fix: improve download reliability with stall detection, failure visibility, and Wikipedia status tracking

Three bugs caused downloads to hang, disappear, or leave stuck spinners:
1. Wikipedia downloads that failed never updated the DB status from 'downloading',
   leaving the spinner stuck forever. Now the worker's failed handler marks them as failed.
2. No stall detection on streaming downloads - if data stopped flowing mid-download,
   the job hung indefinitely. Added a 5-minute stall timer that triggers retry.
3. Failed jobs were invisible to users since only waiting/active/delayed states were
   queried. Now failed jobs appear with error indicators in the download list.

Closes #364, closes #216

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Chris Sherwood 2026-03-18 16:52:34 -07:00 committed by Jake Turner
parent 5e290119ab
commit b0b8f07661
6 changed files with 111 additions and 21 deletions

View File

@ -12,7 +12,7 @@ export class DownloadService {
async listDownloadJobs(filetype?: string): Promise<DownloadJobWithProgress[]> {
// Get regular file download jobs (zim, map, etc.)
const queue = this.queueService.getQueue(RunDownloadJob.queue)
const fileJobs = await queue.getJobs(['waiting', 'active', 'delayed'])
const fileJobs = await queue.getJobs(['waiting', 'active', 'delayed', 'failed'])
const fileDownloads = fileJobs.map((job) => ({
jobId: job.id!.toString(),
@ -20,11 +20,13 @@ export class DownloadService {
progress: parseInt(job.progress.toString(), 10),
filepath: normalize(job.data.filepath),
filetype: job.data.filetype,
status: (job.failedReason ? 'failed' : 'active') as 'active' | 'failed',
failedReason: job.failedReason || undefined,
}))
// Get Ollama model download jobs
const modelQueue = this.queueService.getQueue(DownloadModelJob.queue)
const modelJobs = await modelQueue.getJobs(['waiting', 'active', 'delayed'])
const modelJobs = await modelQueue.getJobs(['waiting', 'active', 'delayed', 'failed'])
const modelDownloads = modelJobs.map((job) => ({
jobId: job.id!.toString(),
@ -32,6 +34,8 @@ export class DownloadService {
progress: parseInt(job.progress.toString(), 10),
filepath: job.data.modelName || 'Unknown Model', // Use model name as filepath
filetype: 'model',
status: (job.failedReason ? 'failed' : 'active') as 'active' | 'failed',
failedReason: job.failedReason || undefined,
}))
const allDownloads = [...fileDownloads, ...modelDownloads]
@ -39,7 +43,11 @@ export class DownloadService {
// Filter by filetype if specified
const filtered = allDownloads.filter((job) => !filetype || job.filetype === filetype)
// Sort so actively downloading items (progress > 0) appear first, then by progress descending
return filtered.sort((a, b) => b.progress - a.progress)
// Sort: active downloads first (by progress desc), then failed at the bottom
return filtered.sort((a, b) => {
if (a.status === 'failed' && b.status !== 'failed') return 1
if (a.status !== 'failed' && b.status === 'failed') return -1
return b.progress - a.progress
})
}
}

View File

@ -88,10 +88,29 @@ export async function doResumableDownload({
let lastProgressTime = Date.now()
let lastDownloadedBytes = startByte
// Stall detection: if no data arrives for 5 minutes, abort the download
const STALL_TIMEOUT_MS = 5 * 60 * 1000
let stallTimer: ReturnType<typeof setTimeout> | null = null
const clearStallTimer = () => {
if (stallTimer) {
clearTimeout(stallTimer)
stallTimer = null
}
}
const resetStallTimer = () => {
clearStallTimer()
stallTimer = setTimeout(() => {
cleanup(new Error('Download stalled - no data received for 5 minutes'))
}, STALL_TIMEOUT_MS)
}
// Progress tracking stream to monitor data flow
const progressStream = new Transform({
transform(chunk: Buffer, _: any, callback: Function) {
downloadedBytes += chunk.length
resetStallTimer()
// Update progress tracking
const now = Date.now()
@ -118,6 +137,7 @@ export async function doResumableDownload({
// Handle errors and cleanup
const cleanup = (error?: Error) => {
clearStallTimer()
progressStream.destroy()
response.data.destroy()
writeStream.destroy()
@ -136,6 +156,7 @@ export async function doResumableDownload({
})
writeStream.on('finish', async () => {
clearStallTimer()
if (onProgress) {
onProgress({
downloadedBytes,
@ -151,7 +172,8 @@ export async function doResumableDownload({
resolve(filepath)
})
// Pipe: response -> progressStream -> writeStream
// Start stall timer and pipe: response -> progressStream -> writeStream
resetStallTimer()
response.data.pipe(progressStream).pipe(writeStream)
})
}

View File

@ -65,8 +65,23 @@ export default class QueueWork extends BaseCommand {
}
)
worker.on('failed', (job, err) => {
worker.on('failed', async (job, err) => {
this.logger.error(`[${queueName}] Job failed: ${job?.id}, Error: ${err.message}`)
// If this was a Wikipedia download, mark it as failed in the DB
if (job?.data?.filetype === 'zim' && job?.data?.url?.includes('wikipedia_en_')) {
try {
const { DockerService } = await import('#services/docker_service')
const { ZimService } = await import('#services/zim_service')
const dockerService = new DockerService()
const zimService = new ZimService(dockerService)
await zimService.onWikipediaDownloadComplete(job.data.url, false)
} catch (e: any) {
this.logger.error(
`[${queueName}] Failed to update Wikipedia status: ${e.message}`
)
}
}
})
worker.on('completed', (job) => {

View File

@ -2,6 +2,7 @@ import useDownloads, { useDownloadsProps } from '~/hooks/useDownloads'
import HorizontalBarChart from './HorizontalBarChart'
import { extractFileName } from '~/lib/util'
import StyledSectionHeader from './StyledSectionHeader'
import { IconAlertTriangle } from '@tabler/icons-react'
interface ActiveDownloadProps {
filetype?: useDownloadsProps['filetype']
@ -17,7 +18,27 @@ const ActiveDownloads = ({ filetype, withHeader = false }: ActiveDownloadProps)
<div className="space-y-4">
{downloads && downloads.length > 0 ? (
downloads.map((download) => (
<div className="bg-desert-white rounded-lg p-4 border border-desert-stone-light shadow-sm hover:shadow-lg transition-shadow">
<div
key={download.jobId}
className={`bg-desert-white rounded-lg p-4 border shadow-sm hover:shadow-lg transition-shadow ${
download.status === 'failed'
? 'border-red-300'
: 'border-desert-stone-light'
}`}
>
{download.status === 'failed' ? (
<div className="flex items-center gap-2">
<IconAlertTriangle className="w-5 h-5 text-red-500 flex-shrink-0" />
<div className="flex-1 min-w-0">
<p className="text-sm font-medium text-gray-900 truncate">
{extractFileName(download.filepath) || download.url}
</p>
<p className="text-xs text-red-600 mt-0.5">
Download failed{download.failedReason ? `: ${download.failedReason}` : ''}
</p>
</div>
</div>
) : (
<HorizontalBarChart
items={[
{
@ -29,6 +50,7 @@ const ActiveDownloads = ({ filetype, withHeader = false }: ActiveDownloadProps)
},
]}
/>
)}
</div>
))
) : (

View File

@ -1,7 +1,7 @@
import { formatBytes } from '~/lib/util'
import { WikipediaOption, WikipediaCurrentSelection } from '../../types/downloads'
import classNames from 'classnames'
import { IconCheck, IconDownload, IconWorld } from '@tabler/icons-react'
import { IconCheck, IconDownload, IconWorld, IconAlertTriangle } from '@tabler/icons-react'
import StyledButton from './StyledButton'
import LoadingSpinner from './LoadingSpinner'
@ -29,8 +29,9 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
// Determine which option to highlight
const highlightedOptionId = selectedOptionId ?? currentSelection?.optionId ?? null
// Check if current selection is downloading
// Check if current selection is downloading or failed
const isDownloading = currentSelection?.status === 'downloading'
const isFailed = currentSelection?.status === 'failed'
return (
<div className="w-full">
@ -55,6 +56,18 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
</div>
)}
{/* Failed status message */}
{isFailed && (
<div className="mb-4 p-3 bg-red-50 border border-red-200 rounded-lg flex items-center justify-between">
<div className="flex items-center gap-2">
<IconAlertTriangle className="w-5 h-5 text-red-600 flex-shrink-0" />
<span className="text-sm text-red-700">
Wikipedia download failed. Select a package and try again.
</span>
</div>
</div>
)}
{/* Options grid */}
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
{options.map((option) => {
@ -63,6 +76,8 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
currentSelection?.optionId === option.id && currentSelection?.status === 'installed'
const isCurrentDownloading =
currentSelection?.optionId === option.id && currentSelection?.status === 'downloading'
const isCurrentFailed =
currentSelection?.optionId === option.id && currentSelection?.status === 'failed'
const isPending = selectedOptionId === option.id && selectedOptionId !== currentSelection?.optionId
return (
@ -100,6 +115,12 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
Downloading
</span>
)}
{isCurrentFailed && (
<span className="text-xs bg-red-500 text-white px-2 py-0.5 rounded-full flex items-center gap-1">
<IconAlertTriangle size={12} />
Failed
</span>
)}
</div>
{/* Option content */}
@ -136,7 +157,7 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
</div>
{/* Submit button for Content Explorer mode */}
{showSubmitButton && selectedOptionId && selectedOptionId !== currentSelection?.optionId && (
{showSubmitButton && selectedOptionId && (selectedOptionId !== currentSelection?.optionId || isFailed) && (
<div className="mt-4 flex justify-end">
<StyledButton
variant="primary"

View File

@ -41,6 +41,8 @@ export type DownloadJobWithProgress = {
progress: number
filepath: string
filetype: string
status?: 'active' | 'failed'
failedReason?: string
}
// Wikipedia selector types