fix: improve download reliability with stall detection, failure visibility, and Wikipedia status tracking

Three bugs caused downloads to hang, disappear, or leave stuck spinners:
1. Wikipedia downloads that failed never updated the DB status from 'downloading',
   leaving the spinner stuck forever. Now the worker's failed handler marks them as failed.
2. No stall detection on streaming downloads - if data stopped flowing mid-download,
   the job hung indefinitely. Added a 5-minute stall timer that triggers retry.
3. Failed jobs were invisible to users since only waiting/active/delayed states were
   queried. Now failed jobs appear with error indicators in the download list.

Closes #364, closes #216

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Chris Sherwood 2026-03-18 16:52:34 -07:00 committed by Jake Turner
parent 5e290119ab
commit b0b8f07661
6 changed files with 111 additions and 21 deletions

View File

@ -12,7 +12,7 @@ export class DownloadService {
async listDownloadJobs(filetype?: string): Promise<DownloadJobWithProgress[]> { async listDownloadJobs(filetype?: string): Promise<DownloadJobWithProgress[]> {
// Get regular file download jobs (zim, map, etc.) // Get regular file download jobs (zim, map, etc.)
const queue = this.queueService.getQueue(RunDownloadJob.queue) const queue = this.queueService.getQueue(RunDownloadJob.queue)
const fileJobs = await queue.getJobs(['waiting', 'active', 'delayed']) const fileJobs = await queue.getJobs(['waiting', 'active', 'delayed', 'failed'])
const fileDownloads = fileJobs.map((job) => ({ const fileDownloads = fileJobs.map((job) => ({
jobId: job.id!.toString(), jobId: job.id!.toString(),
@ -20,11 +20,13 @@ export class DownloadService {
progress: parseInt(job.progress.toString(), 10), progress: parseInt(job.progress.toString(), 10),
filepath: normalize(job.data.filepath), filepath: normalize(job.data.filepath),
filetype: job.data.filetype, filetype: job.data.filetype,
status: (job.failedReason ? 'failed' : 'active') as 'active' | 'failed',
failedReason: job.failedReason || undefined,
})) }))
// Get Ollama model download jobs // Get Ollama model download jobs
const modelQueue = this.queueService.getQueue(DownloadModelJob.queue) const modelQueue = this.queueService.getQueue(DownloadModelJob.queue)
const modelJobs = await modelQueue.getJobs(['waiting', 'active', 'delayed']) const modelJobs = await modelQueue.getJobs(['waiting', 'active', 'delayed', 'failed'])
const modelDownloads = modelJobs.map((job) => ({ const modelDownloads = modelJobs.map((job) => ({
jobId: job.id!.toString(), jobId: job.id!.toString(),
@ -32,6 +34,8 @@ export class DownloadService {
progress: parseInt(job.progress.toString(), 10), progress: parseInt(job.progress.toString(), 10),
filepath: job.data.modelName || 'Unknown Model', // Use model name as filepath filepath: job.data.modelName || 'Unknown Model', // Use model name as filepath
filetype: 'model', filetype: 'model',
status: (job.failedReason ? 'failed' : 'active') as 'active' | 'failed',
failedReason: job.failedReason || undefined,
})) }))
const allDownloads = [...fileDownloads, ...modelDownloads] const allDownloads = [...fileDownloads, ...modelDownloads]
@ -39,7 +43,11 @@ export class DownloadService {
// Filter by filetype if specified // Filter by filetype if specified
const filtered = allDownloads.filter((job) => !filetype || job.filetype === filetype) const filtered = allDownloads.filter((job) => !filetype || job.filetype === filetype)
// Sort so actively downloading items (progress > 0) appear first, then by progress descending // Sort: active downloads first (by progress desc), then failed at the bottom
return filtered.sort((a, b) => b.progress - a.progress) return filtered.sort((a, b) => {
if (a.status === 'failed' && b.status !== 'failed') return 1
if (a.status !== 'failed' && b.status === 'failed') return -1
return b.progress - a.progress
})
} }
} }

View File

@ -88,10 +88,29 @@ export async function doResumableDownload({
let lastProgressTime = Date.now() let lastProgressTime = Date.now()
let lastDownloadedBytes = startByte let lastDownloadedBytes = startByte
// Stall detection: if no data arrives for 5 minutes, abort the download
const STALL_TIMEOUT_MS = 5 * 60 * 1000
let stallTimer: ReturnType<typeof setTimeout> | null = null
const clearStallTimer = () => {
if (stallTimer) {
clearTimeout(stallTimer)
stallTimer = null
}
}
const resetStallTimer = () => {
clearStallTimer()
stallTimer = setTimeout(() => {
cleanup(new Error('Download stalled - no data received for 5 minutes'))
}, STALL_TIMEOUT_MS)
}
// Progress tracking stream to monitor data flow // Progress tracking stream to monitor data flow
const progressStream = new Transform({ const progressStream = new Transform({
transform(chunk: Buffer, _: any, callback: Function) { transform(chunk: Buffer, _: any, callback: Function) {
downloadedBytes += chunk.length downloadedBytes += chunk.length
resetStallTimer()
// Update progress tracking // Update progress tracking
const now = Date.now() const now = Date.now()
@ -118,6 +137,7 @@ export async function doResumableDownload({
// Handle errors and cleanup // Handle errors and cleanup
const cleanup = (error?: Error) => { const cleanup = (error?: Error) => {
clearStallTimer()
progressStream.destroy() progressStream.destroy()
response.data.destroy() response.data.destroy()
writeStream.destroy() writeStream.destroy()
@ -136,6 +156,7 @@ export async function doResumableDownload({
}) })
writeStream.on('finish', async () => { writeStream.on('finish', async () => {
clearStallTimer()
if (onProgress) { if (onProgress) {
onProgress({ onProgress({
downloadedBytes, downloadedBytes,
@ -151,7 +172,8 @@ export async function doResumableDownload({
resolve(filepath) resolve(filepath)
}) })
// Pipe: response -> progressStream -> writeStream // Start stall timer and pipe: response -> progressStream -> writeStream
resetStallTimer()
response.data.pipe(progressStream).pipe(writeStream) response.data.pipe(progressStream).pipe(writeStream)
}) })
} }

View File

@ -65,8 +65,23 @@ export default class QueueWork extends BaseCommand {
} }
) )
worker.on('failed', (job, err) => { worker.on('failed', async (job, err) => {
this.logger.error(`[${queueName}] Job failed: ${job?.id}, Error: ${err.message}`) this.logger.error(`[${queueName}] Job failed: ${job?.id}, Error: ${err.message}`)
// If this was a Wikipedia download, mark it as failed in the DB
if (job?.data?.filetype === 'zim' && job?.data?.url?.includes('wikipedia_en_')) {
try {
const { DockerService } = await import('#services/docker_service')
const { ZimService } = await import('#services/zim_service')
const dockerService = new DockerService()
const zimService = new ZimService(dockerService)
await zimService.onWikipediaDownloadComplete(job.data.url, false)
} catch (e: any) {
this.logger.error(
`[${queueName}] Failed to update Wikipedia status: ${e.message}`
)
}
}
}) })
worker.on('completed', (job) => { worker.on('completed', (job) => {

View File

@ -2,6 +2,7 @@ import useDownloads, { useDownloadsProps } from '~/hooks/useDownloads'
import HorizontalBarChart from './HorizontalBarChart' import HorizontalBarChart from './HorizontalBarChart'
import { extractFileName } from '~/lib/util' import { extractFileName } from '~/lib/util'
import StyledSectionHeader from './StyledSectionHeader' import StyledSectionHeader from './StyledSectionHeader'
import { IconAlertTriangle } from '@tabler/icons-react'
interface ActiveDownloadProps { interface ActiveDownloadProps {
filetype?: useDownloadsProps['filetype'] filetype?: useDownloadsProps['filetype']
@ -17,18 +18,39 @@ const ActiveDownloads = ({ filetype, withHeader = false }: ActiveDownloadProps)
<div className="space-y-4"> <div className="space-y-4">
{downloads && downloads.length > 0 ? ( {downloads && downloads.length > 0 ? (
downloads.map((download) => ( downloads.map((download) => (
<div className="bg-desert-white rounded-lg p-4 border border-desert-stone-light shadow-sm hover:shadow-lg transition-shadow"> <div
<HorizontalBarChart key={download.jobId}
items={[ className={`bg-desert-white rounded-lg p-4 border shadow-sm hover:shadow-lg transition-shadow ${
{ download.status === 'failed'
label: extractFileName(download.filepath) || download.url, ? 'border-red-300'
value: download.progress, : 'border-desert-stone-light'
total: '100%', }`}
used: `${download.progress}%`, >
type: download.filetype, {download.status === 'failed' ? (
}, <div className="flex items-center gap-2">
]} <IconAlertTriangle className="w-5 h-5 text-red-500 flex-shrink-0" />
/> <div className="flex-1 min-w-0">
<p className="text-sm font-medium text-gray-900 truncate">
{extractFileName(download.filepath) || download.url}
</p>
<p className="text-xs text-red-600 mt-0.5">
Download failed{download.failedReason ? `: ${download.failedReason}` : ''}
</p>
</div>
</div>
) : (
<HorizontalBarChart
items={[
{
label: extractFileName(download.filepath) || download.url,
value: download.progress,
total: '100%',
used: `${download.progress}%`,
type: download.filetype,
},
]}
/>
)}
</div> </div>
)) ))
) : ( ) : (

View File

@ -1,7 +1,7 @@
import { formatBytes } from '~/lib/util' import { formatBytes } from '~/lib/util'
import { WikipediaOption, WikipediaCurrentSelection } from '../../types/downloads' import { WikipediaOption, WikipediaCurrentSelection } from '../../types/downloads'
import classNames from 'classnames' import classNames from 'classnames'
import { IconCheck, IconDownload, IconWorld } from '@tabler/icons-react' import { IconCheck, IconDownload, IconWorld, IconAlertTriangle } from '@tabler/icons-react'
import StyledButton from './StyledButton' import StyledButton from './StyledButton'
import LoadingSpinner from './LoadingSpinner' import LoadingSpinner from './LoadingSpinner'
@ -29,8 +29,9 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
// Determine which option to highlight // Determine which option to highlight
const highlightedOptionId = selectedOptionId ?? currentSelection?.optionId ?? null const highlightedOptionId = selectedOptionId ?? currentSelection?.optionId ?? null
// Check if current selection is downloading // Check if current selection is downloading or failed
const isDownloading = currentSelection?.status === 'downloading' const isDownloading = currentSelection?.status === 'downloading'
const isFailed = currentSelection?.status === 'failed'
return ( return (
<div className="w-full"> <div className="w-full">
@ -55,6 +56,18 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
</div> </div>
)} )}
{/* Failed status message */}
{isFailed && (
<div className="mb-4 p-3 bg-red-50 border border-red-200 rounded-lg flex items-center justify-between">
<div className="flex items-center gap-2">
<IconAlertTriangle className="w-5 h-5 text-red-600 flex-shrink-0" />
<span className="text-sm text-red-700">
Wikipedia download failed. Select a package and try again.
</span>
</div>
</div>
)}
{/* Options grid */} {/* Options grid */}
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4"> <div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
{options.map((option) => { {options.map((option) => {
@ -63,6 +76,8 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
currentSelection?.optionId === option.id && currentSelection?.status === 'installed' currentSelection?.optionId === option.id && currentSelection?.status === 'installed'
const isCurrentDownloading = const isCurrentDownloading =
currentSelection?.optionId === option.id && currentSelection?.status === 'downloading' currentSelection?.optionId === option.id && currentSelection?.status === 'downloading'
const isCurrentFailed =
currentSelection?.optionId === option.id && currentSelection?.status === 'failed'
const isPending = selectedOptionId === option.id && selectedOptionId !== currentSelection?.optionId const isPending = selectedOptionId === option.id && selectedOptionId !== currentSelection?.optionId
return ( return (
@ -100,6 +115,12 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
Downloading Downloading
</span> </span>
)} )}
{isCurrentFailed && (
<span className="text-xs bg-red-500 text-white px-2 py-0.5 rounded-full flex items-center gap-1">
<IconAlertTriangle size={12} />
Failed
</span>
)}
</div> </div>
{/* Option content */} {/* Option content */}
@ -136,7 +157,7 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
</div> </div>
{/* Submit button for Content Explorer mode */} {/* Submit button for Content Explorer mode */}
{showSubmitButton && selectedOptionId && selectedOptionId !== currentSelection?.optionId && ( {showSubmitButton && selectedOptionId && (selectedOptionId !== currentSelection?.optionId || isFailed) && (
<div className="mt-4 flex justify-end"> <div className="mt-4 flex justify-end">
<StyledButton <StyledButton
variant="primary" variant="primary"

View File

@ -41,6 +41,8 @@ export type DownloadJobWithProgress = {
progress: number progress: number
filepath: string filepath: string
filetype: string filetype: string
status?: 'active' | 'failed'
failedReason?: string
} }
// Wikipedia selector types // Wikipedia selector types