mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-27 19:19:25 +01:00
fix: improve download reliability with stall detection, failure visibility, and Wikipedia status tracking
Three bugs caused downloads to hang, disappear, or leave stuck spinners: 1. Wikipedia downloads that failed never updated the DB status from 'downloading', leaving the spinner stuck forever. Now the worker's failed handler marks them as failed. 2. No stall detection on streaming downloads - if data stopped flowing mid-download, the job hung indefinitely. Added a 5-minute stall timer that triggers retry. 3. Failed jobs were invisible to users since only waiting/active/delayed states were queried. Now failed jobs appear with error indicators in the download list. Closes #364, closes #216 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5e290119ab
commit
b0b8f07661
|
|
@ -12,7 +12,7 @@ export class DownloadService {
|
|||
async listDownloadJobs(filetype?: string): Promise<DownloadJobWithProgress[]> {
|
||||
// Get regular file download jobs (zim, map, etc.)
|
||||
const queue = this.queueService.getQueue(RunDownloadJob.queue)
|
||||
const fileJobs = await queue.getJobs(['waiting', 'active', 'delayed'])
|
||||
const fileJobs = await queue.getJobs(['waiting', 'active', 'delayed', 'failed'])
|
||||
|
||||
const fileDownloads = fileJobs.map((job) => ({
|
||||
jobId: job.id!.toString(),
|
||||
|
|
@ -20,11 +20,13 @@ export class DownloadService {
|
|||
progress: parseInt(job.progress.toString(), 10),
|
||||
filepath: normalize(job.data.filepath),
|
||||
filetype: job.data.filetype,
|
||||
status: (job.failedReason ? 'failed' : 'active') as 'active' | 'failed',
|
||||
failedReason: job.failedReason || undefined,
|
||||
}))
|
||||
|
||||
// Get Ollama model download jobs
|
||||
const modelQueue = this.queueService.getQueue(DownloadModelJob.queue)
|
||||
const modelJobs = await modelQueue.getJobs(['waiting', 'active', 'delayed'])
|
||||
const modelJobs = await modelQueue.getJobs(['waiting', 'active', 'delayed', 'failed'])
|
||||
|
||||
const modelDownloads = modelJobs.map((job) => ({
|
||||
jobId: job.id!.toString(),
|
||||
|
|
@ -32,6 +34,8 @@ export class DownloadService {
|
|||
progress: parseInt(job.progress.toString(), 10),
|
||||
filepath: job.data.modelName || 'Unknown Model', // Use model name as filepath
|
||||
filetype: 'model',
|
||||
status: (job.failedReason ? 'failed' : 'active') as 'active' | 'failed',
|
||||
failedReason: job.failedReason || undefined,
|
||||
}))
|
||||
|
||||
const allDownloads = [...fileDownloads, ...modelDownloads]
|
||||
|
|
@ -39,7 +43,11 @@ export class DownloadService {
|
|||
// Filter by filetype if specified
|
||||
const filtered = allDownloads.filter((job) => !filetype || job.filetype === filetype)
|
||||
|
||||
// Sort so actively downloading items (progress > 0) appear first, then by progress descending
|
||||
return filtered.sort((a, b) => b.progress - a.progress)
|
||||
// Sort: active downloads first (by progress desc), then failed at the bottom
|
||||
return filtered.sort((a, b) => {
|
||||
if (a.status === 'failed' && b.status !== 'failed') return 1
|
||||
if (a.status !== 'failed' && b.status === 'failed') return -1
|
||||
return b.progress - a.progress
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,10 +88,29 @@ export async function doResumableDownload({
|
|||
let lastProgressTime = Date.now()
|
||||
let lastDownloadedBytes = startByte
|
||||
|
||||
// Stall detection: if no data arrives for 5 minutes, abort the download
|
||||
const STALL_TIMEOUT_MS = 5 * 60 * 1000
|
||||
let stallTimer: ReturnType<typeof setTimeout> | null = null
|
||||
|
||||
const clearStallTimer = () => {
|
||||
if (stallTimer) {
|
||||
clearTimeout(stallTimer)
|
||||
stallTimer = null
|
||||
}
|
||||
}
|
||||
|
||||
const resetStallTimer = () => {
|
||||
clearStallTimer()
|
||||
stallTimer = setTimeout(() => {
|
||||
cleanup(new Error('Download stalled - no data received for 5 minutes'))
|
||||
}, STALL_TIMEOUT_MS)
|
||||
}
|
||||
|
||||
// Progress tracking stream to monitor data flow
|
||||
const progressStream = new Transform({
|
||||
transform(chunk: Buffer, _: any, callback: Function) {
|
||||
downloadedBytes += chunk.length
|
||||
resetStallTimer()
|
||||
|
||||
// Update progress tracking
|
||||
const now = Date.now()
|
||||
|
|
@ -118,6 +137,7 @@ export async function doResumableDownload({
|
|||
|
||||
// Handle errors and cleanup
|
||||
const cleanup = (error?: Error) => {
|
||||
clearStallTimer()
|
||||
progressStream.destroy()
|
||||
response.data.destroy()
|
||||
writeStream.destroy()
|
||||
|
|
@ -136,6 +156,7 @@ export async function doResumableDownload({
|
|||
})
|
||||
|
||||
writeStream.on('finish', async () => {
|
||||
clearStallTimer()
|
||||
if (onProgress) {
|
||||
onProgress({
|
||||
downloadedBytes,
|
||||
|
|
@ -151,7 +172,8 @@ export async function doResumableDownload({
|
|||
resolve(filepath)
|
||||
})
|
||||
|
||||
// Pipe: response -> progressStream -> writeStream
|
||||
// Start stall timer and pipe: response -> progressStream -> writeStream
|
||||
resetStallTimer()
|
||||
response.data.pipe(progressStream).pipe(writeStream)
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -65,8 +65,23 @@ export default class QueueWork extends BaseCommand {
|
|||
}
|
||||
)
|
||||
|
||||
worker.on('failed', (job, err) => {
|
||||
worker.on('failed', async (job, err) => {
|
||||
this.logger.error(`[${queueName}] Job failed: ${job?.id}, Error: ${err.message}`)
|
||||
|
||||
// If this was a Wikipedia download, mark it as failed in the DB
|
||||
if (job?.data?.filetype === 'zim' && job?.data?.url?.includes('wikipedia_en_')) {
|
||||
try {
|
||||
const { DockerService } = await import('#services/docker_service')
|
||||
const { ZimService } = await import('#services/zim_service')
|
||||
const dockerService = new DockerService()
|
||||
const zimService = new ZimService(dockerService)
|
||||
await zimService.onWikipediaDownloadComplete(job.data.url, false)
|
||||
} catch (e: any) {
|
||||
this.logger.error(
|
||||
`[${queueName}] Failed to update Wikipedia status: ${e.message}`
|
||||
)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
worker.on('completed', (job) => {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import useDownloads, { useDownloadsProps } from '~/hooks/useDownloads'
|
|||
import HorizontalBarChart from './HorizontalBarChart'
|
||||
import { extractFileName } from '~/lib/util'
|
||||
import StyledSectionHeader from './StyledSectionHeader'
|
||||
import { IconAlertTriangle } from '@tabler/icons-react'
|
||||
|
||||
interface ActiveDownloadProps {
|
||||
filetype?: useDownloadsProps['filetype']
|
||||
|
|
@ -17,18 +18,39 @@ const ActiveDownloads = ({ filetype, withHeader = false }: ActiveDownloadProps)
|
|||
<div className="space-y-4">
|
||||
{downloads && downloads.length > 0 ? (
|
||||
downloads.map((download) => (
|
||||
<div className="bg-desert-white rounded-lg p-4 border border-desert-stone-light shadow-sm hover:shadow-lg transition-shadow">
|
||||
<HorizontalBarChart
|
||||
items={[
|
||||
{
|
||||
label: extractFileName(download.filepath) || download.url,
|
||||
value: download.progress,
|
||||
total: '100%',
|
||||
used: `${download.progress}%`,
|
||||
type: download.filetype,
|
||||
},
|
||||
]}
|
||||
/>
|
||||
<div
|
||||
key={download.jobId}
|
||||
className={`bg-desert-white rounded-lg p-4 border shadow-sm hover:shadow-lg transition-shadow ${
|
||||
download.status === 'failed'
|
||||
? 'border-red-300'
|
||||
: 'border-desert-stone-light'
|
||||
}`}
|
||||
>
|
||||
{download.status === 'failed' ? (
|
||||
<div className="flex items-center gap-2">
|
||||
<IconAlertTriangle className="w-5 h-5 text-red-500 flex-shrink-0" />
|
||||
<div className="flex-1 min-w-0">
|
||||
<p className="text-sm font-medium text-gray-900 truncate">
|
||||
{extractFileName(download.filepath) || download.url}
|
||||
</p>
|
||||
<p className="text-xs text-red-600 mt-0.5">
|
||||
Download failed{download.failedReason ? `: ${download.failedReason}` : ''}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<HorizontalBarChart
|
||||
items={[
|
||||
{
|
||||
label: extractFileName(download.filepath) || download.url,
|
||||
value: download.progress,
|
||||
total: '100%',
|
||||
used: `${download.progress}%`,
|
||||
type: download.filetype,
|
||||
},
|
||||
]}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
))
|
||||
) : (
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import { formatBytes } from '~/lib/util'
|
||||
import { WikipediaOption, WikipediaCurrentSelection } from '../../types/downloads'
|
||||
import classNames from 'classnames'
|
||||
import { IconCheck, IconDownload, IconWorld } from '@tabler/icons-react'
|
||||
import { IconCheck, IconDownload, IconWorld, IconAlertTriangle } from '@tabler/icons-react'
|
||||
import StyledButton from './StyledButton'
|
||||
import LoadingSpinner from './LoadingSpinner'
|
||||
|
||||
|
|
@ -29,8 +29,9 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
|
|||
// Determine which option to highlight
|
||||
const highlightedOptionId = selectedOptionId ?? currentSelection?.optionId ?? null
|
||||
|
||||
// Check if current selection is downloading
|
||||
// Check if current selection is downloading or failed
|
||||
const isDownloading = currentSelection?.status === 'downloading'
|
||||
const isFailed = currentSelection?.status === 'failed'
|
||||
|
||||
return (
|
||||
<div className="w-full">
|
||||
|
|
@ -55,6 +56,18 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
|
|||
</div>
|
||||
)}
|
||||
|
||||
{/* Failed status message */}
|
||||
{isFailed && (
|
||||
<div className="mb-4 p-3 bg-red-50 border border-red-200 rounded-lg flex items-center justify-between">
|
||||
<div className="flex items-center gap-2">
|
||||
<IconAlertTriangle className="w-5 h-5 text-red-600 flex-shrink-0" />
|
||||
<span className="text-sm text-red-700">
|
||||
Wikipedia download failed. Select a package and try again.
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Options grid */}
|
||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||
{options.map((option) => {
|
||||
|
|
@ -63,6 +76,8 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
|
|||
currentSelection?.optionId === option.id && currentSelection?.status === 'installed'
|
||||
const isCurrentDownloading =
|
||||
currentSelection?.optionId === option.id && currentSelection?.status === 'downloading'
|
||||
const isCurrentFailed =
|
||||
currentSelection?.optionId === option.id && currentSelection?.status === 'failed'
|
||||
const isPending = selectedOptionId === option.id && selectedOptionId !== currentSelection?.optionId
|
||||
|
||||
return (
|
||||
|
|
@ -100,6 +115,12 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
|
|||
Downloading
|
||||
</span>
|
||||
)}
|
||||
{isCurrentFailed && (
|
||||
<span className="text-xs bg-red-500 text-white px-2 py-0.5 rounded-full flex items-center gap-1">
|
||||
<IconAlertTriangle size={12} />
|
||||
Failed
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Option content */}
|
||||
|
|
@ -136,7 +157,7 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
|
|||
</div>
|
||||
|
||||
{/* Submit button for Content Explorer mode */}
|
||||
{showSubmitButton && selectedOptionId && selectedOptionId !== currentSelection?.optionId && (
|
||||
{showSubmitButton && selectedOptionId && (selectedOptionId !== currentSelection?.optionId || isFailed) && (
|
||||
<div className="mt-4 flex justify-end">
|
||||
<StyledButton
|
||||
variant="primary"
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ export type DownloadJobWithProgress = {
|
|||
progress: number
|
||||
filepath: string
|
||||
filetype: string
|
||||
status?: 'active' | 'failed'
|
||||
failedReason?: string
|
||||
}
|
||||
|
||||
// Wikipedia selector types
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user