mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-28 03:29:25 +01:00
fix: improve download reliability with stall detection, failure visibility, and Wikipedia status tracking
Three bugs caused downloads to hang, disappear, or leave stuck spinners: 1. Wikipedia downloads that failed never updated the DB status from 'downloading', leaving the spinner stuck forever. Now the worker's failed handler marks them as failed. 2. No stall detection on streaming downloads - if data stopped flowing mid-download, the job hung indefinitely. Added a 5-minute stall timer that triggers retry. 3. Failed jobs were invisible to users since only waiting/active/delayed states were queried. Now failed jobs appear with error indicators in the download list. Closes #364, closes #216 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5e290119ab
commit
b0b8f07661
|
|
@ -12,7 +12,7 @@ export class DownloadService {
|
||||||
async listDownloadJobs(filetype?: string): Promise<DownloadJobWithProgress[]> {
|
async listDownloadJobs(filetype?: string): Promise<DownloadJobWithProgress[]> {
|
||||||
// Get regular file download jobs (zim, map, etc.)
|
// Get regular file download jobs (zim, map, etc.)
|
||||||
const queue = this.queueService.getQueue(RunDownloadJob.queue)
|
const queue = this.queueService.getQueue(RunDownloadJob.queue)
|
||||||
const fileJobs = await queue.getJobs(['waiting', 'active', 'delayed'])
|
const fileJobs = await queue.getJobs(['waiting', 'active', 'delayed', 'failed'])
|
||||||
|
|
||||||
const fileDownloads = fileJobs.map((job) => ({
|
const fileDownloads = fileJobs.map((job) => ({
|
||||||
jobId: job.id!.toString(),
|
jobId: job.id!.toString(),
|
||||||
|
|
@ -20,11 +20,13 @@ export class DownloadService {
|
||||||
progress: parseInt(job.progress.toString(), 10),
|
progress: parseInt(job.progress.toString(), 10),
|
||||||
filepath: normalize(job.data.filepath),
|
filepath: normalize(job.data.filepath),
|
||||||
filetype: job.data.filetype,
|
filetype: job.data.filetype,
|
||||||
|
status: (job.failedReason ? 'failed' : 'active') as 'active' | 'failed',
|
||||||
|
failedReason: job.failedReason || undefined,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
// Get Ollama model download jobs
|
// Get Ollama model download jobs
|
||||||
const modelQueue = this.queueService.getQueue(DownloadModelJob.queue)
|
const modelQueue = this.queueService.getQueue(DownloadModelJob.queue)
|
||||||
const modelJobs = await modelQueue.getJobs(['waiting', 'active', 'delayed'])
|
const modelJobs = await modelQueue.getJobs(['waiting', 'active', 'delayed', 'failed'])
|
||||||
|
|
||||||
const modelDownloads = modelJobs.map((job) => ({
|
const modelDownloads = modelJobs.map((job) => ({
|
||||||
jobId: job.id!.toString(),
|
jobId: job.id!.toString(),
|
||||||
|
|
@ -32,6 +34,8 @@ export class DownloadService {
|
||||||
progress: parseInt(job.progress.toString(), 10),
|
progress: parseInt(job.progress.toString(), 10),
|
||||||
filepath: job.data.modelName || 'Unknown Model', // Use model name as filepath
|
filepath: job.data.modelName || 'Unknown Model', // Use model name as filepath
|
||||||
filetype: 'model',
|
filetype: 'model',
|
||||||
|
status: (job.failedReason ? 'failed' : 'active') as 'active' | 'failed',
|
||||||
|
failedReason: job.failedReason || undefined,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
const allDownloads = [...fileDownloads, ...modelDownloads]
|
const allDownloads = [...fileDownloads, ...modelDownloads]
|
||||||
|
|
@ -39,7 +43,11 @@ export class DownloadService {
|
||||||
// Filter by filetype if specified
|
// Filter by filetype if specified
|
||||||
const filtered = allDownloads.filter((job) => !filetype || job.filetype === filetype)
|
const filtered = allDownloads.filter((job) => !filetype || job.filetype === filetype)
|
||||||
|
|
||||||
// Sort so actively downloading items (progress > 0) appear first, then by progress descending
|
// Sort: active downloads first (by progress desc), then failed at the bottom
|
||||||
return filtered.sort((a, b) => b.progress - a.progress)
|
return filtered.sort((a, b) => {
|
||||||
|
if (a.status === 'failed' && b.status !== 'failed') return 1
|
||||||
|
if (a.status !== 'failed' && b.status === 'failed') return -1
|
||||||
|
return b.progress - a.progress
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -88,10 +88,29 @@ export async function doResumableDownload({
|
||||||
let lastProgressTime = Date.now()
|
let lastProgressTime = Date.now()
|
||||||
let lastDownloadedBytes = startByte
|
let lastDownloadedBytes = startByte
|
||||||
|
|
||||||
|
// Stall detection: if no data arrives for 5 minutes, abort the download
|
||||||
|
const STALL_TIMEOUT_MS = 5 * 60 * 1000
|
||||||
|
let stallTimer: ReturnType<typeof setTimeout> | null = null
|
||||||
|
|
||||||
|
const clearStallTimer = () => {
|
||||||
|
if (stallTimer) {
|
||||||
|
clearTimeout(stallTimer)
|
||||||
|
stallTimer = null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const resetStallTimer = () => {
|
||||||
|
clearStallTimer()
|
||||||
|
stallTimer = setTimeout(() => {
|
||||||
|
cleanup(new Error('Download stalled - no data received for 5 minutes'))
|
||||||
|
}, STALL_TIMEOUT_MS)
|
||||||
|
}
|
||||||
|
|
||||||
// Progress tracking stream to monitor data flow
|
// Progress tracking stream to monitor data flow
|
||||||
const progressStream = new Transform({
|
const progressStream = new Transform({
|
||||||
transform(chunk: Buffer, _: any, callback: Function) {
|
transform(chunk: Buffer, _: any, callback: Function) {
|
||||||
downloadedBytes += chunk.length
|
downloadedBytes += chunk.length
|
||||||
|
resetStallTimer()
|
||||||
|
|
||||||
// Update progress tracking
|
// Update progress tracking
|
||||||
const now = Date.now()
|
const now = Date.now()
|
||||||
|
|
@ -118,6 +137,7 @@ export async function doResumableDownload({
|
||||||
|
|
||||||
// Handle errors and cleanup
|
// Handle errors and cleanup
|
||||||
const cleanup = (error?: Error) => {
|
const cleanup = (error?: Error) => {
|
||||||
|
clearStallTimer()
|
||||||
progressStream.destroy()
|
progressStream.destroy()
|
||||||
response.data.destroy()
|
response.data.destroy()
|
||||||
writeStream.destroy()
|
writeStream.destroy()
|
||||||
|
|
@ -136,6 +156,7 @@ export async function doResumableDownload({
|
||||||
})
|
})
|
||||||
|
|
||||||
writeStream.on('finish', async () => {
|
writeStream.on('finish', async () => {
|
||||||
|
clearStallTimer()
|
||||||
if (onProgress) {
|
if (onProgress) {
|
||||||
onProgress({
|
onProgress({
|
||||||
downloadedBytes,
|
downloadedBytes,
|
||||||
|
|
@ -151,7 +172,8 @@ export async function doResumableDownload({
|
||||||
resolve(filepath)
|
resolve(filepath)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Pipe: response -> progressStream -> writeStream
|
// Start stall timer and pipe: response -> progressStream -> writeStream
|
||||||
|
resetStallTimer()
|
||||||
response.data.pipe(progressStream).pipe(writeStream)
|
response.data.pipe(progressStream).pipe(writeStream)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -65,8 +65,23 @@ export default class QueueWork extends BaseCommand {
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
worker.on('failed', (job, err) => {
|
worker.on('failed', async (job, err) => {
|
||||||
this.logger.error(`[${queueName}] Job failed: ${job?.id}, Error: ${err.message}`)
|
this.logger.error(`[${queueName}] Job failed: ${job?.id}, Error: ${err.message}`)
|
||||||
|
|
||||||
|
// If this was a Wikipedia download, mark it as failed in the DB
|
||||||
|
if (job?.data?.filetype === 'zim' && job?.data?.url?.includes('wikipedia_en_')) {
|
||||||
|
try {
|
||||||
|
const { DockerService } = await import('#services/docker_service')
|
||||||
|
const { ZimService } = await import('#services/zim_service')
|
||||||
|
const dockerService = new DockerService()
|
||||||
|
const zimService = new ZimService(dockerService)
|
||||||
|
await zimService.onWikipediaDownloadComplete(job.data.url, false)
|
||||||
|
} catch (e: any) {
|
||||||
|
this.logger.error(
|
||||||
|
`[${queueName}] Failed to update Wikipedia status: ${e.message}`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
worker.on('completed', (job) => {
|
worker.on('completed', (job) => {
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import useDownloads, { useDownloadsProps } from '~/hooks/useDownloads'
|
||||||
import HorizontalBarChart from './HorizontalBarChart'
|
import HorizontalBarChart from './HorizontalBarChart'
|
||||||
import { extractFileName } from '~/lib/util'
|
import { extractFileName } from '~/lib/util'
|
||||||
import StyledSectionHeader from './StyledSectionHeader'
|
import StyledSectionHeader from './StyledSectionHeader'
|
||||||
|
import { IconAlertTriangle } from '@tabler/icons-react'
|
||||||
|
|
||||||
interface ActiveDownloadProps {
|
interface ActiveDownloadProps {
|
||||||
filetype?: useDownloadsProps['filetype']
|
filetype?: useDownloadsProps['filetype']
|
||||||
|
|
@ -17,18 +18,39 @@ const ActiveDownloads = ({ filetype, withHeader = false }: ActiveDownloadProps)
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
{downloads && downloads.length > 0 ? (
|
{downloads && downloads.length > 0 ? (
|
||||||
downloads.map((download) => (
|
downloads.map((download) => (
|
||||||
<div className="bg-desert-white rounded-lg p-4 border border-desert-stone-light shadow-sm hover:shadow-lg transition-shadow">
|
<div
|
||||||
<HorizontalBarChart
|
key={download.jobId}
|
||||||
items={[
|
className={`bg-desert-white rounded-lg p-4 border shadow-sm hover:shadow-lg transition-shadow ${
|
||||||
{
|
download.status === 'failed'
|
||||||
label: extractFileName(download.filepath) || download.url,
|
? 'border-red-300'
|
||||||
value: download.progress,
|
: 'border-desert-stone-light'
|
||||||
total: '100%',
|
}`}
|
||||||
used: `${download.progress}%`,
|
>
|
||||||
type: download.filetype,
|
{download.status === 'failed' ? (
|
||||||
},
|
<div className="flex items-center gap-2">
|
||||||
]}
|
<IconAlertTriangle className="w-5 h-5 text-red-500 flex-shrink-0" />
|
||||||
/>
|
<div className="flex-1 min-w-0">
|
||||||
|
<p className="text-sm font-medium text-gray-900 truncate">
|
||||||
|
{extractFileName(download.filepath) || download.url}
|
||||||
|
</p>
|
||||||
|
<p className="text-xs text-red-600 mt-0.5">
|
||||||
|
Download failed{download.failedReason ? `: ${download.failedReason}` : ''}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<HorizontalBarChart
|
||||||
|
items={[
|
||||||
|
{
|
||||||
|
label: extractFileName(download.filepath) || download.url,
|
||||||
|
value: download.progress,
|
||||||
|
total: '100%',
|
||||||
|
used: `${download.progress}%`,
|
||||||
|
type: download.filetype,
|
||||||
|
},
|
||||||
|
]}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
))
|
))
|
||||||
) : (
|
) : (
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import { formatBytes } from '~/lib/util'
|
import { formatBytes } from '~/lib/util'
|
||||||
import { WikipediaOption, WikipediaCurrentSelection } from '../../types/downloads'
|
import { WikipediaOption, WikipediaCurrentSelection } from '../../types/downloads'
|
||||||
import classNames from 'classnames'
|
import classNames from 'classnames'
|
||||||
import { IconCheck, IconDownload, IconWorld } from '@tabler/icons-react'
|
import { IconCheck, IconDownload, IconWorld, IconAlertTriangle } from '@tabler/icons-react'
|
||||||
import StyledButton from './StyledButton'
|
import StyledButton from './StyledButton'
|
||||||
import LoadingSpinner from './LoadingSpinner'
|
import LoadingSpinner from './LoadingSpinner'
|
||||||
|
|
||||||
|
|
@ -29,8 +29,9 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
|
||||||
// Determine which option to highlight
|
// Determine which option to highlight
|
||||||
const highlightedOptionId = selectedOptionId ?? currentSelection?.optionId ?? null
|
const highlightedOptionId = selectedOptionId ?? currentSelection?.optionId ?? null
|
||||||
|
|
||||||
// Check if current selection is downloading
|
// Check if current selection is downloading or failed
|
||||||
const isDownloading = currentSelection?.status === 'downloading'
|
const isDownloading = currentSelection?.status === 'downloading'
|
||||||
|
const isFailed = currentSelection?.status === 'failed'
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="w-full">
|
<div className="w-full">
|
||||||
|
|
@ -55,6 +56,18 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{/* Failed status message */}
|
||||||
|
{isFailed && (
|
||||||
|
<div className="mb-4 p-3 bg-red-50 border border-red-200 rounded-lg flex items-center justify-between">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<IconAlertTriangle className="w-5 h-5 text-red-600 flex-shrink-0" />
|
||||||
|
<span className="text-sm text-red-700">
|
||||||
|
Wikipedia download failed. Select a package and try again.
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Options grid */}
|
{/* Options grid */}
|
||||||
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
||||||
{options.map((option) => {
|
{options.map((option) => {
|
||||||
|
|
@ -63,6 +76,8 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
|
||||||
currentSelection?.optionId === option.id && currentSelection?.status === 'installed'
|
currentSelection?.optionId === option.id && currentSelection?.status === 'installed'
|
||||||
const isCurrentDownloading =
|
const isCurrentDownloading =
|
||||||
currentSelection?.optionId === option.id && currentSelection?.status === 'downloading'
|
currentSelection?.optionId === option.id && currentSelection?.status === 'downloading'
|
||||||
|
const isCurrentFailed =
|
||||||
|
currentSelection?.optionId === option.id && currentSelection?.status === 'failed'
|
||||||
const isPending = selectedOptionId === option.id && selectedOptionId !== currentSelection?.optionId
|
const isPending = selectedOptionId === option.id && selectedOptionId !== currentSelection?.optionId
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|
@ -100,6 +115,12 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
|
||||||
Downloading
|
Downloading
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
|
{isCurrentFailed && (
|
||||||
|
<span className="text-xs bg-red-500 text-white px-2 py-0.5 rounded-full flex items-center gap-1">
|
||||||
|
<IconAlertTriangle size={12} />
|
||||||
|
Failed
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Option content */}
|
{/* Option content */}
|
||||||
|
|
@ -136,7 +157,7 @@ const WikipediaSelector: React.FC<WikipediaSelectorProps> = ({
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Submit button for Content Explorer mode */}
|
{/* Submit button for Content Explorer mode */}
|
||||||
{showSubmitButton && selectedOptionId && selectedOptionId !== currentSelection?.optionId && (
|
{showSubmitButton && selectedOptionId && (selectedOptionId !== currentSelection?.optionId || isFailed) && (
|
||||||
<div className="mt-4 flex justify-end">
|
<div className="mt-4 flex justify-end">
|
||||||
<StyledButton
|
<StyledButton
|
||||||
variant="primary"
|
variant="primary"
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,8 @@ export type DownloadJobWithProgress = {
|
||||||
progress: number
|
progress: number
|
||||||
filepath: string
|
filepath: string
|
||||||
filetype: string
|
filetype: string
|
||||||
|
status?: 'active' | 'failed'
|
||||||
|
failedReason?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wikipedia selector types
|
// Wikipedia selector types
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user