mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-28 03:29:25 +01:00
fix(GPU): persist GPU type to KV store for reliable passthrough
GPU detection results were only applied at container creation time and never persisted. If live detection failed transiently (Docker daemon hiccup, runtime temporarily unavailable), Ollama would silently fall back to CPU-only mode with no way to recover short of force-reinstall. Now _detectGPUType() persists successful detections to the KV store (gpu.type = 'nvidia' | 'amd') and uses the saved value as a fallback when live detection returns nothing. This ensures GPU config survives across container recreations regardless of transient detection failures. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
418f82f9b7
commit
fe08fc0e28
|
|
@ -691,6 +691,7 @@ export class DockerService {
|
|||
const runtimes = dockerInfo.Runtimes || {}
|
||||
if ('nvidia' in runtimes) {
|
||||
logger.info('[DockerService] NVIDIA container runtime detected via Docker API')
|
||||
await this._persistGPUType('nvidia')
|
||||
return { type: 'nvidia' }
|
||||
}
|
||||
} catch (error) {
|
||||
|
|
@ -722,12 +723,26 @@ export class DockerService {
|
|||
)
|
||||
if (amdCheck.trim()) {
|
||||
logger.info('[DockerService] AMD GPU detected via lspci')
|
||||
await this._persistGPUType('amd')
|
||||
return { type: 'amd' }
|
||||
}
|
||||
} catch (error) {
|
||||
// lspci not available, continue
|
||||
}
|
||||
|
||||
// Last resort: check if we previously detected a GPU and it's likely still present.
|
||||
// This handles cases where live detection fails transiently (e.g., Docker daemon
|
||||
// hiccup, runtime temporarily unavailable) but the hardware hasn't changed.
|
||||
try {
|
||||
const savedType = await KVStore.getValue('gpu.type')
|
||||
if (savedType === 'nvidia' || savedType === 'amd') {
|
||||
logger.info(`[DockerService] No GPU detected live, but KV store has '${savedType}' from previous detection. Using saved value.`)
|
||||
return { type: savedType as 'nvidia' | 'amd' }
|
||||
}
|
||||
} catch {
|
||||
// KV store not available, continue
|
||||
}
|
||||
|
||||
logger.info('[DockerService] No GPU detected')
|
||||
return { type: 'none' }
|
||||
} catch (error) {
|
||||
|
|
@ -736,6 +751,15 @@ export class DockerService {
|
|||
}
|
||||
}
|
||||
|
||||
private async _persistGPUType(type: 'nvidia' | 'amd'): Promise<void> {
|
||||
try {
|
||||
await KVStore.setValue('gpu.type', type)
|
||||
logger.info(`[DockerService] Persisted GPU type '${type}' to KV store`)
|
||||
} catch (error) {
|
||||
logger.warn(`[DockerService] Failed to persist GPU type: ${error.message}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Discover AMD GPU DRI devices dynamically.
|
||||
* Returns an array of device configurations for Docker.
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ export const KV_STORE_SCHEMA = {
|
|||
'ui.hasVisitedEasySetup': 'boolean',
|
||||
'ui.theme': 'string',
|
||||
'ai.assistantCustomName': 'string',
|
||||
'gpu.type': 'string',
|
||||
} as const
|
||||
|
||||
type KVTagToType<T extends string> = T extends 'boolean' ? boolean : string
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user