From 6c11442238a7346818aa28c07108e30dcd4096bf Mon Sep 17 00:00:00 2001 From: Chris Sherwood Date: Fri, 20 Mar 2026 08:16:43 -0700 Subject: [PATCH] fix(GPU): persist GPU type to KV store for reliable passthrough GPU detection results were only applied at container creation time and never persisted. If live detection failed transiently (Docker daemon hiccup, runtime temporarily unavailable), Ollama would silently fall back to CPU-only mode with no way to recover short of force-reinstall. Now _detectGPUType() persists successful detections to the KV store (gpu.type = 'nvidia' | 'amd') and uses the saved value as a fallback when live detection returns nothing. This ensures GPU config survives across container recreations regardless of transient detection failures. Co-Authored-By: Claude Opus 4.6 (1M context) --- admin/app/services/docker_service.ts | 24 ++++++++++++++++++++++++ admin/types/kv_store.ts | 1 + 2 files changed, 25 insertions(+) diff --git a/admin/app/services/docker_service.ts b/admin/app/services/docker_service.ts index 06df75c..5d94f54 100644 --- a/admin/app/services/docker_service.ts +++ b/admin/app/services/docker_service.ts @@ -691,6 +691,7 @@ export class DockerService { const runtimes = dockerInfo.Runtimes || {} if ('nvidia' in runtimes) { logger.info('[DockerService] NVIDIA container runtime detected via Docker API') + await this._persistGPUType('nvidia') return { type: 'nvidia' } } } catch (error) { @@ -722,12 +723,26 @@ export class DockerService { ) if (amdCheck.trim()) { logger.info('[DockerService] AMD GPU detected via lspci') + await this._persistGPUType('amd') return { type: 'amd' } } } catch (error) { // lspci not available, continue } + // Last resort: check if we previously detected a GPU and it's likely still present. + // This handles cases where live detection fails transiently (e.g., Docker daemon + // hiccup, runtime temporarily unavailable) but the hardware hasn't changed. + try { + const savedType = await KVStore.getValue('gpu.type') + if (savedType === 'nvidia' || savedType === 'amd') { + logger.info(`[DockerService] No GPU detected live, but KV store has '${savedType}' from previous detection. Using saved value.`) + return { type: savedType as 'nvidia' | 'amd' } + } + } catch { + // KV store not available, continue + } + logger.info('[DockerService] No GPU detected') return { type: 'none' } } catch (error) { @@ -736,6 +751,15 @@ export class DockerService { } } + private async _persistGPUType(type: 'nvidia' | 'amd'): Promise { + try { + await KVStore.setValue('gpu.type', type) + logger.info(`[DockerService] Persisted GPU type '${type}' to KV store`) + } catch (error) { + logger.warn(`[DockerService] Failed to persist GPU type: ${error.message}`) + } + } + /** * Discover AMD GPU DRI devices dynamically. * Returns an array of device configurations for Docker. diff --git a/admin/types/kv_store.ts b/admin/types/kv_store.ts index d752f00..814ebd4 100644 --- a/admin/types/kv_store.ts +++ b/admin/types/kv_store.ts @@ -9,6 +9,7 @@ export const KV_STORE_SCHEMA = { 'ui.hasVisitedEasySetup': 'boolean', 'ui.theme': 'string', 'ai.assistantCustomName': 'string', + 'gpu.type': 'string', } as const type KVTagToType = T extends 'boolean' ? boolean : string