fix(GPU): persist GPU type to KV store for reliable passthrough

GPU detection results were only applied at container creation time and never persisted. If live detection failed transiently (Docker daemon hiccup, runtime temporarily unavailable), Ollama would silently fall back to CPU-only mode with no way to recover short of force-reinstall. Now _detectGPUType() persists successful detections to the KV store (gpu.type = 'nvidia' | 'amd') and uses the saved value as a fallback when live detection returns nothing. This ensures GPU config survives across container recreations regardless of transient detection failures. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 03:29:25 +01:00 · 2026-03-20 08:16:43 -07:00 · 2026-03-20 08:16:43 -07:00 · fe08fc0e28
commit fe08fc0e28
parent 418f82f9b7
2 changed files with 25 additions and 0 deletions
--- a/admin/app/services/docker_service.ts
+++ b/admin/app/services/docker_service.ts
@ -691,6 +691,7 @@ export class DockerService {
        const runtimes = dockerInfo.Runtimes || {}
        if ('nvidia' in runtimes) {
          logger.info('[DockerService] NVIDIA container runtime detected via Docker API')
+          await this._persistGPUType('nvidia')
          return { type: 'nvidia' }
        }
      } catch (error) {
@ -722,12 +723,26 @@ export class DockerService {
        )
        if (amdCheck.trim()) {
          logger.info('[DockerService] AMD GPU detected via lspci')
+          await this._persistGPUType('amd')
          return { type: 'amd' }
        }
      } catch (error) {
        // lspci not available, continue
      }

+      // Last resort: check if we previously detected a GPU and it's likely still present.
+      // This handles cases where live detection fails transiently (e.g., Docker daemon
+      // hiccup, runtime temporarily unavailable) but the hardware hasn't changed.
+      try {
+        const savedType = await KVStore.getValue('gpu.type')
+        if (savedType === 'nvidia' || savedType === 'amd') {
+          logger.info(`[DockerService] No GPU detected live, but KV store has '${savedType}' from previous detection. Using saved value.`)
+          return { type: savedType as 'nvidia' | 'amd' }
+        }
+      } catch {
+        // KV store not available, continue
+      }
+
      logger.info('[DockerService] No GPU detected')
      return { type: 'none' }
    } catch (error) {
@ -736,6 +751,15 @@ export class DockerService {
    }
  }

+  private async _persistGPUType(type: 'nvidia' | 'amd'): Promise<void> {
+    try {
+      await KVStore.setValue('gpu.type', type)
+      logger.info(`[DockerService] Persisted GPU type '${type}' to KV store`)
+    } catch (error) {
+      logger.warn(`[DockerService] Failed to persist GPU type: ${error.message}`)
+    }
+  }
+
  /**
   * Discover AMD GPU DRI devices dynamically.
   * Returns an array of device configurations for Docker.
--- a/admin/types/kv_store.ts
+++ b/admin/types/kv_store.ts
@ -9,6 +9,7 @@ export const KV_STORE_SCHEMA = {
  'ui.hasVisitedEasySetup':     'boolean',
  'ui.theme':                   'string',
  'ai.assistantCustomName':     'string',
+  'gpu.type':                   'string',
 } as const

 type KVTagToType<T extends string> = T extends 'boolean' ? boolean : string