From c16cfc3a9397f103b3fd465614b4e9cdb3a61e17 Mon Sep 17 00:00:00 2001
From: Chris Sherwood <chris@crosstalksolutions.com>
Date: Sat, 7 Feb 2026 08:26:22 -0800
Subject: [PATCH] fix(GPU): detect NVIDIA GPUs via Docker API instead of lspci
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous lspci-based GPU detection fails inside Docker containers
because lspci isn't available, causing Ollama to always run CPU-only
even when a GPU + NVIDIA Container Toolkit are present on the host.

Replace with Docker API runtime check (docker.info() -> Runtimes) as
primary detection method. This works from inside any container via the
mounted Docker socket and confirms both GPU presence and toolkit
installation. Keep lspci as fallback for host-based installs and AMD.

Also add Docker-based GPU detection to benchmark hardware info — exec
nvidia-smi inside the Ollama container to get the actual GPU model name
instead of showing "Not detected".

Tested on nomad3 (Intel Core Ultra 9 285HX + RTX 5060): AI performance
went from 12.7 tok/s (CPU) to 281.4 tok/s (GPU) — a 22x improvement.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 admin/app/services/benchmark_service.ts | 54 ++++++++++++++++++++++++
 admin/app/services/docker_service.ts    | 55 +++++++++++++++++--------
 admin/docs/faq.md                       | 24 ++++++++++-
 admin/docs/getting-started.md           |  2 +
 4 files changed, 117 insertions(+), 18 deletions(-)
diff --git a/admin/app/services/benchmark_service.ts b/admin/app/services/benchmark_service.ts
index 780be36..aed50a4 100644
--- a/admin/app/services/benchmark_service.ts
+++ b/admin/app/services/benchmark_service.ts
@@ -270,6 +270,60 @@ export class BenchmarkService {
         gpuModel = discreteGpu?.model || graphics.controllers[0]?.model || null
       }
 
+      // Fallback: Check Docker for nvidia runtime and query GPU model via nvidia-smi
+      if (!gpuModel) {
+        try {
+          const dockerInfo = await this.dockerService.docker.info()
+          const runtimes = dockerInfo.Runtimes || {}
+          if ('nvidia' in runtimes) {
+            logger.info('[BenchmarkService] NVIDIA container runtime detected, querying GPU model via nvidia-smi')
+
+            // Try to get GPU model name from the running Ollama container
+            try {
+              const containers = await this.dockerService.docker.listContainers({ all: false })
+              const ollamaContainer = containers.find((c) =>
+                c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`)
+              )
+
+              if (ollamaContainer) {
+                const container = this.dockerService.docker.getContainer(ollamaContainer.Id)
+                const exec = await container.exec({
+                  Cmd: ['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'],
+                  AttachStdout: true,
+                  AttachStderr: true,
+                  Tty: true,
+                })
+
+                const stream = await exec.start({ Tty: true })
+                const output = await new Promise<string>((resolve) => {
+                  let data = ''
+                  const timeout = setTimeout(() => resolve(data), 5000)
+                  stream.on('data', (chunk: Buffer) => { data += chunk.toString() })
+                  stream.on('end', () => { clearTimeout(timeout); resolve(data) })
+                })
+
+                const gpuName = output.replace(/[\x00-\x08]/g, '').trim()
+                if (gpuName && !gpuName.toLowerCase().includes('error') && !gpuName.toLowerCase().includes('not found')) {
+                  gpuModel = gpuName
+                  logger.info(`[BenchmarkService] GPU detected via nvidia-smi: ${gpuModel}`)
+                } else {
+                  gpuModel = 'NVIDIA GPU (model unknown)'
+                  logger.info('[BenchmarkService] NVIDIA runtime present but nvidia-smi query failed, using generic name')
+                }
+              } else {
+                gpuModel = 'NVIDIA GPU (model unknown)'
+                logger.info('[BenchmarkService] NVIDIA runtime present but Ollama container not running')
+              }
+            } catch (execError) {
+              gpuModel = 'NVIDIA GPU (model unknown)'
+              logger.warn(`[BenchmarkService] nvidia-smi exec failed: ${execError.message}`)
+            }
+          }
+        } catch (dockerError) {
+          logger.warn(`[BenchmarkService] Could not query Docker info for GPU detection: ${dockerError.message}`)
+        }
+      }
+
       // Fallback: Extract integrated GPU from CPU model name
       if (!gpuModel) {
         const cpuFullName = `${cpu.manufacturer} ${cpu.brand}`
diff --git a/admin/app/services/docker_service.ts b/admin/app/services/docker_service.ts
index db5dfe4..420bb45 100644
--- a/admin/app/services/docker_service.ts
+++ b/admin/app/services/docker_service.ts
@@ -454,13 +454,13 @@ export class DockerService {
       let gpuHostConfig = containerConfig?.HostConfig || {}
 
       if (service.service_name === SERVICE_NAMES.OLLAMA) {
-        const gpuType = await this._detectGPUType()
+        const gpuResult = await this._detectGPUType()
 
-        if (gpuType === 'nvidia') {
+        if (gpuResult.type === 'nvidia') {
           this._broadcast(
             service.service_name,
             'gpu-config',
-            `NVIDIA GPU detected. Configuring container with GPU support...`
+            `NVIDIA container runtime detected. Configuring container with GPU support...`
           )
 
           // Add GPU support for NVIDIA
@@ -474,7 +474,7 @@ export class DockerService {
               },
             ],
           }
-        } else if (gpuType === 'amd') {
+        } else if (gpuResult.type === 'amd') {
           // this._broadcast(
           //   service.service_name,
           //   'gpu-config',
@@ -503,6 +503,12 @@ export class DockerService {
           //     `[DockerService] Configured ${amdDevices.length} AMD GPU devices for Ollama`
           //   )
           // }
+        } else if (gpuResult.toolkitMissing) {
+          this._broadcast(
+            service.service_name,
+            'gpu-config',
+            `NVIDIA GPU detected but NVIDIA Container Toolkit is not installed. Using CPU-only configuration. Install the toolkit and reinstall AI Assistant for GPU acceleration: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html`
+          )
         } else {
           this._broadcast(
             service.service_name,
@@ -691,44 +697,59 @@ export class DockerService {
   }
 
   /**
-   * Detect GPU type (NVIDIA or AMD) on the system.
-   * Returns 'nvidia', 'amd', or 'none'.
+   * Detect GPU type and toolkit availability.
+   * Primary: Check Docker runtimes via docker.info() (works from inside containers).
+   * Fallback: lspci for host-based installs and AMD detection.
    */
-  private async _detectGPUType(): Promise<'nvidia' | 'amd' | 'none'> {
+  private async _detectGPUType(): Promise<{ type: 'nvidia' | 'amd' | 'none'; toolkitMissing?: boolean }> {
     try {
+      // Primary: Check Docker daemon for nvidia runtime (works from inside containers)
+      try {
+        const dockerInfo = await this.docker.info()
+        const runtimes = dockerInfo.Runtimes || {}
+        if ('nvidia' in runtimes) {
+          logger.info('[DockerService] NVIDIA container runtime detected via Docker API')
+          return { type: 'nvidia' }
+        }
+      } catch (error) {
+        logger.warn(`[DockerService] Could not query Docker info for GPU runtimes: ${error.message}`)
+      }
+
+      // Fallback: lspci for host-based installs (not available inside Docker)
       const execAsync = promisify(exec)
 
-      // Check for NVIDIA GPU
+      // Check for NVIDIA GPU via lspci
       try {
         const { stdout: nvidiaCheck } = await execAsync(
           'lspci 2>/dev/null | grep -i nvidia || true'
         )
         if (nvidiaCheck.trim()) {
-          logger.info('[DockerService] NVIDIA GPU detected')
-          return 'nvidia'
+          // GPU hardware found but no nvidia runtime — toolkit not installed
+          logger.warn('[DockerService] NVIDIA GPU detected via lspci but NVIDIA Container Toolkit is not installed')
+          return { type: 'none', toolkitMissing: true }
         }
       } catch (error) {
-        // Continue to AMD check
+        // lspci not available (likely inside Docker container), continue
       }
 
-      // Check for AMD GPU
+      // Check for AMD GPU via lspci
       try {
         const { stdout: amdCheck } = await execAsync(
           'lspci 2>/dev/null | grep -iE "amd|radeon" || true'
         )
         if (amdCheck.trim()) {
-          logger.info('[DockerService] AMD GPU detected')
-          return 'amd'
+          logger.info('[DockerService] AMD GPU detected via lspci')
+          return { type: 'amd' }
         }
       } catch (error) {
-        // No GPU detected
+        // lspci not available, continue
       }
 
       logger.info('[DockerService] No GPU detected')
-      return 'none'
+      return { type: 'none' }
     } catch (error) {
       logger.warn(`[DockerService] Error detecting GPU type: ${error.message}`)
-      return 'none'
+      return { type: 'none' }
     }
   }
 
diff --git a/admin/docs/faq.md b/admin/docs/faq.md
index 39a96dc..6702637 100644
--- a/admin/docs/faq.md
+++ b/admin/docs/faq.md
@@ -110,10 +110,32 @@ The Maps feature requires downloaded map data. If you see a blank area:
 ### AI responses are slow
 
 Local AI requires significant computing power. To improve speed:
+- **Add a GPU** — An NVIDIA GPU with the NVIDIA Container Toolkit can improve AI speed by 10-20x or more
 - Close other applications on the server
 - Ensure adequate cooling (overheating causes throttling)
 - Consider using a smaller/faster AI model if available
-- Add a GPU if your hardware supports it (NVIDIA or AMD)
+
+### How do I enable GPU acceleration for AI?
+
+N.O.M.A.D. automatically detects NVIDIA GPUs when the NVIDIA Container Toolkit is installed on the host system. To set up GPU acceleration:
+
+1. **Install an NVIDIA GPU** in your server (if not already present)
+2. **Install the NVIDIA Container Toolkit** on the host — follow the [official installation guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
+3. **Reinstall the AI Assistant** — Go to [Apps](/settings/apps), find AI Assistant, and click **Force Reinstall**
+
+N.O.M.A.D. will detect the GPU during installation and configure the AI to use it automatically. You'll see "NVIDIA container runtime detected" in the installation progress.
+
+**Tip:** Run a [System Benchmark](/settings/benchmark) before and after to see the difference. GPU-accelerated systems typically see 100+ tokens per second vs 10-15 on CPU only.
+
+### I added/changed my GPU but AI is still slow
+
+When you add or swap a GPU, N.O.M.A.D. needs to reconfigure the AI container to use it:
+
+1. Make sure the **NVIDIA Container Toolkit** is installed on the host
+2. Go to **[Apps](/settings/apps)**
+3. Find the **AI Assistant** and click **Force Reinstall**
+
+Force Reinstall recreates the AI container with GPU support enabled. Without this step, the AI continues to run on CPU only.
 
 ### AI Chat not available
 
diff --git a/admin/docs/getting-started.md b/admin/docs/getting-started.md
index eafd240..78bba37 100644
--- a/admin/docs/getting-started.md
+++ b/admin/docs/getting-started.md
@@ -84,6 +84,8 @@ N.O.M.A.D. includes a built-in AI chat interface powered by Ollama. It runs enti
 
 **Note:** The AI Assistant must be installed first. Enable it during Easy Setup or install it from the [Apps](/settings/apps) page.
 
+**GPU Acceleration:** If your server has an NVIDIA GPU with the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) installed, N.O.M.A.D. will automatically use it for AI — dramatically faster responses (10-20x improvement). If you add a GPU later, go to [Apps](/settings/apps) and **Force Reinstall** the AI Assistant to enable it.
+
 ---
 
 ### Knowledge Base — Document-Aware AI