From c16cfc3a9397f103b3fd465614b4e9cdb3a61e17 Mon Sep 17 00:00:00 2001 From: Chris Sherwood Date: Sat, 7 Feb 2026 08:26:22 -0800 Subject: [PATCH] fix(GPU): detect NVIDIA GPUs via Docker API instead of lspci MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous lspci-based GPU detection fails inside Docker containers because lspci isn't available, causing Ollama to always run CPU-only even when a GPU + NVIDIA Container Toolkit are present on the host. Replace with Docker API runtime check (docker.info() -> Runtimes) as primary detection method. This works from inside any container via the mounted Docker socket and confirms both GPU presence and toolkit installation. Keep lspci as fallback for host-based installs and AMD. Also add Docker-based GPU detection to benchmark hardware info — exec nvidia-smi inside the Ollama container to get the actual GPU model name instead of showing "Not detected". Tested on nomad3 (Intel Core Ultra 9 285HX + RTX 5060): AI performance went from 12.7 tok/s (CPU) to 281.4 tok/s (GPU) — a 22x improvement. Co-Authored-By: Claude Opus 4.6 --- admin/app/services/benchmark_service.ts | 54 ++++++++++++++++++++++++ admin/app/services/docker_service.ts | 55 +++++++++++++++++-------- admin/docs/faq.md | 24 ++++++++++- admin/docs/getting-started.md | 2 + 4 files changed, 117 insertions(+), 18 deletions(-) diff --git a/admin/app/services/benchmark_service.ts b/admin/app/services/benchmark_service.ts index 780be36..aed50a4 100644 --- a/admin/app/services/benchmark_service.ts +++ b/admin/app/services/benchmark_service.ts @@ -270,6 +270,60 @@ export class BenchmarkService { gpuModel = discreteGpu?.model || graphics.controllers[0]?.model || null } + // Fallback: Check Docker for nvidia runtime and query GPU model via nvidia-smi + if (!gpuModel) { + try { + const dockerInfo = await this.dockerService.docker.info() + const runtimes = dockerInfo.Runtimes || {} + if ('nvidia' in runtimes) { + logger.info('[BenchmarkService] NVIDIA container runtime detected, querying GPU model via nvidia-smi') + + // Try to get GPU model name from the running Ollama container + try { + const containers = await this.dockerService.docker.listContainers({ all: false }) + const ollamaContainer = containers.find((c) => + c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`) + ) + + if (ollamaContainer) { + const container = this.dockerService.docker.getContainer(ollamaContainer.Id) + const exec = await container.exec({ + Cmd: ['nvidia-smi', '--query-gpu=name', '--format=csv,noheader'], + AttachStdout: true, + AttachStderr: true, + Tty: true, + }) + + const stream = await exec.start({ Tty: true }) + const output = await new Promise((resolve) => { + let data = '' + const timeout = setTimeout(() => resolve(data), 5000) + stream.on('data', (chunk: Buffer) => { data += chunk.toString() }) + stream.on('end', () => { clearTimeout(timeout); resolve(data) }) + }) + + const gpuName = output.replace(/[\x00-\x08]/g, '').trim() + if (gpuName && !gpuName.toLowerCase().includes('error') && !gpuName.toLowerCase().includes('not found')) { + gpuModel = gpuName + logger.info(`[BenchmarkService] GPU detected via nvidia-smi: ${gpuModel}`) + } else { + gpuModel = 'NVIDIA GPU (model unknown)' + logger.info('[BenchmarkService] NVIDIA runtime present but nvidia-smi query failed, using generic name') + } + } else { + gpuModel = 'NVIDIA GPU (model unknown)' + logger.info('[BenchmarkService] NVIDIA runtime present but Ollama container not running') + } + } catch (execError) { + gpuModel = 'NVIDIA GPU (model unknown)' + logger.warn(`[BenchmarkService] nvidia-smi exec failed: ${execError.message}`) + } + } + } catch (dockerError) { + logger.warn(`[BenchmarkService] Could not query Docker info for GPU detection: ${dockerError.message}`) + } + } + // Fallback: Extract integrated GPU from CPU model name if (!gpuModel) { const cpuFullName = `${cpu.manufacturer} ${cpu.brand}` diff --git a/admin/app/services/docker_service.ts b/admin/app/services/docker_service.ts index db5dfe4..420bb45 100644 --- a/admin/app/services/docker_service.ts +++ b/admin/app/services/docker_service.ts @@ -454,13 +454,13 @@ export class DockerService { let gpuHostConfig = containerConfig?.HostConfig || {} if (service.service_name === SERVICE_NAMES.OLLAMA) { - const gpuType = await this._detectGPUType() + const gpuResult = await this._detectGPUType() - if (gpuType === 'nvidia') { + if (gpuResult.type === 'nvidia') { this._broadcast( service.service_name, 'gpu-config', - `NVIDIA GPU detected. Configuring container with GPU support...` + `NVIDIA container runtime detected. Configuring container with GPU support...` ) // Add GPU support for NVIDIA @@ -474,7 +474,7 @@ export class DockerService { }, ], } - } else if (gpuType === 'amd') { + } else if (gpuResult.type === 'amd') { // this._broadcast( // service.service_name, // 'gpu-config', @@ -503,6 +503,12 @@ export class DockerService { // `[DockerService] Configured ${amdDevices.length} AMD GPU devices for Ollama` // ) // } + } else if (gpuResult.toolkitMissing) { + this._broadcast( + service.service_name, + 'gpu-config', + `NVIDIA GPU detected but NVIDIA Container Toolkit is not installed. Using CPU-only configuration. Install the toolkit and reinstall AI Assistant for GPU acceleration: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html` + ) } else { this._broadcast( service.service_name, @@ -691,44 +697,59 @@ export class DockerService { } /** - * Detect GPU type (NVIDIA or AMD) on the system. - * Returns 'nvidia', 'amd', or 'none'. + * Detect GPU type and toolkit availability. + * Primary: Check Docker runtimes via docker.info() (works from inside containers). + * Fallback: lspci for host-based installs and AMD detection. */ - private async _detectGPUType(): Promise<'nvidia' | 'amd' | 'none'> { + private async _detectGPUType(): Promise<{ type: 'nvidia' | 'amd' | 'none'; toolkitMissing?: boolean }> { try { + // Primary: Check Docker daemon for nvidia runtime (works from inside containers) + try { + const dockerInfo = await this.docker.info() + const runtimes = dockerInfo.Runtimes || {} + if ('nvidia' in runtimes) { + logger.info('[DockerService] NVIDIA container runtime detected via Docker API') + return { type: 'nvidia' } + } + } catch (error) { + logger.warn(`[DockerService] Could not query Docker info for GPU runtimes: ${error.message}`) + } + + // Fallback: lspci for host-based installs (not available inside Docker) const execAsync = promisify(exec) - // Check for NVIDIA GPU + // Check for NVIDIA GPU via lspci try { const { stdout: nvidiaCheck } = await execAsync( 'lspci 2>/dev/null | grep -i nvidia || true' ) if (nvidiaCheck.trim()) { - logger.info('[DockerService] NVIDIA GPU detected') - return 'nvidia' + // GPU hardware found but no nvidia runtime — toolkit not installed + logger.warn('[DockerService] NVIDIA GPU detected via lspci but NVIDIA Container Toolkit is not installed') + return { type: 'none', toolkitMissing: true } } } catch (error) { - // Continue to AMD check + // lspci not available (likely inside Docker container), continue } - // Check for AMD GPU + // Check for AMD GPU via lspci try { const { stdout: amdCheck } = await execAsync( 'lspci 2>/dev/null | grep -iE "amd|radeon" || true' ) if (amdCheck.trim()) { - logger.info('[DockerService] AMD GPU detected') - return 'amd' + logger.info('[DockerService] AMD GPU detected via lspci') + return { type: 'amd' } } } catch (error) { - // No GPU detected + // lspci not available, continue } logger.info('[DockerService] No GPU detected') - return 'none' + return { type: 'none' } } catch (error) { logger.warn(`[DockerService] Error detecting GPU type: ${error.message}`) - return 'none' + return { type: 'none' } } } diff --git a/admin/docs/faq.md b/admin/docs/faq.md index 39a96dc..6702637 100644 --- a/admin/docs/faq.md +++ b/admin/docs/faq.md @@ -110,10 +110,32 @@ The Maps feature requires downloaded map data. If you see a blank area: ### AI responses are slow Local AI requires significant computing power. To improve speed: +- **Add a GPU** — An NVIDIA GPU with the NVIDIA Container Toolkit can improve AI speed by 10-20x or more - Close other applications on the server - Ensure adequate cooling (overheating causes throttling) - Consider using a smaller/faster AI model if available -- Add a GPU if your hardware supports it (NVIDIA or AMD) + +### How do I enable GPU acceleration for AI? + +N.O.M.A.D. automatically detects NVIDIA GPUs when the NVIDIA Container Toolkit is installed on the host system. To set up GPU acceleration: + +1. **Install an NVIDIA GPU** in your server (if not already present) +2. **Install the NVIDIA Container Toolkit** on the host — follow the [official installation guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) +3. **Reinstall the AI Assistant** — Go to [Apps](/settings/apps), find AI Assistant, and click **Force Reinstall** + +N.O.M.A.D. will detect the GPU during installation and configure the AI to use it automatically. You'll see "NVIDIA container runtime detected" in the installation progress. + +**Tip:** Run a [System Benchmark](/settings/benchmark) before and after to see the difference. GPU-accelerated systems typically see 100+ tokens per second vs 10-15 on CPU only. + +### I added/changed my GPU but AI is still slow + +When you add or swap a GPU, N.O.M.A.D. needs to reconfigure the AI container to use it: + +1. Make sure the **NVIDIA Container Toolkit** is installed on the host +2. Go to **[Apps](/settings/apps)** +3. Find the **AI Assistant** and click **Force Reinstall** + +Force Reinstall recreates the AI container with GPU support enabled. Without this step, the AI continues to run on CPU only. ### AI Chat not available diff --git a/admin/docs/getting-started.md b/admin/docs/getting-started.md index eafd240..78bba37 100644 --- a/admin/docs/getting-started.md +++ b/admin/docs/getting-started.md @@ -84,6 +84,8 @@ N.O.M.A.D. includes a built-in AI chat interface powered by Ollama. It runs enti **Note:** The AI Assistant must be installed first. Enable it during Easy Setup or install it from the [Apps](/settings/apps) page. +**GPU Acceleration:** If your server has an NVIDIA GPU with the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) installed, N.O.M.A.D. will automatically use it for AI — dramatically faster responses (10-20x improvement). If you add a GPU later, go to [Apps](/settings/apps) and **Force Reinstall** the AI Assistant to enable it. + --- ### Knowledge Base — Document-Aware AI