feat(GPU): warn when GPU passthrough not working and offer one-click fix

Ollama can silently run on CPU even when the host has an NVIDIA GPU,
resulting in ~3 tok/s instead of ~167 tok/s. This happens when Ollama
was installed before the GPU toolkit, or when the container was
recreated without proper DeviceRequests. Users had zero indication.

Adds a GPU health check to the system info API response that detects
when the host has an NVIDIA runtime but nvidia-smi fails inside the
Ollama container. Shows a warning banner on the System Information
and AI Settings pages with a one-click "Reinstall AI Assistant"
button that force-reinstalls Ollama with GPU passthrough.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Chris Sherwood 2026-03-05 11:47:27 -08:00 committed by Jake Turner
parent 175d63da8b
commit 6783cda222
4 changed files with 191 additions and 2 deletions

View File

@ -4,7 +4,7 @@ import { DockerService } from '#services/docker_service'
import { ServiceSlim } from '../../types/services.js'
import logger from '@adonisjs/core/services/logger'
import si from 'systeminformation'
import { NomadDiskInfo, NomadDiskInfoRaw, SystemInformationResponse } from '../../types/system.js'
import { GpuHealthStatus, NomadDiskInfo, NomadDiskInfoRaw, SystemInformationResponse } from '../../types/system.js'
import { SERVICE_NAMES } from '../../constants/service_names.js'
import { readFileSync } from 'fs'
import path, { join } from 'path'
@ -235,6 +235,13 @@ export class SystemService {
logger.error('Error reading disk info file:', error)
}
// GPU health tracking — detect when host has NVIDIA GPU but Ollama can't access it
let gpuHealth: GpuHealthStatus = {
status: 'no_gpu',
hasNvidiaRuntime: false,
ollamaGpuAccessible: false,
}
// Query Docker API for host-level info (hostname, OS, GPU runtime)
// si.osInfo() returns the container's info inside Docker, not the host's
try {
@ -255,6 +262,7 @@ export class SystemService {
if (!graphics.controllers || graphics.controllers.length === 0) {
const runtimes = dockerInfo.Runtimes || {}
if ('nvidia' in runtimes) {
gpuHealth.hasNvidiaRuntime = true
const nvidiaInfo = await this.getNvidiaSmiInfo()
if (Array.isArray(nvidiaInfo)) {
graphics.controllers = nvidiaInfo.map((gpu) => ({
@ -264,10 +272,19 @@ export class SystemService {
vram: gpu.vram,
vramDynamic: false, // assume false here, we don't actually use this field for our purposes.
}))
gpuHealth.status = 'ok'
gpuHealth.ollamaGpuAccessible = true
} else if (nvidiaInfo === 'OLLAMA_NOT_FOUND') {
gpuHealth.status = 'ollama_not_installed'
} else {
logger.warn(`NVIDIA runtime detected but failed to get GPU info: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}`)
gpuHealth.status = 'passthrough_failed'
logger.warn(`NVIDIA runtime detected but GPU passthrough failed: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}`)
}
}
} else {
// si.graphics() returned controllers (host install, not Docker) — GPU is working
gpuHealth.status = 'ok'
gpuHealth.ollamaGpuAccessible = true
}
} catch {
// Docker info query failed, skip host-level enrichment
@ -282,6 +299,7 @@ export class SystemService {
fsSize,
uptime,
graphics,
gpuHealth,
}
} catch (error) {
logger.error('Error getting system info:', error)

View File

@ -19,6 +19,7 @@ import Input from '~/components/inputs/Input'
import { IconSearch, IconRefresh } from '@tabler/icons-react'
import useDebounce from '~/hooks/useDebounce'
import ActiveModelDownloads from '~/components/ActiveModelDownloads'
import { useSystemInfo } from '~/hooks/useSystemInfo'
export default function ModelsPage(props: {
models: {
@ -32,6 +33,64 @@ export default function ModelsPage(props: {
const { addNotification } = useNotifications()
const { openModal, closeAllModals } = useModals()
const { debounce } = useDebounce()
const { data: systemInfo } = useSystemInfo({})
const [gpuBannerDismissed, setGpuBannerDismissed] = useState(() => {
try {
return localStorage.getItem('nomad:gpu-banner-dismissed') === 'true'
} catch {
return false
}
})
const [reinstalling, setReinstalling] = useState(false)
const handleDismissGpuBanner = () => {
setGpuBannerDismissed(true)
try {
localStorage.setItem('nomad:gpu-banner-dismissed', 'true')
} catch {}
}
const handleForceReinstallOllama = () => {
openModal(
<StyledModal
title="Reinstall AI Assistant?"
onConfirm={async () => {
closeAllModals()
setReinstalling(true)
try {
const response = await api.forceReinstallService('nomad_ollama')
if (!response || !response.success) {
throw new Error(response?.message || 'Force reinstall failed')
}
addNotification({
message: `${aiAssistantName} is being reinstalled with GPU support. This page will reload shortly.`,
type: 'success',
})
try { localStorage.removeItem('nomad:gpu-banner-dismissed') } catch {}
setTimeout(() => window.location.reload(), 5000)
} catch (error) {
addNotification({
message: `Failed to reinstall: ${error instanceof Error ? error.message : 'Unknown error'}`,
type: 'error',
})
setReinstalling(false)
}
}}
onCancel={closeAllModals}
open={true}
confirmText="Reinstall"
cancelText="Cancel"
>
<p className="text-gray-700">
This will recreate the {aiAssistantName} container with GPU support enabled.
Your downloaded models will be preserved. The service will be briefly
unavailable during reinstall.
</p>
</StyledModal>,
'gpu-health-force-reinstall-modal'
)
}
const [chatSuggestionsEnabled, setChatSuggestionsEnabled] = useState(
props.models.settings.chatSuggestionsEnabled
)
@ -178,6 +237,26 @@ export default function ModelsPage(props: {
className="!mt-6"
/>
)}
{isInstalled && systemInfo?.gpuHealth?.status === 'passthrough_failed' && !gpuBannerDismissed && (
<Alert
type="warning"
variant="bordered"
title="GPU Not Accessible"
message={`Your system has an NVIDIA GPU, but ${aiAssistantName} can't access it. AI is running on CPU only, which is significantly slower.`}
className="!mt-6"
dismissible={true}
onDismiss={handleDismissGpuBanner}
buttonProps={{
children: `Fix: Reinstall ${aiAssistantName}`,
icon: 'IconRefresh',
variant: 'action',
size: 'sm',
onClick: handleForceReinstallOllama,
loading: reinstalling,
disabled: reinstalling,
}}
/>
)}
<StyledSectionHeader title="Settings" className="mt-8 mb-4" />
<div className="bg-white rounded-lg border-2 border-gray-200 p-6">

View File

@ -1,3 +1,4 @@
import { useState } from 'react'
import { Head } from '@inertiajs/react'
import SettingsLayout from '~/layouts/SettingsLayout'
import { SystemInformationResponse } from '../../../types/system'
@ -6,7 +7,11 @@ import CircularGauge from '~/components/systeminfo/CircularGauge'
import HorizontalBarChart from '~/components/HorizontalBarChart'
import InfoCard from '~/components/systeminfo/InfoCard'
import Alert from '~/components/Alert'
import StyledModal from '~/components/StyledModal'
import { useSystemInfo } from '~/hooks/useSystemInfo'
import { useNotifications } from '~/context/NotificationContext'
import { useModals } from '~/context/ModalContext'
import api from '~/lib/api'
import StatusCard from '~/components/systeminfo/StatusCard'
import { IconCpu, IconDatabase, IconServer, IconDeviceDesktop, IconComponents } from '@tabler/icons-react'
@ -16,6 +21,65 @@ export default function SettingsPage(props: {
const { data: info } = useSystemInfo({
initialData: props.system.info,
})
const { addNotification } = useNotifications()
const { openModal, closeAllModals } = useModals()
const [gpuBannerDismissed, setGpuBannerDismissed] = useState(() => {
try {
return localStorage.getItem('nomad:gpu-banner-dismissed') === 'true'
} catch {
return false
}
})
const [reinstalling, setReinstalling] = useState(false)
const handleDismissGpuBanner = () => {
setGpuBannerDismissed(true)
try {
localStorage.setItem('nomad:gpu-banner-dismissed', 'true')
} catch {}
}
const handleForceReinstallOllama = () => {
openModal(
<StyledModal
title="Reinstall AI Assistant?"
onConfirm={async () => {
closeAllModals()
setReinstalling(true)
try {
const response = await api.forceReinstallService('nomad_ollama')
if (!response || !response.success) {
throw new Error(response?.message || 'Force reinstall failed')
}
addNotification({
message: 'AI Assistant is being reinstalled with GPU support. This page will reload shortly.',
type: 'success',
})
try { localStorage.removeItem('nomad:gpu-banner-dismissed') } catch {}
setTimeout(() => window.location.reload(), 5000)
} catch (error) {
addNotification({
message: `Failed to reinstall: ${error instanceof Error ? error.message : 'Unknown error'}`,
type: 'error',
})
setReinstalling(false)
}
}}
onCancel={closeAllModals}
open={true}
confirmText="Reinstall"
cancelText="Cancel"
>
<p className="text-gray-700">
This will recreate the AI Assistant container with GPU support enabled.
Your downloaded models will be preserved. The service will be briefly
unavailable during reinstall.
</p>
</StyledModal>,
'gpu-health-force-reinstall-modal'
)
}
// Use (total - available) to reflect actual memory pressure.
// mem.used includes reclaimable buff/cache on Linux, which inflates the number.
@ -173,6 +237,27 @@ export default function SettingsPage(props: {
},
]}
/>
{info?.gpuHealth?.status === 'passthrough_failed' && !gpuBannerDismissed && (
<div className="lg:col-span-2">
<Alert
type="warning"
variant="bordered"
title="GPU Not Accessible to AI Assistant"
message="Your system has an NVIDIA GPU, but the AI Assistant can't access it. AI is running on CPU only, which is significantly slower."
dismissible={true}
onDismiss={handleDismissGpuBanner}
buttonProps={{
children: 'Fix: Reinstall AI Assistant',
icon: 'IconRefresh',
variant: 'action',
size: 'sm',
onClick: handleForceReinstallOllama,
loading: reinstalling,
disabled: reinstalling,
}}
/>
</div>
)}
{info?.graphics?.controllers && info.graphics.controllers.length > 0 && (
<InfoCard
title="Graphics"

View File

@ -1,5 +1,11 @@
import { Systeminformation } from 'systeminformation'
export type GpuHealthStatus = {
status: 'ok' | 'passthrough_failed' | 'no_gpu' | 'ollama_not_installed'
hasNvidiaRuntime: boolean
ollamaGpuAccessible: boolean
}
export type SystemInformationResponse = {
cpu: Systeminformation.CpuData
mem: Systeminformation.MemData
@ -9,6 +15,7 @@ export type SystemInformationResponse = {
fsSize: Systeminformation.FsSizeData[]
uptime: Systeminformation.TimeData
graphics: Systeminformation.GraphicsData
gpuHealth?: GpuHealthStatus
}
// Type inferrence is not working properly with usePage and shared props, so we define this type manually