feat(GPU): warn when GPU passthrough not working and offer one-click fix

Ollama can silently run on CPU even when the host has an NVIDIA GPU, resulting in ~3 tok/s instead of ~167 tok/s. This happens when Ollama was installed before the GPU toolkit, or when the container was recreated without proper DeviceRequests. Users had zero indication. Adds a GPU health check to the system info API response that detects when the host has an NVIDIA runtime but nvidia-smi fails inside the Ollama container. Shows a warning banner on the System Information and AI Settings pages with a one-click "Reinstall AI Assistant" button that force-reinstalls Ollama with GPU passthrough. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-28 03:29:25 +01:00 · 2026-03-05 11:47:27 -08:00 · 2026-03-05 11:47:27 -08:00 · 6783cda222
commit 6783cda222
parent 175d63da8b
4 changed files with 191 additions and 2 deletions
--- a/admin/app/services/system_service.ts
+++ b/admin/app/services/system_service.ts
@ -4,7 +4,7 @@ import { DockerService } from '#services/docker_service'
 import { ServiceSlim } from '../../types/services.js'
 import logger from '@adonisjs/core/services/logger'
 import si from 'systeminformation'
-import { NomadDiskInfo, NomadDiskInfoRaw, SystemInformationResponse } from '../../types/system.js'
+import { GpuHealthStatus, NomadDiskInfo, NomadDiskInfoRaw, SystemInformationResponse } from '../../types/system.js'
 import { SERVICE_NAMES } from '../../constants/service_names.js'
 import { readFileSync } from 'fs'
 import path, { join } from 'path'
@ -235,6 +235,13 @@ export class SystemService {
        logger.error('Error reading disk info file:', error)
      }

+      // GPU health tracking — detect when host has NVIDIA GPU but Ollama can't access it
+      let gpuHealth: GpuHealthStatus = {
+        status: 'no_gpu',
+        hasNvidiaRuntime: false,
+        ollamaGpuAccessible: false,
+      }
+
      // Query Docker API for host-level info (hostname, OS, GPU runtime)
      // si.osInfo() returns the container's info inside Docker, not the host's
      try {
@ -255,6 +262,7 @@ export class SystemService {
        if (!graphics.controllers || graphics.controllers.length === 0) {
          const runtimes = dockerInfo.Runtimes || {}
          if ('nvidia' in runtimes) {
+            gpuHealth.hasNvidiaRuntime = true
            const nvidiaInfo = await this.getNvidiaSmiInfo()
            if (Array.isArray(nvidiaInfo)) {
              graphics.controllers = nvidiaInfo.map((gpu) => ({
@ -264,10 +272,19 @@ export class SystemService {
                vram: gpu.vram,
                vramDynamic: false, // assume false here, we don't actually use this field for our purposes.
              }))
+              gpuHealth.status = 'ok'
+              gpuHealth.ollamaGpuAccessible = true
+            } else if (nvidiaInfo === 'OLLAMA_NOT_FOUND') {
+              gpuHealth.status = 'ollama_not_installed'
            } else {
-              logger.warn(`NVIDIA runtime detected but failed to get GPU info: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}`)
+              gpuHealth.status = 'passthrough_failed'
+              logger.warn(`NVIDIA runtime detected but GPU passthrough failed: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}`)
            }
          }
+        } else {
+          // si.graphics() returned controllers (host install, not Docker) — GPU is working
+          gpuHealth.status = 'ok'
+          gpuHealth.ollamaGpuAccessible = true
        }
      } catch {
        // Docker info query failed, skip host-level enrichment
@ -282,6 +299,7 @@ export class SystemService {
        fsSize,
        uptime,
        graphics,
+        gpuHealth,
      }
    } catch (error) {
      logger.error('Error getting system info:', error)
--- a/admin/inertia/pages/settings/models.tsx
+++ b/admin/inertia/pages/settings/models.tsx
@ -19,6 +19,7 @@ import Input from '~/components/inputs/Input'
 import { IconSearch, IconRefresh } from '@tabler/icons-react'
 import useDebounce from '~/hooks/useDebounce'
 import ActiveModelDownloads from '~/components/ActiveModelDownloads'
+import { useSystemInfo } from '~/hooks/useSystemInfo'

 export default function ModelsPage(props: {
  models: {
@ -32,6 +33,64 @@ export default function ModelsPage(props: {
  const { addNotification } = useNotifications()
  const { openModal, closeAllModals } = useModals()
  const { debounce } = useDebounce()
+  const { data: systemInfo } = useSystemInfo({})
+
+  const [gpuBannerDismissed, setGpuBannerDismissed] = useState(() => {
+    try {
+      return localStorage.getItem('nomad:gpu-banner-dismissed') === 'true'
+    } catch {
+      return false
+    }
+  })
+  const [reinstalling, setReinstalling] = useState(false)
+
+  const handleDismissGpuBanner = () => {
+    setGpuBannerDismissed(true)
+    try {
+      localStorage.setItem('nomad:gpu-banner-dismissed', 'true')
+    } catch {}
+  }
+
+  const handleForceReinstallOllama = () => {
+    openModal(
+      <StyledModal
+        title="Reinstall AI Assistant?"
+        onConfirm={async () => {
+          closeAllModals()
+          setReinstalling(true)
+          try {
+            const response = await api.forceReinstallService('nomad_ollama')
+            if (!response || !response.success) {
+              throw new Error(response?.message || 'Force reinstall failed')
+            }
+            addNotification({
+              message: `${aiAssistantName} is being reinstalled with GPU support. This page will reload shortly.`,
+              type: 'success',
+            })
+            try { localStorage.removeItem('nomad:gpu-banner-dismissed') } catch {}
+            setTimeout(() => window.location.reload(), 5000)
+          } catch (error) {
+            addNotification({
+              message: `Failed to reinstall: ${error instanceof Error ? error.message : 'Unknown error'}`,
+              type: 'error',
+            })
+            setReinstalling(false)
+          }
+        }}
+        onCancel={closeAllModals}
+        open={true}
+        confirmText="Reinstall"
+        cancelText="Cancel"
+      >
+        <p className="text-gray-700">
+          This will recreate the {aiAssistantName} container with GPU support enabled.
+          Your downloaded models will be preserved. The service will be briefly
+          unavailable during reinstall.
+        </p>
+      </StyledModal>,
+      'gpu-health-force-reinstall-modal'
+    )
+  }
  const [chatSuggestionsEnabled, setChatSuggestionsEnabled] = useState(
    props.models.settings.chatSuggestionsEnabled
  )
@ -178,6 +237,26 @@ export default function ModelsPage(props: {
              className="!mt-6"
            />
          )}
+          {isInstalled && systemInfo?.gpuHealth?.status === 'passthrough_failed' && !gpuBannerDismissed && (
+            <Alert
+              type="warning"
+              variant="bordered"
+              title="GPU Not Accessible"
+              message={`Your system has an NVIDIA GPU, but ${aiAssistantName} can't access it. AI is running on CPU only, which is significantly slower.`}
+              className="!mt-6"
+              dismissible={true}
+              onDismiss={handleDismissGpuBanner}
+              buttonProps={{
+                children: `Fix: Reinstall ${aiAssistantName}`,
+                icon: 'IconRefresh',
+                variant: 'action',
+                size: 'sm',
+                onClick: handleForceReinstallOllama,
+                loading: reinstalling,
+                disabled: reinstalling,
+              }}
+            />
+          )}

          <StyledSectionHeader title="Settings" className="mt-8 mb-4" />
          <div className="bg-white rounded-lg border-2 border-gray-200 p-6">
--- a/admin/inertia/pages/settings/system.tsx
+++ b/admin/inertia/pages/settings/system.tsx
@ -1,3 +1,4 @@
+import { useState } from 'react'
 import { Head } from '@inertiajs/react'
 import SettingsLayout from '~/layouts/SettingsLayout'
 import { SystemInformationResponse } from '../../../types/system'
@ -6,7 +7,11 @@ import CircularGauge from '~/components/systeminfo/CircularGauge'
 import HorizontalBarChart from '~/components/HorizontalBarChart'
 import InfoCard from '~/components/systeminfo/InfoCard'
 import Alert from '~/components/Alert'
+import StyledModal from '~/components/StyledModal'
 import { useSystemInfo } from '~/hooks/useSystemInfo'
+import { useNotifications } from '~/context/NotificationContext'
+import { useModals } from '~/context/ModalContext'
+import api from '~/lib/api'
 import StatusCard from '~/components/systeminfo/StatusCard'
 import { IconCpu, IconDatabase, IconServer, IconDeviceDesktop, IconComponents } from '@tabler/icons-react'

@ -16,6 +21,65 @@ export default function SettingsPage(props: {
  const { data: info } = useSystemInfo({
    initialData: props.system.info,
  })
+  const { addNotification } = useNotifications()
+  const { openModal, closeAllModals } = useModals()
+
+  const [gpuBannerDismissed, setGpuBannerDismissed] = useState(() => {
+    try {
+      return localStorage.getItem('nomad:gpu-banner-dismissed') === 'true'
+    } catch {
+      return false
+    }
+  })
+  const [reinstalling, setReinstalling] = useState(false)
+
+  const handleDismissGpuBanner = () => {
+    setGpuBannerDismissed(true)
+    try {
+      localStorage.setItem('nomad:gpu-banner-dismissed', 'true')
+    } catch {}
+  }
+
+  const handleForceReinstallOllama = () => {
+    openModal(
+      <StyledModal
+        title="Reinstall AI Assistant?"
+        onConfirm={async () => {
+          closeAllModals()
+          setReinstalling(true)
+          try {
+            const response = await api.forceReinstallService('nomad_ollama')
+            if (!response || !response.success) {
+              throw new Error(response?.message || 'Force reinstall failed')
+            }
+            addNotification({
+              message: 'AI Assistant is being reinstalled with GPU support. This page will reload shortly.',
+              type: 'success',
+            })
+            try { localStorage.removeItem('nomad:gpu-banner-dismissed') } catch {}
+            setTimeout(() => window.location.reload(), 5000)
+          } catch (error) {
+            addNotification({
+              message: `Failed to reinstall: ${error instanceof Error ? error.message : 'Unknown error'}`,
+              type: 'error',
+            })
+            setReinstalling(false)
+          }
+        }}
+        onCancel={closeAllModals}
+        open={true}
+        confirmText="Reinstall"
+        cancelText="Cancel"
+      >
+        <p className="text-gray-700">
+          This will recreate the AI Assistant container with GPU support enabled.
+          Your downloaded models will be preserved. The service will be briefly
+          unavailable during reinstall.
+        </p>
+      </StyledModal>,
+      'gpu-health-force-reinstall-modal'
+    )
+  }

  // Use (total - available) to reflect actual memory pressure.
  // mem.used includes reclaimable buff/cache on Linux, which inflates the number.
@ -173,6 +237,27 @@ export default function SettingsPage(props: {
                  },
                ]}
              />
+              {info?.gpuHealth?.status === 'passthrough_failed' && !gpuBannerDismissed && (
+                <div className="lg:col-span-2">
+                  <Alert
+                    type="warning"
+                    variant="bordered"
+                    title="GPU Not Accessible to AI Assistant"
+                    message="Your system has an NVIDIA GPU, but the AI Assistant can't access it. AI is running on CPU only, which is significantly slower."
+                    dismissible={true}
+                    onDismiss={handleDismissGpuBanner}
+                    buttonProps={{
+                      children: 'Fix: Reinstall AI Assistant',
+                      icon: 'IconRefresh',
+                      variant: 'action',
+                      size: 'sm',
+                      onClick: handleForceReinstallOllama,
+                      loading: reinstalling,
+                      disabled: reinstalling,
+                    }}
+                  />
+                </div>
+              )}
              {info?.graphics?.controllers && info.graphics.controllers.length > 0 && (
                <InfoCard
                  title="Graphics"
--- a/admin/types/system.ts
+++ b/admin/types/system.ts
@ -1,5 +1,11 @@
 import { Systeminformation } from 'systeminformation'

+export type GpuHealthStatus = {
+  status: 'ok' | 'passthrough_failed' | 'no_gpu' | 'ollama_not_installed'
+  hasNvidiaRuntime: boolean
+  ollamaGpuAccessible: boolean
+}
+
 export type SystemInformationResponse = {
  cpu: Systeminformation.CpuData
  mem: Systeminformation.MemData
@ -9,6 +15,7 @@ export type SystemInformationResponse = {
  fsSize: Systeminformation.FsSizeData[]
  uptime: Systeminformation.TimeData
  graphics: Systeminformation.GraphicsData
+  gpuHealth?: GpuHealthStatus
 }

 // Type inferrence is not working properly with usePage and shared props, so we define this type manually