Support external Ollama GPU health detection

Patch from: https://github.com/KeroZelvin/project-nomad
2026-03-28 03:29:25 +01:00 · 2026-03-21 14:34:04 -07:00 · 2026-03-21 14:34:04 -07:00 · 94eea4484a
commit 94eea4484a
parent f36a7594bd
2 changed files with 230 additions and 133 deletions
--- a/admin/app/controllers/settings_controller.ts
+++ b/admin/app/controllers/settings_controller.ts
@ -1,118 +1,123 @@
-import KVStore from '#models/kv_store';
-import { BenchmarkService } from '#services/benchmark_service';
-import { MapService } from '#services/map_service';
-import { OllamaService } from '#services/ollama_service';
-import { SystemService } from '#services/system_service';
-import { updateSettingSchema } from '#validators/settings';
-import { inject } from '@adonisjs/core';
+import KVStore from '#models/kv_store'
+import { BenchmarkService } from '#services/benchmark_service'
+import { MapService } from '#services/map_service'
+import { OllamaService } from '#services/ollama_service'
+import { SystemService } from '#services/system_service'
+import { updateSettingSchema } from '#validators/settings'
+import { inject } from '@adonisjs/core'
 import type { HttpContext } from '@adonisjs/core/http'
-import type { KVStoreKey } from '../../types/kv_store.js';
+import type { KVStoreKey } from '../../types/kv_store.js'

@inject()
 export default class SettingsController {
-    constructor(
-        private systemService: SystemService,
-        private mapService: MapService,
-        private benchmarkService: BenchmarkService,
-        private ollamaService: OllamaService
-    ) { }
+  constructor(
+    private systemService: SystemService,
+    private mapService: MapService,
+    private benchmarkService: BenchmarkService,
+    private ollamaService: OllamaService
+  ) {}

-    async system({ inertia }: HttpContext) {
-        const systemInfo = await this.systemService.getSystemInfo();
-        return inertia.render('settings/system', {
-            system: {
-                info: systemInfo
-            }
-        });
-    }
+  async system({ inertia }: HttpContext) {
+    const systemInfo = await this.systemService.getSystemInfo()
+    return inertia.render('settings/system', {
+      system: {
+        info: systemInfo,
+      },
+    })
+  }

-    async apps({ inertia }: HttpContext) {
-        const services = await this.systemService.getServices({ installedOnly: false });
-        return inertia.render('settings/apps', {
-            system: {
-                services
-            }
-        });
-    }
-    
-    async legal({ inertia }: HttpContext) {
-        return inertia.render('settings/legal');
-    }
+  async apps({ inertia }: HttpContext) {
+    const services = await this.systemService.getServices({ installedOnly: false })
+    return inertia.render('settings/apps', {
+      system: {
+        services,
+      },
+    })
+  }

-    async support({ inertia }: HttpContext) {
-        return inertia.render('settings/support');
-    }
+  async legal({ inertia }: HttpContext) {
+    return inertia.render('settings/legal')
+  }

-    async maps({ inertia }: HttpContext) {
-        const baseAssetsCheck = await this.mapService.ensureBaseAssets();
-        const regionFiles = await this.mapService.listRegions();
-        return inertia.render('settings/maps', {
-            maps: {
-                baseAssetsExist: baseAssetsCheck,
-                regionFiles: regionFiles.files
-            }
-        });
-    }
+  async support({ inertia }: HttpContext) {
+    return inertia.render('settings/support')
+  }

-    async models({ inertia }: HttpContext) {
-        const availableModels = await this.ollamaService.getAvailableModels({ sort: 'pulls', recommendedOnly: false, query: null, limit: 15 });
-        const installedModels = await this.ollamaService.getModels().catch(() => [])
-        const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled')
-        const aiAssistantCustomName = await KVStore.getValue('ai.assistantCustomName')
-        const remoteOllamaUrl = await KVStore.getValue('ai.remoteOllamaUrl')
-        return inertia.render('settings/models', {
-            models: {
-                availableModels: availableModels?.models || [],
-                installedModels: installedModels || [],
-                settings: {
-                    chatSuggestionsEnabled: chatSuggestionsEnabled ?? false,
-                    aiAssistantCustomName: aiAssistantCustomName ?? '',
-                    remoteOllamaUrl: remoteOllamaUrl ?? '',
-                }
-            }
-        });
-    }
+  async maps({ inertia }: HttpContext) {
+    const baseAssetsCheck = await this.mapService.ensureBaseAssets()
+    const regionFiles = await this.mapService.listRegions()
+    return inertia.render('settings/maps', {
+      maps: {
+        baseAssetsExist: baseAssetsCheck,
+        regionFiles: regionFiles.files,
+      },
+    })
+  }

-    async update({ inertia }: HttpContext) {
-        const updateInfo = await this.systemService.checkLatestVersion();
-        return inertia.render('settings/update', {
-            system: {
-                updateAvailable: updateInfo.updateAvailable,
-                latestVersion: updateInfo.latestVersion,
-                currentVersion: updateInfo.currentVersion
-            }
-        });
-    }
+  async models({ inertia }: HttpContext) {
+    const availableModels = await this.ollamaService.getAvailableModels({
+      sort: 'pulls',
+      recommendedOnly: false,
+      query: null,
+      limit: 15,
+    })
+    const installedModels = await this.ollamaService.getModels().catch(() => [])
+    const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled')
+    const aiAssistantCustomName = await KVStore.getValue('ai.assistantCustomName')
+    const remoteOllamaUrl = await KVStore.getValue('ai.remoteOllamaUrl')
+    return inertia.render('settings/models', {
+      models: {
+        availableModels: availableModels?.models || [],
+        installedModels: installedModels || [],
+        settings: {
+          chatSuggestionsEnabled: chatSuggestionsEnabled ?? false,
+          aiAssistantCustomName: aiAssistantCustomName ?? '',
+          remoteOllamaUrl: remoteOllamaUrl ?? '',
+        },
+      },
+    })
+  }

-    async zim({ inertia }: HttpContext) {
-        return inertia.render('settings/zim/index')
-    }
+  async update({ inertia }: HttpContext) {
+    const updateInfo = await this.systemService.checkLatestVersion()
+    return inertia.render('settings/update', {
+      system: {
+        updateAvailable: updateInfo.updateAvailable,
+        latestVersion: updateInfo.latestVersion,
+        currentVersion: updateInfo.currentVersion,
+      },
+    })
+  }

-    async zimRemote({ inertia }: HttpContext) {
-        return inertia.render('settings/zim/remote-explorer');
-    }
+  async zim({ inertia }: HttpContext) {
+    return inertia.render('settings/zim/index')
+  }

-    async benchmark({ inertia }: HttpContext) {
-        const latestResult = await this.benchmarkService.getLatestResult();
-        const status = this.benchmarkService.getStatus();
-        return inertia.render('settings/benchmark', {
-            benchmark: {
-                latestResult,
-                status: status.status,
-                currentBenchmarkId: status.benchmarkId
-            }
-        });
-    }
+  async zimRemote({ inertia }: HttpContext) {
+    return inertia.render('settings/zim/remote-explorer')
+  }

-    async getSetting({ request, response }: HttpContext) {
-        const key = request.qs().key;
-        const value = await KVStore.getValue(key as KVStoreKey);
-        return response.status(200).send({ key, value });
-    }
+  async benchmark({ inertia }: HttpContext) {
+    const latestResult = await this.benchmarkService.getLatestResult()
+    const status = this.benchmarkService.getStatus()
+    return inertia.render('settings/benchmark', {
+      benchmark: {
+        latestResult,
+        status: status.status,
+        currentBenchmarkId: status.benchmarkId,
+      },
+    })
+  }

-    async updateSetting({ request, response }: HttpContext) {
-        const reqData = await request.validateUsing(updateSettingSchema);
-        await this.systemService.updateSetting(reqData.key, reqData.value);
-        return response.status(200).send({ success: true, message: 'Setting updated successfully' });
-    }
-}
+  async getSetting({ request, response }: HttpContext) {
+    const key = request.qs().key
+    const value = await KVStore.getValue(key as KVStoreKey)
+    return response.status(200).send({ key, value })
+  }
+
+  async updateSetting({ request, response }: HttpContext) {
+    const reqData = await request.validateUsing(updateSettingSchema)
+    await this.systemService.updateSetting(reqData.key, reqData.value)
+    return response.status(200).send({ success: true, message: 'Setting updated successfully' })
+  }
+}
--- a/admin/app/services/system_service.ts
+++ b/admin/app/services/system_service.ts
@ -4,10 +4,15 @@ import { DockerService } from '#services/docker_service'
 import { ServiceSlim } from '../../types/services.js'
 import logger from '@adonisjs/core/services/logger'
 import si from 'systeminformation'
-import { GpuHealthStatus, NomadDiskInfo, NomadDiskInfoRaw, SystemInformationResponse } from '../../types/system.js'
+import {
+  GpuHealthStatus,
+  NomadDiskInfo,
+  NomadDiskInfoRaw,
+  SystemInformationResponse,
+} from '../../types/system.js'
 import { SERVICE_NAMES } from '../../constants/service_names.js'
-import { readFileSync } from 'fs'
-import path, { join } from 'path'
+import { readFileSync } from 'node:fs'
+import path, { join } from 'node:path'
 import { getAllFilesystems, getFile } from '../utils/fs.js'
 import axios from 'axios'
 import env from '#start/env'
@ -15,17 +20,16 @@ import KVStore from '#models/kv_store'
 import { KV_STORE_SCHEMA, KVStoreKey } from '../../types/kv_store.js'
 import { isNewerVersion } from '../utils/version.js'

-
@inject()
 export class SystemService {
  private static appVersion: string | null = null
  private static diskInfoFile = '/storage/nomad-disk-info.json'

-  constructor(private dockerService: DockerService) { }
+  constructor(private dockerService: DockerService) {}

  async checkServiceInstalled(serviceName: string): Promise<boolean> {
-    const services = await this.getServices({ installedOnly: true });
-    return services.some(service => service.service_name === serviceName);
+    const services = await this.getServices({ installedOnly: true })
+    return services.some((service) => service.service_name === serviceName)
  }

  async getInternetStatus(): Promise<boolean> {
@ -67,14 +71,20 @@ export class SystemService {
    return false
  }

-  async getNvidiaSmiInfo(): Promise<Array<{ vendor: string; model: string; vram: number; }> | { error: string } | 'OLLAMA_NOT_FOUND' | 'BAD_RESPONSE' | 'UNKNOWN_ERROR'> {
+  async getNvidiaSmiInfo(): Promise<
+    | Array<{ vendor: string; model: string; vram: number }>
+    | { error: string }
+    | 'OLLAMA_NOT_FOUND'
+    | 'BAD_RESPONSE'
+    | 'UNKNOWN_ERROR'
+  > {
    try {
      const containers = await this.dockerService.docker.listContainers({ all: false })
-      const ollamaContainer = containers.find((c) =>
-        c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`)
-      )
+      const ollamaContainer = containers.find((c) => c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`))
      if (!ollamaContainer) {
-        logger.info('Ollama container not found for nvidia-smi info retrieval. This is expected if Ollama is not installed.')
+        logger.info(
+          'Ollama container not found for nvidia-smi info retrieval. This is expected if Ollama is not installed.'
+        )
        return 'OLLAMA_NOT_FOUND'
      }

@ -92,23 +102,35 @@ export class SystemService {
      const output = await new Promise<string>((resolve) => {
        let data = ''
        const timeout = setTimeout(() => resolve(data), 5000)
-        stream.on('data', (chunk: Buffer) => { data += chunk.toString() })
-        stream.on('end', () => { clearTimeout(timeout); resolve(data) })
+        stream.on('data', (chunk: Buffer) => {
+          data += chunk.toString()
+        })
+        stream.on('end', () => {
+          clearTimeout(timeout)
+          resolve(data)
+        })
      })

      // Remove any non-printable characters and trim the output
-      const cleaned = output.replace(/[\x00-\x08]/g, '').trim()
-      if (cleaned && !cleaned.toLowerCase().includes('error') && !cleaned.toLowerCase().includes('not found')) {
+      const cleaned = Array.from(output)
+        .filter((character) => character.charCodeAt(0) > 8)
+        .join('')
+        .trim()
+      if (
+        cleaned &&
+        !cleaned.toLowerCase().includes('error') &&
+        !cleaned.toLowerCase().includes('not found')
+      ) {
        // Split by newlines to handle multiple GPUs installed
-        const lines = cleaned.split('\n').filter(line => line.trim())
+        const lines = cleaned.split('\n').filter((line) => line.trim())

        // Map each line out to a useful structure for us
-        const gpus = lines.map(line => {
+        const gpus = lines.map((line) => {
          const parts = line.split(',').map((s) => s.trim())
          return {
            vendor: 'NVIDIA',
            model: parts[0] || 'NVIDIA GPU',
-            vram: parts[1] ? parseInt(parts[1], 10) : 0,
+            vram: parts[1] ? Number.parseInt(parts[1], 10) : 0,
          }
        })

@ -117,8 +139,7 @@ export class SystemService {

      // If we got output but looks like an error, consider it a bad response from nvidia-smi
      return 'BAD_RESPONSE'
-    }
-    catch (error) {
+    } catch (error) {
      logger.error('Error getting nvidia-smi info:', error)
      if (error instanceof Error && error.message) {
        return { error: error.message }
@ -127,6 +148,59 @@ export class SystemService {
    }
  }

+  async getExternalOllamaGpuInfo(): Promise<Array<{
+    vendor: string
+    model: string
+    vram: number
+  }> | null> {
+    try {
+      const containers = await this.dockerService.docker.listContainers({ all: false })
+      const ollamaContainer = containers.find((c) => c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`))
+      if (!ollamaContainer) {
+        return null
+      }
+
+      const actualImage = (ollamaContainer.Image || '').toLowerCase()
+      if (actualImage.includes('ollama/ollama') || actualImage.startsWith('ollama:')) {
+        return null
+      }
+
+      const ollamaUrl = await this.dockerService.getServiceURL(SERVICE_NAMES.OLLAMA)
+      if (!ollamaUrl) {
+        return null
+      }
+
+      await axios.get(new URL('/api/tags', ollamaUrl).toString(), { timeout: 3000 })
+
+      let vramMb = 0
+      try {
+        const psResponse = await axios.get(new URL('/api/ps', ollamaUrl).toString(), {
+          timeout: 3000,
+        })
+        const loadedModels = Array.isArray(psResponse.data?.models) ? psResponse.data.models : []
+        const largestAllocation = loadedModels.reduce(
+          (max: number, model: { size_vram?: number | string }) =>
+            Math.max(max, Number(model.size_vram) || 0),
+          0
+        )
+        vramMb = largestAllocation > 0 ? Math.round(largestAllocation / (1024 * 1024)) : 0
+      } catch {}
+
+      return [
+        {
+          vendor: 'NVIDIA',
+          model: 'NVIDIA GPU (external Ollama)',
+          vram: vramMb,
+        },
+      ]
+    } catch (error) {
+      logger.info(
+        `[SystemService] External Ollama GPU probe failed: ${error instanceof Error ? error.message : error}`
+      )
+      return null
+    }
+  }
+
  async getServices({ installedOnly = true }: { installedOnly?: boolean }): Promise<ServiceSlim[]> {
    await this._syncContainersWithDatabase() // Sync up before fetching to ensure we have the latest status

@ -273,7 +347,7 @@ export class SystemService {
              graphics.controllers = nvidiaInfo.map((gpu) => ({
                model: gpu.model,
                vendor: gpu.vendor,
-                bus: "",
+                bus: '',
                vram: gpu.vram,
                vramDynamic: false, // assume false here, we don't actually use this field for our purposes.
              }))
@ -282,8 +356,23 @@ export class SystemService {
            } else if (nvidiaInfo === 'OLLAMA_NOT_FOUND') {
              gpuHealth.status = 'ollama_not_installed'
            } else {
-              gpuHealth.status = 'passthrough_failed'
-              logger.warn(`NVIDIA runtime detected but GPU passthrough failed: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}`)
+              const externalOllamaGpu = await this.getExternalOllamaGpuInfo()
+              if (externalOllamaGpu) {
+                graphics.controllers = externalOllamaGpu.map((gpu) => ({
+                  model: gpu.model,
+                  vendor: gpu.vendor,
+                  bus: '',
+                  vram: gpu.vram,
+                  vramDynamic: false,
+                }))
+                gpuHealth.status = 'ok'
+                gpuHealth.ollamaGpuAccessible = true
+              } else {
+                gpuHealth.status = 'passthrough_failed'
+                logger.warn(
+                  `NVIDIA runtime detected but GPU passthrough failed: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}`
+                )
+              }
            }
          }
        } else {
@ -356,9 +445,10 @@ export class SystemService {

      logger.info(`Current version: ${currentVersion}, Latest version: ${latestVersion}`)

-      const updateAvailable = process.env.NODE_ENV === 'development'
-        ? false
-        : isNewerVersion(latestVersion, currentVersion.trim(), earlyAccess)
+      const updateAvailable =
+        process.env.NODE_ENV === 'development'
+          ? false
+          : isNewerVersion(latestVersion, currentVersion.trim(), earlyAccess)

      // Cache the results in KVStore for frontend checks
      await KVStore.setValue('system.updateAvailable', updateAvailable)
@ -518,11 +608,14 @@ export class SystemService {
    const k = 1024
    const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']
    const i = Math.floor(Math.log(bytes) / Math.log(k))
-    return parseFloat((bytes / Math.pow(k, i)).toFixed(decimals)) + ' ' + sizes[i]
+    return Number.parseFloat((bytes / Math.pow(k, i)).toFixed(decimals)) + ' ' + sizes[i]
  }

  async updateSetting(key: KVStoreKey, value: any): Promise<void> {
-    if ((value === '' || value === undefined || value === null) && KV_STORE_SCHEMA[key] === 'string') {
+    if (
+      (value === '' || value === undefined || value === null) &&
+      KV_STORE_SCHEMA[key] === 'string'
+    ) {
      await KVStore.clearValue(key)
    } else {
      await KVStore.setValue(key, value)
@ -620,5 +713,4 @@ export class SystemService {
        }
      })
  }
-
 }