From 94eea4484adc121e333a9708426996817037b4e7 Mon Sep 17 00:00:00 2001 From: 273-B_L0 <273-B_L0@proton.me> Date: Sat, 21 Mar 2026 14:34:04 -0700 Subject: [PATCH] Support external Ollama GPU health detection Patch from: https://github.com/KeroZelvin/project-nomad --- admin/app/controllers/settings_controller.ts | 211 ++++++++++--------- admin/app/services/system_service.ts | 152 ++++++++++--- 2 files changed, 230 insertions(+), 133 deletions(-) diff --git a/admin/app/controllers/settings_controller.ts b/admin/app/controllers/settings_controller.ts index 40667f6..9ae965e 100644 --- a/admin/app/controllers/settings_controller.ts +++ b/admin/app/controllers/settings_controller.ts @@ -1,118 +1,123 @@ -import KVStore from '#models/kv_store'; -import { BenchmarkService } from '#services/benchmark_service'; -import { MapService } from '#services/map_service'; -import { OllamaService } from '#services/ollama_service'; -import { SystemService } from '#services/system_service'; -import { updateSettingSchema } from '#validators/settings'; -import { inject } from '@adonisjs/core'; +import KVStore from '#models/kv_store' +import { BenchmarkService } from '#services/benchmark_service' +import { MapService } from '#services/map_service' +import { OllamaService } from '#services/ollama_service' +import { SystemService } from '#services/system_service' +import { updateSettingSchema } from '#validators/settings' +import { inject } from '@adonisjs/core' import type { HttpContext } from '@adonisjs/core/http' -import type { KVStoreKey } from '../../types/kv_store.js'; +import type { KVStoreKey } from '../../types/kv_store.js' @inject() export default class SettingsController { - constructor( - private systemService: SystemService, - private mapService: MapService, - private benchmarkService: BenchmarkService, - private ollamaService: OllamaService - ) { } + constructor( + private systemService: SystemService, + private mapService: MapService, + private benchmarkService: BenchmarkService, + private ollamaService: OllamaService + ) {} - async system({ inertia }: HttpContext) { - const systemInfo = await this.systemService.getSystemInfo(); - return inertia.render('settings/system', { - system: { - info: systemInfo - } - }); - } + async system({ inertia }: HttpContext) { + const systemInfo = await this.systemService.getSystemInfo() + return inertia.render('settings/system', { + system: { + info: systemInfo, + }, + }) + } - async apps({ inertia }: HttpContext) { - const services = await this.systemService.getServices({ installedOnly: false }); - return inertia.render('settings/apps', { - system: { - services - } - }); - } - - async legal({ inertia }: HttpContext) { - return inertia.render('settings/legal'); - } + async apps({ inertia }: HttpContext) { + const services = await this.systemService.getServices({ installedOnly: false }) + return inertia.render('settings/apps', { + system: { + services, + }, + }) + } - async support({ inertia }: HttpContext) { - return inertia.render('settings/support'); - } + async legal({ inertia }: HttpContext) { + return inertia.render('settings/legal') + } - async maps({ inertia }: HttpContext) { - const baseAssetsCheck = await this.mapService.ensureBaseAssets(); - const regionFiles = await this.mapService.listRegions(); - return inertia.render('settings/maps', { - maps: { - baseAssetsExist: baseAssetsCheck, - regionFiles: regionFiles.files - } - }); - } + async support({ inertia }: HttpContext) { + return inertia.render('settings/support') + } - async models({ inertia }: HttpContext) { - const availableModels = await this.ollamaService.getAvailableModels({ sort: 'pulls', recommendedOnly: false, query: null, limit: 15 }); - const installedModels = await this.ollamaService.getModels().catch(() => []) - const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled') - const aiAssistantCustomName = await KVStore.getValue('ai.assistantCustomName') - const remoteOllamaUrl = await KVStore.getValue('ai.remoteOllamaUrl') - return inertia.render('settings/models', { - models: { - availableModels: availableModels?.models || [], - installedModels: installedModels || [], - settings: { - chatSuggestionsEnabled: chatSuggestionsEnabled ?? false, - aiAssistantCustomName: aiAssistantCustomName ?? '', - remoteOllamaUrl: remoteOllamaUrl ?? '', - } - } - }); - } + async maps({ inertia }: HttpContext) { + const baseAssetsCheck = await this.mapService.ensureBaseAssets() + const regionFiles = await this.mapService.listRegions() + return inertia.render('settings/maps', { + maps: { + baseAssetsExist: baseAssetsCheck, + regionFiles: regionFiles.files, + }, + }) + } - async update({ inertia }: HttpContext) { - const updateInfo = await this.systemService.checkLatestVersion(); - return inertia.render('settings/update', { - system: { - updateAvailable: updateInfo.updateAvailable, - latestVersion: updateInfo.latestVersion, - currentVersion: updateInfo.currentVersion - } - }); - } + async models({ inertia }: HttpContext) { + const availableModels = await this.ollamaService.getAvailableModels({ + sort: 'pulls', + recommendedOnly: false, + query: null, + limit: 15, + }) + const installedModels = await this.ollamaService.getModels().catch(() => []) + const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled') + const aiAssistantCustomName = await KVStore.getValue('ai.assistantCustomName') + const remoteOllamaUrl = await KVStore.getValue('ai.remoteOllamaUrl') + return inertia.render('settings/models', { + models: { + availableModels: availableModels?.models || [], + installedModels: installedModels || [], + settings: { + chatSuggestionsEnabled: chatSuggestionsEnabled ?? false, + aiAssistantCustomName: aiAssistantCustomName ?? '', + remoteOllamaUrl: remoteOllamaUrl ?? '', + }, + }, + }) + } - async zim({ inertia }: HttpContext) { - return inertia.render('settings/zim/index') - } + async update({ inertia }: HttpContext) { + const updateInfo = await this.systemService.checkLatestVersion() + return inertia.render('settings/update', { + system: { + updateAvailable: updateInfo.updateAvailable, + latestVersion: updateInfo.latestVersion, + currentVersion: updateInfo.currentVersion, + }, + }) + } - async zimRemote({ inertia }: HttpContext) { - return inertia.render('settings/zim/remote-explorer'); - } + async zim({ inertia }: HttpContext) { + return inertia.render('settings/zim/index') + } - async benchmark({ inertia }: HttpContext) { - const latestResult = await this.benchmarkService.getLatestResult(); - const status = this.benchmarkService.getStatus(); - return inertia.render('settings/benchmark', { - benchmark: { - latestResult, - status: status.status, - currentBenchmarkId: status.benchmarkId - } - }); - } + async zimRemote({ inertia }: HttpContext) { + return inertia.render('settings/zim/remote-explorer') + } - async getSetting({ request, response }: HttpContext) { - const key = request.qs().key; - const value = await KVStore.getValue(key as KVStoreKey); - return response.status(200).send({ key, value }); - } + async benchmark({ inertia }: HttpContext) { + const latestResult = await this.benchmarkService.getLatestResult() + const status = this.benchmarkService.getStatus() + return inertia.render('settings/benchmark', { + benchmark: { + latestResult, + status: status.status, + currentBenchmarkId: status.benchmarkId, + }, + }) + } - async updateSetting({ request, response }: HttpContext) { - const reqData = await request.validateUsing(updateSettingSchema); - await this.systemService.updateSetting(reqData.key, reqData.value); - return response.status(200).send({ success: true, message: 'Setting updated successfully' }); - } -} \ No newline at end of file + async getSetting({ request, response }: HttpContext) { + const key = request.qs().key + const value = await KVStore.getValue(key as KVStoreKey) + return response.status(200).send({ key, value }) + } + + async updateSetting({ request, response }: HttpContext) { + const reqData = await request.validateUsing(updateSettingSchema) + await this.systemService.updateSetting(reqData.key, reqData.value) + return response.status(200).send({ success: true, message: 'Setting updated successfully' }) + } +} diff --git a/admin/app/services/system_service.ts b/admin/app/services/system_service.ts index 84157af..e61c68f 100644 --- a/admin/app/services/system_service.ts +++ b/admin/app/services/system_service.ts @@ -4,10 +4,15 @@ import { DockerService } from '#services/docker_service' import { ServiceSlim } from '../../types/services.js' import logger from '@adonisjs/core/services/logger' import si from 'systeminformation' -import { GpuHealthStatus, NomadDiskInfo, NomadDiskInfoRaw, SystemInformationResponse } from '../../types/system.js' +import { + GpuHealthStatus, + NomadDiskInfo, + NomadDiskInfoRaw, + SystemInformationResponse, +} from '../../types/system.js' import { SERVICE_NAMES } from '../../constants/service_names.js' -import { readFileSync } from 'fs' -import path, { join } from 'path' +import { readFileSync } from 'node:fs' +import path, { join } from 'node:path' import { getAllFilesystems, getFile } from '../utils/fs.js' import axios from 'axios' import env from '#start/env' @@ -15,17 +20,16 @@ import KVStore from '#models/kv_store' import { KV_STORE_SCHEMA, KVStoreKey } from '../../types/kv_store.js' import { isNewerVersion } from '../utils/version.js' - @inject() export class SystemService { private static appVersion: string | null = null private static diskInfoFile = '/storage/nomad-disk-info.json' - constructor(private dockerService: DockerService) { } + constructor(private dockerService: DockerService) {} async checkServiceInstalled(serviceName: string): Promise { - const services = await this.getServices({ installedOnly: true }); - return services.some(service => service.service_name === serviceName); + const services = await this.getServices({ installedOnly: true }) + return services.some((service) => service.service_name === serviceName) } async getInternetStatus(): Promise { @@ -67,14 +71,20 @@ export class SystemService { return false } - async getNvidiaSmiInfo(): Promise | { error: string } | 'OLLAMA_NOT_FOUND' | 'BAD_RESPONSE' | 'UNKNOWN_ERROR'> { + async getNvidiaSmiInfo(): Promise< + | Array<{ vendor: string; model: string; vram: number }> + | { error: string } + | 'OLLAMA_NOT_FOUND' + | 'BAD_RESPONSE' + | 'UNKNOWN_ERROR' + > { try { const containers = await this.dockerService.docker.listContainers({ all: false }) - const ollamaContainer = containers.find((c) => - c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`) - ) + const ollamaContainer = containers.find((c) => c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`)) if (!ollamaContainer) { - logger.info('Ollama container not found for nvidia-smi info retrieval. This is expected if Ollama is not installed.') + logger.info( + 'Ollama container not found for nvidia-smi info retrieval. This is expected if Ollama is not installed.' + ) return 'OLLAMA_NOT_FOUND' } @@ -92,23 +102,35 @@ export class SystemService { const output = await new Promise((resolve) => { let data = '' const timeout = setTimeout(() => resolve(data), 5000) - stream.on('data', (chunk: Buffer) => { data += chunk.toString() }) - stream.on('end', () => { clearTimeout(timeout); resolve(data) }) + stream.on('data', (chunk: Buffer) => { + data += chunk.toString() + }) + stream.on('end', () => { + clearTimeout(timeout) + resolve(data) + }) }) // Remove any non-printable characters and trim the output - const cleaned = output.replace(/[\x00-\x08]/g, '').trim() - if (cleaned && !cleaned.toLowerCase().includes('error') && !cleaned.toLowerCase().includes('not found')) { + const cleaned = Array.from(output) + .filter((character) => character.charCodeAt(0) > 8) + .join('') + .trim() + if ( + cleaned && + !cleaned.toLowerCase().includes('error') && + !cleaned.toLowerCase().includes('not found') + ) { // Split by newlines to handle multiple GPUs installed - const lines = cleaned.split('\n').filter(line => line.trim()) + const lines = cleaned.split('\n').filter((line) => line.trim()) // Map each line out to a useful structure for us - const gpus = lines.map(line => { + const gpus = lines.map((line) => { const parts = line.split(',').map((s) => s.trim()) return { vendor: 'NVIDIA', model: parts[0] || 'NVIDIA GPU', - vram: parts[1] ? parseInt(parts[1], 10) : 0, + vram: parts[1] ? Number.parseInt(parts[1], 10) : 0, } }) @@ -117,8 +139,7 @@ export class SystemService { // If we got output but looks like an error, consider it a bad response from nvidia-smi return 'BAD_RESPONSE' - } - catch (error) { + } catch (error) { logger.error('Error getting nvidia-smi info:', error) if (error instanceof Error && error.message) { return { error: error.message } @@ -127,6 +148,59 @@ export class SystemService { } } + async getExternalOllamaGpuInfo(): Promise | null> { + try { + const containers = await this.dockerService.docker.listContainers({ all: false }) + const ollamaContainer = containers.find((c) => c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`)) + if (!ollamaContainer) { + return null + } + + const actualImage = (ollamaContainer.Image || '').toLowerCase() + if (actualImage.includes('ollama/ollama') || actualImage.startsWith('ollama:')) { + return null + } + + const ollamaUrl = await this.dockerService.getServiceURL(SERVICE_NAMES.OLLAMA) + if (!ollamaUrl) { + return null + } + + await axios.get(new URL('/api/tags', ollamaUrl).toString(), { timeout: 3000 }) + + let vramMb = 0 + try { + const psResponse = await axios.get(new URL('/api/ps', ollamaUrl).toString(), { + timeout: 3000, + }) + const loadedModels = Array.isArray(psResponse.data?.models) ? psResponse.data.models : [] + const largestAllocation = loadedModels.reduce( + (max: number, model: { size_vram?: number | string }) => + Math.max(max, Number(model.size_vram) || 0), + 0 + ) + vramMb = largestAllocation > 0 ? Math.round(largestAllocation / (1024 * 1024)) : 0 + } catch {} + + return [ + { + vendor: 'NVIDIA', + model: 'NVIDIA GPU (external Ollama)', + vram: vramMb, + }, + ] + } catch (error) { + logger.info( + `[SystemService] External Ollama GPU probe failed: ${error instanceof Error ? error.message : error}` + ) + return null + } + } + async getServices({ installedOnly = true }: { installedOnly?: boolean }): Promise { await this._syncContainersWithDatabase() // Sync up before fetching to ensure we have the latest status @@ -273,7 +347,7 @@ export class SystemService { graphics.controllers = nvidiaInfo.map((gpu) => ({ model: gpu.model, vendor: gpu.vendor, - bus: "", + bus: '', vram: gpu.vram, vramDynamic: false, // assume false here, we don't actually use this field for our purposes. })) @@ -282,8 +356,23 @@ export class SystemService { } else if (nvidiaInfo === 'OLLAMA_NOT_FOUND') { gpuHealth.status = 'ollama_not_installed' } else { - gpuHealth.status = 'passthrough_failed' - logger.warn(`NVIDIA runtime detected but GPU passthrough failed: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}`) + const externalOllamaGpu = await this.getExternalOllamaGpuInfo() + if (externalOllamaGpu) { + graphics.controllers = externalOllamaGpu.map((gpu) => ({ + model: gpu.model, + vendor: gpu.vendor, + bus: '', + vram: gpu.vram, + vramDynamic: false, + })) + gpuHealth.status = 'ok' + gpuHealth.ollamaGpuAccessible = true + } else { + gpuHealth.status = 'passthrough_failed' + logger.warn( + `NVIDIA runtime detected but GPU passthrough failed: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}` + ) + } } } } else { @@ -356,9 +445,10 @@ export class SystemService { logger.info(`Current version: ${currentVersion}, Latest version: ${latestVersion}`) - const updateAvailable = process.env.NODE_ENV === 'development' - ? false - : isNewerVersion(latestVersion, currentVersion.trim(), earlyAccess) + const updateAvailable = + process.env.NODE_ENV === 'development' + ? false + : isNewerVersion(latestVersion, currentVersion.trim(), earlyAccess) // Cache the results in KVStore for frontend checks await KVStore.setValue('system.updateAvailable', updateAvailable) @@ -518,11 +608,14 @@ export class SystemService { const k = 1024 const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'] const i = Math.floor(Math.log(bytes) / Math.log(k)) - return parseFloat((bytes / Math.pow(k, i)).toFixed(decimals)) + ' ' + sizes[i] + return Number.parseFloat((bytes / Math.pow(k, i)).toFixed(decimals)) + ' ' + sizes[i] } async updateSetting(key: KVStoreKey, value: any): Promise { - if ((value === '' || value === undefined || value === null) && KV_STORE_SCHEMA[key] === 'string') { + if ( + (value === '' || value === undefined || value === null) && + KV_STORE_SCHEMA[key] === 'string' + ) { await KVStore.clearValue(key) } else { await KVStore.setValue(key, value) @@ -620,5 +713,4 @@ export class SystemService { } }) } - }