Support external Ollama GPU health detection

Patch from: https://github.com/KeroZelvin/project-nomad
This commit is contained in:
273-B_L0 2026-03-21 14:34:04 -07:00 committed by Henry Estela
parent f36a7594bd
commit 94eea4484a
No known key found for this signature in database
GPG Key ID: 90439853E9E235BA
2 changed files with 230 additions and 133 deletions

View File

@ -1,118 +1,123 @@
import KVStore from '#models/kv_store';
import { BenchmarkService } from '#services/benchmark_service';
import { MapService } from '#services/map_service';
import { OllamaService } from '#services/ollama_service';
import { SystemService } from '#services/system_service';
import { updateSettingSchema } from '#validators/settings';
import { inject } from '@adonisjs/core';
import KVStore from '#models/kv_store'
import { BenchmarkService } from '#services/benchmark_service'
import { MapService } from '#services/map_service'
import { OllamaService } from '#services/ollama_service'
import { SystemService } from '#services/system_service'
import { updateSettingSchema } from '#validators/settings'
import { inject } from '@adonisjs/core'
import type { HttpContext } from '@adonisjs/core/http'
import type { KVStoreKey } from '../../types/kv_store.js';
import type { KVStoreKey } from '../../types/kv_store.js'
@inject()
export default class SettingsController {
constructor(
private systemService: SystemService,
private mapService: MapService,
private benchmarkService: BenchmarkService,
private ollamaService: OllamaService
) { }
constructor(
private systemService: SystemService,
private mapService: MapService,
private benchmarkService: BenchmarkService,
private ollamaService: OllamaService
) {}
async system({ inertia }: HttpContext) {
const systemInfo = await this.systemService.getSystemInfo();
return inertia.render('settings/system', {
system: {
info: systemInfo
}
});
}
async system({ inertia }: HttpContext) {
const systemInfo = await this.systemService.getSystemInfo()
return inertia.render('settings/system', {
system: {
info: systemInfo,
},
})
}
async apps({ inertia }: HttpContext) {
const services = await this.systemService.getServices({ installedOnly: false });
return inertia.render('settings/apps', {
system: {
services
}
});
}
async legal({ inertia }: HttpContext) {
return inertia.render('settings/legal');
}
async apps({ inertia }: HttpContext) {
const services = await this.systemService.getServices({ installedOnly: false })
return inertia.render('settings/apps', {
system: {
services,
},
})
}
async support({ inertia }: HttpContext) {
return inertia.render('settings/support');
}
async legal({ inertia }: HttpContext) {
return inertia.render('settings/legal')
}
async maps({ inertia }: HttpContext) {
const baseAssetsCheck = await this.mapService.ensureBaseAssets();
const regionFiles = await this.mapService.listRegions();
return inertia.render('settings/maps', {
maps: {
baseAssetsExist: baseAssetsCheck,
regionFiles: regionFiles.files
}
});
}
async support({ inertia }: HttpContext) {
return inertia.render('settings/support')
}
async models({ inertia }: HttpContext) {
const availableModels = await this.ollamaService.getAvailableModels({ sort: 'pulls', recommendedOnly: false, query: null, limit: 15 });
const installedModels = await this.ollamaService.getModels().catch(() => [])
const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled')
const aiAssistantCustomName = await KVStore.getValue('ai.assistantCustomName')
const remoteOllamaUrl = await KVStore.getValue('ai.remoteOllamaUrl')
return inertia.render('settings/models', {
models: {
availableModels: availableModels?.models || [],
installedModels: installedModels || [],
settings: {
chatSuggestionsEnabled: chatSuggestionsEnabled ?? false,
aiAssistantCustomName: aiAssistantCustomName ?? '',
remoteOllamaUrl: remoteOllamaUrl ?? '',
}
}
});
}
async maps({ inertia }: HttpContext) {
const baseAssetsCheck = await this.mapService.ensureBaseAssets()
const regionFiles = await this.mapService.listRegions()
return inertia.render('settings/maps', {
maps: {
baseAssetsExist: baseAssetsCheck,
regionFiles: regionFiles.files,
},
})
}
async update({ inertia }: HttpContext) {
const updateInfo = await this.systemService.checkLatestVersion();
return inertia.render('settings/update', {
system: {
updateAvailable: updateInfo.updateAvailable,
latestVersion: updateInfo.latestVersion,
currentVersion: updateInfo.currentVersion
}
});
}
async models({ inertia }: HttpContext) {
const availableModels = await this.ollamaService.getAvailableModels({
sort: 'pulls',
recommendedOnly: false,
query: null,
limit: 15,
})
const installedModels = await this.ollamaService.getModels().catch(() => [])
const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled')
const aiAssistantCustomName = await KVStore.getValue('ai.assistantCustomName')
const remoteOllamaUrl = await KVStore.getValue('ai.remoteOllamaUrl')
return inertia.render('settings/models', {
models: {
availableModels: availableModels?.models || [],
installedModels: installedModels || [],
settings: {
chatSuggestionsEnabled: chatSuggestionsEnabled ?? false,
aiAssistantCustomName: aiAssistantCustomName ?? '',
remoteOllamaUrl: remoteOllamaUrl ?? '',
},
},
})
}
async zim({ inertia }: HttpContext) {
return inertia.render('settings/zim/index')
}
async update({ inertia }: HttpContext) {
const updateInfo = await this.systemService.checkLatestVersion()
return inertia.render('settings/update', {
system: {
updateAvailable: updateInfo.updateAvailable,
latestVersion: updateInfo.latestVersion,
currentVersion: updateInfo.currentVersion,
},
})
}
async zimRemote({ inertia }: HttpContext) {
return inertia.render('settings/zim/remote-explorer');
}
async zim({ inertia }: HttpContext) {
return inertia.render('settings/zim/index')
}
async benchmark({ inertia }: HttpContext) {
const latestResult = await this.benchmarkService.getLatestResult();
const status = this.benchmarkService.getStatus();
return inertia.render('settings/benchmark', {
benchmark: {
latestResult,
status: status.status,
currentBenchmarkId: status.benchmarkId
}
});
}
async zimRemote({ inertia }: HttpContext) {
return inertia.render('settings/zim/remote-explorer')
}
async getSetting({ request, response }: HttpContext) {
const key = request.qs().key;
const value = await KVStore.getValue(key as KVStoreKey);
return response.status(200).send({ key, value });
}
async benchmark({ inertia }: HttpContext) {
const latestResult = await this.benchmarkService.getLatestResult()
const status = this.benchmarkService.getStatus()
return inertia.render('settings/benchmark', {
benchmark: {
latestResult,
status: status.status,
currentBenchmarkId: status.benchmarkId,
},
})
}
async updateSetting({ request, response }: HttpContext) {
const reqData = await request.validateUsing(updateSettingSchema);
await this.systemService.updateSetting(reqData.key, reqData.value);
return response.status(200).send({ success: true, message: 'Setting updated successfully' });
}
}
async getSetting({ request, response }: HttpContext) {
const key = request.qs().key
const value = await KVStore.getValue(key as KVStoreKey)
return response.status(200).send({ key, value })
}
async updateSetting({ request, response }: HttpContext) {
const reqData = await request.validateUsing(updateSettingSchema)
await this.systemService.updateSetting(reqData.key, reqData.value)
return response.status(200).send({ success: true, message: 'Setting updated successfully' })
}
}

View File

@ -4,10 +4,15 @@ import { DockerService } from '#services/docker_service'
import { ServiceSlim } from '../../types/services.js'
import logger from '@adonisjs/core/services/logger'
import si from 'systeminformation'
import { GpuHealthStatus, NomadDiskInfo, NomadDiskInfoRaw, SystemInformationResponse } from '../../types/system.js'
import {
GpuHealthStatus,
NomadDiskInfo,
NomadDiskInfoRaw,
SystemInformationResponse,
} from '../../types/system.js'
import { SERVICE_NAMES } from '../../constants/service_names.js'
import { readFileSync } from 'fs'
import path, { join } from 'path'
import { readFileSync } from 'node:fs'
import path, { join } from 'node:path'
import { getAllFilesystems, getFile } from '../utils/fs.js'
import axios from 'axios'
import env from '#start/env'
@ -15,17 +20,16 @@ import KVStore from '#models/kv_store'
import { KV_STORE_SCHEMA, KVStoreKey } from '../../types/kv_store.js'
import { isNewerVersion } from '../utils/version.js'
@inject()
export class SystemService {
private static appVersion: string | null = null
private static diskInfoFile = '/storage/nomad-disk-info.json'
constructor(private dockerService: DockerService) { }
constructor(private dockerService: DockerService) {}
async checkServiceInstalled(serviceName: string): Promise<boolean> {
const services = await this.getServices({ installedOnly: true });
return services.some(service => service.service_name === serviceName);
const services = await this.getServices({ installedOnly: true })
return services.some((service) => service.service_name === serviceName)
}
async getInternetStatus(): Promise<boolean> {
@ -67,14 +71,20 @@ export class SystemService {
return false
}
async getNvidiaSmiInfo(): Promise<Array<{ vendor: string; model: string; vram: number; }> | { error: string } | 'OLLAMA_NOT_FOUND' | 'BAD_RESPONSE' | 'UNKNOWN_ERROR'> {
async getNvidiaSmiInfo(): Promise<
| Array<{ vendor: string; model: string; vram: number }>
| { error: string }
| 'OLLAMA_NOT_FOUND'
| 'BAD_RESPONSE'
| 'UNKNOWN_ERROR'
> {
try {
const containers = await this.dockerService.docker.listContainers({ all: false })
const ollamaContainer = containers.find((c) =>
c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`)
)
const ollamaContainer = containers.find((c) => c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`))
if (!ollamaContainer) {
logger.info('Ollama container not found for nvidia-smi info retrieval. This is expected if Ollama is not installed.')
logger.info(
'Ollama container not found for nvidia-smi info retrieval. This is expected if Ollama is not installed.'
)
return 'OLLAMA_NOT_FOUND'
}
@ -92,23 +102,35 @@ export class SystemService {
const output = await new Promise<string>((resolve) => {
let data = ''
const timeout = setTimeout(() => resolve(data), 5000)
stream.on('data', (chunk: Buffer) => { data += chunk.toString() })
stream.on('end', () => { clearTimeout(timeout); resolve(data) })
stream.on('data', (chunk: Buffer) => {
data += chunk.toString()
})
stream.on('end', () => {
clearTimeout(timeout)
resolve(data)
})
})
// Remove any non-printable characters and trim the output
const cleaned = output.replace(/[\x00-\x08]/g, '').trim()
if (cleaned && !cleaned.toLowerCase().includes('error') && !cleaned.toLowerCase().includes('not found')) {
const cleaned = Array.from(output)
.filter((character) => character.charCodeAt(0) > 8)
.join('')
.trim()
if (
cleaned &&
!cleaned.toLowerCase().includes('error') &&
!cleaned.toLowerCase().includes('not found')
) {
// Split by newlines to handle multiple GPUs installed
const lines = cleaned.split('\n').filter(line => line.trim())
const lines = cleaned.split('\n').filter((line) => line.trim())
// Map each line out to a useful structure for us
const gpus = lines.map(line => {
const gpus = lines.map((line) => {
const parts = line.split(',').map((s) => s.trim())
return {
vendor: 'NVIDIA',
model: parts[0] || 'NVIDIA GPU',
vram: parts[1] ? parseInt(parts[1], 10) : 0,
vram: parts[1] ? Number.parseInt(parts[1], 10) : 0,
}
})
@ -117,8 +139,7 @@ export class SystemService {
// If we got output but looks like an error, consider it a bad response from nvidia-smi
return 'BAD_RESPONSE'
}
catch (error) {
} catch (error) {
logger.error('Error getting nvidia-smi info:', error)
if (error instanceof Error && error.message) {
return { error: error.message }
@ -127,6 +148,59 @@ export class SystemService {
}
}
async getExternalOllamaGpuInfo(): Promise<Array<{
vendor: string
model: string
vram: number
}> | null> {
try {
const containers = await this.dockerService.docker.listContainers({ all: false })
const ollamaContainer = containers.find((c) => c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`))
if (!ollamaContainer) {
return null
}
const actualImage = (ollamaContainer.Image || '').toLowerCase()
if (actualImage.includes('ollama/ollama') || actualImage.startsWith('ollama:')) {
return null
}
const ollamaUrl = await this.dockerService.getServiceURL(SERVICE_NAMES.OLLAMA)
if (!ollamaUrl) {
return null
}
await axios.get(new URL('/api/tags', ollamaUrl).toString(), { timeout: 3000 })
let vramMb = 0
try {
const psResponse = await axios.get(new URL('/api/ps', ollamaUrl).toString(), {
timeout: 3000,
})
const loadedModels = Array.isArray(psResponse.data?.models) ? psResponse.data.models : []
const largestAllocation = loadedModels.reduce(
(max: number, model: { size_vram?: number | string }) =>
Math.max(max, Number(model.size_vram) || 0),
0
)
vramMb = largestAllocation > 0 ? Math.round(largestAllocation / (1024 * 1024)) : 0
} catch {}
return [
{
vendor: 'NVIDIA',
model: 'NVIDIA GPU (external Ollama)',
vram: vramMb,
},
]
} catch (error) {
logger.info(
`[SystemService] External Ollama GPU probe failed: ${error instanceof Error ? error.message : error}`
)
return null
}
}
async getServices({ installedOnly = true }: { installedOnly?: boolean }): Promise<ServiceSlim[]> {
await this._syncContainersWithDatabase() // Sync up before fetching to ensure we have the latest status
@ -273,7 +347,7 @@ export class SystemService {
graphics.controllers = nvidiaInfo.map((gpu) => ({
model: gpu.model,
vendor: gpu.vendor,
bus: "",
bus: '',
vram: gpu.vram,
vramDynamic: false, // assume false here, we don't actually use this field for our purposes.
}))
@ -282,8 +356,23 @@ export class SystemService {
} else if (nvidiaInfo === 'OLLAMA_NOT_FOUND') {
gpuHealth.status = 'ollama_not_installed'
} else {
gpuHealth.status = 'passthrough_failed'
logger.warn(`NVIDIA runtime detected but GPU passthrough failed: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}`)
const externalOllamaGpu = await this.getExternalOllamaGpuInfo()
if (externalOllamaGpu) {
graphics.controllers = externalOllamaGpu.map((gpu) => ({
model: gpu.model,
vendor: gpu.vendor,
bus: '',
vram: gpu.vram,
vramDynamic: false,
}))
gpuHealth.status = 'ok'
gpuHealth.ollamaGpuAccessible = true
} else {
gpuHealth.status = 'passthrough_failed'
logger.warn(
`NVIDIA runtime detected but GPU passthrough failed: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}`
)
}
}
}
} else {
@ -356,9 +445,10 @@ export class SystemService {
logger.info(`Current version: ${currentVersion}, Latest version: ${latestVersion}`)
const updateAvailable = process.env.NODE_ENV === 'development'
? false
: isNewerVersion(latestVersion, currentVersion.trim(), earlyAccess)
const updateAvailable =
process.env.NODE_ENV === 'development'
? false
: isNewerVersion(latestVersion, currentVersion.trim(), earlyAccess)
// Cache the results in KVStore for frontend checks
await KVStore.setValue('system.updateAvailable', updateAvailable)
@ -518,11 +608,14 @@ export class SystemService {
const k = 1024
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']
const i = Math.floor(Math.log(bytes) / Math.log(k))
return parseFloat((bytes / Math.pow(k, i)).toFixed(decimals)) + ' ' + sizes[i]
return Number.parseFloat((bytes / Math.pow(k, i)).toFixed(decimals)) + ' ' + sizes[i]
}
async updateSetting(key: KVStoreKey, value: any): Promise<void> {
if ((value === '' || value === undefined || value === null) && KV_STORE_SCHEMA[key] === 'string') {
if (
(value === '' || value === undefined || value === null) &&
KV_STORE_SCHEMA[key] === 'string'
) {
await KVStore.clearValue(key)
} else {
await KVStore.setValue(key, value)
@ -620,5 +713,4 @@ export class SystemService {
}
})
}
}