mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-28 03:29:25 +01:00
Support external Ollama GPU health detection
Patch from: https://github.com/KeroZelvin/project-nomad
This commit is contained in:
parent
f36a7594bd
commit
94eea4484a
|
|
@ -1,118 +1,123 @@
|
|||
import KVStore from '#models/kv_store';
|
||||
import { BenchmarkService } from '#services/benchmark_service';
|
||||
import { MapService } from '#services/map_service';
|
||||
import { OllamaService } from '#services/ollama_service';
|
||||
import { SystemService } from '#services/system_service';
|
||||
import { updateSettingSchema } from '#validators/settings';
|
||||
import { inject } from '@adonisjs/core';
|
||||
import KVStore from '#models/kv_store'
|
||||
import { BenchmarkService } from '#services/benchmark_service'
|
||||
import { MapService } from '#services/map_service'
|
||||
import { OllamaService } from '#services/ollama_service'
|
||||
import { SystemService } from '#services/system_service'
|
||||
import { updateSettingSchema } from '#validators/settings'
|
||||
import { inject } from '@adonisjs/core'
|
||||
import type { HttpContext } from '@adonisjs/core/http'
|
||||
import type { KVStoreKey } from '../../types/kv_store.js';
|
||||
import type { KVStoreKey } from '../../types/kv_store.js'
|
||||
|
||||
@inject()
|
||||
export default class SettingsController {
|
||||
constructor(
|
||||
private systemService: SystemService,
|
||||
private mapService: MapService,
|
||||
private benchmarkService: BenchmarkService,
|
||||
private ollamaService: OllamaService
|
||||
) { }
|
||||
constructor(
|
||||
private systemService: SystemService,
|
||||
private mapService: MapService,
|
||||
private benchmarkService: BenchmarkService,
|
||||
private ollamaService: OllamaService
|
||||
) {}
|
||||
|
||||
async system({ inertia }: HttpContext) {
|
||||
const systemInfo = await this.systemService.getSystemInfo();
|
||||
return inertia.render('settings/system', {
|
||||
system: {
|
||||
info: systemInfo
|
||||
}
|
||||
});
|
||||
}
|
||||
async system({ inertia }: HttpContext) {
|
||||
const systemInfo = await this.systemService.getSystemInfo()
|
||||
return inertia.render('settings/system', {
|
||||
system: {
|
||||
info: systemInfo,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
async apps({ inertia }: HttpContext) {
|
||||
const services = await this.systemService.getServices({ installedOnly: false });
|
||||
return inertia.render('settings/apps', {
|
||||
system: {
|
||||
services
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async legal({ inertia }: HttpContext) {
|
||||
return inertia.render('settings/legal');
|
||||
}
|
||||
async apps({ inertia }: HttpContext) {
|
||||
const services = await this.systemService.getServices({ installedOnly: false })
|
||||
return inertia.render('settings/apps', {
|
||||
system: {
|
||||
services,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
async support({ inertia }: HttpContext) {
|
||||
return inertia.render('settings/support');
|
||||
}
|
||||
async legal({ inertia }: HttpContext) {
|
||||
return inertia.render('settings/legal')
|
||||
}
|
||||
|
||||
async maps({ inertia }: HttpContext) {
|
||||
const baseAssetsCheck = await this.mapService.ensureBaseAssets();
|
||||
const regionFiles = await this.mapService.listRegions();
|
||||
return inertia.render('settings/maps', {
|
||||
maps: {
|
||||
baseAssetsExist: baseAssetsCheck,
|
||||
regionFiles: regionFiles.files
|
||||
}
|
||||
});
|
||||
}
|
||||
async support({ inertia }: HttpContext) {
|
||||
return inertia.render('settings/support')
|
||||
}
|
||||
|
||||
async models({ inertia }: HttpContext) {
|
||||
const availableModels = await this.ollamaService.getAvailableModels({ sort: 'pulls', recommendedOnly: false, query: null, limit: 15 });
|
||||
const installedModels = await this.ollamaService.getModels().catch(() => [])
|
||||
const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled')
|
||||
const aiAssistantCustomName = await KVStore.getValue('ai.assistantCustomName')
|
||||
const remoteOllamaUrl = await KVStore.getValue('ai.remoteOllamaUrl')
|
||||
return inertia.render('settings/models', {
|
||||
models: {
|
||||
availableModels: availableModels?.models || [],
|
||||
installedModels: installedModels || [],
|
||||
settings: {
|
||||
chatSuggestionsEnabled: chatSuggestionsEnabled ?? false,
|
||||
aiAssistantCustomName: aiAssistantCustomName ?? '',
|
||||
remoteOllamaUrl: remoteOllamaUrl ?? '',
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
async maps({ inertia }: HttpContext) {
|
||||
const baseAssetsCheck = await this.mapService.ensureBaseAssets()
|
||||
const regionFiles = await this.mapService.listRegions()
|
||||
return inertia.render('settings/maps', {
|
||||
maps: {
|
||||
baseAssetsExist: baseAssetsCheck,
|
||||
regionFiles: regionFiles.files,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
async update({ inertia }: HttpContext) {
|
||||
const updateInfo = await this.systemService.checkLatestVersion();
|
||||
return inertia.render('settings/update', {
|
||||
system: {
|
||||
updateAvailable: updateInfo.updateAvailable,
|
||||
latestVersion: updateInfo.latestVersion,
|
||||
currentVersion: updateInfo.currentVersion
|
||||
}
|
||||
});
|
||||
}
|
||||
async models({ inertia }: HttpContext) {
|
||||
const availableModels = await this.ollamaService.getAvailableModels({
|
||||
sort: 'pulls',
|
||||
recommendedOnly: false,
|
||||
query: null,
|
||||
limit: 15,
|
||||
})
|
||||
const installedModels = await this.ollamaService.getModels().catch(() => [])
|
||||
const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled')
|
||||
const aiAssistantCustomName = await KVStore.getValue('ai.assistantCustomName')
|
||||
const remoteOllamaUrl = await KVStore.getValue('ai.remoteOllamaUrl')
|
||||
return inertia.render('settings/models', {
|
||||
models: {
|
||||
availableModels: availableModels?.models || [],
|
||||
installedModels: installedModels || [],
|
||||
settings: {
|
||||
chatSuggestionsEnabled: chatSuggestionsEnabled ?? false,
|
||||
aiAssistantCustomName: aiAssistantCustomName ?? '',
|
||||
remoteOllamaUrl: remoteOllamaUrl ?? '',
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
async zim({ inertia }: HttpContext) {
|
||||
return inertia.render('settings/zim/index')
|
||||
}
|
||||
async update({ inertia }: HttpContext) {
|
||||
const updateInfo = await this.systemService.checkLatestVersion()
|
||||
return inertia.render('settings/update', {
|
||||
system: {
|
||||
updateAvailable: updateInfo.updateAvailable,
|
||||
latestVersion: updateInfo.latestVersion,
|
||||
currentVersion: updateInfo.currentVersion,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
async zimRemote({ inertia }: HttpContext) {
|
||||
return inertia.render('settings/zim/remote-explorer');
|
||||
}
|
||||
async zim({ inertia }: HttpContext) {
|
||||
return inertia.render('settings/zim/index')
|
||||
}
|
||||
|
||||
async benchmark({ inertia }: HttpContext) {
|
||||
const latestResult = await this.benchmarkService.getLatestResult();
|
||||
const status = this.benchmarkService.getStatus();
|
||||
return inertia.render('settings/benchmark', {
|
||||
benchmark: {
|
||||
latestResult,
|
||||
status: status.status,
|
||||
currentBenchmarkId: status.benchmarkId
|
||||
}
|
||||
});
|
||||
}
|
||||
async zimRemote({ inertia }: HttpContext) {
|
||||
return inertia.render('settings/zim/remote-explorer')
|
||||
}
|
||||
|
||||
async getSetting({ request, response }: HttpContext) {
|
||||
const key = request.qs().key;
|
||||
const value = await KVStore.getValue(key as KVStoreKey);
|
||||
return response.status(200).send({ key, value });
|
||||
}
|
||||
async benchmark({ inertia }: HttpContext) {
|
||||
const latestResult = await this.benchmarkService.getLatestResult()
|
||||
const status = this.benchmarkService.getStatus()
|
||||
return inertia.render('settings/benchmark', {
|
||||
benchmark: {
|
||||
latestResult,
|
||||
status: status.status,
|
||||
currentBenchmarkId: status.benchmarkId,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
async updateSetting({ request, response }: HttpContext) {
|
||||
const reqData = await request.validateUsing(updateSettingSchema);
|
||||
await this.systemService.updateSetting(reqData.key, reqData.value);
|
||||
return response.status(200).send({ success: true, message: 'Setting updated successfully' });
|
||||
}
|
||||
}
|
||||
async getSetting({ request, response }: HttpContext) {
|
||||
const key = request.qs().key
|
||||
const value = await KVStore.getValue(key as KVStoreKey)
|
||||
return response.status(200).send({ key, value })
|
||||
}
|
||||
|
||||
async updateSetting({ request, response }: HttpContext) {
|
||||
const reqData = await request.validateUsing(updateSettingSchema)
|
||||
await this.systemService.updateSetting(reqData.key, reqData.value)
|
||||
return response.status(200).send({ success: true, message: 'Setting updated successfully' })
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,10 +4,15 @@ import { DockerService } from '#services/docker_service'
|
|||
import { ServiceSlim } from '../../types/services.js'
|
||||
import logger from '@adonisjs/core/services/logger'
|
||||
import si from 'systeminformation'
|
||||
import { GpuHealthStatus, NomadDiskInfo, NomadDiskInfoRaw, SystemInformationResponse } from '../../types/system.js'
|
||||
import {
|
||||
GpuHealthStatus,
|
||||
NomadDiskInfo,
|
||||
NomadDiskInfoRaw,
|
||||
SystemInformationResponse,
|
||||
} from '../../types/system.js'
|
||||
import { SERVICE_NAMES } from '../../constants/service_names.js'
|
||||
import { readFileSync } from 'fs'
|
||||
import path, { join } from 'path'
|
||||
import { readFileSync } from 'node:fs'
|
||||
import path, { join } from 'node:path'
|
||||
import { getAllFilesystems, getFile } from '../utils/fs.js'
|
||||
import axios from 'axios'
|
||||
import env from '#start/env'
|
||||
|
|
@ -15,17 +20,16 @@ import KVStore from '#models/kv_store'
|
|||
import { KV_STORE_SCHEMA, KVStoreKey } from '../../types/kv_store.js'
|
||||
import { isNewerVersion } from '../utils/version.js'
|
||||
|
||||
|
||||
@inject()
|
||||
export class SystemService {
|
||||
private static appVersion: string | null = null
|
||||
private static diskInfoFile = '/storage/nomad-disk-info.json'
|
||||
|
||||
constructor(private dockerService: DockerService) { }
|
||||
constructor(private dockerService: DockerService) {}
|
||||
|
||||
async checkServiceInstalled(serviceName: string): Promise<boolean> {
|
||||
const services = await this.getServices({ installedOnly: true });
|
||||
return services.some(service => service.service_name === serviceName);
|
||||
const services = await this.getServices({ installedOnly: true })
|
||||
return services.some((service) => service.service_name === serviceName)
|
||||
}
|
||||
|
||||
async getInternetStatus(): Promise<boolean> {
|
||||
|
|
@ -67,14 +71,20 @@ export class SystemService {
|
|||
return false
|
||||
}
|
||||
|
||||
async getNvidiaSmiInfo(): Promise<Array<{ vendor: string; model: string; vram: number; }> | { error: string } | 'OLLAMA_NOT_FOUND' | 'BAD_RESPONSE' | 'UNKNOWN_ERROR'> {
|
||||
async getNvidiaSmiInfo(): Promise<
|
||||
| Array<{ vendor: string; model: string; vram: number }>
|
||||
| { error: string }
|
||||
| 'OLLAMA_NOT_FOUND'
|
||||
| 'BAD_RESPONSE'
|
||||
| 'UNKNOWN_ERROR'
|
||||
> {
|
||||
try {
|
||||
const containers = await this.dockerService.docker.listContainers({ all: false })
|
||||
const ollamaContainer = containers.find((c) =>
|
||||
c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`)
|
||||
)
|
||||
const ollamaContainer = containers.find((c) => c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`))
|
||||
if (!ollamaContainer) {
|
||||
logger.info('Ollama container not found for nvidia-smi info retrieval. This is expected if Ollama is not installed.')
|
||||
logger.info(
|
||||
'Ollama container not found for nvidia-smi info retrieval. This is expected if Ollama is not installed.'
|
||||
)
|
||||
return 'OLLAMA_NOT_FOUND'
|
||||
}
|
||||
|
||||
|
|
@ -92,23 +102,35 @@ export class SystemService {
|
|||
const output = await new Promise<string>((resolve) => {
|
||||
let data = ''
|
||||
const timeout = setTimeout(() => resolve(data), 5000)
|
||||
stream.on('data', (chunk: Buffer) => { data += chunk.toString() })
|
||||
stream.on('end', () => { clearTimeout(timeout); resolve(data) })
|
||||
stream.on('data', (chunk: Buffer) => {
|
||||
data += chunk.toString()
|
||||
})
|
||||
stream.on('end', () => {
|
||||
clearTimeout(timeout)
|
||||
resolve(data)
|
||||
})
|
||||
})
|
||||
|
||||
// Remove any non-printable characters and trim the output
|
||||
const cleaned = output.replace(/[\x00-\x08]/g, '').trim()
|
||||
if (cleaned && !cleaned.toLowerCase().includes('error') && !cleaned.toLowerCase().includes('not found')) {
|
||||
const cleaned = Array.from(output)
|
||||
.filter((character) => character.charCodeAt(0) > 8)
|
||||
.join('')
|
||||
.trim()
|
||||
if (
|
||||
cleaned &&
|
||||
!cleaned.toLowerCase().includes('error') &&
|
||||
!cleaned.toLowerCase().includes('not found')
|
||||
) {
|
||||
// Split by newlines to handle multiple GPUs installed
|
||||
const lines = cleaned.split('\n').filter(line => line.trim())
|
||||
const lines = cleaned.split('\n').filter((line) => line.trim())
|
||||
|
||||
// Map each line out to a useful structure for us
|
||||
const gpus = lines.map(line => {
|
||||
const gpus = lines.map((line) => {
|
||||
const parts = line.split(',').map((s) => s.trim())
|
||||
return {
|
||||
vendor: 'NVIDIA',
|
||||
model: parts[0] || 'NVIDIA GPU',
|
||||
vram: parts[1] ? parseInt(parts[1], 10) : 0,
|
||||
vram: parts[1] ? Number.parseInt(parts[1], 10) : 0,
|
||||
}
|
||||
})
|
||||
|
||||
|
|
@ -117,8 +139,7 @@ export class SystemService {
|
|||
|
||||
// If we got output but looks like an error, consider it a bad response from nvidia-smi
|
||||
return 'BAD_RESPONSE'
|
||||
}
|
||||
catch (error) {
|
||||
} catch (error) {
|
||||
logger.error('Error getting nvidia-smi info:', error)
|
||||
if (error instanceof Error && error.message) {
|
||||
return { error: error.message }
|
||||
|
|
@ -127,6 +148,59 @@ export class SystemService {
|
|||
}
|
||||
}
|
||||
|
||||
async getExternalOllamaGpuInfo(): Promise<Array<{
|
||||
vendor: string
|
||||
model: string
|
||||
vram: number
|
||||
}> | null> {
|
||||
try {
|
||||
const containers = await this.dockerService.docker.listContainers({ all: false })
|
||||
const ollamaContainer = containers.find((c) => c.Names.includes(`/${SERVICE_NAMES.OLLAMA}`))
|
||||
if (!ollamaContainer) {
|
||||
return null
|
||||
}
|
||||
|
||||
const actualImage = (ollamaContainer.Image || '').toLowerCase()
|
||||
if (actualImage.includes('ollama/ollama') || actualImage.startsWith('ollama:')) {
|
||||
return null
|
||||
}
|
||||
|
||||
const ollamaUrl = await this.dockerService.getServiceURL(SERVICE_NAMES.OLLAMA)
|
||||
if (!ollamaUrl) {
|
||||
return null
|
||||
}
|
||||
|
||||
await axios.get(new URL('/api/tags', ollamaUrl).toString(), { timeout: 3000 })
|
||||
|
||||
let vramMb = 0
|
||||
try {
|
||||
const psResponse = await axios.get(new URL('/api/ps', ollamaUrl).toString(), {
|
||||
timeout: 3000,
|
||||
})
|
||||
const loadedModels = Array.isArray(psResponse.data?.models) ? psResponse.data.models : []
|
||||
const largestAllocation = loadedModels.reduce(
|
||||
(max: number, model: { size_vram?: number | string }) =>
|
||||
Math.max(max, Number(model.size_vram) || 0),
|
||||
0
|
||||
)
|
||||
vramMb = largestAllocation > 0 ? Math.round(largestAllocation / (1024 * 1024)) : 0
|
||||
} catch {}
|
||||
|
||||
return [
|
||||
{
|
||||
vendor: 'NVIDIA',
|
||||
model: 'NVIDIA GPU (external Ollama)',
|
||||
vram: vramMb,
|
||||
},
|
||||
]
|
||||
} catch (error) {
|
||||
logger.info(
|
||||
`[SystemService] External Ollama GPU probe failed: ${error instanceof Error ? error.message : error}`
|
||||
)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
async getServices({ installedOnly = true }: { installedOnly?: boolean }): Promise<ServiceSlim[]> {
|
||||
await this._syncContainersWithDatabase() // Sync up before fetching to ensure we have the latest status
|
||||
|
||||
|
|
@ -273,7 +347,7 @@ export class SystemService {
|
|||
graphics.controllers = nvidiaInfo.map((gpu) => ({
|
||||
model: gpu.model,
|
||||
vendor: gpu.vendor,
|
||||
bus: "",
|
||||
bus: '',
|
||||
vram: gpu.vram,
|
||||
vramDynamic: false, // assume false here, we don't actually use this field for our purposes.
|
||||
}))
|
||||
|
|
@ -282,8 +356,23 @@ export class SystemService {
|
|||
} else if (nvidiaInfo === 'OLLAMA_NOT_FOUND') {
|
||||
gpuHealth.status = 'ollama_not_installed'
|
||||
} else {
|
||||
gpuHealth.status = 'passthrough_failed'
|
||||
logger.warn(`NVIDIA runtime detected but GPU passthrough failed: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}`)
|
||||
const externalOllamaGpu = await this.getExternalOllamaGpuInfo()
|
||||
if (externalOllamaGpu) {
|
||||
graphics.controllers = externalOllamaGpu.map((gpu) => ({
|
||||
model: gpu.model,
|
||||
vendor: gpu.vendor,
|
||||
bus: '',
|
||||
vram: gpu.vram,
|
||||
vramDynamic: false,
|
||||
}))
|
||||
gpuHealth.status = 'ok'
|
||||
gpuHealth.ollamaGpuAccessible = true
|
||||
} else {
|
||||
gpuHealth.status = 'passthrough_failed'
|
||||
logger.warn(
|
||||
`NVIDIA runtime detected but GPU passthrough failed: ${typeof nvidiaInfo === 'string' ? nvidiaInfo : JSON.stringify(nvidiaInfo)}`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
@ -356,9 +445,10 @@ export class SystemService {
|
|||
|
||||
logger.info(`Current version: ${currentVersion}, Latest version: ${latestVersion}`)
|
||||
|
||||
const updateAvailable = process.env.NODE_ENV === 'development'
|
||||
? false
|
||||
: isNewerVersion(latestVersion, currentVersion.trim(), earlyAccess)
|
||||
const updateAvailable =
|
||||
process.env.NODE_ENV === 'development'
|
||||
? false
|
||||
: isNewerVersion(latestVersion, currentVersion.trim(), earlyAccess)
|
||||
|
||||
// Cache the results in KVStore for frontend checks
|
||||
await KVStore.setValue('system.updateAvailable', updateAvailable)
|
||||
|
|
@ -518,11 +608,14 @@ export class SystemService {
|
|||
const k = 1024
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k))
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(decimals)) + ' ' + sizes[i]
|
||||
return Number.parseFloat((bytes / Math.pow(k, i)).toFixed(decimals)) + ' ' + sizes[i]
|
||||
}
|
||||
|
||||
async updateSetting(key: KVStoreKey, value: any): Promise<void> {
|
||||
if ((value === '' || value === undefined || value === null) && KV_STORE_SCHEMA[key] === 'string') {
|
||||
if (
|
||||
(value === '' || value === undefined || value === null) &&
|
||||
KV_STORE_SCHEMA[key] === 'string'
|
||||
) {
|
||||
await KVStore.clearValue(key)
|
||||
} else {
|
||||
await KVStore.setValue(key, value)
|
||||
|
|
@ -620,5 +713,4 @@ export class SystemService {
|
|||
}
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user