feat(AI): add Ollama support for NVIDIA and AMD GPUs

This commit is contained in:
Jake Turner 2026-02-02 00:24:10 +00:00
parent d1f40663d3
commit a697d930fe
2 changed files with 243 additions and 2 deletions

View File

@ -7,6 +7,9 @@ import { doResumableDownloadWithRetry } from '../utils/downloads.js'
import { join } from 'path'
import { ZIM_STORAGE_PATH } from '../utils/fs.js'
import { SERVICE_NAMES } from '../../constants/service_names.js'
import { exec } from 'child_process'
import { promisify } from 'util'
import { readdir } from 'fs/promises'
@inject()
export class DockerService {
@ -444,16 +447,79 @@ export class DockerService {
)
}
// GPU-aware configuration for Ollama
let finalImage = service.container_image
let gpuHostConfig = containerConfig?.HostConfig || {}
if (service.service_name === SERVICE_NAMES.OLLAMA) {
const gpuType = await this._detectGPUType()
if (gpuType === 'nvidia') {
this._broadcast(
service.service_name,
'gpu-config',
`NVIDIA GPU detected. Configuring container with GPU support...`
)
// Add GPU support for NVIDIA
gpuHostConfig = {
...gpuHostConfig,
DeviceRequests: [
{
Driver: 'nvidia',
Count: -1, // -1 means all GPUs
Capabilities: [['gpu']],
},
],
}
} else if (gpuType === 'amd') {
this._broadcast(
service.service_name,
'gpu-config',
`AMD GPU detected. Using ROCm image and configuring container with GPU support...`
)
// Use ROCm image for AMD
finalImage = 'ollama/ollama:rocm'
// Dynamically discover and add AMD GPU devices
const amdDevices = await this._discoverAMDDevices()
if (!amdDevices || amdDevices.length === 0) {
this._broadcast(
service.service_name,
'gpu-config-error',
`Failed to discover AMD GPU devices. Proceeding with CPU-only configuration...`
)
gpuHostConfig = { ...gpuHostConfig } // No GPU devices added
logger.warn(`[DockerService] No AMD GPU devices discovered for Ollama`)
} else {
gpuHostConfig = {
...gpuHostConfig,
Devices: amdDevices,
}
logger.info(
`[DockerService] Configured ${amdDevices.length} AMD GPU devices for Ollama`
)
}
} else {
this._broadcast(
service.service_name,
'gpu-config',
`No GPU detected. Using CPU-only configuration...`
)
}
}
this._broadcast(
service.service_name,
'creating',
`Creating Docker container for service ${service.service_name}...`
)
const container = await this.docker.createContainer({
Image: service.container_image,
Image: finalImage,
name: service.service_name,
...(containerConfig?.User && { User: containerConfig.User }),
...(containerConfig?.HostConfig && { HostConfig: containerConfig.HostConfig }),
HostConfig: gpuHostConfig,
...(containerConfig?.WorkingDir && { WorkingDir: containerConfig.WorkingDir }),
...(containerConfig?.ExposedPorts && { ExposedPorts: containerConfig.ExposedPorts }),
...(containerConfig?.Env && { Env: containerConfig.Env }),
@ -603,6 +669,104 @@ export class DockerService {
}
}
/**
* Detect GPU type (NVIDIA or AMD) on the system.
* Returns 'nvidia', 'amd', or 'none'.
*/
private async _detectGPUType(): Promise<'nvidia' | 'amd' | 'none'> {
try {
const execAsync = promisify(exec)
// Check for NVIDIA GPU
try {
const { stdout: nvidiaCheck } = await execAsync(
'lspci 2>/dev/null | grep -i nvidia || true'
)
if (nvidiaCheck.trim()) {
logger.info('[DockerService] NVIDIA GPU detected')
return 'nvidia'
}
} catch (error) {
// Continue to AMD check
}
// Check for AMD GPU
try {
const { stdout: amdCheck } = await execAsync(
'lspci 2>/dev/null | grep -iE "amd|radeon" || true'
)
if (amdCheck.trim()) {
logger.info('[DockerService] AMD GPU detected')
return 'amd'
}
} catch (error) {
// No GPU detected
}
logger.info('[DockerService] No GPU detected')
return 'none'
} catch (error) {
logger.warn(`[DockerService] Error detecting GPU type: ${error.message}`)
return 'none'
}
}
/**
* Discover AMD GPU DRI devices dynamically.
* Returns an array of device configurations for Docker.
*/
private async _discoverAMDDevices(): Promise<
Array<{ PathOnHost: string; PathInContainer: string; CgroupPermissions: string }>
> {
try {
const devices: Array<{
PathOnHost: string
PathInContainer: string
CgroupPermissions: string
}> = []
// Always add /dev/kfd (Kernel Fusion Driver)
devices.push({
PathOnHost: '/dev/kfd',
PathInContainer: '/dev/kfd',
CgroupPermissions: 'rwm',
})
// Discover DRI devices in /dev/dri/
try {
const driDevices = await readdir('/dev/dri')
for (const device of driDevices) {
const devicePath = `/dev/dri/${device}`
devices.push({
PathOnHost: devicePath,
PathInContainer: devicePath,
CgroupPermissions: 'rwm',
})
}
logger.info(
`[DockerService] Discovered ${driDevices.length} DRI devices: ${driDevices.join(', ')}`
)
} catch (error) {
logger.warn(`[DockerService] Could not read /dev/dri directory: ${error.message}`)
// Fallback to common device names if directory read fails
const fallbackDevices = ['card0', 'renderD128']
for (const device of fallbackDevices) {
devices.push({
PathOnHost: `/dev/dri/${device}`,
PathInContainer: `/dev/dri/${device}`,
CgroupPermissions: 'rwm',
})
}
logger.info(`[DockerService] Using fallback DRI devices: ${fallbackDevices.join(', ')}`)
}
return devices
} catch (error) {
logger.error(`[DockerService] Error discovering AMD devices: ${error.message}`)
return []
}
}
private _broadcast(service: string, status: string, message: string) {
transmit.broadcast('service-installation', {
service_name: service,

View File

@ -203,6 +203,82 @@ ensure_docker_installed() {
fi
}
setup_nvidia_container_toolkit() {
echo -e "${YELLOW}#${RESET} Checking for NVIDIA GPU...\\n"
# Safely detect NVIDIA GPU
local has_nvidia_gpu=false
if command -v lspci &> /dev/null; then
if lspci 2>/dev/null | grep -i nvidia &> /dev/null; then
has_nvidia_gpu=true
echo -e "${GREEN}#${RESET} NVIDIA GPU detected.\\n"
fi
fi
# Also check for nvidia-smi
if ! $has_nvidia_gpu && command -v nvidia-smi &> /dev/null; then
if nvidia-smi &> /dev/null; then
has_nvidia_gpu=true
echo -e "${GREEN}#${RESET} NVIDIA GPU detected via nvidia-smi.\\n"
fi
fi
if ! $has_nvidia_gpu; then
echo -e "${YELLOW}#${RESET} No NVIDIA GPU detected. Skipping NVIDIA container toolkit installation.\\n"
return 0
fi
# Check if nvidia-container-toolkit is already installed
if command -v nvidia-ctk &> /dev/null; then
echo -e "${GREEN}#${RESET} NVIDIA container toolkit is already installed.\\n"
return 0
fi
echo -e "${YELLOW}#${RESET} Installing NVIDIA container toolkit...\\n"
# Install dependencies per https://docs.ollama.com/docker - wrapped in error handling
if ! curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey 2>/dev/null | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg 2>/dev/null; then
echo -e "${YELLOW}#${RESET} Warning: Failed to add NVIDIA container toolkit GPG key. Continuing anyway...\\n"
return 0
fi
if ! curl -fsSL https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list 2>/dev/null \
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list > /dev/null 2>&1; then
echo -e "${YELLOW}#${RESET} Warning: Failed to add NVIDIA container toolkit repository. Continuing anyway...\\n"
return 0
fi
if ! sudo apt-get update 2>/dev/null; then
echo -e "${YELLOW}#${RESET} Warning: Failed to update package list. Continuing anyway...\\n"
return 0
fi
if ! sudo apt-get install -y nvidia-container-toolkit 2>/dev/null; then
echo -e "${YELLOW}#${RESET} Warning: Failed to install NVIDIA container toolkit. Continuing anyway...\\n"
return 0
fi
echo -e "${GREEN}#${RESET} NVIDIA container toolkit installed successfully.\\n"
# Configure Docker to use NVIDIA runtime
echo -e "${YELLOW}#${RESET} Configuring Docker to use NVIDIA runtime...\\n"
if ! sudo nvidia-ctk runtime configure --runtime=docker 2>/dev/null; then
echo -e "${YELLOW}#${RESET} Warning: Failed to configure NVIDIA runtime for Docker. Continuing anyway...\\n"
return 0
fi
# Restart Docker service
echo -e "${YELLOW}#${RESET} Restarting Docker service...\\n"
if ! sudo systemctl restart docker 2>/dev/null; then
echo -e "${YELLOW}#${RESET} Warning: Failed to restart Docker service. You may need to restart it manually.\\n"
return 0
fi
echo -e "${GREEN}#${RESET} NVIDIA container toolkit configuration completed successfully.\\n"
}
get_install_confirmation(){
read -p "This script will install/update Project N.O.M.A.D. and its dependencies on your machine. Are you sure you want to continue? (y/n): " choice
case "$choice" in
@ -439,6 +515,7 @@ check_is_debug_mode
get_install_confirmation
accept_terms
ensure_docker_installed
setup_nvidia_container_toolkit
get_local_ip
create_nomad_directory
download_wait_for_it_script