diff --git a/admin/app/controllers/settings_controller.ts b/admin/app/controllers/settings_controller.ts index c21ddd6..24cb4ce 100644 --- a/admin/app/controllers/settings_controller.ts +++ b/admin/app/controllers/settings_controller.ts @@ -64,6 +64,7 @@ export default class SettingsController { const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled') const aiAssistantCustomName = await KVStore.getValue('ai.assistantCustomName') const remoteOllamaUrl = await KVStore.getValue('ai.remoteOllamaUrl') + const ollamaFlashAttention = await KVStore.getValue('ai.ollamaFlashAttention') return inertia.render('settings/models', { models: { availableModels: availableModels?.models || [], @@ -72,6 +73,7 @@ export default class SettingsController { chatSuggestionsEnabled: chatSuggestionsEnabled ?? false, aiAssistantCustomName: aiAssistantCustomName ?? '', remoteOllamaUrl: remoteOllamaUrl ?? '', + ollamaFlashAttention: ollamaFlashAttention ?? true, }, }, }) diff --git a/admin/app/services/docker_service.ts b/admin/app/services/docker_service.ts index d3edb5d..3c44332 100644 --- a/admin/app/services/docker_service.ts +++ b/admin/app/services/docker_service.ts @@ -505,6 +505,15 @@ export class DockerService { } } + const ollamaEnv: string[] = [] + if (service.service_name === SERVICE_NAMES.OLLAMA) { + ollamaEnv.push('OLLAMA_NO_CLOUD=1') + const flashAttentionEnabled = await KVStore.getValue('ai.ollamaFlashAttention') + if (flashAttentionEnabled !== false) { + ollamaEnv.push('OLLAMA_FLASH_ATTENTION=1') + } + } + this._broadcast( service.service_name, 'creating', @@ -522,7 +531,7 @@ export class DockerService { HostConfig: gpuHostConfig, ...(containerConfig?.WorkingDir && { WorkingDir: containerConfig.WorkingDir }), ...(containerConfig?.ExposedPorts && { ExposedPorts: containerConfig.ExposedPorts }), - ...(containerConfig?.Env && { Env: containerConfig.Env }), + Env: [...(containerConfig?.Env ?? []), ...ollamaEnv], ...(service.container_command ? { Cmd: service.container_command.split(' ') } : {}), // Ensure container is attached to the Nomad docker network in production ...(process.env.NODE_ENV === 'production' && { diff --git a/admin/constants/kv_store.ts b/admin/constants/kv_store.ts index 1085723..c49416c 100644 --- a/admin/constants/kv_store.ts +++ b/admin/constants/kv_store.ts @@ -1,3 +1,3 @@ import { KVStoreKey } from "../types/kv_store.js"; -export const SETTINGS_KEYS: KVStoreKey[] = ['chat.suggestionsEnabled', 'chat.lastModel', 'ui.hasVisitedEasySetup', 'ui.theme', 'system.earlyAccess', 'ai.assistantCustomName', 'ai.remoteOllamaUrl']; \ No newline at end of file +export const SETTINGS_KEYS: KVStoreKey[] = ['chat.suggestionsEnabled', 'chat.lastModel', 'ui.hasVisitedEasySetup', 'ui.theme', 'system.earlyAccess', 'ai.assistantCustomName', 'ai.remoteOllamaUrl', 'ai.ollamaFlashAttention']; \ No newline at end of file diff --git a/admin/inertia/pages/settings/models.tsx b/admin/inertia/pages/settings/models.tsx index 08d9616..d4124fe 100644 --- a/admin/inertia/pages/settings/models.tsx +++ b/admin/inertia/pages/settings/models.tsx @@ -26,7 +26,7 @@ export default function ModelsPage(props: { models: { availableModels: NomadOllamaModel[] installedModels: NomadInstalledModel[] - settings: { chatSuggestionsEnabled: boolean; aiAssistantCustomName: string; remoteOllamaUrl: string } + settings: { chatSuggestionsEnabled: boolean; aiAssistantCustomName: string; remoteOllamaUrl: string; ollamaFlashAttention: boolean } } }) { const { aiAssistantName } = usePage<{ aiAssistantName: string }>().props @@ -95,6 +95,9 @@ export default function ModelsPage(props: { const [chatSuggestionsEnabled, setChatSuggestionsEnabled] = useState( props.models.settings.chatSuggestionsEnabled ) + const [ollamaFlashAttention, setOllamaFlashAttention] = useState( + props.models.settings.ollamaFlashAttention + ) const [aiAssistantCustomName, setAiAssistantCustomName] = useState( props.models.settings.aiAssistantCustomName ) @@ -308,6 +311,15 @@ export default function ModelsPage(props: { label="Chat Suggestions" description="Display AI-generated conversation starters in the chat interface" /> + { + setOllamaFlashAttention(newVal) + updateSettingMutation.mutate({ key: 'ai.ollamaFlashAttention', value: newVal }) + }} + label="Flash Attention" + description="Enables OLLAMA_FLASH_ATTENTION=1 for improved memory efficiency. Disable if you experience instability. Takes effect after reinstalling the AI Assistant." + /> = T extends 'boolean' ? boolean : string