mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-03-28 03:29:25 +01:00
feat(ai-chat): Add support for OpenAI API
Exisiting Ollama API support still functions as before. OpenAI vs Ollama API mostly have the same features, however model file size is not support with OpenAI's API so when a user chooses one of those then the models will just show up as the model name without the size. `npm install openai` triggered some updates in admin/package-lock.json such as adding many instances of "dev: true". This further enhances the user's ability to run the LLM on a different host.
This commit is contained in:
parent
ac87cfce59
commit
f98664921a
|
|
@ -11,7 +11,7 @@ import type { HttpContext } from '@adonisjs/core/http'
|
||||||
import { DEFAULT_QUERY_REWRITE_MODEL, RAG_CONTEXT_LIMITS, SYSTEM_PROMPTS } from '../../constants/ollama.js'
|
import { DEFAULT_QUERY_REWRITE_MODEL, RAG_CONTEXT_LIMITS, SYSTEM_PROMPTS } from '../../constants/ollama.js'
|
||||||
import { SERVICE_NAMES } from '../../constants/service_names.js'
|
import { SERVICE_NAMES } from '../../constants/service_names.js'
|
||||||
import logger from '@adonisjs/core/services/logger'
|
import logger from '@adonisjs/core/services/logger'
|
||||||
import type { Message } from 'ollama'
|
type Message = { role: 'system' | 'user' | 'assistant'; content: string }
|
||||||
|
|
||||||
@inject()
|
@inject()
|
||||||
export default class OllamaController {
|
export default class OllamaController {
|
||||||
|
|
@ -201,21 +201,21 @@ export default class OllamaController {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test connectivity
|
// Test connectivity via OpenAI-compatible /v1/models endpoint (works with Ollama, LM Studio, llama.cpp, etc.)
|
||||||
try {
|
try {
|
||||||
const testResponse = await fetch(`${remoteUrl.replace(/\/$/, '')}/api/tags`, {
|
const testResponse = await fetch(`${remoteUrl.replace(/\/$/, '')}/v1/models`, {
|
||||||
signal: AbortSignal.timeout(5000),
|
signal: AbortSignal.timeout(5000),
|
||||||
})
|
})
|
||||||
if (!testResponse.ok) {
|
if (!testResponse.ok) {
|
||||||
return response.status(400).send({
|
return response.status(400).send({
|
||||||
success: false,
|
success: false,
|
||||||
message: `Could not connect to Ollama at ${remoteUrl} (HTTP ${testResponse.status}). Make sure Ollama is running with OLLAMA_HOST=0.0.0.0.`,
|
message: `Could not connect to ${remoteUrl} (HTTP ${testResponse.status}). Make sure the server is running and accessible. For Ollama, start it with OLLAMA_HOST=0.0.0.0.`,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
return response.status(400).send({
|
return response.status(400).send({
|
||||||
success: false,
|
success: false,
|
||||||
message: `Could not connect to Ollama at ${remoteUrl}. Make sure the host is reachable and Ollama is running with OLLAMA_HOST=0.0.0.0.`,
|
message: `Could not connect to ${remoteUrl}. Make sure the server is running and reachable. For Ollama, start it with OLLAMA_HOST=0.0.0.0.`,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
import { inject } from '@adonisjs/core'
|
import { inject } from '@adonisjs/core'
|
||||||
import { ChatRequest, Ollama } from 'ollama'
|
import OpenAI from 'openai'
|
||||||
|
import type { ChatCompletionChunk, ChatCompletionMessageParam } from 'openai/resources/chat/completions.js'
|
||||||
|
import type { Stream } from 'openai/streaming.js'
|
||||||
import { NomadOllamaModel } from '../../types/ollama.js'
|
import { NomadOllamaModel } from '../../types/ollama.js'
|
||||||
import { FALLBACK_RECOMMENDED_OLLAMA_MODELS } from '../../constants/ollama.js'
|
import { FALLBACK_RECOMMENDED_OLLAMA_MODELS } from '../../constants/ollama.js'
|
||||||
import fs from 'node:fs/promises'
|
import fs from 'node:fs/promises'
|
||||||
|
|
@ -13,51 +15,91 @@ import Fuse, { IFuseOptions } from 'fuse.js'
|
||||||
import { BROADCAST_CHANNELS } from '../../constants/broadcast.js'
|
import { BROADCAST_CHANNELS } from '../../constants/broadcast.js'
|
||||||
import env from '#start/env'
|
import env from '#start/env'
|
||||||
import { NOMAD_API_DEFAULT_BASE_URL } from '../../constants/misc.js'
|
import { NOMAD_API_DEFAULT_BASE_URL } from '../../constants/misc.js'
|
||||||
|
import KVStore from '#models/kv_store'
|
||||||
|
|
||||||
const NOMAD_MODELS_API_PATH = '/api/v1/ollama/models'
|
const NOMAD_MODELS_API_PATH = '/api/v1/ollama/models'
|
||||||
const MODELS_CACHE_FILE = path.join(process.cwd(), 'storage', 'ollama-models-cache.json')
|
const MODELS_CACHE_FILE = path.join(process.cwd(), 'storage', 'ollama-models-cache.json')
|
||||||
const CACHE_MAX_AGE_MS = 24 * 60 * 60 * 1000 // 24 hours
|
const CACHE_MAX_AGE_MS = 24 * 60 * 60 * 1000 // 24 hours
|
||||||
|
|
||||||
|
export type NomadInstalledModel = {
|
||||||
|
name: string
|
||||||
|
size: number
|
||||||
|
digest?: string
|
||||||
|
details?: Record<string, any>
|
||||||
|
}
|
||||||
|
|
||||||
|
export type NomadChatResponse = {
|
||||||
|
message: { content: string; thinking?: string }
|
||||||
|
done: boolean
|
||||||
|
model: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export type NomadChatStreamChunk = {
|
||||||
|
message: { content: string; thinking?: string }
|
||||||
|
done: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
type ChatInput = {
|
||||||
|
model: string
|
||||||
|
messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }>
|
||||||
|
think?: boolean | 'medium'
|
||||||
|
stream?: boolean
|
||||||
|
}
|
||||||
|
|
||||||
@inject()
|
@inject()
|
||||||
export class OllamaService {
|
export class OllamaService {
|
||||||
private ollama: Ollama | null = null
|
private openai: OpenAI | null = null
|
||||||
private ollamaInitPromise: Promise<void> | null = null
|
private baseUrl: string | null = null
|
||||||
|
private initPromise: Promise<void> | null = null
|
||||||
|
|
||||||
constructor() { }
|
constructor() {}
|
||||||
|
|
||||||
private async _initializeOllamaClient() {
|
private async _initialize() {
|
||||||
if (!this.ollamaInitPromise) {
|
if (!this.initPromise) {
|
||||||
this.ollamaInitPromise = (async () => {
|
this.initPromise = (async () => {
|
||||||
const dockerService = new (await import('./docker_service.js')).DockerService()
|
// Check KVStore for a custom base URL (remote Ollama, LM Studio, llama.cpp, etc.)
|
||||||
const qdrantUrl = await dockerService.getServiceURL(SERVICE_NAMES.OLLAMA)
|
const customUrl = (await KVStore.getValue('ai.remoteOllamaUrl')) as string | null
|
||||||
if (!qdrantUrl) {
|
if (customUrl && customUrl.trim()) {
|
||||||
throw new Error('Ollama service is not installed or running.')
|
this.baseUrl = customUrl.trim().replace(/\/$/, '')
|
||||||
|
} else {
|
||||||
|
// Fall back to the local Ollama container managed by Docker
|
||||||
|
const dockerService = new (await import('./docker_service.js')).DockerService()
|
||||||
|
const ollamaUrl = await dockerService.getServiceURL(SERVICE_NAMES.OLLAMA)
|
||||||
|
if (!ollamaUrl) {
|
||||||
|
throw new Error('Ollama service is not installed or running.')
|
||||||
|
}
|
||||||
|
this.baseUrl = ollamaUrl.trim().replace(/\/$/, '')
|
||||||
}
|
}
|
||||||
this.ollama = new Ollama({ host: qdrantUrl })
|
|
||||||
|
this.openai = new OpenAI({
|
||||||
|
apiKey: 'nomad', // Required by SDK; not validated by Ollama/LM Studio/llama.cpp
|
||||||
|
baseURL: `${this.baseUrl}/v1`,
|
||||||
|
})
|
||||||
})()
|
})()
|
||||||
}
|
}
|
||||||
return this.ollamaInitPromise
|
return this.initPromise
|
||||||
}
|
}
|
||||||
|
|
||||||
private async _ensureDependencies() {
|
private async _ensureDependencies() {
|
||||||
if (!this.ollama) {
|
if (!this.openai) {
|
||||||
await this._initializeOllamaClient()
|
await this._initialize()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Downloads a model from the Ollama service with progress tracking. Where possible,
|
* Downloads a model from Ollama with progress tracking. Only works with Ollama backends.
|
||||||
* one should dispatch a background job instead of calling this method directly to avoid long blocking.
|
* Use dispatchModelDownload() for background job processing where possible.
|
||||||
* @param model Model name to download
|
|
||||||
* @returns Success status and message
|
|
||||||
*/
|
*/
|
||||||
async downloadModel(model: string, progressCallback?: (percent: number) => void): Promise<{ success: boolean; message: string; retryable?: boolean }> {
|
async downloadModel(
|
||||||
try {
|
model: string,
|
||||||
await this._ensureDependencies()
|
progressCallback?: (percent: number) => void
|
||||||
if (!this.ollama) {
|
): Promise<{ success: boolean; message: string; retryable?: boolean }> {
|
||||||
throw new Error('Ollama client is not initialized.')
|
await this._ensureDependencies()
|
||||||
}
|
if (!this.baseUrl) {
|
||||||
|
return { success: false, message: 'AI service is not initialized.' }
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
// See if model is already installed
|
// See if model is already installed
|
||||||
const installedModels = await this.getModels()
|
const installedModels = await this.getModels()
|
||||||
if (installedModels && installedModels.some((m) => m.name === model)) {
|
if (installedModels && installedModels.some((m) => m.name === model)) {
|
||||||
|
|
@ -65,23 +107,36 @@ export class OllamaService {
|
||||||
return { success: true, message: 'Model is already installed.' }
|
return { success: true, message: 'Model is already installed.' }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns AbortableAsyncIterator<ProgressResponse>
|
// Stream pull via Ollama native API
|
||||||
const downloadStream = await this.ollama.pull({
|
const pullResponse = await axios.post(
|
||||||
model,
|
`${this.baseUrl}/api/pull`,
|
||||||
stream: true,
|
{ model, stream: true },
|
||||||
})
|
{ responseType: 'stream', timeout: 0 }
|
||||||
|
)
|
||||||
|
|
||||||
for await (const chunk of downloadStream) {
|
await new Promise<void>((resolve, reject) => {
|
||||||
if (chunk.completed && chunk.total) {
|
let buffer = ''
|
||||||
const percent = ((chunk.completed / chunk.total) * 100).toFixed(2)
|
pullResponse.data.on('data', (chunk: Buffer) => {
|
||||||
const percentNum = parseFloat(percent)
|
buffer += chunk.toString()
|
||||||
|
const lines = buffer.split('\n')
|
||||||
this.broadcastDownloadProgress(model, percentNum)
|
buffer = lines.pop() || ''
|
||||||
if (progressCallback) {
|
for (const line of lines) {
|
||||||
progressCallback(percentNum)
|
if (!line.trim()) continue
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(line)
|
||||||
|
if (parsed.completed && parsed.total) {
|
||||||
|
const percent = parseFloat(((parsed.completed / parsed.total) * 100).toFixed(2))
|
||||||
|
this.broadcastDownloadProgress(model, percent)
|
||||||
|
if (progressCallback) progressCallback(percent)
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore parse errors on partial lines
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
})
|
||||||
}
|
pullResponse.data.on('end', resolve)
|
||||||
|
pullResponse.data.on('error', reject)
|
||||||
|
})
|
||||||
|
|
||||||
logger.info(`[OllamaService] Model "${model}" downloaded successfully.`)
|
logger.info(`[OllamaService] Model "${model}" downloaded successfully.`)
|
||||||
return { success: true, message: 'Model downloaded successfully.' }
|
return { success: true, message: 'Model downloaded successfully.' }
|
||||||
|
|
@ -128,88 +183,192 @@ export class OllamaService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public async getClient() {
|
public async chat(chatRequest: ChatInput): Promise<NomadChatResponse> {
|
||||||
await this._ensureDependencies()
|
await this._ensureDependencies()
|
||||||
return this.ollama!
|
if (!this.openai) {
|
||||||
}
|
throw new Error('AI client is not initialized.')
|
||||||
|
|
||||||
public async chat(chatRequest: ChatRequest & { stream?: boolean }) {
|
|
||||||
await this._ensureDependencies()
|
|
||||||
if (!this.ollama) {
|
|
||||||
throw new Error('Ollama client is not initialized.')
|
|
||||||
}
|
}
|
||||||
return await this.ollama.chat({
|
|
||||||
...chatRequest,
|
const params: any = {
|
||||||
|
model: chatRequest.model,
|
||||||
|
messages: chatRequest.messages as ChatCompletionMessageParam[],
|
||||||
stream: false,
|
stream: false,
|
||||||
})
|
}
|
||||||
|
if (chatRequest.think) {
|
||||||
|
params.think = chatRequest.think
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await this.openai.chat.completions.create(params)
|
||||||
|
const choice = response.choices[0]
|
||||||
|
|
||||||
|
return {
|
||||||
|
message: {
|
||||||
|
content: choice.message.content ?? '',
|
||||||
|
thinking: (choice.message as any).thinking ?? undefined,
|
||||||
|
},
|
||||||
|
done: true,
|
||||||
|
model: response.model,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public async chatStream(chatRequest: ChatRequest) {
|
public async chatStream(chatRequest: ChatInput): Promise<AsyncIterable<NomadChatStreamChunk>> {
|
||||||
await this._ensureDependencies()
|
await this._ensureDependencies()
|
||||||
if (!this.ollama) {
|
if (!this.openai) {
|
||||||
throw new Error('Ollama client is not initialized.')
|
throw new Error('AI client is not initialized.')
|
||||||
}
|
}
|
||||||
return await this.ollama.chat({
|
|
||||||
...chatRequest,
|
const params: any = {
|
||||||
|
model: chatRequest.model,
|
||||||
|
messages: chatRequest.messages as ChatCompletionMessageParam[],
|
||||||
stream: true,
|
stream: true,
|
||||||
})
|
}
|
||||||
|
if (chatRequest.think) {
|
||||||
|
params.think = chatRequest.think
|
||||||
|
}
|
||||||
|
|
||||||
|
const stream = (await this.openai.chat.completions.create(params)) as unknown as Stream<ChatCompletionChunk>
|
||||||
|
|
||||||
|
async function* normalize(): AsyncGenerator<NomadChatStreamChunk> {
|
||||||
|
for await (const chunk of stream) {
|
||||||
|
const delta = chunk.choices[0]?.delta
|
||||||
|
yield {
|
||||||
|
message: {
|
||||||
|
content: delta?.content ?? '',
|
||||||
|
thinking: (delta as any)?.thinking ?? '',
|
||||||
|
},
|
||||||
|
done: chunk.choices[0]?.finish_reason !== null && chunk.choices[0]?.finish_reason !== undefined,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return normalize()
|
||||||
}
|
}
|
||||||
|
|
||||||
public async checkModelHasThinking(modelName: string): Promise<boolean> {
|
public async checkModelHasThinking(modelName: string): Promise<boolean> {
|
||||||
await this._ensureDependencies()
|
await this._ensureDependencies()
|
||||||
if (!this.ollama) {
|
if (!this.baseUrl) return false
|
||||||
throw new Error('Ollama client is not initialized.')
|
|
||||||
|
try {
|
||||||
|
const response = await axios.post(
|
||||||
|
`${this.baseUrl}/api/show`,
|
||||||
|
{ model: modelName },
|
||||||
|
{ timeout: 5000 }
|
||||||
|
)
|
||||||
|
return Array.isArray(response.data?.capabilities) && response.data.capabilities.includes('thinking')
|
||||||
|
} catch {
|
||||||
|
// Non-Ollama backends don't expose /api/show — assume no thinking support
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
const modelInfo = await this.ollama.show({
|
|
||||||
model: modelName,
|
|
||||||
})
|
|
||||||
|
|
||||||
return modelInfo.capabilities.includes('thinking')
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public async deleteModel(modelName: string) {
|
public async deleteModel(modelName: string): Promise<{ success: boolean; message: string }> {
|
||||||
await this._ensureDependencies()
|
await this._ensureDependencies()
|
||||||
if (!this.ollama) {
|
if (!this.baseUrl) {
|
||||||
throw new Error('Ollama client is not initialized.')
|
return { success: false, message: 'AI service is not initialized.' }
|
||||||
}
|
}
|
||||||
|
|
||||||
return await this.ollama.delete({
|
try {
|
||||||
model: modelName,
|
await axios.delete(`${this.baseUrl}/api/delete`, {
|
||||||
})
|
data: { model: modelName },
|
||||||
|
timeout: 10000,
|
||||||
|
})
|
||||||
|
return { success: true, message: `Model "${modelName}" deleted.` }
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(
|
||||||
|
`[OllamaService] Failed to delete model "${modelName}": ${error instanceof Error ? error.message : error}`
|
||||||
|
)
|
||||||
|
return { success: false, message: 'Failed to delete model. This may not be an Ollama backend.' }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public async getModels(includeEmbeddings = false) {
|
/**
|
||||||
|
* Generate embeddings for the given input strings.
|
||||||
|
* Tries the Ollama native /api/embed endpoint first, falls back to /v1/embeddings.
|
||||||
|
*/
|
||||||
|
public async embed(model: string, input: string[]): Promise<{ embeddings: number[][] }> {
|
||||||
await this._ensureDependencies()
|
await this._ensureDependencies()
|
||||||
if (!this.ollama) {
|
if (!this.baseUrl || !this.openai) {
|
||||||
throw new Error('Ollama client is not initialized.')
|
throw new Error('AI service is not initialized.')
|
||||||
}
|
}
|
||||||
const response = await this.ollama.list()
|
|
||||||
if (includeEmbeddings) {
|
try {
|
||||||
return response.models
|
// Prefer Ollama native endpoint (supports batch input natively)
|
||||||
|
const response = await axios.post(
|
||||||
|
`${this.baseUrl}/api/embed`,
|
||||||
|
{ model, input },
|
||||||
|
{ timeout: 60000 }
|
||||||
|
)
|
||||||
|
return { embeddings: response.data.embeddings }
|
||||||
|
} catch {
|
||||||
|
// Fall back to OpenAI-compatible /v1/embeddings (processes one at a time then batches)
|
||||||
|
logger.info('[OllamaService] /api/embed unavailable, falling back to /v1/embeddings')
|
||||||
|
const results = await this.openai.embeddings.create({ model, input })
|
||||||
|
return { embeddings: results.data.map((e) => e.embedding) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async getModels(includeEmbeddings = false): Promise<NomadInstalledModel[]> {
|
||||||
|
await this._ensureDependencies()
|
||||||
|
if (!this.baseUrl) {
|
||||||
|
throw new Error('AI service is not initialized.')
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Prefer the Ollama native endpoint which includes size and metadata
|
||||||
|
const response = await axios.get(`${this.baseUrl}/api/tags`, { timeout: 5000 })
|
||||||
|
// LM Studio returns HTTP 200 for unknown endpoints with an incompatible body — validate explicitly
|
||||||
|
if (!Array.isArray(response.data?.models)) {
|
||||||
|
throw new Error('Not an Ollama-compatible /api/tags response')
|
||||||
|
}
|
||||||
|
const models: NomadInstalledModel[] = response.data.models
|
||||||
|
if (includeEmbeddings) return models
|
||||||
|
return models.filter((m) => !m.name.includes('embed'))
|
||||||
|
} catch {
|
||||||
|
// Fall back to the OpenAI-compatible /v1/models endpoint (LM Studio, llama.cpp, etc.)
|
||||||
|
logger.info('[OllamaService] /api/tags unavailable, falling back to /v1/models')
|
||||||
|
try {
|
||||||
|
const modelList = await this.openai!.models.list()
|
||||||
|
const models: NomadInstalledModel[] = modelList.data.map((m) => ({ name: m.id, size: 0 }))
|
||||||
|
if (includeEmbeddings) return models
|
||||||
|
return models.filter((m) => !m.name.includes('embed'))
|
||||||
|
} catch (err) {
|
||||||
|
logger.error(
|
||||||
|
`[OllamaService] Failed to list models: ${err instanceof Error ? err.message : err}`
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Filter out embedding models
|
|
||||||
return response.models.filter((model) => !model.name.includes('embed'))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async getAvailableModels(
|
async getAvailableModels(
|
||||||
{ sort, recommendedOnly, query, limit, force }: { sort?: 'pulls' | 'name'; recommendedOnly?: boolean, query: string | null, limit?: number, force?: boolean } = {
|
{
|
||||||
|
sort,
|
||||||
|
recommendedOnly,
|
||||||
|
query,
|
||||||
|
limit,
|
||||||
|
force,
|
||||||
|
}: {
|
||||||
|
sort?: 'pulls' | 'name'
|
||||||
|
recommendedOnly?: boolean
|
||||||
|
query: string | null
|
||||||
|
limit?: number
|
||||||
|
force?: boolean
|
||||||
|
} = {
|
||||||
sort: 'pulls',
|
sort: 'pulls',
|
||||||
recommendedOnly: false,
|
recommendedOnly: false,
|
||||||
query: null,
|
query: null,
|
||||||
limit: 15,
|
limit: 15,
|
||||||
}
|
}
|
||||||
): Promise<{ models: NomadOllamaModel[], hasMore: boolean } | null> {
|
): Promise<{ models: NomadOllamaModel[]; hasMore: boolean } | null> {
|
||||||
try {
|
try {
|
||||||
const models = await this.retrieveAndRefreshModels(sort, force)
|
const models = await this.retrieveAndRefreshModels(sort, force)
|
||||||
if (!models) {
|
if (!models) {
|
||||||
// If we fail to get models from the API, return the fallback recommended models
|
|
||||||
logger.warn(
|
logger.warn(
|
||||||
'[OllamaService] Returning fallback recommended models due to failure in fetching available models'
|
'[OllamaService] Returning fallback recommended models due to failure in fetching available models'
|
||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
models: FALLBACK_RECOMMENDED_OLLAMA_MODELS,
|
models: FALLBACK_RECOMMENDED_OLLAMA_MODELS,
|
||||||
hasMore: false
|
hasMore: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -217,15 +376,13 @@ export class OllamaService {
|
||||||
const filteredModels = query ? this.fuseSearchModels(models, query) : models
|
const filteredModels = query ? this.fuseSearchModels(models, query) : models
|
||||||
return {
|
return {
|
||||||
models: filteredModels.slice(0, limit || 15),
|
models: filteredModels.slice(0, limit || 15),
|
||||||
hasMore: filteredModels.length > (limit || 15)
|
hasMore: filteredModels.length > (limit || 15),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If recommendedOnly is true, only return the first three models (if sorted by pulls, these will be the top 3)
|
|
||||||
const sortedByPulls = sort === 'pulls' ? models : this.sortModels(models, 'pulls')
|
const sortedByPulls = sort === 'pulls' ? models : this.sortModels(models, 'pulls')
|
||||||
const firstThree = sortedByPulls.slice(0, 3)
|
const firstThree = sortedByPulls.slice(0, 3)
|
||||||
|
|
||||||
// Only return the first tag of each of these models (should be the most lightweight variant)
|
|
||||||
const recommendedModels = firstThree.map((model) => {
|
const recommendedModels = firstThree.map((model) => {
|
||||||
return {
|
return {
|
||||||
...model,
|
...model,
|
||||||
|
|
@ -237,13 +394,13 @@ export class OllamaService {
|
||||||
const filteredRecommendedModels = this.fuseSearchModels(recommendedModels, query)
|
const filteredRecommendedModels = this.fuseSearchModels(recommendedModels, query)
|
||||||
return {
|
return {
|
||||||
models: filteredRecommendedModels,
|
models: filteredRecommendedModels,
|
||||||
hasMore: filteredRecommendedModels.length > (limit || 15)
|
hasMore: filteredRecommendedModels.length > (limit || 15),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
models: recommendedModels,
|
models: recommendedModels,
|
||||||
hasMore: recommendedModels.length > (limit || 15)
|
hasMore: recommendedModels.length > (limit || 15),
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(
|
logger.error(
|
||||||
|
|
@ -283,7 +440,6 @@ export class OllamaService {
|
||||||
|
|
||||||
const rawModels = response.data.models as NomadOllamaModel[]
|
const rawModels = response.data.models as NomadOllamaModel[]
|
||||||
|
|
||||||
// Filter out tags where cloud is truthy, then remove models with no remaining tags
|
|
||||||
const noCloud = rawModels
|
const noCloud = rawModels
|
||||||
.map((model) => ({
|
.map((model) => ({
|
||||||
...model,
|
...model,
|
||||||
|
|
@ -295,8 +451,7 @@ export class OllamaService {
|
||||||
return this.sortModels(noCloud, sort)
|
return this.sortModels(noCloud, sort)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
logger.error(
|
logger.error(
|
||||||
`[OllamaService] Failed to retrieve models from Nomad API: ${error instanceof Error ? error.message : error
|
`[OllamaService] Failed to retrieve models from Nomad API: ${error instanceof Error ? error.message : error}`
|
||||||
}`
|
|
||||||
)
|
)
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
|
|
@ -322,7 +477,6 @@ export class OllamaService {
|
||||||
|
|
||||||
return models
|
return models
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Cache doesn't exist or is invalid
|
|
||||||
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
|
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
|
||||||
logger.warn(
|
logger.warn(
|
||||||
`[OllamaService] Error reading cache: ${error instanceof Error ? error.message : error}`
|
`[OllamaService] Error reading cache: ${error instanceof Error ? error.message : error}`
|
||||||
|
|
@ -346,7 +500,6 @@ export class OllamaService {
|
||||||
|
|
||||||
private sortModels(models: NomadOllamaModel[], sort?: 'pulls' | 'name'): NomadOllamaModel[] {
|
private sortModels(models: NomadOllamaModel[], sort?: 'pulls' | 'name'): NomadOllamaModel[] {
|
||||||
if (sort === 'pulls') {
|
if (sort === 'pulls') {
|
||||||
// Sort by estimated pulls (it should be a string like "1.2K", "500", "4M" etc.)
|
|
||||||
models.sort((a, b) => {
|
models.sort((a, b) => {
|
||||||
const parsePulls = (pulls: string) => {
|
const parsePulls = (pulls: string) => {
|
||||||
const multiplier = pulls.endsWith('K')
|
const multiplier = pulls.endsWith('K')
|
||||||
|
|
@ -364,8 +517,6 @@ export class OllamaService {
|
||||||
models.sort((a, b) => a.name.localeCompare(b.name))
|
models.sort((a, b) => a.name.localeCompare(b.name))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Always sort model.tags by the size field in descending order
|
|
||||||
// Size is a string like '75GB', '8.5GB', '2GB' etc. Smaller models first
|
|
||||||
models.forEach((model) => {
|
models.forEach((model) => {
|
||||||
if (model.tags && Array.isArray(model.tags)) {
|
if (model.tags && Array.isArray(model.tags)) {
|
||||||
model.tags.sort((a, b) => {
|
model.tags.sort((a, b) => {
|
||||||
|
|
@ -378,7 +529,7 @@ export class OllamaService {
|
||||||
? 1
|
? 1
|
||||||
: size.endsWith('TB')
|
: size.endsWith('TB')
|
||||||
? 1_000
|
? 1_000
|
||||||
: 0 // Unknown size format
|
: 0
|
||||||
return parseFloat(size) * multiplier
|
return parseFloat(size) * multiplier
|
||||||
}
|
}
|
||||||
return parseSize(a.size) - parseSize(b.size)
|
return parseSize(a.size) - parseSize(b.size)
|
||||||
|
|
@ -411,11 +562,11 @@ export class OllamaService {
|
||||||
const options: IFuseOptions<NomadOllamaModel> = {
|
const options: IFuseOptions<NomadOllamaModel> = {
|
||||||
ignoreDiacritics: true,
|
ignoreDiacritics: true,
|
||||||
keys: ['name', 'description', 'tags.name'],
|
keys: ['name', 'description', 'tags.name'],
|
||||||
threshold: 0.3, // lower threshold for stricter matching
|
threshold: 0.3,
|
||||||
}
|
}
|
||||||
|
|
||||||
const fuse = new Fuse(models, options)
|
const fuse = new Fuse(models, options)
|
||||||
|
|
||||||
return fuse.search(query).map(result => result.item)
|
return fuse.search(query).map((result) => result.item)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -285,8 +285,6 @@ export class RagService {
|
||||||
// Extract text from chunk results
|
// Extract text from chunk results
|
||||||
const chunks = chunkResults.map((chunk) => chunk.text)
|
const chunks = chunkResults.map((chunk) => chunk.text)
|
||||||
|
|
||||||
const ollamaClient = await this.ollamaService.getClient()
|
|
||||||
|
|
||||||
// Prepare all chunk texts with prefix and truncation
|
// Prepare all chunk texts with prefix and truncation
|
||||||
const prefixedChunks: string[] = []
|
const prefixedChunks: string[] = []
|
||||||
for (let i = 0; i < chunks.length; i++) {
|
for (let i = 0; i < chunks.length; i++) {
|
||||||
|
|
@ -320,10 +318,7 @@ export class RagService {
|
||||||
|
|
||||||
logger.debug(`[RAG] Embedding batch ${batchIdx + 1}/${totalBatches} (${batch.length} chunks)`)
|
logger.debug(`[RAG] Embedding batch ${batchIdx + 1}/${totalBatches} (${batch.length} chunks)`)
|
||||||
|
|
||||||
const response = await ollamaClient.embed({
|
const response = await this.ollamaService.embed(RagService.EMBEDDING_MODEL, batch)
|
||||||
model: RagService.EMBEDDING_MODEL,
|
|
||||||
input: batch,
|
|
||||||
})
|
|
||||||
|
|
||||||
embeddings.push(...response.embeddings)
|
embeddings.push(...response.embeddings)
|
||||||
|
|
||||||
|
|
@ -710,8 +705,6 @@ export class RagService {
|
||||||
logger.debug(`[RAG] Extracted keywords: [${keywords.join(', ')}]`)
|
logger.debug(`[RAG] Extracted keywords: [${keywords.join(', ')}]`)
|
||||||
|
|
||||||
// Generate embedding for the query with search_query prefix
|
// Generate embedding for the query with search_query prefix
|
||||||
const ollamaClient = await this.ollamaService.getClient()
|
|
||||||
|
|
||||||
// Ensure query doesn't exceed token limit
|
// Ensure query doesn't exceed token limit
|
||||||
const prefixTokens = this.estimateTokenCount(RagService.SEARCH_QUERY_PREFIX)
|
const prefixTokens = this.estimateTokenCount(RagService.SEARCH_QUERY_PREFIX)
|
||||||
const maxQueryTokens = RagService.MAX_SAFE_TOKENS - prefixTokens
|
const maxQueryTokens = RagService.MAX_SAFE_TOKENS - prefixTokens
|
||||||
|
|
@ -729,10 +722,7 @@ export class RagService {
|
||||||
return []
|
return []
|
||||||
}
|
}
|
||||||
|
|
||||||
const response = await ollamaClient.embed({
|
const response = await this.ollamaService.embed(RagService.EMBEDDING_MODEL, [prefixedQuery])
|
||||||
model: RagService.EMBEDDING_MODEL,
|
|
||||||
input: [prefixedQuery],
|
|
||||||
})
|
|
||||||
|
|
||||||
// Perform semantic search with a higher limit to enable reranking
|
// Perform semantic search with a higher limit to enable reranking
|
||||||
const searchLimit = limit * 3 // Get more results for reranking
|
const searchLimit = limit * 3 // Get more results for reranking
|
||||||
|
|
|
||||||
|
|
@ -380,7 +380,7 @@ export default function Chat({
|
||||||
>
|
>
|
||||||
{installedModels.map((model) => (
|
{installedModels.map((model) => (
|
||||||
<option key={model.name} value={model.name}>
|
<option key={model.name} value={model.name}>
|
||||||
{model.name} ({formatBytes(model.size)})
|
{model.name}{model.size > 0 ? ` (${formatBytes(model.size)})` : ''}
|
||||||
</option>
|
</option>
|
||||||
))}
|
))}
|
||||||
</select>
|
</select>
|
||||||
|
|
|
||||||
|
|
@ -7,8 +7,7 @@ import { DownloadJobWithProgress, WikipediaState } from '../../types/downloads'
|
||||||
import { EmbedJobWithProgress } from '../../types/rag'
|
import { EmbedJobWithProgress } from '../../types/rag'
|
||||||
import type { CategoryWithStatus, CollectionWithStatus, ContentUpdateCheckResult, ResourceUpdateInfo } from '../../types/collections'
|
import type { CategoryWithStatus, CollectionWithStatus, ContentUpdateCheckResult, ResourceUpdateInfo } from '../../types/collections'
|
||||||
import { catchInternal } from './util'
|
import { catchInternal } from './util'
|
||||||
import { NomadOllamaModel, OllamaChatRequest } from '../../types/ollama'
|
import { NomadChatResponse, NomadInstalledModel, NomadOllamaModel, OllamaChatRequest } from '../../types/ollama'
|
||||||
import { ChatResponse, ModelResponse } from 'ollama'
|
|
||||||
import BenchmarkResult from '#models/benchmark_result'
|
import BenchmarkResult from '#models/benchmark_result'
|
||||||
import { BenchmarkType, RunBenchmarkResponse, SubmitBenchmarkResponse, UpdateBuilderTagResponse } from '../../types/benchmark'
|
import { BenchmarkType, RunBenchmarkResponse, SubmitBenchmarkResponse, UpdateBuilderTagResponse } from '../../types/benchmark'
|
||||||
|
|
||||||
|
|
@ -249,7 +248,7 @@ class API {
|
||||||
|
|
||||||
async getInstalledModels() {
|
async getInstalledModels() {
|
||||||
return catchInternal(async () => {
|
return catchInternal(async () => {
|
||||||
const response = await this.client.get<ModelResponse[]>('/ollama/installed-models')
|
const response = await this.client.get<NomadInstalledModel[]>('/ollama/installed-models')
|
||||||
return response.data
|
return response.data
|
||||||
})()
|
})()
|
||||||
}
|
}
|
||||||
|
|
@ -268,7 +267,7 @@ class API {
|
||||||
|
|
||||||
async sendChatMessage(chatRequest: OllamaChatRequest) {
|
async sendChatMessage(chatRequest: OllamaChatRequest) {
|
||||||
return catchInternal(async () => {
|
return catchInternal(async () => {
|
||||||
const response = await this.client.post<ChatResponse>('/ollama/chat', chatRequest)
|
const response = await this.client.post<NomadChatResponse>('/ollama/chat', chatRequest)
|
||||||
return response.data
|
return response.data
|
||||||
})()
|
})()
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ import { useNotifications } from '~/context/NotificationContext'
|
||||||
import api from '~/lib/api'
|
import api from '~/lib/api'
|
||||||
import { useModals } from '~/context/ModalContext'
|
import { useModals } from '~/context/ModalContext'
|
||||||
import StyledModal from '~/components/StyledModal'
|
import StyledModal from '~/components/StyledModal'
|
||||||
import { ModelResponse } from 'ollama'
|
import type { NomadInstalledModel } from '../../../types/ollama'
|
||||||
import { SERVICE_NAMES } from '../../../constants/service_names'
|
import { SERVICE_NAMES } from '../../../constants/service_names'
|
||||||
import Switch from '~/components/inputs/Switch'
|
import Switch from '~/components/inputs/Switch'
|
||||||
import StyledSectionHeader from '~/components/StyledSectionHeader'
|
import StyledSectionHeader from '~/components/StyledSectionHeader'
|
||||||
|
|
@ -24,7 +24,7 @@ import { useSystemInfo } from '~/hooks/useSystemInfo'
|
||||||
export default function ModelsPage(props: {
|
export default function ModelsPage(props: {
|
||||||
models: {
|
models: {
|
||||||
availableModels: NomadOllamaModel[]
|
availableModels: NomadOllamaModel[]
|
||||||
installedModels: ModelResponse[]
|
installedModels: NomadInstalledModel[]
|
||||||
settings: { chatSuggestionsEnabled: boolean; aiAssistantCustomName: string; remoteOllamaUrl: string }
|
settings: { chatSuggestionsEnabled: boolean; aiAssistantCustomName: string; remoteOllamaUrl: string }
|
||||||
}
|
}
|
||||||
}) {
|
}) {
|
||||||
|
|
@ -326,8 +326,8 @@ export default function ModelsPage(props: {
|
||||||
<StyledSectionHeader title="Remote Connection" className="mt-8 mb-4" />
|
<StyledSectionHeader title="Remote Connection" className="mt-8 mb-4" />
|
||||||
<div className="bg-white rounded-lg border-2 border-gray-200 p-6">
|
<div className="bg-white rounded-lg border-2 border-gray-200 p-6">
|
||||||
<p className="text-sm text-gray-500 mb-4">
|
<p className="text-sm text-gray-500 mb-4">
|
||||||
Connect to an Ollama instance running on another machine in your local network.
|
Connect to any OpenAI-compatible API server — Ollama, LM Studio, llama.cpp, and others are all supported.
|
||||||
The remote host must be started with <code className="bg-gray-100 px-1 rounded">OLLAMA_HOST=0.0.0.0</code>.
|
For remote Ollama instances, the host must be started with <code className="bg-gray-100 px-1 rounded">OLLAMA_HOST=0.0.0.0</code>.
|
||||||
</p>
|
</p>
|
||||||
{props.models.settings.remoteOllamaUrl && (
|
{props.models.settings.remoteOllamaUrl && (
|
||||||
<p className="text-sm text-green-700 bg-green-50 border border-green-200 rounded px-3 py-2 mb-4">
|
<p className="text-sm text-green-700 bg-green-50 border border-green-200 rounded px-3 py-2 mb-4">
|
||||||
|
|
@ -339,7 +339,7 @@ export default function ModelsPage(props: {
|
||||||
<Input
|
<Input
|
||||||
name="remoteOllamaUrl"
|
name="remoteOllamaUrl"
|
||||||
label="Remote Ollama URL"
|
label="Remote Ollama URL"
|
||||||
placeholder="http://192.168.1.100:11434"
|
placeholder="http://192.168.1.100:11434 (or :1234 for OpenAI API Compatible Apps)"
|
||||||
value={remoteOllamaUrl}
|
value={remoteOllamaUrl}
|
||||||
onChange={(e) => {
|
onChange={(e) => {
|
||||||
setRemoteOllamaUrl(e.target.value)
|
setRemoteOllamaUrl(e.target.value)
|
||||||
|
|
|
||||||
22
admin/package-lock.json
generated
22
admin/package-lock.json
generated
|
|
@ -50,6 +50,7 @@
|
||||||
"maplibre-gl": "^4.7.1",
|
"maplibre-gl": "^4.7.1",
|
||||||
"mysql2": "^3.14.1",
|
"mysql2": "^3.14.1",
|
||||||
"ollama": "^0.6.3",
|
"ollama": "^0.6.3",
|
||||||
|
"openai": "^6.27.0",
|
||||||
"pdf-parse": "^2.4.5",
|
"pdf-parse": "^2.4.5",
|
||||||
"pdf2pic": "^3.2.0",
|
"pdf2pic": "^3.2.0",
|
||||||
"pino-pretty": "^13.0.0",
|
"pino-pretty": "^13.0.0",
|
||||||
|
|
@ -12640,6 +12641,27 @@
|
||||||
"url": "https://github.com/sponsors/sindresorhus"
|
"url": "https://github.com/sponsors/sindresorhus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/openai": {
|
||||||
|
"version": "6.27.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/openai/-/openai-6.27.0.tgz",
|
||||||
|
"integrity": "sha512-osTKySlrdYrLYTt0zjhY8yp0JUBmWDCN+Q+QxsV4xMQnnoVFpylgKGgxwN8sSdTNw0G4y+WUXs4eCMWpyDNWZQ==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"bin": {
|
||||||
|
"openai": "bin/cli"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"ws": "^8.18.0",
|
||||||
|
"zod": "^3.25 || ^4.0"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"ws": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"zod": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/opencollective-postinstall": {
|
"node_modules/opencollective-postinstall": {
|
||||||
"version": "2.0.3",
|
"version": "2.0.3",
|
||||||
"resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz",
|
"resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz",
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,7 @@
|
||||||
"maplibre-gl": "^4.7.1",
|
"maplibre-gl": "^4.7.1",
|
||||||
"mysql2": "^3.14.1",
|
"mysql2": "^3.14.1",
|
||||||
"ollama": "^0.6.3",
|
"ollama": "^0.6.3",
|
||||||
|
"openai": "^6.27.0",
|
||||||
"pdf-parse": "^2.4.5",
|
"pdf-parse": "^2.4.5",
|
||||||
"pdf2pic": "^3.2.0",
|
"pdf2pic": "^3.2.0",
|
||||||
"pino-pretty": "^13.0.0",
|
"pino-pretty": "^13.0.0",
|
||||||
|
|
|
||||||
|
|
@ -44,3 +44,16 @@ export type OllamaChatResponse = {
|
||||||
}
|
}
|
||||||
done: boolean
|
done: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type NomadInstalledModel = {
|
||||||
|
name: string
|
||||||
|
size: number
|
||||||
|
digest?: string
|
||||||
|
details?: Record<string, any>
|
||||||
|
}
|
||||||
|
|
||||||
|
export type NomadChatResponse = {
|
||||||
|
message: { content: string; thinking?: string }
|
||||||
|
done: boolean
|
||||||
|
model: string
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user