Compare commits

...

5 Commits

Author SHA1 Message Date
cosmistack-bot
071c2a89a0 chore(release): 1.27.0-rc.1 [skip ci] 2026-02-25 17:48:12 +00:00
Jake Turner
0c952e55e1
docs: update release notes 2026-02-25 17:45:53 +00:00
Jake Turner
274351b65a
fix(AI): improved perf via rewrite and streaming logic 2026-02-25 17:42:22 +00:00
Jake Turner
e319da67ba
ops: support RC versions 2026-02-25 17:32:24 +00:00
Jake Turner
2a63d95850
feat(Models): paginate available models endpoint 2026-02-25 06:13:40 +00:00
17 changed files with 267 additions and 107 deletions

View File

@ -7,6 +7,11 @@ on:
description: 'Semantic version to label the Docker image under' description: 'Semantic version to label the Docker image under'
required: true required: true
type: string type: string
tag_latest:
description: 'Also tag this image as :latest? (Keep false for RC and beta releases)'
required: false
type: boolean
default: false
jobs: jobs:
check_authorization: check_authorization:
@ -41,4 +46,4 @@ jobs:
push: true push: true
tags: | tags: |
ghcr.io/crosstalk-solutions/project-nomad:${{ inputs.version }} ghcr.io/crosstalk-solutions/project-nomad:${{ inputs.version }}
ghcr.io/crosstalk-solutions/project-nomad:latest ${{ inputs.tag_latest && 'ghcr.io/crosstalk-solutions/project-nomad:latest' || '' }}

View File

@ -37,7 +37,10 @@ jobs:
GIT_COMMITTER_EMAIL: dev@cosmistack.com GIT_COMMITTER_EMAIL: dev@cosmistack.com
- name: Finalize release notes - name: Finalize release notes
if: steps.semver.outputs.new_release_published == 'true' # Skip for pre-releases (versions containing a hyphen, e.g. 1.27.0-rc.1)
if: |
steps.semver.outputs.new_release_published == 'true' &&
!contains(steps.semver.outputs.new_release_version, '-')
id: finalize-notes id: finalize-notes
env: env:
GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_REPOSITORY: ${{ github.repository }}
@ -55,17 +58,23 @@ jobs:
fi fi
- name: Commit finalized release notes - name: Commit finalized release notes
if: steps.semver.outputs.new_release_published == 'true' && steps.finalize-notes.outputs.has_notes == 'true' if: |
steps.semver.outputs.new_release_published == 'true' &&
steps.finalize-notes.outputs.has_notes == 'true' &&
!contains(steps.semver.outputs.new_release_version, '-')
run: | run: |
git config user.name "cosmistack-bot" git config user.name "cosmistack-bot"
git config user.email "dev@cosmistack.com" git config user.email "dev@cosmistack.com"
git remote set-url origin https://x-access-token:${{ secrets.COSMISTACKBOT_ACCESS_TOKEN }}@github.com/${{ github.repository }}.git git remote set-url origin https://x-access-token:${{ secrets.COSMISTACKBOT_ACCESS_TOKEN }}@github.com/${{ github.repository }}.git
git add admin/docs/release-notes.md git add admin/docs/release-notes.md
git commit -m "docs(release): finalize v${{ steps.semver.outputs.new_release_version }} release notes [skip ci]" git commit -m "docs(release): finalize v${{ steps.semver.outputs.new_release_version }} release notes [skip ci]"
git push origin master git push origin ${{ github.ref_name }}
- name: Update GitHub release body - name: Update GitHub release body
if: steps.semver.outputs.new_release_published == 'true' && steps.finalize-notes.outputs.has_notes == 'true' if: |
steps.semver.outputs.new_release_published == 'true' &&
steps.finalize-notes.outputs.has_notes == 'true' &&
!contains(steps.semver.outputs.new_release_version, '-')
env: env:
GH_TOKEN: ${{ secrets.COSMISTACKBOT_ACCESS_TOKEN }} GH_TOKEN: ${{ secrets.COSMISTACKBOT_ACCESS_TOKEN }}
run: | run: |

View File

@ -1,5 +1,8 @@
{ {
"branches": ["master"], "branches": [
"master",
{ "name": "rc", "prerelease": "rc" }
],
"plugins": [ "plugins": [
"@semantic-release/commit-analyzer", "@semantic-release/commit-analyzer",
"@semantic-release/release-notes-generator", "@semantic-release/release-notes-generator",

View File

@ -4,7 +4,7 @@ import { modelNameSchema } from '#validators/download'
import { chatSchema, getAvailableModelsSchema } from '#validators/ollama' import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
import { inject } from '@adonisjs/core' import { inject } from '@adonisjs/core'
import type { HttpContext } from '@adonisjs/core/http' import type { HttpContext } from '@adonisjs/core/http'
import { SYSTEM_PROMPTS } from '../../constants/ollama.js' import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js'
import logger from '@adonisjs/core/services/logger' import logger from '@adonisjs/core/services/logger'
import type { Message } from 'ollama' import type { Message } from 'ollama'
@ -21,86 +21,92 @@ export default class OllamaController {
sort: reqData.sort, sort: reqData.sort,
recommendedOnly: reqData.recommendedOnly, recommendedOnly: reqData.recommendedOnly,
query: reqData.query || null, query: reqData.query || null,
limit: reqData.limit || 15,
}) })
} }
async chat({ request, response }: HttpContext) { async chat({ request, response }: HttpContext) {
const reqData = await request.validateUsing(chatSchema) const reqData = await request.validateUsing(chatSchema)
// If there are no system messages in the chat inject system prompts // Flush SSE headers immediately so the client connection is open while
const hasSystemMessage = reqData.messages.some((msg) => msg.role === 'system') // pre-processing (query rewriting, RAG lookup) runs in the background.
if (!hasSystemMessage) {
const systemPrompt = {
role: 'system' as const,
content: SYSTEM_PROMPTS.default,
}
logger.debug('[OllamaController] Injecting system prompt')
reqData.messages.unshift(systemPrompt)
}
// Query rewriting for better RAG retrieval with manageable context
// Will return user's latest message if no rewriting is needed
const rewrittenQuery = await this.rewriteQueryWithContext(
reqData.messages,
reqData.model
)
logger.debug(`[OllamaController] Rewritten query for RAG: "${rewrittenQuery}"`)
if (rewrittenQuery) {
const relevantDocs = await this.ragService.searchSimilarDocuments(
rewrittenQuery,
5, // Top 5 most relevant chunks
0.3 // Minimum similarity score of 0.3
)
logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
// If relevant context is found, inject as a system message
if (relevantDocs.length > 0) {
const contextText = relevantDocs
.map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
.join('\n\n')
const systemMessage = {
role: 'system' as const,
content: SYSTEM_PROMPTS.rag_context(contextText),
}
// Insert system message at the beginning (after any existing system messages)
const firstNonSystemIndex = reqData.messages.findIndex((msg) => msg.role !== 'system')
const insertIndex = firstNonSystemIndex === -1 ? 0 : firstNonSystemIndex
reqData.messages.splice(insertIndex, 0, systemMessage)
}
}
// Check if the model supports "thinking" capability for enhanced response generation
// If gpt-oss model, it requires a text param for "think" https://docs.ollama.com/api/chat
const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
if (reqData.stream) { if (reqData.stream) {
logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`)
// SSE streaming path
response.response.setHeader('Content-Type', 'text/event-stream') response.response.setHeader('Content-Type', 'text/event-stream')
response.response.setHeader('Cache-Control', 'no-cache') response.response.setHeader('Cache-Control', 'no-cache')
response.response.setHeader('Connection', 'keep-alive') response.response.setHeader('Connection', 'keep-alive')
response.response.flushHeaders() response.response.flushHeaders()
}
try { try {
// If there are no system messages in the chat inject system prompts
const hasSystemMessage = reqData.messages.some((msg) => msg.role === 'system')
if (!hasSystemMessage) {
const systemPrompt = {
role: 'system' as const,
content: SYSTEM_PROMPTS.default,
}
logger.debug('[OllamaController] Injecting system prompt')
reqData.messages.unshift(systemPrompt)
}
// Query rewriting for better RAG retrieval with manageable context
// Will return user's latest message if no rewriting is needed
const rewrittenQuery = await this.rewriteQueryWithContext(reqData.messages)
logger.debug(`[OllamaController] Rewritten query for RAG: "${rewrittenQuery}"`)
if (rewrittenQuery) {
const relevantDocs = await this.ragService.searchSimilarDocuments(
rewrittenQuery,
5, // Top 5 most relevant chunks
0.3 // Minimum similarity score of 0.3
)
logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
// If relevant context is found, inject as a system message
if (relevantDocs.length > 0) {
const contextText = relevantDocs
.map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
.join('\n\n')
const systemMessage = {
role: 'system' as const,
content: SYSTEM_PROMPTS.rag_context(contextText),
}
// Insert system message at the beginning (after any existing system messages)
const firstNonSystemIndex = reqData.messages.findIndex((msg) => msg.role !== 'system')
const insertIndex = firstNonSystemIndex === -1 ? 0 : firstNonSystemIndex
reqData.messages.splice(insertIndex, 0, systemMessage)
}
}
// Check if the model supports "thinking" capability for enhanced response generation
// If gpt-oss model, it requires a text param for "think" https://docs.ollama.com/api/chat
const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
if (reqData.stream) {
logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`)
// Headers already flushed above
const stream = await this.ollamaService.chatStream({ ...reqData, think }) const stream = await this.ollamaService.chatStream({ ...reqData, think })
for await (const chunk of stream) { for await (const chunk of stream) {
response.response.write(`data: ${JSON.stringify(chunk)}\n\n`) response.response.write(`data: ${JSON.stringify(chunk)}\n\n`)
} }
} catch (error) {
response.response.write(`data: ${JSON.stringify({ error: true })}\n\n`)
} finally {
response.response.end() response.response.end()
return
} }
return
}
// Non-streaming (legacy) path // Non-streaming (legacy) path
return await this.ollamaService.chat({ ...reqData, think }) return await this.ollamaService.chat({ ...reqData, think })
} catch (error) {
if (reqData.stream) {
response.response.write(`data: ${JSON.stringify({ error: true })}\n\n`)
response.response.end()
return
}
throw error
}
} }
async deleteModel({ request }: HttpContext) { async deleteModel({ request }: HttpContext) {
@ -126,17 +132,17 @@ export default class OllamaController {
} }
private async rewriteQueryWithContext( private async rewriteQueryWithContext(
messages: Message[], messages: Message[]
model: string
): Promise<string | null> { ): Promise<string | null> {
try { try {
// Get recent conversation history (last 6 messages for 3 turns) // Get recent conversation history (last 6 messages for 3 turns)
const recentMessages = messages.slice(-6) const recentMessages = messages.slice(-6)
// If there's only one user message, no rewriting needed // Skip rewriting for short conversations. Rewriting adds latency with
// little RAG benefit until there is enough context to matter.
const userMessages = recentMessages.filter(msg => msg.role === 'user') const userMessages = recentMessages.filter(msg => msg.role === 'user')
if (userMessages.length <= 1) { if (userMessages.length <= 2) {
return userMessages[0]?.content || null return userMessages[userMessages.length - 1]?.content || null
} }
const conversationContext = recentMessages const conversationContext = recentMessages
@ -150,8 +156,17 @@ export default class OllamaController {
}) })
.join('\n') .join('\n')
const availableModels = await this.ollamaService.getAvailableModels({ query: null, limit: 500 })
const rewriteModelAvailable = availableModels?.models.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
if (!rewriteModelAvailable) {
logger.warn(`[RAG] Query rewrite model "${DEFAULT_QUERY_REWRITE_MODEL}" not available. Skipping query rewriting.`)
const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')
return lastUserMessage?.content || null
}
// FUTURE ENHANCEMENT: allow the user to specify which model to use for rewriting
const response = await this.ollamaService.chat({ const response = await this.ollamaService.chat({
model, model: DEFAULT_QUERY_REWRITE_MODEL,
messages: [ messages: [
{ {
role: 'system', role: 'system',

View File

@ -51,12 +51,12 @@ export default class SettingsController {
} }
async models({ inertia }: HttpContext) { async models({ inertia }: HttpContext) {
const availableModels = await this.ollamaService.getAvailableModels({ sort: 'pulls', recommendedOnly: false, query: null }); const availableModels = await this.ollamaService.getAvailableModels({ sort: 'pulls', recommendedOnly: false, query: null, limit: 15 });
const installedModels = await this.ollamaService.getModels(); const installedModels = await this.ollamaService.getModels();
const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled') const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled')
return inertia.render('settings/models', { return inertia.render('settings/models', {
models: { models: {
availableModels: availableModels || [], availableModels: availableModels?.models || [],
installedModels: installedModels || [], installedModels: installedModels || [],
settings: { settings: {
chatSuggestionsEnabled: parseBoolean(chatSuggestionsEnabled) chatSuggestionsEnabled: parseBoolean(chatSuggestionsEnabled)

View File

@ -183,12 +183,13 @@ export class OllamaService {
} }
async getAvailableModels( async getAvailableModels(
{ sort, recommendedOnly, query }: { sort?: 'pulls' | 'name'; recommendedOnly?: boolean, query: string | null } = { { sort, recommendedOnly, query, limit }: { sort?: 'pulls' | 'name'; recommendedOnly?: boolean, query: string | null, limit?: number } = {
sort: 'pulls', sort: 'pulls',
recommendedOnly: false, recommendedOnly: false,
query: null, query: null,
limit: 15,
} }
): Promise<NomadOllamaModel[] | null> { ): Promise<{ models: NomadOllamaModel[], hasMore: boolean } | null> {
try { try {
const models = await this.retrieveAndRefreshModels(sort) const models = await this.retrieveAndRefreshModels(sort)
if (!models) { if (!models) {
@ -196,12 +197,18 @@ export class OllamaService {
logger.warn( logger.warn(
'[OllamaService] Returning fallback recommended models due to failure in fetching available models' '[OllamaService] Returning fallback recommended models due to failure in fetching available models'
) )
return FALLBACK_RECOMMENDED_OLLAMA_MODELS return {
models: FALLBACK_RECOMMENDED_OLLAMA_MODELS,
hasMore: false
}
} }
if (!recommendedOnly) { if (!recommendedOnly) {
const filteredModels = query ? this.fuseSearchModels(models, query) : models const filteredModels = query ? this.fuseSearchModels(models, query) : models
return filteredModels return {
models: filteredModels.slice(0, limit || 15),
hasMore: filteredModels.length > (limit || 15)
}
} }
// If recommendedOnly is true, only return the first three models (if sorted by pulls, these will be the top 3) // If recommendedOnly is true, only return the first three models (if sorted by pulls, these will be the top 3)
@ -217,10 +224,17 @@ export class OllamaService {
}) })
if (query) { if (query) {
return this.fuseSearchModels(recommendedModels, query) const filteredRecommendedModels = this.fuseSearchModels(recommendedModels, query)
return {
models: filteredRecommendedModels,
hasMore: filteredRecommendedModels.length > (limit || 15)
}
} }
return recommendedModels return {
models: recommendedModels,
hasMore: recommendedModels.length > (limit || 15)
}
} catch (error) { } catch (error) {
logger.error( logger.error(
`[OllamaService] Failed to get available models: ${error instanceof Error ? error.message : error}` `[OllamaService] Failed to get available models: ${error instanceof Error ? error.message : error}`

View File

@ -18,5 +18,6 @@ export const getAvailableModelsSchema = vine.compile(
sort: vine.enum(['pulls', 'name'] as const).optional(), sort: vine.enum(['pulls', 'name'] as const).optional(),
recommendedOnly: vine.boolean().optional(), recommendedOnly: vine.boolean().optional(),
query: vine.string().trim().optional(), query: vine.string().trim().optional(),
limit: vine.number().positive().optional(),
}) })
) )

View File

@ -62,6 +62,8 @@ export const FALLBACK_RECOMMENDED_OLLAMA_MODELS: NomadOllamaModel[] = [
}, },
] ]
export const DEFAULT_QUERY_REWRITE_MODEL = 'qwen2.5:3b' // default to qwen2.5 for query rewriting with good balance of text task performance and resource usage
export const SYSTEM_PROMPTS = { export const SYSTEM_PROMPTS = {
default: ` default: `
Format all responses using markdown for better readability. Vanilla markdown or GitHub-flavored markdown is preferred. Format all responses using markdown for better readability. Vanilla markdown or GitHub-flavored markdown is preferred.

View File

@ -1,5 +1,16 @@
# Release Notes # Release Notes
## Unreleased
### Features
- **Settings**: Added pagination support for Ollama model list
### Bug Fixes
### Improvements
- **AI Assistant**: Improved chat performance by optimizing query rewriting and response streaming logic
- **CI/CD**: Updated release workflows to support release candidate versions
## Version 1.26.0 - February 19, 2026 ## Version 1.26.0 - February 19, 2026
### Features ### Features

View File

@ -5,6 +5,10 @@ import { ChatMessage } from '../../../types/chat'
import ChatMessageBubble from './ChatMessageBubble' import ChatMessageBubble from './ChatMessageBubble'
import ChatAssistantAvatar from './ChatAssistantAvatar' import ChatAssistantAvatar from './ChatAssistantAvatar'
import BouncingDots from '../BouncingDots' import BouncingDots from '../BouncingDots'
import StyledModal from '../StyledModal'
import api from '~/lib/api'
import { DEFAULT_QUERY_REWRITE_MODEL } from '../../../constants/ollama'
import { useNotifications } from '~/context/NotificationContext'
interface ChatInterfaceProps { interface ChatInterfaceProps {
messages: ChatMessage[] messages: ChatMessage[]
@ -13,6 +17,7 @@ interface ChatInterfaceProps {
chatSuggestions?: string[] chatSuggestions?: string[]
chatSuggestionsEnabled?: boolean chatSuggestionsEnabled?: boolean
chatSuggestionsLoading?: boolean chatSuggestionsLoading?: boolean
rewriteModelAvailable?: boolean
} }
export default function ChatInterface({ export default function ChatInterface({
@ -22,11 +27,28 @@ export default function ChatInterface({
chatSuggestions = [], chatSuggestions = [],
chatSuggestionsEnabled = false, chatSuggestionsEnabled = false,
chatSuggestionsLoading = false, chatSuggestionsLoading = false,
rewriteModelAvailable = false
}: ChatInterfaceProps) { }: ChatInterfaceProps) {
const { addNotification } = useNotifications()
const [input, setInput] = useState('') const [input, setInput] = useState('')
const [downloadDialogOpen, setDownloadDialogOpen] = useState(false)
const [isDownloading, setIsDownloading] = useState(false)
const messagesEndRef = useRef<HTMLDivElement>(null) const messagesEndRef = useRef<HTMLDivElement>(null)
const textareaRef = useRef<HTMLTextAreaElement>(null) const textareaRef = useRef<HTMLTextAreaElement>(null)
const handleDownloadModel = async () => {
setIsDownloading(true)
try {
await api.downloadModel(DEFAULT_QUERY_REWRITE_MODEL)
addNotification({ type: 'success', message: 'Model download queued' })
} catch (error) {
addNotification({ type: 'error', message: 'Failed to queue model download' })
} finally {
setIsDownloading(false)
setDownloadDialogOpen(false)
}
}
const scrollToBottom = () => { const scrollToBottom = () => {
messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }) messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' })
} }
@ -162,6 +184,36 @@ export default function ChatInterface({
)} )}
</button> </button>
</form> </form>
{!rewriteModelAvailable && (
<div className="text-sm text-gray-500 mt-2">
The {DEFAULT_QUERY_REWRITE_MODEL} model is not installed. Consider{' '}
<button
onClick={() => setDownloadDialogOpen(true)}
className="text-desert-green underline hover:text-desert-green/80 cursor-pointer"
>
downloading it
</button>{' '}
for improved retrieval-augmented generation (RAG) performance.
</div>
)}
<StyledModal
open={downloadDialogOpen}
title={`Download ${DEFAULT_QUERY_REWRITE_MODEL}?`}
confirmText="Download"
cancelText="Cancel"
confirmIcon='IconDownload'
confirmVariant='primary'
confirmLoading={isDownloading}
onConfirm={handleDownloadModel}
onCancel={() => setDownloadDialogOpen(false)}
onClose={() => setDownloadDialogOpen(false)}
>
<p className="text-gray-700">
This will dispatch a background download job for{' '}
<span className="font-mono font-medium">{DEFAULT_QUERY_REWRITE_MODEL}</span> and may take some time to complete. The model
will be used to rewrite queries for improved RAG retrieval performance.
</p>
</StyledModal>
</div> </div>
</div> </div>
) )

View File

@ -29,7 +29,9 @@ export default function ChatMessageBubble({ message }: ChatMessageBubbleProps) {
{!message.isThinking && message.thinking && ( {!message.isThinking && message.thinking && (
<details className="mb-3 rounded border border-gray-200 bg-gray-50 text-xs"> <details className="mb-3 rounded border border-gray-200 bg-gray-50 text-xs">
<summary className="cursor-pointer px-3 py-2 font-medium text-gray-500 hover:text-gray-700 select-none"> <summary className="cursor-pointer px-3 py-2 font-medium text-gray-500 hover:text-gray-700 select-none">
Reasoning {message.thinkingDuration !== undefined
? `Thought for ${message.thinkingDuration}s`
: 'Reasoning'}
</summary> </summary>
<div className="px-3 pb-3 prose prose-xs max-w-none text-gray-600 max-h-48 overflow-y-auto border-t border-gray-200 pt-2"> <div className="px-3 pb-3 prose prose-xs max-w-none text-gray-600 max-h-48 overflow-y-auto border-t border-gray-200 pt-2">
<ReactMarkdown remarkPlugins={[remarkGfm]}>{message.thinking}</ReactMarkdown> <ReactMarkdown remarkPlugins={[remarkGfm]}>{message.thinking}</ReactMarkdown>

View File

@ -1,4 +1,4 @@
import { useState, useCallback, useEffect, useRef } from 'react' import { useState, useCallback, useEffect, useRef, useMemo } from 'react'
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query' import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
import ChatSidebar from './ChatSidebar' import ChatSidebar from './ChatSidebar'
import ChatInterface from './ChatInterface' import ChatInterface from './ChatInterface'
@ -9,6 +9,7 @@ import { useModals } from '~/context/ModalContext'
import { ChatMessage } from '../../../types/chat' import { ChatMessage } from '../../../types/chat'
import classNames from '~/lib/classNames' import classNames from '~/lib/classNames'
import { IconX } from '@tabler/icons-react' import { IconX } from '@tabler/icons-react'
import { DEFAULT_QUERY_REWRITE_MODEL } from '../../../constants/ollama'
interface ChatProps { interface ChatProps {
enabled: boolean enabled: boolean
@ -68,6 +69,10 @@ export default function Chat({
refetchOnMount: false, refetchOnMount: false,
}) })
const rewriteModelAvailable = useMemo(() => {
return installedModels.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
}, [installedModels])
const deleteAllSessionsMutation = useMutation({ const deleteAllSessionsMutation = useMutation({
mutationFn: () => api.deleteAllChatSessions(), mutationFn: () => api.deleteAllChatSessions(),
onSuccess: () => { onSuccess: () => {
@ -230,11 +235,16 @@ export default function Chat({
let fullContent = '' let fullContent = ''
let thinkingContent = '' let thinkingContent = ''
let isThinkingPhase = true let isThinkingPhase = true
let thinkingStartTime: number | null = null
let thinkingDuration: number | null = null
try { try {
await api.streamChatMessage( await api.streamChatMessage(
{ model: selectedModel || 'llama3.2', messages: chatMessages, stream: true }, { model: selectedModel || 'llama3.2', messages: chatMessages, stream: true },
(chunkContent, chunkThinking, done) => { (chunkContent, chunkThinking, done) => {
if (chunkThinking.length > 0 && thinkingStartTime === null) {
thinkingStartTime = Date.now()
}
if (isFirstChunk) { if (isFirstChunk) {
isFirstChunk = false isFirstChunk = false
setIsStreamingResponse(false) setIsStreamingResponse(false)
@ -248,22 +258,27 @@ export default function Chat({
timestamp: new Date(), timestamp: new Date(),
isStreaming: true, isStreaming: true,
isThinking: chunkThinking.length > 0 && chunkContent.length === 0, isThinking: chunkThinking.length > 0 && chunkContent.length === 0,
thinkingDuration: undefined,
}, },
]) ])
} else { } else {
if (isThinkingPhase && chunkContent.length > 0) { if (isThinkingPhase && chunkContent.length > 0) {
isThinkingPhase = false isThinkingPhase = false
if (thinkingStartTime !== null) {
thinkingDuration = Math.max(1, Math.round((Date.now() - thinkingStartTime) / 1000))
}
} }
setMessages((prev) => setMessages((prev) =>
prev.map((m) => prev.map((m) =>
m.id === assistantMsgId m.id === assistantMsgId
? { ? {
...m, ...m,
content: m.content + chunkContent, content: m.content + chunkContent,
thinking: (m.thinking ?? '') + chunkThinking, thinking: (m.thinking ?? '') + chunkThinking,
isStreaming: !done, isStreaming: !done,
isThinking: isThinkingPhase, isThinking: isThinkingPhase,
} thinkingDuration: thinkingDuration ?? undefined,
}
: m : m
) )
) )
@ -391,6 +406,7 @@ export default function Chat({
chatSuggestions={chatSuggestions} chatSuggestions={chatSuggestions}
chatSuggestionsEnabled={suggestionsEnabled} chatSuggestionsEnabled={suggestionsEnabled}
chatSuggestionsLoading={chatSuggestionsLoading} chatSuggestionsLoading={chatSuggestionsLoading}
rewriteModelAvailable={rewriteModelAvailable}
/> />
</div> </div>
</div> </div>

View File

@ -196,10 +196,13 @@ class API {
})() })()
} }
async getAvailableModels(query: string | null, recommendedOnly: boolean): Promise<NomadOllamaModel[] | undefined> { async getAvailableModels(params: { query?: string; recommendedOnly?: boolean; limit?: number }) {
return catchInternal(async () => { return catchInternal(async () => {
const response = await this.client.get<NomadOllamaModel[]>('/ollama/models', { const response = await this.client.get<{
params: { sort: 'pulls', recommendedOnly, query }, models: NomadOllamaModel[]
hasMore: boolean
}>('/ollama/models', {
params: { sort: 'pulls', ...params },
}) })
return response.data return response.data
})() })()
@ -506,7 +509,7 @@ class API {
// For 409 Conflict errors, throw a specific error that the UI can handle // For 409 Conflict errors, throw a specific error that the UI can handle
if (error.response?.status === 409) { if (error.response?.status === 409) {
const err = new Error(error.response?.data?.error || 'This benchmark has already been submitted to the repository') const err = new Error(error.response?.data?.error || 'This benchmark has already been submitted to the repository')
;(err as any).status = 409 ; (err as any).status = 409
throw err throw err
} }
// For other errors, extract the message and throw // For other errors, extract the message and throw

View File

@ -152,7 +152,13 @@ export default function EasySetupWizard(props: { system: { services: ServiceSlim
const { data: recommendedModels, isLoading: isLoadingRecommendedModels } = useQuery({ const { data: recommendedModels, isLoading: isLoadingRecommendedModels } = useQuery({
queryKey: ['recommended-ollama-models'], queryKey: ['recommended-ollama-models'],
queryFn: () => api.getAvailableModels(null, true), queryFn: async () => {
const res = await api.getAvailableModels({ recommendedOnly: true })
if (!res) {
return []
}
return res.models
},
refetchOnWindowFocus: false, refetchOnWindowFocus: false,
}) })
@ -736,7 +742,7 @@ export default function EasySetupWizard(props: { system: { services: ServiceSlim
className={classNames( className={classNames(
'relative', 'relative',
selectedMapCollections.includes(collection.slug) && selectedMapCollections.includes(collection.slug) &&
'ring-4 ring-desert-green rounded-lg', 'ring-4 ring-desert-green rounded-lg',
collection.all_installed && 'opacity-75', collection.all_installed && 'opacity-75',
!isOnline && 'opacity-50 cursor-not-allowed' !isOnline && 'opacity-50 cursor-not-allowed'
)} )}

View File

@ -37,21 +37,29 @@ export default function ModelsPage(props: {
const [query, setQuery] = useState('') const [query, setQuery] = useState('')
const [queryUI, setQueryUI] = useState('') const [queryUI, setQueryUI] = useState('')
const [limit, setLimit] = useState(15)
const debouncedSetQuery = debounce((val: string) => { const debouncedSetQuery = debounce((val: string) => {
setQuery(val) setQuery(val)
}, 300) }, 300)
const { data: availableModels, isLoading } = useQuery({ const { data: availableModelData, isFetching } = useQuery({
queryKey: ['ollama', 'availableModels', query], queryKey: ['ollama', 'availableModels', query, limit],
queryFn: async () => { queryFn: async () => {
const res = await api.getAvailableModels(query, false) const res = await api.getAvailableModels({
query,
recommendedOnly: false,
limit,
})
if (!res) { if (!res) {
return [] return {
models: [],
hasMore: false,
}
} }
return res return res
}, },
initialData: props.models.availableModels, initialData: { models: props.models.availableModels, hasMore: false },
}) })
async function handleInstallModel(modelName: string) { async function handleInstallModel(modelName: string) {
@ -209,8 +217,8 @@ export default function ModelsPage(props: {
title: 'Last Updated', title: 'Last Updated',
}, },
]} ]}
data={availableModels || []} data={availableModelData?.models || []}
loading={isLoading} loading={isFetching}
expandable={{ expandable={{
expandedRowRender: (record) => ( expandedRowRender: (record) => (
<div className="pl-14"> <div className="pl-14">
@ -283,6 +291,18 @@ export default function ModelsPage(props: {
), ),
}} }}
/> />
<div className="flex justify-center mt-6">
{availableModelData?.hasMore && (
<StyledButton
variant="primary"
onClick={() => {
setLimit((prev) => prev + 15)
}}
>
Load More
</StyledButton>
)}
</div>
</main> </main>
</div> </div>
</SettingsLayout> </SettingsLayout>

View File

@ -6,6 +6,7 @@ export interface ChatMessage {
isStreaming?: boolean isStreaming?: boolean
thinking?: string thinking?: string
isThinking?: boolean isThinking?: boolean
thinkingDuration?: number
} }
export interface ChatSession { export interface ChatSession {

View File

@ -1,6 +1,6 @@
{ {
"name": "project-nomad", "name": "project-nomad",
"version": "1.26.1", "version": "1.27.0-rc.1",
"description": "\"", "description": "\"",
"main": "index.js", "main": "index.js",
"scripts": { "scripts": {