chore(release): 1.27.0-rc.1 [skip ci]

docs: update release notes
fix(AI): improved perf via rewrite and streaming logic
2026-03-28 19:49:25 +01:00 · 2026-02-25 17:48:12 +00:00 · 2026-02-25 17:45:53 +00:00 · 2026-02-25 17:42:22 +00:00 · 2026-02-25 17:32:24 +00:00 · 2026-02-25 06:13:40 +00:00
17 changed files with 267 additions and 107 deletions
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -7,6 +7,11 @@ on:
        description: 'Semantic version to label the Docker image under'
        required: true
        type: string
+      tag_latest:
+        description: 'Also tag this image as :latest? (Keep false for RC and beta releases)'
+        required: false
+        type: boolean
+        default: false

 jobs:
  check_authorization:
@ -41,4 +46,4 @@ jobs:
          push: true
          tags: |
            ghcr.io/crosstalk-solutions/project-nomad:${{ inputs.version }}
-            ghcr.io/crosstalk-solutions/project-nomad:latest
+            ${{ inputs.tag_latest && 'ghcr.io/crosstalk-solutions/project-nomad:latest' || '' }}
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@ -37,7 +37,10 @@ jobs:
          GIT_COMMITTER_EMAIL: dev@cosmistack.com

      - name: Finalize release notes
-        if: steps.semver.outputs.new_release_published == 'true'
+        # Skip for pre-releases (versions containing a hyphen, e.g. 1.27.0-rc.1)
+        if: |
+          steps.semver.outputs.new_release_published == 'true' &&
+          !contains(steps.semver.outputs.new_release_version, '-')
        id: finalize-notes
        env:
          GITHUB_REPOSITORY: ${{ github.repository }}
@ -55,17 +58,23 @@ jobs:
          fi

      - name: Commit finalized release notes
-        if: steps.semver.outputs.new_release_published == 'true' && steps.finalize-notes.outputs.has_notes == 'true'
+        if: |
+          steps.semver.outputs.new_release_published == 'true' &&
+          steps.finalize-notes.outputs.has_notes == 'true' &&
+          !contains(steps.semver.outputs.new_release_version, '-')
        run: |
          git config user.name "cosmistack-bot"
          git config user.email "dev@cosmistack.com"
          git remote set-url origin https://x-access-token:${{ secrets.COSMISTACKBOT_ACCESS_TOKEN }}@github.com/${{ github.repository }}.git
          git add admin/docs/release-notes.md
          git commit -m "docs(release): finalize v${{ steps.semver.outputs.new_release_version }} release notes [skip ci]"
-          git push origin master
+          git push origin ${{ github.ref_name }}

      - name: Update GitHub release body
-        if: steps.semver.outputs.new_release_published == 'true' && steps.finalize-notes.outputs.has_notes == 'true'
+        if: |
+          steps.semver.outputs.new_release_published == 'true' &&
+          steps.finalize-notes.outputs.has_notes == 'true' &&
+          !contains(steps.semver.outputs.new_release_version, '-')
        env:
          GH_TOKEN: ${{ secrets.COSMISTACKBOT_ACCESS_TOKEN }}
        run: |
--- a/.releaserc.json
+++ b/.releaserc.json
@ -1,5 +1,8 @@
 {
-  "branches": ["master"],
+  "branches": [
+    "master",
+    { "name": "rc", "prerelease": "rc" }
+  ],
  "plugins": [
    "@semantic-release/commit-analyzer",
    "@semantic-release/release-notes-generator",
--- a/admin/app/controllers/ollama_controller.ts
+++ b/admin/app/controllers/ollama_controller.ts
@ -4,7 +4,7 @@ import { modelNameSchema } from '#validators/download'
 import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
 import { inject } from '@adonisjs/core'
 import type { HttpContext } from '@adonisjs/core/http'
-import { SYSTEM_PROMPTS } from '../../constants/ollama.js'
+import { DEFAULT_QUERY_REWRITE_MODEL, SYSTEM_PROMPTS } from '../../constants/ollama.js'
 import logger from '@adonisjs/core/services/logger'
 import type { Message } from 'ollama'

@ -21,86 +21,92 @@ export default class OllamaController {
      sort: reqData.sort,
      recommendedOnly: reqData.recommendedOnly,
      query: reqData.query || null,
+      limit: reqData.limit || 15,
    })
  }

  async chat({ request, response }: HttpContext) {
    const reqData = await request.validateUsing(chatSchema)

-    // If there are no system messages in the chat inject system prompts
-    const hasSystemMessage = reqData.messages.some((msg) => msg.role === 'system')
-    if (!hasSystemMessage) {
-      const systemPrompt = {
-        role: 'system' as const,
-        content: SYSTEM_PROMPTS.default,
-      }
-      logger.debug('[OllamaController] Injecting system prompt')
-      reqData.messages.unshift(systemPrompt)
-    }
-
-    // Query rewriting for better RAG retrieval with manageable context
-    // Will return user's latest message if no rewriting is needed
-    const rewrittenQuery = await this.rewriteQueryWithContext(
-      reqData.messages,
-      reqData.model
-    )
-
-    logger.debug(`[OllamaController] Rewritten query for RAG: "${rewrittenQuery}"`)
-    if (rewrittenQuery) {
-      const relevantDocs = await this.ragService.searchSimilarDocuments(
-        rewrittenQuery,
-        5, // Top 5 most relevant chunks
-        0.3 // Minimum similarity score of 0.3
-      )
-
-      logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
-
-      // If relevant context is found, inject as a system message
-      if (relevantDocs.length > 0) {
-        const contextText = relevantDocs
-          .map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
-          .join('\n\n')
-
-        const systemMessage = {
-          role: 'system' as const,
-          content: SYSTEM_PROMPTS.rag_context(contextText),
-        }
-
-        // Insert system message at the beginning (after any existing system messages)
-        const firstNonSystemIndex = reqData.messages.findIndex((msg) => msg.role !== 'system')
-        const insertIndex = firstNonSystemIndex === -1 ? 0 : firstNonSystemIndex
-        reqData.messages.splice(insertIndex, 0, systemMessage)
-      }
-    }
-
-    // Check if the model supports "thinking" capability for enhanced response generation
-    // If gpt-oss model, it requires a text param for "think" https://docs.ollama.com/api/chat
-    const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
-    const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
-    
+    // Flush SSE headers immediately so the client connection is open while
+    // pre-processing (query rewriting, RAG lookup) runs in the background.
    if (reqData.stream) {
-      logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`)
-      // SSE streaming path
      response.response.setHeader('Content-Type', 'text/event-stream')
      response.response.setHeader('Cache-Control', 'no-cache')
      response.response.setHeader('Connection', 'keep-alive')
      response.response.flushHeaders()
+    }

-      try {
+    try {
+      // If there are no system messages in the chat inject system prompts
+      const hasSystemMessage = reqData.messages.some((msg) => msg.role === 'system')
+      if (!hasSystemMessage) {
+        const systemPrompt = {
+          role: 'system' as const,
+          content: SYSTEM_PROMPTS.default,
+        }
+        logger.debug('[OllamaController] Injecting system prompt')
+        reqData.messages.unshift(systemPrompt)
+      }
+
+      // Query rewriting for better RAG retrieval with manageable context
+      // Will return user's latest message if no rewriting is needed
+      const rewrittenQuery = await this.rewriteQueryWithContext(reqData.messages)
+
+      logger.debug(`[OllamaController] Rewritten query for RAG: "${rewrittenQuery}"`)
+      if (rewrittenQuery) {
+        const relevantDocs = await this.ragService.searchSimilarDocuments(
+          rewrittenQuery,
+          5, // Top 5 most relevant chunks
+          0.3 // Minimum similarity score of 0.3
+        )
+
+        logger.debug(`[RAG] Retrieved ${relevantDocs.length} relevant documents for query: "${rewrittenQuery}"`)
+
+        // If relevant context is found, inject as a system message
+        if (relevantDocs.length > 0) {
+          const contextText = relevantDocs
+            .map((doc, idx) => `[Context ${idx + 1}] (Relevance: ${(doc.score * 100).toFixed(1)}%)\n${doc.text}`)
+            .join('\n\n')
+
+          const systemMessage = {
+            role: 'system' as const,
+            content: SYSTEM_PROMPTS.rag_context(contextText),
+          }
+
+          // Insert system message at the beginning (after any existing system messages)
+          const firstNonSystemIndex = reqData.messages.findIndex((msg) => msg.role !== 'system')
+          const insertIndex = firstNonSystemIndex === -1 ? 0 : firstNonSystemIndex
+          reqData.messages.splice(insertIndex, 0, systemMessage)
+        }
+      }
+
+      // Check if the model supports "thinking" capability for enhanced response generation
+      // If gpt-oss model, it requires a text param for "think" https://docs.ollama.com/api/chat
+      const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
+      const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
+
+      if (reqData.stream) {
+        logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`)
+        // Headers already flushed above
        const stream = await this.ollamaService.chatStream({ ...reqData, think })
        for await (const chunk of stream) {
          response.response.write(`data: ${JSON.stringify(chunk)}\n\n`)
        }
-      } catch (error) {
-        response.response.write(`data: ${JSON.stringify({ error: true })}\n\n`)
-      } finally {
        response.response.end()
+        return
      }
-      return
-    }

-    // Non-streaming (legacy) path
-    return await this.ollamaService.chat({ ...reqData, think })
+      // Non-streaming (legacy) path
+      return await this.ollamaService.chat({ ...reqData, think })
+    } catch (error) {
+      if (reqData.stream) {
+        response.response.write(`data: ${JSON.stringify({ error: true })}\n\n`)
+        response.response.end()
+        return
+      }
+      throw error
+    }
  }

  async deleteModel({ request }: HttpContext) {
@ -126,17 +132,17 @@ export default class OllamaController {
  }

  private async rewriteQueryWithContext(
-    messages: Message[],
-    model: string
+    messages: Message[]
  ): Promise<string | null> {
    try {
      // Get recent conversation history (last 6 messages for 3 turns)
      const recentMessages = messages.slice(-6)

-      // If there's only one user message, no rewriting needed
+      // Skip rewriting for short conversations. Rewriting adds latency with
+      // little RAG benefit until there is enough context to matter.
      const userMessages = recentMessages.filter(msg => msg.role === 'user')
-      if (userMessages.length <= 1) {
-        return userMessages[0]?.content || null
+      if (userMessages.length <= 2) {
+        return userMessages[userMessages.length - 1]?.content || null
      }

      const conversationContext = recentMessages
@ -150,8 +156,17 @@ export default class OllamaController {
        })
        .join('\n')

+      const availableModels = await this.ollamaService.getAvailableModels({ query: null, limit: 500 })
+      const rewriteModelAvailable = availableModels?.models.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
+      if (!rewriteModelAvailable) {
+        logger.warn(`[RAG] Query rewrite model "${DEFAULT_QUERY_REWRITE_MODEL}" not available. Skipping query rewriting.`)
+        const lastUserMessage = [...messages].reverse().find(msg => msg.role === 'user')
+        return lastUserMessage?.content || null
+      }
+
+      // FUTURE ENHANCEMENT: allow the user to specify which model to use for rewriting
      const response = await this.ollamaService.chat({
-        model,
+        model: DEFAULT_QUERY_REWRITE_MODEL,
        messages: [
          {
            role: 'system',
--- a/admin/app/controllers/settings_controller.ts
+++ b/admin/app/controllers/settings_controller.ts
@ -51,12 +51,12 @@ export default class SettingsController {
    }

    async models({ inertia }: HttpContext) {
-        const availableModels = await this.ollamaService.getAvailableModels({ sort: 'pulls', recommendedOnly: false, query: null });
+        const availableModels = await this.ollamaService.getAvailableModels({ sort: 'pulls', recommendedOnly: false, query: null, limit: 15 });
        const installedModels = await this.ollamaService.getModels();
        const chatSuggestionsEnabled = await KVStore.getValue('chat.suggestionsEnabled')
        return inertia.render('settings/models', {
            models: {
-                availableModels: availableModels || [],
+                availableModels: availableModels?.models || [],
                installedModels: installedModels || [],
                settings: {
                    chatSuggestionsEnabled: parseBoolean(chatSuggestionsEnabled)
--- a/admin/app/services/ollama_service.ts
+++ b/admin/app/services/ollama_service.ts
@ -183,12 +183,13 @@ export class OllamaService {
  }

  async getAvailableModels(
-    { sort, recommendedOnly, query }: { sort?: 'pulls' | 'name'; recommendedOnly?: boolean, query: string | null } = {
+    { sort, recommendedOnly, query, limit }: { sort?: 'pulls' | 'name'; recommendedOnly?: boolean, query: string | null, limit?: number } = {
      sort: 'pulls',
      recommendedOnly: false,
      query: null,
+      limit: 15,
    }
-  ): Promise<NomadOllamaModel[] | null> {
+  ): Promise<{ models: NomadOllamaModel[], hasMore: boolean } | null> {
    try {
      const models = await this.retrieveAndRefreshModels(sort)
      if (!models) {
@ -196,12 +197,18 @@ export class OllamaService {
        logger.warn(
          '[OllamaService] Returning fallback recommended models due to failure in fetching available models'
        )
-        return FALLBACK_RECOMMENDED_OLLAMA_MODELS
+        return {
+          models: FALLBACK_RECOMMENDED_OLLAMA_MODELS,
+          hasMore: false
+        }
      }

      if (!recommendedOnly) {
        const filteredModels = query ? this.fuseSearchModels(models, query) : models
-        return filteredModels
+        return {
+          models: filteredModels.slice(0, limit || 15),
+          hasMore: filteredModels.length > (limit || 15)
+        }
      }

      // If recommendedOnly is true, only return the first three models (if sorted by pulls, these will be the top 3)
@ -217,10 +224,17 @@ export class OllamaService {
      })

      if (query) {
-        return this.fuseSearchModels(recommendedModels, query)
+        const filteredRecommendedModels = this.fuseSearchModels(recommendedModels, query)
+        return {
+          models: filteredRecommendedModels,
+          hasMore: filteredRecommendedModels.length > (limit || 15)
+        }
      }

-      return recommendedModels
+      return {
+        models: recommendedModels,
+        hasMore: recommendedModels.length > (limit || 15)
+      }
    } catch (error) {
      logger.error(
        `[OllamaService] Failed to get available models: ${error instanceof Error ? error.message : error}`
@ -253,7 +267,7 @@ export class OllamaService {
      }

      const rawModels = response.data.models as NomadOllamaModel[]
-      
+
      // Filter out tags where cloud is truthy, then remove models with no remaining tags
      const noCloud = rawModels
        .map((model) => ({
--- a/admin/app/validators/ollama.ts
+++ b/admin/app/validators/ollama.ts
@ -18,5 +18,6 @@ export const getAvailableModelsSchema = vine.compile(
    sort: vine.enum(['pulls', 'name'] as const).optional(),
    recommendedOnly: vine.boolean().optional(),
    query: vine.string().trim().optional(),
+    limit: vine.number().positive().optional(),
  })
 )
--- a/admin/constants/ollama.ts
+++ b/admin/constants/ollama.ts
@ -62,6 +62,8 @@ export const FALLBACK_RECOMMENDED_OLLAMA_MODELS: NomadOllamaModel[] = [
  },
 ]

+export const DEFAULT_QUERY_REWRITE_MODEL = 'qwen2.5:3b' // default to qwen2.5 for query rewriting with good balance of text task performance and resource usage
+
 export const SYSTEM_PROMPTS = {
  default: `
 Format all responses using markdown for better readability. Vanilla markdown or GitHub-flavored markdown is preferred.
--- a/admin/docs/release-notes.md
+++ b/admin/docs/release-notes.md
@ -1,5 +1,16 @@
 # Release Notes

+## Unreleased
+
+### Features
+- **Settings**: Added pagination support for Ollama model list
+
+### Bug Fixes
+
+### Improvements
+- **AI Assistant**: Improved chat performance by optimizing query rewriting and response streaming logic
+- **CI/CD**: Updated release workflows to support release candidate versions
+
 ## Version 1.26.0 - February 19, 2026

 ### Features
--- a/admin/inertia/components/chat/ChatInterface.tsx
+++ b/admin/inertia/components/chat/ChatInterface.tsx
@ -5,6 +5,10 @@ import { ChatMessage } from '../../../types/chat'
 import ChatMessageBubble from './ChatMessageBubble'
 import ChatAssistantAvatar from './ChatAssistantAvatar'
 import BouncingDots from '../BouncingDots'
+import StyledModal from '../StyledModal'
+import api from '~/lib/api'
+import { DEFAULT_QUERY_REWRITE_MODEL } from '../../../constants/ollama'
+import { useNotifications } from '~/context/NotificationContext'

 interface ChatInterfaceProps {
  messages: ChatMessage[]
@ -13,6 +17,7 @@ interface ChatInterfaceProps {
  chatSuggestions?: string[]
  chatSuggestionsEnabled?: boolean
  chatSuggestionsLoading?: boolean
+  rewriteModelAvailable?: boolean
 }

 export default function ChatInterface({
@ -22,11 +27,28 @@ export default function ChatInterface({
  chatSuggestions = [],
  chatSuggestionsEnabled = false,
  chatSuggestionsLoading = false,
+  rewriteModelAvailable = false
 }: ChatInterfaceProps) {
+  const { addNotification } = useNotifications()
  const [input, setInput] = useState('')
+  const [downloadDialogOpen, setDownloadDialogOpen] = useState(false)
+  const [isDownloading, setIsDownloading] = useState(false)
  const messagesEndRef = useRef<HTMLDivElement>(null)
  const textareaRef = useRef<HTMLTextAreaElement>(null)

+  const handleDownloadModel = async () => {
+    setIsDownloading(true)
+    try {
+      await api.downloadModel(DEFAULT_QUERY_REWRITE_MODEL)
+      addNotification({ type: 'success', message: 'Model download queued' })
+    } catch (error) {
+      addNotification({ type: 'error', message: 'Failed to queue model download' })
+    } finally {
+      setIsDownloading(false)
+      setDownloadDialogOpen(false)
+    }
+  }
+
  const scrollToBottom = () => {
    messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' })
  }
@ -162,6 +184,36 @@ export default function ChatInterface({
            )}
          </button>
        </form>
+        {!rewriteModelAvailable && (
+          <div className="text-sm text-gray-500 mt-2">
+            The {DEFAULT_QUERY_REWRITE_MODEL} model is not installed. Consider{' '}
+            <button
+              onClick={() => setDownloadDialogOpen(true)}
+              className="text-desert-green underline hover:text-desert-green/80 cursor-pointer"
+            >
+              downloading it
+            </button>{' '}
+            for improved retrieval-augmented generation (RAG) performance.
+          </div>
+        )}
+        <StyledModal
+          open={downloadDialogOpen}
+          title={`Download ${DEFAULT_QUERY_REWRITE_MODEL}?`}
+          confirmText="Download"
+          cancelText="Cancel"
+          confirmIcon='IconDownload'
+          confirmVariant='primary'
+          confirmLoading={isDownloading}
+          onConfirm={handleDownloadModel}
+          onCancel={() => setDownloadDialogOpen(false)}
+          onClose={() => setDownloadDialogOpen(false)}
+        >
+          <p className="text-gray-700">
+            This will dispatch a background download job for{' '}
+            <span className="font-mono font-medium">{DEFAULT_QUERY_REWRITE_MODEL}</span> and may take some time to complete. The model
+            will be used to rewrite queries for improved RAG retrieval performance.
+          </p>
+        </StyledModal>
      </div>
    </div>
  )
--- a/admin/inertia/components/chat/ChatMessageBubble.tsx
+++ b/admin/inertia/components/chat/ChatMessageBubble.tsx
@ -29,7 +29,9 @@ export default function ChatMessageBubble({ message }: ChatMessageBubbleProps) {
      {!message.isThinking && message.thinking && (
        <details className="mb-3 rounded border border-gray-200 bg-gray-50 text-xs">
          <summary className="cursor-pointer px-3 py-2 font-medium text-gray-500 hover:text-gray-700 select-none">
-            Reasoning
+            {message.thinkingDuration !== undefined
+              ? `Thought for ${message.thinkingDuration}s`
+              : 'Reasoning'}
          </summary>
          <div className="px-3 pb-3 prose prose-xs max-w-none text-gray-600 max-h-48 overflow-y-auto border-t border-gray-200 pt-2">
            <ReactMarkdown remarkPlugins={[remarkGfm]}>{message.thinking}</ReactMarkdown>
--- a/admin/inertia/components/chat/index.tsx
+++ b/admin/inertia/components/chat/index.tsx
@ -1,4 +1,4 @@
-import { useState, useCallback, useEffect, useRef } from 'react'
+import { useState, useCallback, useEffect, useRef, useMemo } from 'react'
 import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'
 import ChatSidebar from './ChatSidebar'
 import ChatInterface from './ChatInterface'
@ -9,6 +9,7 @@ import { useModals } from '~/context/ModalContext'
 import { ChatMessage } from '../../../types/chat'
 import classNames from '~/lib/classNames'
 import { IconX } from '@tabler/icons-react'
+import { DEFAULT_QUERY_REWRITE_MODEL } from '../../../constants/ollama'

 interface ChatProps {
  enabled: boolean
@ -68,6 +69,10 @@ export default function Chat({
    refetchOnMount: false,
  })

+  const rewriteModelAvailable = useMemo(() => {
+    return installedModels.some(model => model.name === DEFAULT_QUERY_REWRITE_MODEL)
+  }, [installedModels])
+
  const deleteAllSessionsMutation = useMutation({
    mutationFn: () => api.deleteAllChatSessions(),
    onSuccess: () => {
@ -159,7 +164,7 @@ export default function Chat({
    async (sessionId: string) => {
      // Cancel any ongoing suggestions fetch
      queryClient.cancelQueries({ queryKey: ['chatSuggestions'] })
-      
+
      setActiveSessionId(sessionId)
      // Load messages for this session
      const sessionData = await api.getChatSession(sessionId)
@ -230,11 +235,16 @@ export default function Chat({
        let fullContent = ''
        let thinkingContent = ''
        let isThinkingPhase = true
+        let thinkingStartTime: number | null = null
+        let thinkingDuration: number | null = null

        try {
          await api.streamChatMessage(
            { model: selectedModel || 'llama3.2', messages: chatMessages, stream: true },
            (chunkContent, chunkThinking, done) => {
+              if (chunkThinking.length > 0 && thinkingStartTime === null) {
+                thinkingStartTime = Date.now()
+              }
              if (isFirstChunk) {
                isFirstChunk = false
                setIsStreamingResponse(false)
@ -248,22 +258,27 @@ export default function Chat({
                    timestamp: new Date(),
                    isStreaming: true,
                    isThinking: chunkThinking.length > 0 && chunkContent.length === 0,
+                    thinkingDuration: undefined,
                  },
                ])
              } else {
                if (isThinkingPhase && chunkContent.length > 0) {
                  isThinkingPhase = false
+                  if (thinkingStartTime !== null) {
+                    thinkingDuration = Math.max(1, Math.round((Date.now() - thinkingStartTime) / 1000))
+                  }
                }
                setMessages((prev) =>
                  prev.map((m) =>
                    m.id === assistantMsgId
                      ? {
-                          ...m,
-                          content: m.content + chunkContent,
-                          thinking: (m.thinking ?? '') + chunkThinking,
-                          isStreaming: !done,
-                          isThinking: isThinkingPhase,
-                        }
+                        ...m,
+                        content: m.content + chunkContent,
+                        thinking: (m.thinking ?? '') + chunkThinking,
+                        isStreaming: !done,
+                        isThinking: isThinkingPhase,
+                        thinkingDuration: thinkingDuration ?? undefined,
+                      }
                      : m
                  )
                )
@ -391,6 +406,7 @@ export default function Chat({
          chatSuggestions={chatSuggestions}
          chatSuggestionsEnabled={suggestionsEnabled}
          chatSuggestionsLoading={chatSuggestionsLoading}
+          rewriteModelAvailable={rewriteModelAvailable}
        />
      </div>
    </div>
--- a/admin/inertia/lib/api.ts
+++ b/admin/inertia/lib/api.ts
@ -196,10 +196,13 @@ class API {
    })()
  }

-  async getAvailableModels(query: string | null, recommendedOnly: boolean): Promise<NomadOllamaModel[] | undefined> {
+  async getAvailableModels(params: { query?: string; recommendedOnly?: boolean; limit?: number }) {
    return catchInternal(async () => {
-      const response = await this.client.get<NomadOllamaModel[]>('/ollama/models', {
-        params: { sort: 'pulls', recommendedOnly, query },
+      const response = await this.client.get<{
+        models: NomadOllamaModel[]
+        hasMore: boolean
+      }>('/ollama/models', {
+        params: { sort: 'pulls', ...params },
      })
      return response.data
    })()
@ -506,7 +509,7 @@ class API {
      // For 409 Conflict errors, throw a specific error that the UI can handle
      if (error.response?.status === 409) {
        const err = new Error(error.response?.data?.error || 'This benchmark has already been submitted to the repository')
-        ;(err as any).status = 409
+          ; (err as any).status = 409
        throw err
      }
      // For other errors, extract the message and throw
--- a/admin/inertia/pages/easy-setup/index.tsx
+++ b/admin/inertia/pages/easy-setup/index.tsx
@ -152,7 +152,13 @@ export default function EasySetupWizard(props: { system: { services: ServiceSlim

  const { data: recommendedModels, isLoading: isLoadingRecommendedModels } = useQuery({
    queryKey: ['recommended-ollama-models'],
-    queryFn: () => api.getAvailableModels(null, true),
+    queryFn: async () => {
+      const res = await api.getAvailableModels({ recommendedOnly: true })
+      if (!res) {
+        return []
+      }
+      return res.models
+    },
    refetchOnWindowFocus: false,
  })

@ -736,7 +742,7 @@ export default function EasySetupWizard(props: { system: { services: ServiceSlim
              className={classNames(
                'relative',
                selectedMapCollections.includes(collection.slug) &&
-                  'ring-4 ring-desert-green rounded-lg',
+                'ring-4 ring-desert-green rounded-lg',
                collection.all_installed && 'opacity-75',
                !isOnline && 'opacity-50 cursor-not-allowed'
              )}
@ -760,7 +766,7 @@ export default function EasySetupWizard(props: { system: { services: ServiceSlim

  const renderStep3 = () => {
    // Check if AI or Information capabilities are selected OR already installed
-    const isAiSelected = selectedServices.includes(SERVICE_NAMES.OLLAMA) || 
+    const isAiSelected = selectedServices.includes(SERVICE_NAMES.OLLAMA) ||
      installedServices.some((s) => s.service_name === SERVICE_NAMES.OLLAMA)
    const isInformationSelected = selectedServices.includes(SERVICE_NAMES.KIWIX) ||
      installedServices.some((s) => s.service_name === SERVICE_NAMES.KIWIX)
--- a/admin/inertia/pages/settings/models.tsx
+++ b/admin/inertia/pages/settings/models.tsx
@ -37,21 +37,29 @@ export default function ModelsPage(props: {

  const [query, setQuery] = useState('')
  const [queryUI, setQueryUI] = useState('')
+  const [limit, setLimit] = useState(15)

  const debouncedSetQuery = debounce((val: string) => {
    setQuery(val)
  }, 300)

-  const { data: availableModels, isLoading } = useQuery({
-    queryKey: ['ollama', 'availableModels', query],
+  const { data: availableModelData, isFetching } = useQuery({
+    queryKey: ['ollama', 'availableModels', query, limit],
    queryFn: async () => {
-      const res = await api.getAvailableModels(query, false)
+      const res = await api.getAvailableModels({
+        query,
+        recommendedOnly: false,
+        limit,
+      })
      if (!res) {
-        return []
+        return {
+          models: [],
+          hasMore: false,
+        }
      }
      return res
    },
-    initialData: props.models.availableModels,
+    initialData: { models: props.models.availableModels, hasMore: false },
  })

  async function handleInstallModel(modelName: string) {
@ -209,8 +217,8 @@ export default function ModelsPage(props: {
                title: 'Last Updated',
              },
            ]}
-            data={availableModels || []}
-            loading={isLoading}
+            data={availableModelData?.models || []}
+            loading={isFetching}
            expandable={{
              expandedRowRender: (record) => (
                <div className="pl-14">
@ -283,6 +291,18 @@ export default function ModelsPage(props: {
              ),
            }}
          />
+          <div className="flex justify-center mt-6">
+            {availableModelData?.hasMore && (
+              <StyledButton
+                variant="primary"
+                onClick={() => {
+                  setLimit((prev) => prev + 15)
+                }}
+              >
+                Load More
+              </StyledButton>
+            )}
+          </div>
        </main>
      </div>
    </SettingsLayout>
--- a/admin/types/chat.ts
+++ b/admin/types/chat.ts
@ -6,6 +6,7 @@ export interface ChatMessage {
  isStreaming?: boolean
  thinking?: string
  isThinking?: boolean
+  thinkingDuration?: number
 }

 export interface ChatSession {
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "project-nomad",
-  "version": "1.26.1",
+  "version": "1.27.0-rc.1",
  "description": "\"",
  "main": "index.js",
  "scripts": {
Author	SHA1	Message	Date
cosmistack-bot	071c2a89a0	chore(release): 1.27.0-rc.1 [skip ci]	2026-02-25 17:48:12 +00:00
Jake Turner	0c952e55e1	docs: update release notes	2026-02-25 17:45:53 +00:00
Jake Turner	274351b65a	fix(AI): improved perf via rewrite and streaming logic	2026-02-25 17:42:22 +00:00
Jake Turner	e319da67ba	ops: support RC versions	2026-02-25 17:32:24 +00:00
Jake Turner	2a63d95850	feat(Models): paginate available models endpoint	2026-02-25 06:13:40 +00:00