feat: add MiniMax as optional cloud LLM provider

- Add MiniMaxService with OpenAI-compatible API integration - Route MiniMax models (MiniMax-M2.7, MiniMax-M2.7-highspeed) through existing chat controller alongside Ollama - Cloud models appear in model selector when MINIMAX_API_KEY is set - Add MINIMAX_API_KEY env var support - Add 8 unit tests + 3 integration tests - Update README with MiniMax mention
2026-03-28 03:29:25 +01:00 · 2026-03-26 12:29:14 +08:00 · 2026-03-26 12:29:14 +08:00 · ae856b268e
commit ae856b268e
parent 5c92c89813
8 changed files with 419 additions and 17 deletions
--- a/README.md
+++ b/README.md
@ -37,7 +37,7 @@ For more control over the installation process, copy and paste the [Docker Compo
 N.O.M.A.D. is a management UI ("Command Center") and API that orchestrates a collection of containerized tools and resources via [Docker](https://www.docker.com/). It handles installation, configuration, and updates for everything — so you don't have to.

 **Built-in capabilities include:**
- **AI Chat with Knowledge Base** — local AI chat powered by [Ollama](https://ollama.com/), with document upload and semantic search (RAG via [Qdrant](https://qdrant.tech/))
+- **AI Chat with Knowledge Base** — local AI chat powered by [Ollama](https://ollama.com/), with optional cloud models via [MiniMax](https://platform.minimax.io), document upload and semantic search (RAG via [Qdrant](https://qdrant.tech/))
 - **Information Library** — offline Wikipedia, medical references, ebooks, and more via [Kiwix](https://kiwix.org/)
 - **Education Platform** — Khan Academy courses with progress tracking via [Kolibri](https://learningequality.org/kolibri/)
 - **Offline Maps** — downloadable regional maps via [ProtoMaps](https://protomaps.com)
@ -53,7 +53,7 @@ N.O.M.A.D. also includes built-in tools like a Wikipedia content selector, ZIM l
 | Capability | Powered By | What You Get |
 |-----------|-----------|-------------|
 | Information Library | Kiwix | Offline Wikipedia, medical references, survival guides, ebooks |
-| AI Assistant | Ollama + Qdrant | Built-in chat with document upload and semantic search |
+| AI Assistant | Ollama + MiniMax + Qdrant | Built-in chat with local and cloud models, document upload and semantic search |
 | Education Platform | Kolibri | Khan Academy courses, progress tracking, multi-user support |
 | Offline Maps | ProtoMaps | Downloadable regional maps with search and navigation |
 | Data Tools | CyberChef | Encryption, encoding, hashing, and data analysis |
--- a/admin/.env.example
+++ b/admin/.env.example
@ -15,4 +15,7 @@ REDIS_PORT=6379
 # Storage path for NOMAD content (ZIM files, maps, etc.)
 # On Windows dev, use an absolute path like: C:/nomad-storage
 # On Linux production, use: /opt/project-nomad/storage
-NOMAD_STORAGE_PATH=/opt/project-nomad/storage
+NOMAD_STORAGE_PATH=/opt/project-nomad/storage
+# Optional: MiniMax cloud LLM API key (enables cloud models alongside local Ollama models)
+# Get your API key at https://platform.minimax.io
+# MINIMAX_API_KEY=your_api_key_here
--- a/admin/app/controllers/ollama_controller.ts
+++ b/admin/app/controllers/ollama_controller.ts
@ -1,5 +1,6 @@
 import { ChatService } from '#services/chat_service'
 import { OllamaService } from '#services/ollama_service'
+import { MiniMaxService } from '#services/minimax_service'
 import { RagService } from '#services/rag_service'
 import { modelNameSchema } from '#validators/download'
 import { chatSchema, getAvailableModelsSchema } from '#validators/ollama'
@ -14,7 +15,8 @@ export default class OllamaController {
  constructor(
    private chatService: ChatService,
    private ollamaService: OllamaService,
-    private ragService: RagService
+    private ragService: RagService,
+    private minimaxService: MiniMaxService
  ) { }

  async availableModels({ request }: HttpContext) {
@ -103,13 +105,9 @@ export default class OllamaController {
        }
      }

-      // Check if the model supports "thinking" capability for enhanced response generation
-      // If gpt-oss model, it requires a text param for "think" https://docs.ollama.com/api/chat
-      const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
-      const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
-
-      // Separate sessionId from the Ollama request payload — Ollama rejects unknown fields
-      const { sessionId, ...ollamaRequest } = reqData
+      // Separate sessionId from the request payload
+      const { sessionId, ...chatRequest } = reqData
+      const isMiniMax = this.minimaxService.isMiniMaxModel(reqData.model)

      // Save user message to DB before streaming if sessionId provided
      let userContent: string | null = null
@ -122,9 +120,20 @@ export default class OllamaController {
      }

      if (reqData.stream) {
-        logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" with think: ${think}`)
+        logger.debug(`[OllamaController] Initiating streaming response for model: "${reqData.model}" (provider: ${isMiniMax ? 'MiniMax' : 'Ollama'})`)
        // Headers already flushed above
-        const stream = await this.ollamaService.chatStream({ ...ollamaRequest, think })
+
+        let stream: AsyncIterable<any>
+
+        if (isMiniMax) {
+          stream = this.minimaxService.chatStream(chatRequest)
+        } else {
+          // Check if the model supports "thinking" capability for enhanced response generation
+          const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
+          const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
+          stream = await this.ollamaService.chatStream({ ...chatRequest, think })
+        }
+
        let fullContent = ''
        for await (const chunk of stream) {
          if (chunk.message?.content) {
@ -147,8 +156,16 @@ export default class OllamaController {
        return
      }

-      // Non-streaming (legacy) path
-      const result = await this.ollamaService.chat({ ...ollamaRequest, think })
+      // Non-streaming path
+      let result: any
+
+      if (isMiniMax) {
+        result = await this.minimaxService.chat(chatRequest)
+      } else {
+        const thinkingCapability = await this.ollamaService.checkModelHasThinking(reqData.model)
+        const think: boolean | 'medium' = thinkingCapability ? (reqData.model.startsWith('gpt-oss') ? 'medium' : true) : false
+        result = await this.ollamaService.chat({ ...chatRequest, think })
+      }

      if (sessionId && result?.message?.content) {
        await this.chatService.addMessage(sessionId, 'assistant', result.message.content)
@ -190,7 +207,9 @@ export default class OllamaController {
  }

  async installedModels({ }: HttpContext) {
-    return await this.ollamaService.getModels()
+    const ollamaModels = await this.ollamaService.getModels()
+    const minimaxModels = this.minimaxService.getModels()
+    return [...(ollamaModels || []), ...minimaxModels]
  }

  /**
--- a/admin/app/services/minimax_service.ts
+++ b/admin/app/services/minimax_service.ts
@ -0,0 +1,188 @@
+import logger from '@adonisjs/core/services/logger'
+import env from '#start/env'
+
+const MINIMAX_BASE_URL = 'https://api.minimax.io/v1'
+
+export const MINIMAX_MODELS = [
+  { id: 'MiniMax-M2.7', name: 'MiniMax-M2.7' },
+  { id: 'MiniMax-M2.7-highspeed', name: 'MiniMax-M2.7-highspeed' },
+] as const
+
+/**
+ * Service for interacting with MiniMax cloud LLM API.
+ *
+ * MiniMax provides an OpenAI-compatible chat completions API, so this service
+ * uses native fetch to call it. When MINIMAX_API_KEY is set, cloud models
+ * appear alongside local Ollama models in the model selector, giving users
+ * an optional cloud-based alternative when internet is available.
+ */
+export class MiniMaxService {
+  private apiKey: string | undefined
+
+  constructor() {
+    this.apiKey = env.get('MINIMAX_API_KEY')
+  }
+
+  /**
+   * Whether MiniMax cloud models are available (API key is configured).
+   */
+  isAvailable(): boolean {
+    return !!this.apiKey
+  }
+
+  /**
+   * Whether the given model name is a MiniMax model.
+   */
+  isMiniMaxModel(model: string): boolean {
+    return model.startsWith('MiniMax-')
+  }
+
+  /**
+   * Returns MiniMax models in Ollama ModelResponse-compatible format
+   * so they can be mixed into the installed models list.
+   */
+  getModels() {
+    if (!this.isAvailable()) return []
+
+    return MINIMAX_MODELS.map((m) => ({
+      name: m.id,
+      model: m.id,
+      modified_at: new Date(),
+      size: 0,
+      digest: 'cloud',
+      details: {
+        parent_model: '',
+        format: 'cloud',
+        family: 'minimax',
+        families: ['minimax'],
+        parameter_size: 'cloud',
+        quantization_level: '',
+      },
+    }))
+  }
+
+  /**
+   * Sends a non-streaming chat request to MiniMax API (OpenAI-compatible).
+   * Returns an Ollama-compatible response shape so the controller can use it
+   * transparently.
+   */
+  async chat(params: { model: string; messages: Array<{ role: string; content: string }> }) {
+    if (!this.apiKey) {
+      throw new Error('MINIMAX_API_KEY is not configured')
+    }
+
+    logger.debug(`[MiniMaxService] Sending chat request to model: ${params.model}`)
+
+    const response = await fetch(`${MINIMAX_BASE_URL}/chat/completions`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${this.apiKey}`,
+      },
+      body: JSON.stringify({
+        model: params.model,
+        messages: params.messages,
+        temperature: 1.0,
+      }),
+    })
+
+    if (!response.ok) {
+      const errorText = await response.text().catch(() => 'Unknown error')
+      logger.error(`[MiniMaxService] API error ${response.status}: ${errorText}`)
+      throw new Error(`MiniMax API error: ${response.status} - ${errorText}`)
+    }
+
+    const data = await response.json()
+
+    return {
+      model: params.model,
+      created_at: new Date().toISOString(),
+      message: {
+        role: 'assistant' as const,
+        content: data.choices[0].message.content,
+      },
+      done: true,
+    }
+  }
+
+  /**
+   * Sends a streaming chat request to MiniMax API. Returns an async generator
+   * that yields Ollama-compatible chunk objects so the controller SSE logic
+   * can forward them as-is.
+   */
+  async *chatStream(params: {
+    model: string
+    messages: Array<{ role: string; content: string }>
+  }) {
+    if (!this.apiKey) {
+      throw new Error('MINIMAX_API_KEY is not configured')
+    }
+
+    logger.debug(`[MiniMaxService] Starting streaming chat for model: ${params.model}`)
+
+    const response = await fetch(`${MINIMAX_BASE_URL}/chat/completions`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${this.apiKey}`,
+      },
+      body: JSON.stringify({
+        model: params.model,
+        messages: params.messages,
+        temperature: 1.0,
+        stream: true,
+      }),
+    })
+
+    if (!response.ok) {
+      const errorText = await response.text().catch(() => 'Unknown error')
+      logger.error(`[MiniMaxService] Streaming API error ${response.status}: ${errorText}`)
+      throw new Error(`MiniMax API error: ${response.status} - ${errorText}`)
+    }
+
+    if (!response.body) {
+      throw new Error('MiniMax API returned no response body')
+    }
+
+    const reader = response.body.getReader()
+    const decoder = new TextDecoder()
+    let buffer = ''
+
+    try {
+      while (true) {
+        const { done, value } = await reader.read()
+        if (done) break
+
+        buffer += decoder.decode(value, { stream: true })
+        const lines = buffer.split('\n')
+        buffer = lines.pop() || ''
+
+        for (const line of lines) {
+          if (!line.startsWith('data:')) continue
+          const jsonStr = line.slice(5).trim()
+          if (!jsonStr || jsonStr === '[DONE]') continue
+
+          try {
+            const data = JSON.parse(jsonStr)
+            const content = data.choices?.[0]?.delta?.content || ''
+            const finishReason = data.choices?.[0]?.finish_reason
+
+            yield {
+              model: params.model,
+              created_at: new Date().toISOString(),
+              message: {
+                role: 'assistant' as const,
+                content,
+              },
+              done: finishReason === 'stop',
+            }
+          } catch {
+            // Skip malformed SSE chunks
+          }
+        }
+      }
+    } finally {
+      reader.releaseLock()
+    }
+  }
+}
--- a/admin/inertia/components/chat/index.tsx
+++ b/admin/inertia/components/chat/index.tsx
@ -380,7 +380,7 @@ export default function Chat({
                >
                  {installedModels.map((model) => (
                    <option key={model.name} value={model.name}>
-                      {model.name} ({formatBytes(model.size)})
+                      {model.name} ({model.size > 0 ? formatBytes(model.size) : 'Cloud'})
                    </option>
                  ))}
                </select>
--- a/admin/start/env.ts
+++ b/admin/start/env.ts
@ -60,4 +60,11 @@ export default await Env.create(new URL('../', import.meta.url), {
  |----------------------------------------------------------
  */
  NOMAD_API_URL: Env.schema.string.optional(),
+
+  /*
+  |----------------------------------------------------------
+  | Variables for configuring cloud LLM providers
+  |----------------------------------------------------------
+  */
+  MINIMAX_API_KEY: Env.schema.string.optional(),
 })
--- a/admin/tests/functional/minimax_e2e.spec.ts
+++ b/admin/tests/functional/minimax_e2e.spec.ts
@ -0,0 +1,100 @@
+import { test } from '@japa/runner'
+
+const MINIMAX_API_KEY = process.env.MINIMAX_API_KEY
+const MINIMAX_BASE_URL = 'https://api.minimax.io/v1'
+
+test.group('MiniMax E2E', (group) => {
+  group.tap((t) => t.timeout(30_000))
+
+  if (!MINIMAX_API_KEY) {
+    test('skipped: MINIMAX_API_KEY not set', ({ assert }) => {
+      assert.isTrue(true)
+    })
+    return
+  }
+
+  test('completes basic chat with MiniMax-M2.7', async ({ assert }) => {
+    const response = await fetch(`${MINIMAX_BASE_URL}/chat/completions`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${MINIMAX_API_KEY}`,
+      },
+      body: JSON.stringify({
+        model: 'MiniMax-M2.7',
+        messages: [{ role: 'user', content: 'Say "test passed" in exactly two words.' }],
+        max_tokens: 20,
+        temperature: 1.0,
+      }),
+    })
+
+    assert.equal(response.status, 200)
+    const data = await response.json()
+    assert.isTrue(data.choices.length > 0)
+    assert.isString(data.choices[0].message.content)
+    assert.isTrue(data.choices[0].message.content.length > 0)
+  })
+
+  test('streams chat response from MiniMax-M2.7', async ({ assert }) => {
+    const response = await fetch(`${MINIMAX_BASE_URL}/chat/completions`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${MINIMAX_API_KEY}`,
+      },
+      body: JSON.stringify({
+        model: 'MiniMax-M2.7',
+        messages: [{ role: 'user', content: 'Say "hello"' }],
+        max_tokens: 10,
+        temperature: 1.0,
+        stream: true,
+      }),
+    })
+
+    assert.equal(response.status, 200)
+
+    const reader = response.body!.getReader()
+    const decoder = new TextDecoder()
+    let receivedChunks = 0
+    let buffer = ''
+
+    while (true) {
+      const { done, value } = await reader.read()
+      if (done) break
+      buffer += decoder.decode(value, { stream: true })
+      const lines = buffer.split('\n')
+      buffer = lines.pop() || ''
+      for (const line of lines) {
+        if (line.startsWith('data:') && line.slice(5).trim() !== '[DONE]') {
+          receivedChunks++
+        }
+      }
+    }
+
+    assert.isTrue(receivedChunks > 0)
+  })
+
+  test('handles system message correctly', async ({ assert }) => {
+    const response = await fetch(`${MINIMAX_BASE_URL}/chat/completions`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${MINIMAX_API_KEY}`,
+      },
+      body: JSON.stringify({
+        model: 'MiniMax-M2.7',
+        messages: [
+          { role: 'system', content: 'You are a helpful assistant.' },
+          { role: 'user', content: 'Say "ok"' },
+        ],
+        max_tokens: 10,
+        temperature: 1.0,
+      }),
+    })
+
+    assert.equal(response.status, 200)
+    const data = await response.json()
+    assert.isTrue(data.choices.length > 0)
+    assert.isString(data.choices[0].message.content)
+  })
+})
--- a/admin/tests/unit/minimax_service.spec.ts
+++ b/admin/tests/unit/minimax_service.spec.ts
@ -0,0 +1,85 @@
+import { test } from '@japa/runner'
+import { MINIMAX_MODELS, MiniMaxService } from '#services/minimax_service'
+
+test.group('MiniMaxService', () => {
+  test('isMiniMaxModel returns true for MiniMax models', ({ assert }) => {
+    const service = new MiniMaxService()
+    assert.isTrue(service.isMiniMaxModel('MiniMax-M2.7'))
+    assert.isTrue(service.isMiniMaxModel('MiniMax-M2.7-highspeed'))
+  })
+
+  test('isMiniMaxModel returns false for non-MiniMax models', ({ assert }) => {
+    const service = new MiniMaxService()
+    assert.isFalse(service.isMiniMaxModel('llama3.2:3b'))
+    assert.isFalse(service.isMiniMaxModel('deepseek-r1:1.5b'))
+    assert.isFalse(service.isMiniMaxModel('gpt-4o'))
+  })
+
+  test('MINIMAX_MODELS contains expected models', ({ assert }) => {
+    const modelIds = MINIMAX_MODELS.map((m) => m.id)
+    assert.include(modelIds, 'MiniMax-M2.7')
+    assert.include(modelIds, 'MiniMax-M2.7-highspeed')
+    assert.lengthOf(MINIMAX_MODELS, 2)
+  })
+
+  test('getModels returns empty array when API key is not set', ({ assert }) => {
+    const service = new MiniMaxService()
+    // When MINIMAX_API_KEY is not set, isAvailable() returns false
+    if (!service.isAvailable()) {
+      const models = service.getModels()
+      assert.lengthOf(models, 0)
+    }
+  })
+
+  test('getModels returns Ollama-compatible model objects', ({ assert }) => {
+    const service = new MiniMaxService()
+    // If API key is set, getModels returns model objects
+    if (service.isAvailable()) {
+      const models = service.getModels()
+      assert.lengthOf(models, 2)
+
+      for (const model of models) {
+        assert.properties(model, ['name', 'model', 'modified_at', 'size', 'digest', 'details'])
+        assert.equal(model.size, 0)
+        assert.equal(model.digest, 'cloud')
+        assert.equal(model.details.family, 'minimax')
+        assert.equal(model.details.format, 'cloud')
+      }
+    }
+  })
+
+  test('chat throws when API key is not set', async ({ assert }) => {
+    const service = new MiniMaxService()
+    if (!service.isAvailable()) {
+      await assert.rejects(
+        () =>
+          service.chat({
+            model: 'MiniMax-M2.7',
+            messages: [{ role: 'user', content: 'hello' }],
+          }),
+        'MINIMAX_API_KEY is not configured'
+      )
+    }
+  })
+
+  test('chatStream throws when API key is not set', async ({ assert }) => {
+    const service = new MiniMaxService()
+    if (!service.isAvailable()) {
+      await assert.rejects(async () => {
+        const gen = service.chatStream({
+          model: 'MiniMax-M2.7',
+          messages: [{ role: 'user', content: 'hello' }],
+        })
+        // Consume the first value to trigger the error
+        await gen.next()
+      }, 'MINIMAX_API_KEY is not configured')
+    }
+  })
+
+  test('model names match expected format', ({ assert }) => {
+    for (const model of MINIMAX_MODELS) {
+      assert.isTrue(model.id.startsWith('MiniMax-'))
+      assert.equal(model.id, model.name)
+    }
+  })
+})