mirror of
https://github.com/Crosstalk-Solutions/project-nomad.git
synced 2026-04-03 23:36:17 +02:00
feat(RAG): [wip] RAG capabilities
This commit is contained in:
parent
c78736c8da
commit
50174d2edb
|
|
@ -1,7 +1,7 @@
|
||||||
FROM node:22.16.0-alpine3.22 AS base
|
FROM node:22.16.0-alpine3.22 AS base
|
||||||
|
|
||||||
# Install bash & curl for entrypoint script compatibility
|
# Install bash & curl for entrypoint script compatibility, graphicsmagick for pdf2pic, and vips-dev & build-base for sharp
|
||||||
RUN apk add --no-cache bash curl
|
RUN apk add --no-cache bash curl graphicsmagick vips-dev build-base
|
||||||
|
|
||||||
# All deps stage
|
# All deps stage
|
||||||
FROM base AS deps
|
FROM base AS deps
|
||||||
|
|
|
||||||
18
admin/app/controllers/rag_controller.ts
Normal file
18
admin/app/controllers/rag_controller.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
import { cuid } from '@adonisjs/core/helpers'
|
||||||
|
import type { HttpContext } from '@adonisjs/core/http'
|
||||||
|
import app from '@adonisjs/core/services/app'
|
||||||
|
|
||||||
|
export default class RagsController {
|
||||||
|
public async upload({ request, response }: HttpContext) {
|
||||||
|
const uploadedFile = request.file('file')
|
||||||
|
if (!uploadedFile) {
|
||||||
|
return response.status(400).json({ error: 'No file uploaded' })
|
||||||
|
}
|
||||||
|
|
||||||
|
const fileName = `${cuid()}.${uploadedFile.extname}`
|
||||||
|
|
||||||
|
await uploadedFile.move(app.makePath('storage/uploads'), {
|
||||||
|
name: fileName,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
46
admin/app/models/kv_store.ts
Normal file
46
admin/app/models/kv_store.ts
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
import { DateTime } from 'luxon'
|
||||||
|
import { BaseModel, column, SnakeCaseNamingStrategy } from '@adonisjs/lucid/orm'
|
||||||
|
import type { KVStoreKey, KVStoreValue } from '../../types/kv_store.js'
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generic key-value store model for storing various settings
|
||||||
|
* that don't necessitate their own dedicated models.
|
||||||
|
*/
|
||||||
|
export default class KVStore extends BaseModel {
|
||||||
|
static namingStrategy = new SnakeCaseNamingStrategy()
|
||||||
|
|
||||||
|
@column({ isPrimary: true })
|
||||||
|
declare id: number
|
||||||
|
|
||||||
|
@column()
|
||||||
|
declare key: KVStoreKey
|
||||||
|
|
||||||
|
@column()
|
||||||
|
declare value: KVStoreValue
|
||||||
|
|
||||||
|
@column.dateTime({ autoCreate: true })
|
||||||
|
declare created_at: DateTime
|
||||||
|
|
||||||
|
@column.dateTime({ autoCreate: true, autoUpdate: true })
|
||||||
|
declare updated_at: DateTime
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a setting value by key
|
||||||
|
*/
|
||||||
|
static async getValue(key: KVStoreKey): Promise<KVStoreValue> {
|
||||||
|
const setting = await this.findBy('key', key)
|
||||||
|
return setting?.value ?? null
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set a setting value by key (creates if not exists)
|
||||||
|
*/
|
||||||
|
static async setValue(key: KVStoreKey, value: KVStoreValue): Promise<KVStore> {
|
||||||
|
const setting = await this.firstOrCreate({ key }, { key, value })
|
||||||
|
if (setting.value !== value) {
|
||||||
|
setting.value = value
|
||||||
|
await setting.save()
|
||||||
|
}
|
||||||
|
return setting
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -426,21 +426,10 @@ export class BenchmarkService {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if the benchmark model is available, pull if not
|
// Check if the benchmark model is available, pull if not
|
||||||
const modelsResponse = await axios.get(`${ollamaAPIURL}/api/tags`)
|
const openWebUIService = new (await import('./openwebui_service.js')).OpenWebUIService(this.dockerService)
|
||||||
const models = modelsResponse.data.models || []
|
const modelResponse = await openWebUIService.downloadModelSync(AI_BENCHMARK_MODEL)
|
||||||
const hasModel = models.some((m: any) => m.name === AI_BENCHMARK_MODEL || m.name.startsWith(AI_BENCHMARK_MODEL.split(':')[0]))
|
if (!modelResponse.success) {
|
||||||
|
throw new Error(`Model does not exist and failed to download: ${modelResponse.message}`)
|
||||||
if (!hasModel) {
|
|
||||||
this._updateStatus('downloading_ai_model', `Downloading AI benchmark model (${AI_BENCHMARK_MODEL})... This may take a few minutes on first run.`)
|
|
||||||
logger.info(`[BenchmarkService] Model ${AI_BENCHMARK_MODEL} not found, downloading...`)
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Model pull can take several minutes, use longer timeout
|
|
||||||
await axios.post(`${ollamaAPIURL}/api/pull`, { name: AI_BENCHMARK_MODEL }, { timeout: 600000 })
|
|
||||||
logger.info(`[BenchmarkService] Model ${AI_BENCHMARK_MODEL} downloaded successfully`)
|
|
||||||
} catch (pullError) {
|
|
||||||
throw new Error(`Failed to download AI benchmark model (${AI_BENCHMARK_MODEL}): ${pullError.message}`)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run inference benchmark
|
// Run inference benchmark
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ export class DockerService {
|
||||||
private activeInstallations: Set<string> = new Set()
|
private activeInstallations: Set<string> = new Set()
|
||||||
public static KIWIX_SERVICE_NAME = 'nomad_kiwix_serve'
|
public static KIWIX_SERVICE_NAME = 'nomad_kiwix_serve'
|
||||||
public static OLLAMA_SERVICE_NAME = 'nomad_ollama'
|
public static OLLAMA_SERVICE_NAME = 'nomad_ollama'
|
||||||
|
public static QDRANT_SERVICE_NAME = 'nomad_qdrant'
|
||||||
public static OPEN_WEBUI_SERVICE_NAME = 'nomad_open_webui'
|
public static OPEN_WEBUI_SERVICE_NAME = 'nomad_open_webui'
|
||||||
public static CYBERCHEF_SERVICE_NAME = 'nomad_cyberchef'
|
public static CYBERCHEF_SERVICE_NAME = 'nomad_cyberchef'
|
||||||
public static FLATNOTES_SERVICE_NAME = 'nomad_flatnotes'
|
public static FLATNOTES_SERVICE_NAME = 'nomad_flatnotes'
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,9 @@ import path from 'node:path'
|
||||||
import { PassThrough } from 'node:stream'
|
import { PassThrough } from 'node:stream'
|
||||||
import { DownloadModelJob } from '#jobs/download_model_job'
|
import { DownloadModelJob } from '#jobs/download_model_job'
|
||||||
import { FALLBACK_RECOMMENDED_OLLAMA_MODELS } from '../../constants/ollama.js'
|
import { FALLBACK_RECOMMENDED_OLLAMA_MODELS } from '../../constants/ollama.js'
|
||||||
|
import { chromium } from 'playwright'
|
||||||
|
import KVStore from '#models/kv_store'
|
||||||
|
import { getFile } from '../utils/fs.js'
|
||||||
|
|
||||||
const NOMAD_MODELS_API_BASE_URL = 'https://api.projectnomad.us/api/v1/ollama/models'
|
const NOMAD_MODELS_API_BASE_URL = 'https://api.projectnomad.us/api/v1/ollama/models'
|
||||||
const MODELS_CACHE_FILE = path.join(process.cwd(), 'storage', 'ollama-models-cache.json')
|
const MODELS_CACHE_FILE = path.join(process.cwd(), 'storage', 'ollama-models-cache.json')
|
||||||
|
|
@ -15,6 +18,10 @@ const CACHE_MAX_AGE_MS = 24 * 60 * 60 * 1000 // 24 hours
|
||||||
|
|
||||||
@inject()
|
@inject()
|
||||||
export class OpenWebUIService {
|
export class OpenWebUIService {
|
||||||
|
public static NOMAD_KNOWLEDGE_BASE_NAME = 'nomad-knowledge-base'
|
||||||
|
public static NOMAD_KNOWLEDGE_BASE_DESCRIP =
|
||||||
|
'Knowledge base managed by Project NOMAD, used to enhance LLM responses with up-to-date information. Do not delete.'
|
||||||
|
|
||||||
constructor(private dockerService: DockerService) {}
|
constructor(private dockerService: DockerService) {}
|
||||||
|
|
||||||
/** We need to call this in the DownloadModelJob, so it can't be private,
|
/** We need to call this in the DownloadModelJob, so it can't be private,
|
||||||
|
|
@ -200,6 +207,45 @@ export class OpenWebUIService {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synchronous version of model download (waits for completion). Should only be used for
|
||||||
|
* small models or in contexts where a background job is incompatible.
|
||||||
|
* @param model Model name to download
|
||||||
|
* @returns Success status and message
|
||||||
|
*/
|
||||||
|
async downloadModelSync(model: string): Promise<{ success: boolean; message: string }> {
|
||||||
|
try {
|
||||||
|
// See if model is already installed
|
||||||
|
const installedModels = await this.getInstalledModels()
|
||||||
|
if (installedModels && installedModels.some((m) => m.name === model)) {
|
||||||
|
logger.info(`[OpenWebUIService] Model "${model}" is already installed.`)
|
||||||
|
return { success: true, message: 'Model is already installed.' }
|
||||||
|
}
|
||||||
|
|
||||||
|
const ollamAPIURL = await this.dockerService.getServiceURL(DockerService.OLLAMA_SERVICE_NAME)
|
||||||
|
if (!ollamAPIURL) {
|
||||||
|
logger.warn('[OpenWebUIService] Ollama service is not running. Cannot download model.')
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
message: 'Ollama is not running. Please start Ollama and try again.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 10 minutes timeout for large model downloads
|
||||||
|
await axios.post(`${ollamAPIURL}/api/pull`, { name: model }, { timeout: 600000 })
|
||||||
|
|
||||||
|
logger.info(`[OpenWebUIService] Model "${model}" downloaded via API.`)
|
||||||
|
return { success: true, message: 'Model downloaded successfully.' }
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(
|
||||||
|
`[OpenWebUIService] Failed to download model "${model}": ${
|
||||||
|
error instanceof Error ? error.message : error
|
||||||
|
}`
|
||||||
|
)
|
||||||
|
return { success: false, message: 'Failed to download model.' }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async deleteModel(model: string): Promise<{ success: boolean; message: string }> {
|
async deleteModel(model: string): Promise<{ success: boolean; message: string }> {
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
try {
|
try {
|
||||||
|
|
@ -529,6 +575,163 @@ export class OpenWebUIService {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async getOrCreateKnowledgeBase(): Promise<string | null> {
|
||||||
|
try {
|
||||||
|
// See if we already have the knowledge base ID stored
|
||||||
|
const existing = await KVStore.getValue('open_webui_knowledge_id')
|
||||||
|
if (existing) {
|
||||||
|
return existing as string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new knowledge base via Open WebUI API
|
||||||
|
const tokenData = await this.getOpenWebUIToken()
|
||||||
|
if (!tokenData) {
|
||||||
|
logger.warn(
|
||||||
|
'[OpenWebUIService] Cannot get or create knowledge base because Open WebUI token is unavailable.'
|
||||||
|
)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await axios.post(
|
||||||
|
`${tokenData.url}/api/v1/knowledge/create`,
|
||||||
|
{
|
||||||
|
name: OpenWebUIService.NOMAD_KNOWLEDGE_BASE_NAME,
|
||||||
|
description: OpenWebUIService.NOMAD_KNOWLEDGE_BASE_DESCRIP,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${tokenData.token}`,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if (response.data && response.data.id) {
|
||||||
|
await KVStore.setValue('open_webui_knowledge_id', response.data.id)
|
||||||
|
return response.data.id
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.error(
|
||||||
|
`[OpenWebUIService] Invalid response when creating knowledge base: ${JSON.stringify(
|
||||||
|
response.data
|
||||||
|
)}`
|
||||||
|
)
|
||||||
|
return null
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(
|
||||||
|
`[OpenWebUIService] Failed to get or create knowledge base: ${
|
||||||
|
error instanceof Error ? error.message : error
|
||||||
|
}`
|
||||||
|
)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async uploadFileToKnowledgeBase(filepath: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const knowledgeBaseId = await this.getOrCreateKnowledgeBase()
|
||||||
|
if (!knowledgeBaseId) {
|
||||||
|
logger.warn(
|
||||||
|
'[OpenWebUIService] Cannot upload file because knowledge base ID is unavailable and could not be created.'
|
||||||
|
)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
const tokenData = await this.getOpenWebUIToken()
|
||||||
|
if (!tokenData) {
|
||||||
|
logger.warn(
|
||||||
|
'[OpenWebUIService] Cannot upload file because Open WebUI token is unavailable.'
|
||||||
|
)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
const fileStream = await getFile(filepath, 'stream')
|
||||||
|
if (!fileStream) {
|
||||||
|
logger.warn(
|
||||||
|
`[OpenWebUIService] Cannot upload file because it could not be read: ${filepath}`
|
||||||
|
)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
const formData = new FormData()
|
||||||
|
formData.append('file', fileStream)
|
||||||
|
|
||||||
|
const uploadRes = await axios.post(
|
||||||
|
`${tokenData.url}/api/v1/files/`, // Trailing slash seems to be required by OWUI
|
||||||
|
formData,
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${tokenData.token}`,
|
||||||
|
'Content-Type': 'multipart/form-data',
|
||||||
|
'Accept': 'application/json',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if (!uploadRes.data || !uploadRes.data.id) {
|
||||||
|
logger.error(
|
||||||
|
`[OpenWebUIService] Invalid response when uploading file: ${JSON.stringify(
|
||||||
|
uploadRes.data
|
||||||
|
)}`
|
||||||
|
)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
const fileId = uploadRes.data.id
|
||||||
|
|
||||||
|
// Now associate the uploaded file with the knowledge base
|
||||||
|
const associateRes = await axios.post(
|
||||||
|
`${tokenData.url}/api/v1/knowledge/${knowledgeBaseId}/file/add`,
|
||||||
|
{
|
||||||
|
file_id: fileId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${tokenData.token}`,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(
|
||||||
|
`[OpenWebUIService] Failed to upload file to knowledge base: ${
|
||||||
|
error instanceof Error ? error.message : error
|
||||||
|
}`
|
||||||
|
)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async getOpenWebUIToken(): Promise<{ token: string; url: string } | null> {
|
||||||
|
try {
|
||||||
|
const openWebUIURL = await this.dockerService.getServiceURL(
|
||||||
|
DockerService.OPEN_WEBUI_SERVICE_NAME
|
||||||
|
)
|
||||||
|
if (!openWebUIURL) {
|
||||||
|
logger.warn('[OpenWebUIService] Open WebUI service is not running. Cannot retrieve token.')
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
const browser = await chromium.launch({ headless: true })
|
||||||
|
const context = await browser.newContext()
|
||||||
|
const page = await context.newPage()
|
||||||
|
|
||||||
|
await page.goto(openWebUIURL)
|
||||||
|
await page.waitForLoadState('networkidle')
|
||||||
|
|
||||||
|
const cookies = await context.cookies()
|
||||||
|
const tokenCookie = cookies.find((cookie) => cookie.name === 'token')
|
||||||
|
await browser.close()
|
||||||
|
|
||||||
|
return tokenCookie ? { token: tokenCookie.value, url: openWebUIURL } : null
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(
|
||||||
|
`[OpenWebUIService] Failed to retrieve Open WebUI token: ${
|
||||||
|
error instanceof Error ? error.message : error
|
||||||
|
}`
|
||||||
|
)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private async retrieveAndRefreshModels(
|
private async retrieveAndRefreshModels(
|
||||||
sort?: 'pulls' | 'name'
|
sort?: 'pulls' | 'name'
|
||||||
): Promise<NomadOllamaModel[] | null> {
|
): Promise<NomadOllamaModel[] | null> {
|
||||||
|
|
|
||||||
262
admin/app/services/rag_service.ts
Normal file
262
admin/app/services/rag_service.ts
Normal file
|
|
@ -0,0 +1,262 @@
|
||||||
|
import { Ollama } from 'ollama'
|
||||||
|
import { QdrantClient } from '@qdrant/js-client-rest'
|
||||||
|
import { DockerService } from './docker_service.js'
|
||||||
|
import { inject } from '@adonisjs/core'
|
||||||
|
import logger from '@adonisjs/core/services/logger'
|
||||||
|
import { chunk } from 'llm-chunk'
|
||||||
|
import { OpenWebUIService } from './openwebui_service.js'
|
||||||
|
import sharp from 'sharp'
|
||||||
|
import { determineFileType, getFile } from '../utils/fs.js'
|
||||||
|
import { PDFParse } from 'pdf-parse'
|
||||||
|
import { createWorker } from 'tesseract.js'
|
||||||
|
import { fromBuffer } from 'pdf2pic'
|
||||||
|
|
||||||
|
@inject()
|
||||||
|
export class RagService {
|
||||||
|
private qdrant: QdrantClient | null = null
|
||||||
|
private ollama: Ollama | null = null
|
||||||
|
private qdrantInitPromise: Promise<void> | null = null
|
||||||
|
private ollamaInitPromise: Promise<void> | null = null
|
||||||
|
public static CONTENT_COLLECTION_NAME = 'open-webui_knowledge' // This is the collection name OWUI uses for uploaded knowledge
|
||||||
|
public static EMBEDDING_MODEL = 'nomic-embed-text:v1.5'
|
||||||
|
public static EMBEDDING_DIMENSION = 768 // Nomic Embed Text v1.5 dimension is 768
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
private dockerService: DockerService,
|
||||||
|
private openWebUIService: OpenWebUIService
|
||||||
|
) {}
|
||||||
|
|
||||||
|
private async _initializeQdrantClient() {
|
||||||
|
if (!this.qdrantInitPromise) {
|
||||||
|
this.qdrantInitPromise = (async () => {
|
||||||
|
const qdrantUrl = await this.dockerService.getServiceURL(DockerService.QDRANT_SERVICE_NAME)
|
||||||
|
if (!qdrantUrl) {
|
||||||
|
throw new Error('Qdrant service is not installed or running.')
|
||||||
|
}
|
||||||
|
this.qdrant = new QdrantClient({ url: `http://${qdrantUrl}` })
|
||||||
|
})()
|
||||||
|
}
|
||||||
|
return this.qdrantInitPromise
|
||||||
|
}
|
||||||
|
|
||||||
|
private async _initializeOllamaClient() {
|
||||||
|
if (!this.ollamaInitPromise) {
|
||||||
|
this.ollamaInitPromise = (async () => {
|
||||||
|
const ollamaUrl = await this.dockerService.getServiceURL(DockerService.OLLAMA_SERVICE_NAME)
|
||||||
|
if (!ollamaUrl) {
|
||||||
|
throw new Error('Ollama service is not installed or running.')
|
||||||
|
}
|
||||||
|
this.ollama = new Ollama({ host: `http://${ollamaUrl}` })
|
||||||
|
})()
|
||||||
|
}
|
||||||
|
return this.ollamaInitPromise
|
||||||
|
}
|
||||||
|
|
||||||
|
private async _ensureDependencies() {
|
||||||
|
if (!this.qdrant) {
|
||||||
|
await this._initializeQdrantClient()
|
||||||
|
}
|
||||||
|
if (!this.ollama) {
|
||||||
|
await this._initializeOllamaClient()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async _ensureCollection(
|
||||||
|
collectionName: string,
|
||||||
|
dimensions: number = RagService.EMBEDDING_DIMENSION
|
||||||
|
) {
|
||||||
|
try {
|
||||||
|
await this._ensureDependencies()
|
||||||
|
const collections = await this.qdrant!.getCollections()
|
||||||
|
const collectionExists = collections.collections.some((col) => col.name === collectionName)
|
||||||
|
|
||||||
|
if (!collectionExists) {
|
||||||
|
await this.qdrant!.createCollection(collectionName, {
|
||||||
|
vectors: {
|
||||||
|
size: dimensions,
|
||||||
|
distance: 'Cosine',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Error ensuring Qdrant collection:', error)
|
||||||
|
throw error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public async embedAndStoreText(
|
||||||
|
text: string,
|
||||||
|
metadata: Record<string, any> = {}
|
||||||
|
): Promise<{ chunks: number } | null> {
|
||||||
|
try {
|
||||||
|
await this._ensureCollection(
|
||||||
|
RagService.CONTENT_COLLECTION_NAME,
|
||||||
|
RagService.EMBEDDING_DIMENSION
|
||||||
|
)
|
||||||
|
const initModelResponse = await this.openWebUIService.downloadModelSync(
|
||||||
|
RagService.EMBEDDING_MODEL
|
||||||
|
)
|
||||||
|
if (!initModelResponse.success) {
|
||||||
|
throw new Error(
|
||||||
|
`${RagService.EMBEDDING_MODEL} does not exist and could not be downloaded: ${initModelResponse.message}`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const chunks = chunk(text, {
|
||||||
|
// These settings should provide a good balance between context and precision
|
||||||
|
minLength: 512,
|
||||||
|
maxLength: 1024,
|
||||||
|
overlap: 200,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!chunks || chunks.length === 0) {
|
||||||
|
throw new Error('No text chunks generated for embedding.')
|
||||||
|
}
|
||||||
|
|
||||||
|
const embeddings: number[][] = []
|
||||||
|
for (const chunkText of chunks) {
|
||||||
|
const response = await this.ollama!.embeddings({
|
||||||
|
model: RagService.EMBEDDING_MODEL,
|
||||||
|
prompt: chunkText,
|
||||||
|
})
|
||||||
|
|
||||||
|
embeddings.push(response.embedding)
|
||||||
|
}
|
||||||
|
|
||||||
|
const points = chunks.map((chunkText, index) => ({
|
||||||
|
id: `${Date.now()}_${index}`,
|
||||||
|
vector: embeddings[index],
|
||||||
|
payload: {
|
||||||
|
...metadata,
|
||||||
|
text: chunkText,
|
||||||
|
chunk_index: index,
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
|
||||||
|
await this.qdrant!.upsert(RagService.CONTENT_COLLECTION_NAME, { points })
|
||||||
|
|
||||||
|
return { chunks: chunks.length }
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Error embedding text:', error)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Preprocess an image to enhance text extraction quality.
|
||||||
|
* Normalizes, grayscales, sharpens, and resizes the image to a manageable size.
|
||||||
|
* @param filebuffer Buffer of the image file
|
||||||
|
* @returns - Processed image buffer
|
||||||
|
*/
|
||||||
|
private async preprocessImage(filebuffer: Buffer): Promise<Buffer> {
|
||||||
|
return await sharp(filebuffer)
|
||||||
|
.grayscale()
|
||||||
|
.normalize()
|
||||||
|
.sharpen()
|
||||||
|
.resize({ width: 2000, fit: 'inside' })
|
||||||
|
.toBuffer()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If the original PDF has little to no extractable text,
|
||||||
|
* we can use this method to convert each page to an image for OCR processing.
|
||||||
|
* @param filebuffer - Buffer of the PDF file
|
||||||
|
* @returns - Array of image buffers, one per page
|
||||||
|
*/
|
||||||
|
private async convertPDFtoImages(filebuffer: Buffer): Promise<Buffer[]> {
|
||||||
|
const converted = await fromBuffer(filebuffer, {
|
||||||
|
quality: 50,
|
||||||
|
density: 200,
|
||||||
|
format: 'png',
|
||||||
|
}).bulk(-1, {
|
||||||
|
responseType: 'buffer',
|
||||||
|
})
|
||||||
|
return converted.filter((res) => res.buffer).map((res) => res.buffer!)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract text from a PDF file using pdf-parse.
|
||||||
|
* @param filebuffer - Buffer of the PDF file
|
||||||
|
* @returns - Extracted text
|
||||||
|
*/
|
||||||
|
private async extractPDFText(filebuffer: Buffer): Promise<string> {
|
||||||
|
const parser = new PDFParse({ data: filebuffer })
|
||||||
|
const data = await parser.getText()
|
||||||
|
await parser.destroy()
|
||||||
|
return data.text
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract text from a plain text file.
|
||||||
|
* @param filebuffer - Buffer of the text file
|
||||||
|
* @returns - Extracted text
|
||||||
|
*/
|
||||||
|
private async extractTXTText(filebuffer: Buffer): Promise<string> {
|
||||||
|
return filebuffer.toString('utf-8')
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract text from an image file using Tesseract.js OCR.
|
||||||
|
* @param filebuffer - Buffer of the image file
|
||||||
|
* @returns - Extracted text
|
||||||
|
*/
|
||||||
|
private async extractImageText(filebuffer: Buffer): Promise<string> {
|
||||||
|
const worker = await createWorker('eng')
|
||||||
|
const result = await worker.recognize(filebuffer)
|
||||||
|
await worker.terminate()
|
||||||
|
return result.data.text
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main pipeline to process and embed an uploaded file into the RAG knowledge base.
|
||||||
|
* This includes text extraction, chunking, embedding, and storing in Qdrant.
|
||||||
|
*/
|
||||||
|
public async processAndEmbedFile(
|
||||||
|
filepath: string
|
||||||
|
): Promise<{ success: boolean; message: string }> {
|
||||||
|
try {
|
||||||
|
const fileType = determineFileType(filepath)
|
||||||
|
if (fileType === 'unknown') {
|
||||||
|
return { success: false, message: 'Unsupported file type.' }
|
||||||
|
}
|
||||||
|
|
||||||
|
const origFileBuffer = await getFile(filepath, 'buffer')
|
||||||
|
if (!origFileBuffer) {
|
||||||
|
return { success: false, message: 'Failed to read the uploaded file.' }
|
||||||
|
}
|
||||||
|
|
||||||
|
let extractedText = ''
|
||||||
|
|
||||||
|
if (fileType === 'image') {
|
||||||
|
const preprocessedBuffer = await this.preprocessImage(origFileBuffer)
|
||||||
|
extractedText = await this.extractImageText(preprocessedBuffer)
|
||||||
|
} else if (fileType === 'pdf') {
|
||||||
|
extractedText = await this.extractPDFText(origFileBuffer)
|
||||||
|
// Check if there was no extracted text or it was very minimal
|
||||||
|
if (!extractedText || extractedText.trim().length < 100) {
|
||||||
|
// Convert PDF pages to images for OCR
|
||||||
|
const imageBuffers = await this.convertPDFtoImages(origFileBuffer)
|
||||||
|
for (const imgBuffer of imageBuffers) {
|
||||||
|
const preprocessedImg = await this.preprocessImage(imgBuffer)
|
||||||
|
const pageText = await this.extractImageText(preprocessedImg)
|
||||||
|
extractedText += pageText + '\n'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
extractedText = await this.extractTXTText(origFileBuffer)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!extractedText || extractedText.trim().length === 0) {
|
||||||
|
return { success: false, message: 'No text could be extracted from the file.' }
|
||||||
|
}
|
||||||
|
|
||||||
|
const embedResult = await this.embedAndStoreText(extractedText, {})
|
||||||
|
|
||||||
|
|
||||||
|
return { success: true, message: 'File processed and embedded successfully.' }
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Error processing and embedding file:', error)
|
||||||
|
return { success: false, message: 'Error processing and embedding file.' }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
import { mkdir, readdir, readFile, stat, unlink } from 'fs/promises'
|
import { mkdir, readdir, readFile, stat, unlink } from 'fs/promises'
|
||||||
import { join } from 'path'
|
import path, { join } from 'path'
|
||||||
import { FileEntry } from '../../types/files.js'
|
import { FileEntry } from '../../types/files.js'
|
||||||
import { createReadStream } from 'fs'
|
import { createReadStream } from 'fs'
|
||||||
import { LSBlockDevice, NomadDiskInfoRaw } from '../../types/system.js'
|
import { LSBlockDevice, NomadDiskInfoRaw } from '../../types/system.js'
|
||||||
|
|
@ -151,3 +151,16 @@ export function matchesDevice(fsPath: string, deviceName: string): boolean {
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function determineFileType(filename: string): 'image' | 'pdf' | 'text' | 'unknown' {
|
||||||
|
const ext = path.extname(filename).toLowerCase()
|
||||||
|
if (['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'].includes(ext)) {
|
||||||
|
return 'image'
|
||||||
|
} else if (ext === '.pdf') {
|
||||||
|
return 'pdf'
|
||||||
|
} else if (['.txt', '.md', '.docx', '.rtf'].includes(ext)) {
|
||||||
|
return 'text'
|
||||||
|
} else {
|
||||||
|
return 'unknown'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,19 @@
|
||||||
|
import { BaseSchema } from '@adonisjs/lucid/schema'
|
||||||
|
|
||||||
|
export default class extends BaseSchema {
|
||||||
|
protected tableName = 'kv_store'
|
||||||
|
|
||||||
|
async up() {
|
||||||
|
this.schema.createTable(this.tableName, (table) => {
|
||||||
|
table.increments('id')
|
||||||
|
table.string('key').unique().notNullable()
|
||||||
|
table.text('value').nullable()
|
||||||
|
table.timestamp('created_at')
|
||||||
|
table.timestamp('updated_at')
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async down() {
|
||||||
|
this.schema.dropTable(this.tableName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -3,17 +3,25 @@ import { DockerService } from '#services/docker_service'
|
||||||
import { BaseSeeder } from '@adonisjs/lucid/seeders'
|
import { BaseSeeder } from '@adonisjs/lucid/seeders'
|
||||||
import { ModelAttributes } from '@adonisjs/lucid/types/model'
|
import { ModelAttributes } from '@adonisjs/lucid/types/model'
|
||||||
import env from '#start/env'
|
import env from '#start/env'
|
||||||
|
import { RagService } from '#services/rag_service'
|
||||||
|
|
||||||
export default class ServiceSeeder extends BaseSeeder {
|
export default class ServiceSeeder extends BaseSeeder {
|
||||||
// Use environment variable with fallback to production default
|
// Use environment variable with fallback to production default
|
||||||
private static NOMAD_STORAGE_ABS_PATH = env.get('NOMAD_STORAGE_PATH', '/opt/project-nomad/storage')
|
private static NOMAD_STORAGE_ABS_PATH = env.get(
|
||||||
private static DEFAULT_SERVICES: Omit<ModelAttributes<Service>, 'created_at' | 'updated_at' | 'metadata' | 'id'>[] = [
|
'NOMAD_STORAGE_PATH',
|
||||||
|
'/opt/project-nomad/storage'
|
||||||
|
)
|
||||||
|
private static DEFAULT_SERVICES: Omit<
|
||||||
|
ModelAttributes<Service>,
|
||||||
|
'created_at' | 'updated_at' | 'metadata' | 'id'
|
||||||
|
>[] = [
|
||||||
{
|
{
|
||||||
service_name: DockerService.KIWIX_SERVICE_NAME,
|
service_name: DockerService.KIWIX_SERVICE_NAME,
|
||||||
friendly_name: 'Information Library',
|
friendly_name: 'Information Library',
|
||||||
powered_by: 'Kiwix',
|
powered_by: 'Kiwix',
|
||||||
display_order: 1,
|
display_order: 1,
|
||||||
description: 'Offline access to Wikipedia, medical references, how-to guides, and encyclopedias',
|
description:
|
||||||
|
'Offline access to Wikipedia, medical references, how-to guides, and encyclopedias',
|
||||||
icon: 'IconBooks',
|
icon: 'IconBooks',
|
||||||
container_image: 'ghcr.io/kiwix/kiwix-serve:3.8.1',
|
container_image: 'ghcr.io/kiwix/kiwix-serve:3.8.1',
|
||||||
container_command: '*.zim --address=all',
|
container_command: '*.zim --address=all',
|
||||||
|
|
@ -21,9 +29,9 @@ export default class ServiceSeeder extends BaseSeeder {
|
||||||
HostConfig: {
|
HostConfig: {
|
||||||
RestartPolicy: { Name: 'unless-stopped' },
|
RestartPolicy: { Name: 'unless-stopped' },
|
||||||
Binds: [`${ServiceSeeder.NOMAD_STORAGE_ABS_PATH}/zim:/data`],
|
Binds: [`${ServiceSeeder.NOMAD_STORAGE_ABS_PATH}/zim:/data`],
|
||||||
PortBindings: { '8080/tcp': [{ HostPort: '8090' }] }
|
PortBindings: { '8080/tcp': [{ HostPort: '8090' }] },
|
||||||
},
|
},
|
||||||
ExposedPorts: { '8080/tcp': {} }
|
ExposedPorts: { '8080/tcp': {} },
|
||||||
}),
|
}),
|
||||||
ui_location: '8090',
|
ui_location: '8090',
|
||||||
installed: false,
|
installed: false,
|
||||||
|
|
@ -31,6 +39,29 @@ export default class ServiceSeeder extends BaseSeeder {
|
||||||
is_dependency_service: false,
|
is_dependency_service: false,
|
||||||
depends_on: null,
|
depends_on: null,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
service_name: DockerService.QDRANT_SERVICE_NAME,
|
||||||
|
friendly_name: 'Qdrant Vector Database',
|
||||||
|
powered_by: null,
|
||||||
|
display_order: 100, // Dependency service, not shown directly
|
||||||
|
description: 'Vector database for storing and searching embeddings',
|
||||||
|
icon: 'IconRobot',
|
||||||
|
container_image: 'qdrant/qdrant:latest',
|
||||||
|
container_command: null,
|
||||||
|
container_config: JSON.stringify({
|
||||||
|
HostConfig: {
|
||||||
|
RestartPolicy: { Name: 'unless-stopped' },
|
||||||
|
Binds: [`${ServiceSeeder.NOMAD_STORAGE_ABS_PATH}/qdrant:/qdrant/storage`],
|
||||||
|
PortBindings: { '6333/tcp': [{ HostPort: '6333' }], '6334/tcp': [{ HostPort: '6334' }] },
|
||||||
|
},
|
||||||
|
ExposedPorts: { '6333/tcp': {}, '6334/tcp': {} },
|
||||||
|
}),
|
||||||
|
ui_location: '6333',
|
||||||
|
installed: false,
|
||||||
|
installation_status: 'idle',
|
||||||
|
is_dependency_service: true,
|
||||||
|
depends_on: null,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
service_name: DockerService.OLLAMA_SERVICE_NAME,
|
service_name: DockerService.OLLAMA_SERVICE_NAME,
|
||||||
friendly_name: 'Ollama',
|
friendly_name: 'Ollama',
|
||||||
|
|
@ -44,15 +75,15 @@ export default class ServiceSeeder extends BaseSeeder {
|
||||||
HostConfig: {
|
HostConfig: {
|
||||||
RestartPolicy: { Name: 'unless-stopped' },
|
RestartPolicy: { Name: 'unless-stopped' },
|
||||||
Binds: [`${ServiceSeeder.NOMAD_STORAGE_ABS_PATH}/ollama:/root/.ollama`],
|
Binds: [`${ServiceSeeder.NOMAD_STORAGE_ABS_PATH}/ollama:/root/.ollama`],
|
||||||
PortBindings: { '11434/tcp': [{ HostPort: '11434' }] }
|
PortBindings: { '11434/tcp': [{ HostPort: '11434' }] },
|
||||||
},
|
},
|
||||||
ExposedPorts: { '11434/tcp': {} }
|
ExposedPorts: { '11434/tcp': {} },
|
||||||
}),
|
}),
|
||||||
ui_location: null,
|
ui_location: null,
|
||||||
installed: false,
|
installed: false,
|
||||||
installation_status: 'idle',
|
installation_status: 'idle',
|
||||||
is_dependency_service: true,
|
is_dependency_service: true,
|
||||||
depends_on: null,
|
depends_on: DockerService.QDRANT_SERVICE_NAME,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
service_name: DockerService.OPEN_WEBUI_SERVICE_NAME,
|
service_name: DockerService.OPEN_WEBUI_SERVICE_NAME,
|
||||||
|
|
@ -68,9 +99,17 @@ export default class ServiceSeeder extends BaseSeeder {
|
||||||
RestartPolicy: { Name: 'unless-stopped' },
|
RestartPolicy: { Name: 'unless-stopped' },
|
||||||
NetworkMode: 'host',
|
NetworkMode: 'host',
|
||||||
Binds: [`${ServiceSeeder.NOMAD_STORAGE_ABS_PATH}/open-webui:/app/backend/data`],
|
Binds: [`${ServiceSeeder.NOMAD_STORAGE_ABS_PATH}/open-webui:/app/backend/data`],
|
||||||
PortBindings: { '8080/tcp': [{ HostPort: '3000' }] }
|
PortBindings: { '8080/tcp': [{ HostPort: '3000' }] },
|
||||||
},
|
},
|
||||||
Env: ['WEBUI_AUTH=False', 'PORT=3000', 'OLLAMA_BASE_URL=http://127.0.0.1:11434']
|
Env: [
|
||||||
|
'WEBUI_AUTH=False',
|
||||||
|
'PORT=3000',
|
||||||
|
'OLLAMA_BASE_URL=http://127.0.0.1:11434',
|
||||||
|
'VECTOR_DB=qdrant',
|
||||||
|
'QDRANT_URI=http://127.0.0.1:6333',
|
||||||
|
'RAG_EMBEDDING_ENGINE=ollama',
|
||||||
|
`RAG_EMBEDDING_MODEL=${RagService.EMBEDDING_MODEL}`,
|
||||||
|
],
|
||||||
}),
|
}),
|
||||||
ui_location: '3000',
|
ui_location: '3000',
|
||||||
installed: false,
|
installed: false,
|
||||||
|
|
@ -90,9 +129,9 @@ export default class ServiceSeeder extends BaseSeeder {
|
||||||
container_config: JSON.stringify({
|
container_config: JSON.stringify({
|
||||||
HostConfig: {
|
HostConfig: {
|
||||||
RestartPolicy: { Name: 'unless-stopped' },
|
RestartPolicy: { Name: 'unless-stopped' },
|
||||||
PortBindings: { '80/tcp': [{ HostPort: '8100' }] }
|
PortBindings: { '80/tcp': [{ HostPort: '8100' }] },
|
||||||
},
|
},
|
||||||
ExposedPorts: { '80/tcp': {} }
|
ExposedPorts: { '80/tcp': {} },
|
||||||
}),
|
}),
|
||||||
ui_location: '8100',
|
ui_location: '8100',
|
||||||
installed: false,
|
installed: false,
|
||||||
|
|
@ -113,10 +152,10 @@ export default class ServiceSeeder extends BaseSeeder {
|
||||||
HostConfig: {
|
HostConfig: {
|
||||||
RestartPolicy: { Name: 'unless-stopped' },
|
RestartPolicy: { Name: 'unless-stopped' },
|
||||||
PortBindings: { '8080/tcp': [{ HostPort: '8200' }] },
|
PortBindings: { '8080/tcp': [{ HostPort: '8200' }] },
|
||||||
Binds: [`${ServiceSeeder.NOMAD_STORAGE_ABS_PATH}/flatnotes:/data`]
|
Binds: [`${ServiceSeeder.NOMAD_STORAGE_ABS_PATH}/flatnotes:/data`],
|
||||||
},
|
},
|
||||||
ExposedPorts: { '8080/tcp': {} },
|
ExposedPorts: { '8080/tcp': {} },
|
||||||
Env: ['FLATNOTES_AUTH_TYPE=none']
|
Env: ['FLATNOTES_AUTH_TYPE=none'],
|
||||||
}),
|
}),
|
||||||
ui_location: '8200',
|
ui_location: '8200',
|
||||||
installed: false,
|
installed: false,
|
||||||
|
|
@ -137,7 +176,7 @@ export default class ServiceSeeder extends BaseSeeder {
|
||||||
HostConfig: {
|
HostConfig: {
|
||||||
RestartPolicy: { Name: 'unless-stopped' },
|
RestartPolicy: { Name: 'unless-stopped' },
|
||||||
PortBindings: { '8080/tcp': [{ HostPort: '8300' }] },
|
PortBindings: { '8080/tcp': [{ HostPort: '8300' }] },
|
||||||
Binds: [`${ServiceSeeder.NOMAD_STORAGE_ABS_PATH}/kolibri:/root/.kolibri`]
|
Binds: [`${ServiceSeeder.NOMAD_STORAGE_ABS_PATH}/kolibri:/root/.kolibri`],
|
||||||
},
|
},
|
||||||
ExposedPorts: { '8080/tcp': {} },
|
ExposedPorts: { '8080/tcp': {} },
|
||||||
}),
|
}),
|
||||||
|
|
@ -146,15 +185,17 @@ export default class ServiceSeeder extends BaseSeeder {
|
||||||
installation_status: 'idle',
|
installation_status: 'idle',
|
||||||
is_dependency_service: false,
|
is_dependency_service: false,
|
||||||
depends_on: null,
|
depends_on: null,
|
||||||
}
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
async run() {
|
async run() {
|
||||||
const existingServices = await Service.query().select('service_name')
|
const existingServices = await Service.query().select('service_name')
|
||||||
const existingServiceNames = new Set(existingServices.map(service => service.service_name))
|
const existingServiceNames = new Set(existingServices.map((service) => service.service_name))
|
||||||
|
|
||||||
const newServices = ServiceSeeder.DEFAULT_SERVICES.filter(service => !existingServiceNames.has(service.service_name))
|
const newServices = ServiceSeeder.DEFAULT_SERVICES.filter(
|
||||||
|
(service) => !existingServiceNames.has(service.service_name)
|
||||||
|
)
|
||||||
|
|
||||||
await Service.createMany([...newServices])
|
await Service.createMany([...newServices])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
3
admin/inertia/components/file-uploader/index.css
Normal file
3
admin/inertia/components/file-uploader/index.css
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
.uppy-size--md .uppy-Dashboard-AddFiles-title {
|
||||||
|
font-size: 1.15rem !important;
|
||||||
|
}
|
||||||
84
admin/inertia/components/file-uploader/index.tsx
Normal file
84
admin/inertia/components/file-uploader/index.tsx
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
import React, { useState } from 'react'
|
||||||
|
import Uppy from '@uppy/core'
|
||||||
|
import '@uppy/core/css/style.min.css'
|
||||||
|
import '@uppy/dashboard/css/style.min.css'
|
||||||
|
import { useUppyEvent } from '@uppy/react'
|
||||||
|
import Dashboard from '@uppy/react/dashboard'
|
||||||
|
import classNames from 'classnames'
|
||||||
|
import './index.css' // Custom styles for the uploader
|
||||||
|
|
||||||
|
interface FileUploaderProps {
|
||||||
|
minFiles?: number // minimum number of files required
|
||||||
|
maxFiles?: number
|
||||||
|
maxFileSize?: number // in bytes, e.g., 10485760 for 10MB
|
||||||
|
fileTypes?: string[] // e.g., ['image/*', 'application/pdf']
|
||||||
|
disabled?: boolean
|
||||||
|
onUpload: (files: FileList) => void
|
||||||
|
className?: string
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A drag-and-drop (or click) file upload area with customizations for
|
||||||
|
* multiple and maximum numbers of files.
|
||||||
|
*/
|
||||||
|
const FileUploader: React.FC<FileUploaderProps> = ({
|
||||||
|
minFiles = 0,
|
||||||
|
maxFiles = 1,
|
||||||
|
maxFileSize = 10485760, // default to 10MB
|
||||||
|
fileTypes,
|
||||||
|
disabled = false,
|
||||||
|
onUpload,
|
||||||
|
className,
|
||||||
|
}) => {
|
||||||
|
const [uppy] = useState(() => {
|
||||||
|
const uppy = new Uppy({
|
||||||
|
debug: true,
|
||||||
|
restrictions: {
|
||||||
|
maxFileSize: maxFileSize,
|
||||||
|
minNumberOfFiles: minFiles,
|
||||||
|
maxNumberOfFiles: maxFiles,
|
||||||
|
allowedFileTypes: fileTypes || undefined,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
return uppy
|
||||||
|
})
|
||||||
|
|
||||||
|
useUppyEvent(uppy, 'state-update', (_, newState) => {
|
||||||
|
const stateFiles = Object.values(newState.files)
|
||||||
|
|
||||||
|
const dataTransfer = new DataTransfer()
|
||||||
|
stateFiles.forEach((file) => {
|
||||||
|
if (file.data) {
|
||||||
|
if (file.data instanceof File) {
|
||||||
|
dataTransfer.items.add(file.data)
|
||||||
|
} else if (file.data instanceof Blob) {
|
||||||
|
const newFile = new File(
|
||||||
|
[file.data],
|
||||||
|
file.name || `${crypto.randomUUID()}.${file.extension}`,
|
||||||
|
{
|
||||||
|
type: file.type,
|
||||||
|
lastModified: new Date().getTime(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
dataTransfer.items.add(newFile)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const fileList = dataTransfer.files
|
||||||
|
onUpload(fileList) // Always send new file list even if empty
|
||||||
|
})
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Dashboard
|
||||||
|
uppy={uppy}
|
||||||
|
width={'100%'}
|
||||||
|
height={'250px'}
|
||||||
|
hideUploadButton
|
||||||
|
disabled={disabled}
|
||||||
|
className={classNames(className)}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
export default FileUploader
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import {
|
import {
|
||||||
IconBolt,
|
IconBolt,
|
||||||
|
IconBrain,
|
||||||
IconHelp,
|
IconHelp,
|
||||||
IconMapRoute,
|
IconMapRoute,
|
||||||
IconPlus,
|
IconPlus,
|
||||||
|
|
@ -80,6 +81,17 @@ interface DashboardItem {
|
||||||
poweredBy: string | null
|
poweredBy: string | null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const KNOWLEDGE_BASE_ITEM: DashboardItem = {
|
||||||
|
label: 'Knowledge Base',
|
||||||
|
to: '/knowledge-base',
|
||||||
|
target: '',
|
||||||
|
description: 'Upload documents to your personal knowledge base for AI access',
|
||||||
|
icon: <IconBrain size={48} />,
|
||||||
|
installed: true,
|
||||||
|
displayOrder: 5,
|
||||||
|
poweredBy: null,
|
||||||
|
}
|
||||||
|
|
||||||
export default function Home(props: {
|
export default function Home(props: {
|
||||||
system: {
|
system: {
|
||||||
services: ServiceSlim[]
|
services: ServiceSlim[]
|
||||||
|
|
@ -114,6 +126,9 @@ export default function Home(props: {
|
||||||
|
|
||||||
// Add system items
|
// Add system items
|
||||||
items.push(...SYSTEM_ITEMS)
|
items.push(...SYSTEM_ITEMS)
|
||||||
|
if (props.system.services.find((s) => s.service_name === 'nomad_open_webui' && s.installed)) {
|
||||||
|
items.push(KNOWLEDGE_BASE_ITEM)
|
||||||
|
}
|
||||||
|
|
||||||
// Sort all items by display order
|
// Sort all items by display order
|
||||||
items.sort((a, b) => a.displayOrder - b.displayOrder)
|
items.sort((a, b) => a.displayOrder - b.displayOrder)
|
||||||
|
|
@ -130,9 +145,7 @@ export default function Home(props: {
|
||||||
>
|
>
|
||||||
<div className="flex items-center justify-center mb-2">{item.icon}</div>
|
<div className="flex items-center justify-center mb-2">{item.icon}</div>
|
||||||
<h3 className="font-bold text-2xl">{item.label}</h3>
|
<h3 className="font-bold text-2xl">{item.label}</h3>
|
||||||
{item.poweredBy && (
|
{item.poweredBy && <p className="text-sm opacity-80">Powered by {item.poweredBy}</p>}
|
||||||
<p className="text-sm opacity-80">Powered by {item.poweredBy}</p>
|
|
||||||
)}
|
|
||||||
<p className="xl:text-lg mt-2">{item.description}</p>
|
<p className="xl:text-lg mt-2">{item.description}</p>
|
||||||
</div>
|
</div>
|
||||||
</a>
|
</a>
|
||||||
|
|
|
||||||
97
admin/inertia/pages/knowledge-base.tsx
Normal file
97
admin/inertia/pages/knowledge-base.tsx
Normal file
|
|
@ -0,0 +1,97 @@
|
||||||
|
import { Head } from '@inertiajs/react'
|
||||||
|
import { useState } from 'react'
|
||||||
|
import FileUploader from '~/components/file-uploader'
|
||||||
|
import StyledButton from '~/components/StyledButton'
|
||||||
|
import AppLayout from '~/layouts/AppLayout'
|
||||||
|
|
||||||
|
export default function KnowledgeBase() {
|
||||||
|
const [loading, setLoading] = useState(false)
|
||||||
|
const [files, setFiles] = useState<File[]>([])
|
||||||
|
|
||||||
|
return (
|
||||||
|
<AppLayout>
|
||||||
|
<Head title="Knowledge Base" />
|
||||||
|
<main className="px-6 lg:px-12 py-6 lg:py-8">
|
||||||
|
<div className="bg-white rounded-lg border shadow-md overflow-hidden">
|
||||||
|
<div className="p-6">
|
||||||
|
<FileUploader
|
||||||
|
minFiles={1}
|
||||||
|
maxFiles={1}
|
||||||
|
onUpload={(files) => {
|
||||||
|
setLoading(true)
|
||||||
|
setFiles(Array.from(files))
|
||||||
|
setLoading(false)
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
<div className="flex justify-center gap-4 my-6">
|
||||||
|
<StyledButton
|
||||||
|
variant="primary"
|
||||||
|
size="lg"
|
||||||
|
icon="ArrowUpCircleIcon"
|
||||||
|
onClick={() => {}}
|
||||||
|
disabled={files.length === 0 || loading}
|
||||||
|
loading={loading}
|
||||||
|
>
|
||||||
|
Upload
|
||||||
|
</StyledButton>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="border-t bg-white p-6">
|
||||||
|
<h3 className="text-lg font-semibold text-desert-green mb-4">
|
||||||
|
Why upload documents to your Knowledge Base?
|
||||||
|
</h3>
|
||||||
|
<div className="space-y-3">
|
||||||
|
<div className="flex items-start gap-3">
|
||||||
|
<div className="flex-shrink-0 w-6 h-6 rounded-full bg-desert-green text-white flex items-center justify-center text-sm font-bold">
|
||||||
|
1
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p className="font-medium text-desert-stone-dark">
|
||||||
|
AI Assistant Knowledge Base Integration
|
||||||
|
</p>
|
||||||
|
<p className="text-sm text-desert-stone">
|
||||||
|
When you upload documents to your Knowledge Base, NOMAD processes and embeds the
|
||||||
|
content, making it directly accessible to the AI Assistant. This allows the AI
|
||||||
|
Assistant to reference your specific documents during conversations, providing
|
||||||
|
more accurate and personalized responses based on your uploaded data.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-start gap-3">
|
||||||
|
<div className="flex-shrink-0 w-6 h-6 rounded-full bg-desert-green text-white flex items-center justify-center text-sm font-bold">
|
||||||
|
2
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p className="font-medium text-desert-stone-dark">
|
||||||
|
Enhanced Document Processing with OCR
|
||||||
|
</p>
|
||||||
|
<p className="text-sm text-desert-stone">
|
||||||
|
NOMAD includes built-in Optical Character Recognition (OCR) capabilities,
|
||||||
|
allowing it to extract text from image-based documents such as scanned PDFs or
|
||||||
|
photos. This means that even if your documents are not in a standard text
|
||||||
|
format, NOMAD can still process and embed their content for AI access.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-start gap-3">
|
||||||
|
<div className="flex-shrink-0 w-6 h-6 rounded-full bg-desert-green text-white flex items-center justify-center text-sm font-bold">
|
||||||
|
3
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p className="font-medium text-desert-stone-dark">
|
||||||
|
Information Library Integration
|
||||||
|
</p>
|
||||||
|
<p className="text-sm text-desert-stone">
|
||||||
|
NOMAD will automatically discover and extract any content you save to your
|
||||||
|
Information Library (if installed), making it instantly available to the AI
|
||||||
|
Assistant without any extra steps.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
</AppLayout>
|
||||||
|
)
|
||||||
|
}
|
||||||
1325
admin/package-lock.json
generated
1325
admin/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
|
|
@ -76,11 +76,15 @@
|
||||||
"@inertiajs/react": "^2.0.13",
|
"@inertiajs/react": "^2.0.13",
|
||||||
"@markdoc/markdoc": "^0.5.2",
|
"@markdoc/markdoc": "^0.5.2",
|
||||||
"@protomaps/basemaps": "^5.7.0",
|
"@protomaps/basemaps": "^5.7.0",
|
||||||
|
"@qdrant/js-client-rest": "^1.16.2",
|
||||||
"@tabler/icons-react": "^3.34.0",
|
"@tabler/icons-react": "^3.34.0",
|
||||||
"@tailwindcss/vite": "^4.1.10",
|
"@tailwindcss/vite": "^4.1.10",
|
||||||
"@tanstack/react-query": "^5.81.5",
|
"@tanstack/react-query": "^5.81.5",
|
||||||
"@tanstack/react-query-devtools": "^5.83.0",
|
"@tanstack/react-query-devtools": "^5.83.0",
|
||||||
"@tanstack/react-virtual": "^3.13.12",
|
"@tanstack/react-virtual": "^3.13.12",
|
||||||
|
"@uppy/core": "^5.2.0",
|
||||||
|
"@uppy/dashboard": "^5.1.0",
|
||||||
|
"@uppy/react": "^5.1.1",
|
||||||
"@vinejs/vine": "^3.0.1",
|
"@vinejs/vine": "^3.0.1",
|
||||||
"@vitejs/plugin-react": "^4.6.0",
|
"@vitejs/plugin-react": "^4.6.0",
|
||||||
"autoprefixer": "^10.4.21",
|
"autoprefixer": "^10.4.21",
|
||||||
|
|
@ -90,10 +94,15 @@
|
||||||
"dockerode": "^4.0.7",
|
"dockerode": "^4.0.7",
|
||||||
"edge.js": "^6.2.1",
|
"edge.js": "^6.2.1",
|
||||||
"fast-xml-parser": "^5.2.5",
|
"fast-xml-parser": "^5.2.5",
|
||||||
|
"llm-chunk": "^0.0.1",
|
||||||
"luxon": "^3.6.1",
|
"luxon": "^3.6.1",
|
||||||
"maplibre-gl": "^4.7.1",
|
"maplibre-gl": "^4.7.1",
|
||||||
"mysql2": "^3.14.1",
|
"mysql2": "^3.14.1",
|
||||||
|
"ollama": "^0.6.3",
|
||||||
|
"pdf-parse": "^2.4.5",
|
||||||
|
"pdf2pic": "^3.2.0",
|
||||||
"pino-pretty": "^13.0.0",
|
"pino-pretty": "^13.0.0",
|
||||||
|
"playwright": "^1.58.0",
|
||||||
"pmtiles": "^4.3.0",
|
"pmtiles": "^4.3.0",
|
||||||
"postcss": "^8.5.6",
|
"postcss": "^8.5.6",
|
||||||
"react": "^19.1.0",
|
"react": "^19.1.0",
|
||||||
|
|
@ -101,9 +110,11 @@
|
||||||
"react-dom": "^19.1.0",
|
"react-dom": "^19.1.0",
|
||||||
"react-map-gl": "^8.1.0",
|
"react-map-gl": "^8.1.0",
|
||||||
"reflect-metadata": "^0.2.2",
|
"reflect-metadata": "^0.2.2",
|
||||||
|
"sharp": "^0.34.5",
|
||||||
"systeminformation": "^5.27.14",
|
"systeminformation": "^5.27.14",
|
||||||
"tailwindcss": "^4.1.10",
|
"tailwindcss": "^4.1.10",
|
||||||
"tar": "^7.5.6",
|
"tar": "^7.5.6",
|
||||||
|
"tesseract.js": "^7.0.0",
|
||||||
"url-join": "^5.0.0",
|
"url-join": "^5.0.0",
|
||||||
"usehooks-ts": "^3.1.1",
|
"usehooks-ts": "^3.1.1",
|
||||||
"yaml": "^2.8.0"
|
"yaml": "^2.8.0"
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ transmit.registerRoutes()
|
||||||
router.get('/', [HomeController, 'index'])
|
router.get('/', [HomeController, 'index'])
|
||||||
router.get('/home', [HomeController, 'home'])
|
router.get('/home', [HomeController, 'home'])
|
||||||
router.on('/about').renderInertia('about')
|
router.on('/about').renderInertia('about')
|
||||||
|
router.on('/knowledge-base').renderInertia('knowledge-base')
|
||||||
router.get('/maps', [MapsController, 'index'])
|
router.get('/maps', [MapsController, 'index'])
|
||||||
|
|
||||||
router.get('/easy-setup', [EasySetupController, 'index'])
|
router.get('/easy-setup', [EasySetupController, 'index'])
|
||||||
|
|
|
||||||
3
admin/types/kv_store.ts
Normal file
3
admin/types/kv_store.ts
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
|
||||||
|
export type KVStoreKey = 'open_webui_knowledge_id'
|
||||||
|
export type KVStoreValue = string | null
|
||||||
|
|
@ -27,3 +27,13 @@ export type OllamaModelListing = {
|
||||||
size: string
|
size: string
|
||||||
modified: string
|
modified: string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
export type OpenWebUIKnowledgeFileMetadata = {
|
||||||
|
source: string
|
||||||
|
name: string
|
||||||
|
created_by: string
|
||||||
|
file_id: string
|
||||||
|
start_index: number
|
||||||
|
hash: string
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user