Skip to main content

Settings API

Read and update application configuration at runtime. Manage VRAM presets, cloud model registries, Ollama connectivity, logging levels, and database reset operations.

Base path: /api/v1/settings

Related pages

Get Settings

Returns the complete application settings object with all configuration groups.

GET /api/v1/settings

Example Request

curl http://localhost:8080/api/v1/settings

Response

Status: 200 OK

Trimmed for readability

Response includes all configuration sections. Only key fields shown per section -- see settings.yaml reference for the complete schema.

{
"app_name": "Chaos Cypher",
"current_database": "default",
"data_dir": "/data",
"dark_mode": true,
"auto_enable": true,
"local_auth": {
"cookie_name": "cc_session",
"cookie_ttl_seconds": 2592000,
"cookie_secure": false
},
"llm": {
"chat_provider": "ollama",
"ollama_instances": [
{
"id": "default",
"name": "Default",
"base_url": "http://host.docker.internal:11434",
"enabled": true,
"healthy": true
}
],
"ollama_load_balancing": "round_robin",
"ollama_chat_model": "qwen3:30b-instruct",
"ollama_num_ctx": 32768,
"openai_api_key": null,
"openai_chat_model": "gpt-4.1",
"anthropic_api_key": null,
"anthropic_chat_model": "claude-sonnet-4-5",
"gemini_api_key": null,
"gemini_chat_model": "gemini-2.5-pro",
"ai_max_tokens": 65536,
"ai_temperature": 0.3,
"thinking_for_chat": true,
"enable_llm_queueing": true
// + extraction models, context windows, streaming, cost tracking, etc.
},
"queue": {
"queue_host": "valkey",
"queue_port": 6379,
"queue_database": 0
// + queue_password, queue_ssl
},
"chunking": {
"small_chunk_size": 900,
"small_chunk_overlap": 150,
"group_size": 4
// + min/max chunk sizes, extraction density, normalization
},
"search": {
"enable_vector_search": true,
"embedding_model": "Qwen/Qwen3-Embedding-0.6B",
"vector_dimensions": 1024,
"min_similarity_threshold": 0.55,
"enable_rerank": true
// + rerank model, fulltext language, candidate multiplier, etc.
},
"source_processing": {
"auto_extract_entities": true,
"entity_deduplication_mode": "semantic",
"relationship_confidence_threshold": 0.5
// + chunking strategy, dedup thresholds, max ratios, etc.
},
"export": {
"export_version": "1.0.0",
"export_license": "CC-BY-SA-4.0"
// + package name, author, description, tags
},
"lexicon": {
"url": "http://localhost:3001",
"api_path": "/api/v1"
// + timeout, token, credentials
},
"paths": {
"data_dir": "/data",
"databases_subdir": "databases",
"app_db_filename": "app.db"
// + settings paths, graphs, search, imports, static dirs
},
"priorities": { "interactive": 10, "background": 50, "default": 0 },
"timeouts": {
"llm_chat_wait": 120,
"http_request": 30,
"hot_reload_delay": 10
// + embedding, operation, worker, health check, SQLite timeouts
},
"ports": { "web_ui_api": 8080, "valkey": 6379 },
"batching": {
"embedding_batch_size": 512,
"embedding_concurrency": 4,
"max_upload_files": 20
// + PDF batching, discovery, export, graph analysis limits, etc.
},
"pagination": {
"default_page_size": 50,
"max_page_size": 1000,
"canvas_max_nodes": 100000,
"canvas_max_edges": 300000
// + list limits, history limits, citation page size
},
"retries": {
"llm_max_retries": 3,
"llm_worker_max_tries": 5,
"operations_worker_max_tries": 5
// + HTTP, SQLite, extraction retries
},
"services": {
"cortex_internal_url": "http://cortex:8080",
"valkey_internal_url": "valkey://valkey:6379"
},
"backoff": {
"retry_delays": [2.0, 4.0, 8.0, 16.0],
"max_seconds": 30
// + LLM/SQLite backoff multipliers
},
"analysis": { "quick_sample_size": 5, "extraction_max_input_chars": 8000 },
"chat_context": {
"default_context_window": 32768,
"history_allocation_percent": 0.50
// + token estimates, preview lengths, response validation
},
"workers": { "operations_max_concurrent": 8, "health_report_interval": 2 },
"cors": {
"allowed_origins": ["http://localhost:3000", "http://localhost:8080"]
// + allow_credentials, allow_methods, allow_headers
},
"custom_settings": {}
}

Update Settings

Partially update application settings. Changes are persisted to settings.yaml. When LLM or search settings change, workers are notified via Valkey pub/sub to hot-reload their providers without restart.

PATCH /api/v1/settings

Request Body

Any valid settings fields to update. Supports nested updates by passing the top-level group key.

Example Request

curl -X PATCH http://localhost:8080/api/v1/settings \
-H 'Content-Type: application/json' \
-d '{
"llm": {
"chat_provider": "openai",
"openai_api_key": "sk-..."
},
"search": {
"min_similarity_threshold": 0.6
}
}'

Response

Status: 200 OK

{
"settings": {
"app_name": "Chaos Cypher",
"current_database": "default",
"llm": {
"chat_provider": "openai",
"openai_api_key": "sk-..."
},
"search": {
"min_similarity_threshold": 0.6
}
},
"warnings": [
{
"field": "search.vector_dimensions",
"message": "Vector dimensions changed. Existing embeddings may be orphaned and should be regenerated.",
"severity": "warning"
}
]
}
Automatic trigger sync

When enable_auto_embedding changes, system triggers for node.created and node.updated events are automatically updated. Only system workflows are affected -- user-created workflows remain unchanged.

Warnings

The response may include warnings when a change has side effects. For example, changing vector_dimensions warns about orphaned embeddings that need regeneration.


Reset Settings

Reset all settings to their default values. The settings.yaml file is overwritten with defaults.

POST /api/v1/settings/reset

Example Request

curl -X POST http://localhost:8080/api/v1/settings/reset

Response

Status: 200 OK

Returns the complete default Settings object (same schema as Get Settings).


Get Logging Level

Get the current application logging level.

GET /api/v1/settings/logging/level

Example Request

curl http://localhost:8080/api/v1/settings/logging/level

Response

Status: 200 OK

{
"level": "INFO",
"numeric_level": 20,
"available_levels": ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
}

Set Logging Level

Change the application logging level in real-time. No restart required.

POST /api/v1/settings/logging/level

Request Body

FieldTypeRequiredDescription
levelstringYesOne of: DEBUG, INFO, WARNING, ERROR, CRITICAL

Example Request

curl -X POST http://localhost:8080/api/v1/settings/logging/level \
-H 'Content-Type: application/json' \
-d '{"level": "WARNING"}'

Response

Status: 200 OK

{
"success": true,
"old_level": "INFO",
"new_level": "WARNING",
"message": "Logging level changed from INFO to WARNING"
}

VRAM Presets

VRAM presets provide pre-configured LLM model and parameter selections optimized for different GPU memory sizes. Presets are loaded from built-in defaults and optionally from user plugins in data/plugins/presets/.

List Presets

List all available VRAM presets, sorted by VRAM size (ascending).

GET /api/v1/settings/presets

Example Request

curl http://localhost:8080/api/v1/settings/presets

Response

Status: 200 OK

{
"presets": [
{
"name": "vram_8gb",
"display_name": "8 GB VRAM",
"description": "Small models for entry-level GPUs",
"vram_gb": 8,
"gpu_examples": ["RTX 3060 8GB", "RTX 4060"],
"version": "1.0.0",
"author": "Chaos Cypher",
"builtin": true,
"ollama_settings": {
"ollama_chat_model": "qwen3:8b-instruct",
"ollama_num_ctx": 8192,
"ollama_num_batch": 512
},
"llm_settings": {
"ai_max_tokens": 4096,
"thinking_for_chat": false,
"thinking_for_tools": false,
"thinking_for_extraction": false
}
},
{
"name": "vram_24gb",
"display_name": "24 GB VRAM",
"description": "Large models for high-end GPUs",
"vram_gb": 24,
"gpu_examples": ["RTX 3090", "RTX 4090"],
"version": "1.0.0",
"author": "Chaos Cypher",
"builtin": true,
"ollama_settings": {
"ollama_chat_model": "qwen3:30b-instruct",
"ollama_num_ctx": 32768,
"ollama_num_batch": 1024
},
"llm_settings": {
"ai_max_tokens": 65536,
"thinking_for_chat": true,
"thinking_for_tools": false,
"thinking_for_extraction": false
}
}
],
"count": 2
}

Get Preset

Get a specific VRAM preset by ID.

GET /api/v1/settings/presets/{preset_id}

Path Parameters

ParameterTypeRequiredDescription
preset_idstringYesPreset identifier (e.g., vram_24gb)

Example Request

curl http://localhost:8080/api/v1/settings/presets/vram_24gb

Response

Status: 200 OK

{
"name": "vram_24gb",
"display_name": "24 GB VRAM",
"description": "Large models for high-end GPUs",
"vram_gb": 24,
"gpu_examples": ["RTX 3090", "RTX 4090"],
"version": "1.0.0",
"author": "Chaos Cypher",
"builtin": true,
"ollama_settings": {
"ollama_chat_model": "qwen3:30b-instruct",
"ollama_num_ctx": 32768,
"ollama_num_batch": 1024
},
"llm_settings": {
"ai_max_tokens": 65536,
"thinking_for_chat": true,
"thinking_for_tools": false,
"thinking_for_extraction": false
}
}
404 Not Found

Returned when no preset exists with the given ID.


Apply Preset

Apply a VRAM preset to update LLM settings. Workers are notified via Valkey pub/sub to hot-reload their providers.

POST /api/v1/settings/presets/apply

Request Body

FieldTypeRequiredDescription
preset_idstringYesPreset to apply (e.g., vram_24gb)

Example Request

curl -X POST http://localhost:8080/api/v1/settings/presets/apply \
-H 'Content-Type: application/json' \
-d '{"preset_id": "vram_24gb"}'

Response

Status: 200 OK

{
"success": true,
"preset_id": "vram_24gb",
"preset_name": "24 GB VRAM",
"settings_updated": {
"ollama_chat_model": "qwen3:30b-instruct",
"ollama_num_ctx": 32768,
"ollama_num_batch": 1024,
"ai_max_tokens": 65536,
"thinking_for_chat": true,
"thinking_for_tools": false,
"thinking_for_extraction": false
},
"message": "Applied preset 'vram_24gb' successfully"
}
What gets updated

Applying a preset updates: ollama_chat_model, ollama_num_ctx, ollama_num_batch, ai_max_tokens, thinking_for_chat, thinking_for_tools, and thinking_for_extraction. All other settings (API keys, URLs, instances, etc.) are preserved.

404 Not Found

Returned when no preset exists with the given ID.


Cloud Models

The cloud model registry provides metadata about available models for cloud LLM providers (OpenAI, Anthropic, Gemini). Use these endpoints to populate model selection dropdowns and display capabilities and pricing.

List All Cloud Models

Get all available cloud LLM models grouped by provider.

GET /api/v1/settings/cloudmodels

Example Request

curl http://localhost:8080/api/v1/settings/cloudmodels

Response

Status: 200 OK

{
"providers": {
"openai": {
"display_name": "OpenAI",
"models": [
{
"id": "gpt-4.1",
"display_name": "GPT-4.1",
"context_window": 1047576,
"max_output_tokens": 32768,
"supports_vision": true,
"supports_tools": true,
"recommended": true,
"pricing": {
"input_per_million": 2.0,
"output_per_million": 8.0
},
"notes": null
}
]
},
"anthropic": {
"display_name": "Anthropic",
"models": [
{
"id": "claude-sonnet-4-5",
"display_name": "Claude Sonnet 4.5",
"context_window": 200000,
"max_output_tokens": 64000,
"supports_vision": true,
"supports_tools": true,
"recommended": true,
"pricing": {
"input_per_million": 3.0,
"output_per_million": 15.0
},
"notes": null
}
]
},
"gemini": {
"display_name": "Google Gemini",
"models": [
{
"id": "gemini-2.5-pro",
"display_name": "Gemini 2.5 Pro",
"context_window": 1048576,
"max_output_tokens": 65536,
"supports_vision": true,
"supports_tools": true,
"recommended": true,
"pricing": {
"input_per_million": 1.25,
"output_per_million": 10.0
},
"notes": null
}
]
}
}
}

List Models by Provider

Get available models for a specific cloud provider.

GET /api/v1/settings/cloudmodels/{provider}

Path Parameters

ParameterTypeRequiredDescription
providerstringYesProvider ID: openai, anthropic, or gemini

Example Request

curl http://localhost:8080/api/v1/settings/cloudmodels/anthropic

Response

Status: 200 OK

[
{
"id": "claude-sonnet-4-5",
"display_name": "Claude Sonnet 4.5",
"context_window": 200000,
"max_output_tokens": 64000,
"supports_vision": true,
"supports_tools": true,
"recommended": true,
"pricing": {
"input_per_million": 3.0,
"output_per_million": 15.0
},
"notes": null
}
]
404 Not Found

Returned when no provider exists with the given ID.


Ollama Verification

Verify Ollama URL

Verify that an Ollama instance is running and reachable at the given URL. Checks basic connectivity, retrieves the list of installed models, and reports the Ollama version.

POST /api/v1/settings/ollama/verify

Request Body

FieldTypeRequiredDescription
urlstringYesOllama base URL to verify (e.g., http://localhost:11434)
timeoutintegerNoRequest timeout in seconds. Uses timeouts.ollama_verify_timeout from settings if not provided

Example Request

curl -X POST http://localhost:8080/api/v1/settings/ollama/verify \
-H 'Content-Type: application/json' \
-d '{"url": "http://localhost:11434", "timeout": 5}'

Response (Success)

Status: 200 OK

{
"success": true,
"message": "Ollama is running and reachable",
"version": "0.6.2",
"models": ["qwen3:30b-instruct", "snowflake-arctic-embed2", "llama3:8b"],
"model_count": 3,
"response_time_ms": 42,
"error_type": null
}

Response (Failure)

Status: 200 OK

{
"success": false,
"message": "Connection refused: could not connect to http://localhost:11434",
"version": null,
"models": null,
"model_count": null,
"response_time_ms": null,
"error_type": "connection_error"
}
Always returns 200

This endpoint always returns 200 OK regardless of Ollama reachability. Check the success field to determine connectivity status. The error_type field provides a machine-readable error classification when success is false.


Ollama Model Management

Manage Ollama models directly from the API -- list installed models, pull new ones, remove unused models, and inspect model details.

List Installed Models

GET /api/v1/settings/ollama/models

List all models installed on the configured Ollama instance.

Example Request

curl http://localhost:8080/api/v1/settings/ollama/models

Response

Status: 200 OK

{
"models": [
{
"name": "qwen3:30b-instruct",
"size": 18200000000,
"modified_at": "2026-03-01T12:00:00Z",
"digest": "sha256:abc123..."
},
{
"name": "snowflake-arctic-embed2",
"size": 1200000000,
"modified_at": "2026-02-15T08:00:00Z",
"digest": "sha256:def456..."
}
]
}

Pull Model

POST /api/v1/settings/ollama/models/pull

Pull (download) a model from the Ollama registry. Returns a Server-Sent Events (SSE) stream with real-time download progress.

Request Body

FieldTypeRequiredDescription
modelstringYesModel name to pull (e.g. qwen3:30b-instruct)

Example Request

curl -X POST http://localhost:8080/api/v1/settings/ollama/models/pull \
-H 'Content-Type: application/json' \
-d '{"model": "qwen3:8b-instruct"}'

Response (SSE Stream)

Status: 200 OK with Content-Type: text/event-stream

data: {"status": "pulling manifest"}
data: {"status": "downloading", "completed": 1048576, "total": 4800000000}
data: {"status": "downloading", "completed": 2097152, "total": 4800000000}
data: {"status": "verifying sha256 digest"}
data: {"status": "writing manifest"}
data: {"status": "success"}

Remove Model

DELETE /api/v1/settings/ollama/models/remove

Remove an installed model from Ollama.

Request Body

FieldTypeRequiredDescription
modelstringYesModel name to remove

Example Request

curl -X DELETE http://localhost:8080/api/v1/settings/ollama/models/remove \
-H 'Content-Type: application/json' \
-d '{"model": "qwen3:8b-instruct"}'

Response

Status: 200 OK

{
"success": true,
"message": "Model 'qwen3:8b-instruct' removed"
}

Errors

StatusReason
404Model not found on Ollama instance

Get Model Details

GET /api/v1/settings/ollama/models/{model:path}/details

Get detailed information about a specific installed Ollama model, including parameter count, quantization, and capabilities. The {model:path} parameter accepts model names containing slashes and colons (e.g. qwen3:30b-instruct).

Path Parameters

ParameterTypeRequiredDescription
modelstringYesModel name (e.g. qwen3:30b-instruct). Colons and slashes are allowed.

Example Request

curl http://localhost:8080/api/v1/settings/ollama/models/qwen3:30b-instruct/details

Response

Status: 200 OK

{
"name": "qwen3:30b-instruct",
"model_info": {
"general.architecture": "qwen3",
"general.parameter_count": 30000000000,
"general.quantization_version": "Q4_K_M"
},
"details": {
"format": "gguf",
"family": "qwen3",
"parameter_size": "30B",
"quantization_level": "Q4_K_M"
}
}

Errors

StatusReason
404Model not found on Ollama instance

Reset Operations

Destructive operations that reset parts of the application database. All reset endpoints return a ResetResponse with success status and operation-specific statistics.

Irreversible

All reset operations permanently delete data and cannot be undone. Back up your database before proceeding.

Reset Workflows

Reset the workflow system (tools, workflows, triggers) to factory defaults.

POST /api/v1/settings/reset/workflows

Example Request

curl -X POST http://localhost:8080/api/v1/settings/reset/workflows

Response

Status: 200 OK

{
"success": true,
"data": {
"workflows_deleted": 5,
"tools_deleted": 42,
"triggers_deleted": 4,
"workflows_created": 3,
"tools_created": 40,
"triggers_created": 2
}
}

Deletes: All custom workflows, execution history, user tools, triggers, and trigger history.

Recreates: System tools (40+), default workflows (3), default triggers (2).


Reset Chats

Delete all conversations and messages.

POST /api/v1/settings/reset/chats

Example Request

curl -X POST http://localhost:8080/api/v1/settings/reset/chats

Response

Status: 200 OK

{
"success": true,
"data": {
"chats_deleted": 12,
"messages_deleted": 347
}
}

Reset Queue

Reset the queue system, cancelling all active jobs and clearing statistics.

POST /api/v1/settings/reset/queue

Example Request

curl -X POST http://localhost:8080/api/v1/settings/reset/queue

Response

Status: 200 OK

{
"success": true,
"data": {
"jobs_cancelled": 2,
"tasks_cleared": 58,
"stats_cleared": true
}
}

Deletes: All active/queued jobs (cancelled), completed/failed/cancelled task records, token usage statistics, cost tracking data, task history.

Preserves: Queue configuration.


Reset Source Processing

Reset source processing history (imports, chunks, extraction jobs) while preserving committed knowledge.

POST /api/v1/settings/reset/source_processing

Example Request

curl -X POST http://localhost:8080/api/v1/settings/reset/source_processing

Response

Status: 200 OK

{
"success": true,
"data": {
"source_files_deleted": 15,
"chunks_deleted": 2340,
"embeddings_deleted": 2340,
"extraction_jobs_deleted": 15,
"imports_dir_cleared": true
}
}

Deletes: All source file records, staged document chunks, entity embeddings from source processing, chunk extraction jobs and tasks, uploaded import files directory.

Preserves: Committed sources and their chunks, knowledge graph (nodes, edges), workflows, tools, triggers, conversations.


Reset Knowledge

Reset the entire knowledge base (combined reset of sources, graph, and search indices).

POST /api/v1/settings/reset/knowledge

Example Request

curl -X POST http://localhost:8080/api/v1/settings/reset/knowledge

Response

Status: 200 OK

{
"success": true,
"data": {
"import_history_deleted": 15,
"graph_nodes_deleted": 450,
"graph_edges_deleted": 1200,
"graph_templates_deleted": 0,
"sources_deleted": 8,
"chunks_deleted": 4500,
"search_indices_cleared": true
}
}

Deletes: Import history and file records, discovery sessions and AI suggestions, knowledge graph (nodes, edges, templates), document sources (sources, chunks, citations, tags), search indices (full-text and vector).

Preserves: Workflows, tools, triggers, conversations, queue statistics.


Reset All

Nuclear reset -- deletes everything and recreates the database with factory defaults.

POST /api/v1/settings/reset/all

Request Body

FieldTypeRequiredDescription
confirmationstringYesMust be exactly "CONFIRM" to proceed

Example Request

curl -X POST http://localhost:8080/api/v1/settings/reset/all \
-H 'Content-Type: application/json' \
-d '{"confirmation": "CONFIRM"}'

Response

Status: 200 OK

{
"success": true,
"data": {
"app_db_deleted": true,
"graphs_deleted": true,
"search_indices_deleted": true,
"imports_deleted": true,
"queue_cleared": true,
"database_recreated": true,
"system_tools_created": 40,
"default_workflows_created": 3,
"default_triggers_created": 2
}
}

Deletes: Entire app.db file (including all knowledge graph nodes, edges, templates, search indices, queue history), and uploaded import files.

Recreates: Fresh database with system defaults, system tools (40+), default workflows (3), default triggers (2).

400 Bad Request

Returned when confirmation is not set to "CONFIRM".


Cleanup Operations

Clean Up Orphaned Graph Items

Safe maintenance operation that removes graph items with invalid references. Primarily useful for cleaning up legacy data before FK constraints were in place.

POST /api/v1/settings/cleanup/orphans

Example Request

curl -X POST http://localhost:8080/api/v1/settings/cleanup/orphans

Response

Status: 200 OK

{
"success": true,
"data": {
"edges_scanned": 1200,
"edges_removed": 3,
"nodes_scanned": 450,
"nodes_removed": 1,
"templates_scanned": 25,
"templates_removed": 0
}
}

Removes: Edges pointing to non-existent nodes, nodes with source_id pointing to non-existent sources, templates with source_id pointing to non-existent sources (except system templates).

Preserves: Nodes/edges with source_id=NULL (intentionally unlinked: chat, workflows, manual), system templates, all valid nodes and edges with proper references.


Seed Operations

Re-seed Default Templates

Re-seed default system templates. This is a safe operation that only creates templates that do not already exist.

POST /api/v1/settings/seed/templates

Example Request

curl -X POST http://localhost:8080/api/v1/settings/seed/templates

Response

Status: 200 OK

{
"success": true,
"data": {
"templates_created": 5,
"templates_skipped": 20,
"total_templates": 25
}
}

Creates (if missing): Default node templates (Note, Item, Person, Organization, etc.), default edge templates (link, works_at, located_in, etc.), system templates (Workflow, etc.).

Idempotent

This endpoint is safe to call multiple times. Existing templates are not modified or duplicated.


TLS Configuration

Manage TLS certificates for HTTPS. All TLS endpoints require authentication.

Get TLS Status

GET /api/v1/settings/tls/status

Returns the current TLS configuration state.

curl http://localhost:8080/api/v1/settings/tls/status

Response 200 OK

{
"enabled": true,
"mode": "selfsigned",
"cert_expires_at": "2027-05-06T12:00:00"
}

Generate Self-Signed Certificate

POST /api/v1/settings/tls/selfsigned

Generate a self-signed TLS certificate and enable HTTPS. Suitable for local development and self-hosted deployments where certificate warnings are acceptable.

curl -X POST http://localhost:8080/api/v1/settings/tls/selfsigned

Response 200 OK

{
"success": true,
"message": "Self-signed certificate generated and TLS enabled"
}

Upload Custom Certificate

POST /api/v1/settings/tls/custom

Upload a custom TLS certificate and private key (e.g. from Let's Encrypt or a CA).

curl -X POST http://localhost:8080/api/v1/settings/tls/custom \
-F "cert=@fullchain.pem" \
-F "key=@privkey.pem"

Response 200 OK

{
"success": true,
"message": "Custom certificate uploaded and TLS enabled"
}

Disable TLS

DELETE /api/v1/settings/tls

Disable TLS and revert to plain HTTP.

curl -X DELETE http://localhost:8080/api/v1/settings/tls

Response 200 OK

{
"success": true,
"message": "TLS disabled"
}

Embedding Models

Manage local embedding models (HuggingFace Sentence Transformers downloaded to the data directory).

List Curated Embedding Models

GET /api/v1/settings/embedding/models

Returns the curated list of supported embedding models with metadata. Used to populate the model selection UI.

curl http://localhost:8080/api/v1/settings/embedding/models

Response 200 OK

{
"models": [
{
"id": "Qwen/Qwen3-Embedding-0.6B",
"display_name": "Qwen3 Embedding 0.6B",
"dimensions": 1024,
"description": "Compact multilingual embedding model",
"recommended": true
}
]
}

List Downloaded Local Models

GET /api/v1/settings/embedding/local/models

Returns embedding models already downloaded to the local data directory.

curl http://localhost:8080/api/v1/settings/embedding/local/models

Response 200 OK

{
"models": [
{
"model_id": "Qwen/Qwen3-Embedding-0.6B",
"path": "/data/models/Qwen/Qwen3-Embedding-0.6B",
"size_bytes": 1200000000,
"downloaded_at": "2026-03-01T10:00:00"
}
]
}

Download Local Embedding Model

POST /api/v1/settings/embedding/local/models

Download a HuggingFace embedding model to the local data directory.

curl -X POST http://localhost:8080/api/v1/settings/embedding/local/models \
-H "Content-Type: application/json" \
-d '{"model_id": "Qwen/Qwen3-Embedding-0.6B"}'
FieldTypeRequiredDescription
model_idstringYesHuggingFace model ID to download

Response 202 Accepted

{
"task_id": "task-abc-123",
"status": "queued",
"message": "Model download queued"
}

Delete Local Embedding Model

DELETE /api/v1/settings/embedding/local/models/{model_id:path}

Remove a downloaded embedding model from the local data directory. The {model_id:path} parameter accepts model IDs containing slashes (e.g. Qwen/Qwen3-Embedding-0.6B).

curl -X DELETE "http://localhost:8080/api/v1/settings/embedding/local/models/Qwen/Qwen3-Embedding-0.6B"

Response 200 OK

{
"success": true,
"message": "Model deleted"
}
StatusDescription
404Model not found locally

Response Schema Reference

ResetResponse

Returned by all reset, cleanup, and seed endpoints.

FieldTypeDescription
successbooleanWhether the operation completed successfully
dataobjectOperation-specific statistics (varies by endpoint)

SettingsUpdateResponse

Returned by the Update Settings endpoint.

FieldTypeDescription
settingsobjectThe complete updated settings object
warningslist[SettingsWarning]Warnings about side effects of the changes (may be empty)

SettingsWarning

FieldTypeDescription
fieldstringThe settings field that triggered the warning
messagestringHuman-readable description of the side effect
severitystring"warning" or "info"

VRAMPresetResponse

FieldTypeDescription
namestringPreset identifier
display_namestringHuman-readable preset name
descriptionstringWhat this preset is optimized for
vram_gbintegerTarget GPU VRAM in gigabytes
gpu_exampleslist[string]Example GPUs that match this VRAM tier
versionstringPreset version
authorstringPreset author
builtinbooleanWhether this is a built-in preset or user-provided
ollama_settingsobjectOllama model and parameter overrides
llm_settingsobjectLLM behavior overrides

ApplyPresetResponse

FieldTypeDescription
successbooleanWhether the preset was applied successfully
preset_idstringThe ID of the applied preset
preset_namestringDisplay name of the applied preset
settings_updatedobjectKey-value pairs of all settings that were changed
messagestringHuman-readable confirmation message

OllamaVerifyResponse

FieldTypeDescription
successbooleanWhether Ollama is reachable
messagestringHuman-readable status message
versionstring or nullOllama version (when reachable)
modelslist[string] or nullList of installed model names (when reachable)
model_countinteger or nullNumber of installed models (when reachable)
response_time_msinteger or nullRound-trip time in milliseconds (when reachable)
error_typestring or nullMachine-readable error classification (when unreachable)

CloudModelInfo

FieldTypeDescription
idstringModel identifier used in API calls
display_namestringHuman-readable model name
context_windowintegerMaximum input context window in tokens
max_output_tokensintegerMaximum output tokens per request
supports_visionbooleanWhether the model supports image inputs
supports_toolsbooleanWhether the model supports tool/function calling
recommendedbooleanWhether this model is recommended for use
pricingobject or nullPricing with input_per_million and output_per_million (USD)
notesstring or nullAdditional notes about the model

LoggingLevelResponse

FieldTypeDescription
levelstringCurrent level name
numeric_levelintegerNumeric Python logging level
available_levelslist[string]All valid level names

SetLoggingLevelResponse

FieldTypeDescription
successbooleanWhether the level was changed
old_levelstringPrevious logging level
new_levelstringNew logging level
messagestringHuman-readable confirmation