Skip to main content

API Server

Cortex has an API server that runs at localhost:39281.

Usage

Start Cortex Server


# By default the server will be started on port `39281`
cortex
# Start a server with different port number
cortex -a <address> -p <port_number>
# Set the data folder directory
cortex --dataFolder <dataFolderPath>

Run Model


# Pull a model
curl --request POST \
--url http://localhost:39281/v1/models/mistral/pull
# Start the model
curl --request POST \
--url http://localhost:39281/v1/models/mistral/start \
--header 'Content-Type: application/json' \
--data '{
"prompt_template": "system\n{system_message}\nuser\n{prompt}\nassistant",
"stop": [],
"ngl": 4096,
"ctx_len": 4096,
"cpu_threads": 10,
"n_batch": 2048,
"caching_enabled": true,
"grp_attn_n": 1,
"grp_attn_w": 512,
"mlock": false,
"flash_attn": true,
"cache_type": "f16",
"use_mmap": true,
"engine": "llamacpp"
}'

Show the Model State


# Check the model status
curl --request GET \
--url http://localhost:39281/v1/system/events/model

Chat with Model


# Invoke the chat completions endpoint
curl http://localhost:39281/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "",
"messages": [
{
"role": "user",
"content": "Hello"
},
],
"model": "mistral",
"stream": true,
"max_tokens": 1,
"stop": [
null
],
"frequency_penalty": 1,
"presence_penalty": 1,
"temperature": 1,
"top_p": 1
}'

Stop Model


# Stop a model
curl --request POST \
--url http://localhost:39281/v1/models/mistral/stop

Pull Model


# Pull a model
curl --request POST \
--url http://localhost:39281/v1/models/mistral/pull