An ultra-lightweight HTTP server that brings PicoLM inference to your stack with full OpenAI API compatibility — no cloud required.
PicoLM Server gives you a production-ready inference API without the bloat. Drop-in replace OpenAI with one config line.
Change your base URL. That's it. PicoLM Server speaks fluent OpenAI so your existing code just works.
from openai import OpenAI client = OpenAI( base_url="http://localhost:8080/v1", api_key="your-key" ) response = client.chat.completions.create( model="picolm-local", messages=[{ "role": "user", "content": "Hello!" }] ) print(response.choices[0].message.content)
import OpenAI from 'openai'; const client = new OpenAI({ baseURL: 'http://localhost:8080/v1', apiKey: 'your-key' }); const response = await client.chat.completions.create({ model: 'picolm-local', messages: [{ role: 'user', content: 'Hello!' }] }); console.log(response.choices[0].message.content);
package main import ( "context" "fmt" openai "github.com/sashabaranov/go-openai" ) func main() { cfg := openai.DefaultConfig("your-key") cfg.BaseURL = "http://localhost:8080/v1" client := openai.NewClientWithConfig(cfg) resp, _ := client.CreateChatCompletion( context.Background(), openai.ChatCompletionRequest{ Model: "picolm-local", Messages: []openai.ChatCompletionMessage{ {Role: openai.ChatMessageRoleUser, Content: "Hello!"}, }, }, ) fmt.Println(resp.Choices[0].Message.Content) }
curl -X POST http://localhost:8080/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer your-key" \ -d '{ "model": "picolm-local", "messages": [ {"role": "user", "content": "Hello!"} ], "stream": true, "temperature": 0.7 }'
Load any GGUF-formatted model that supports ChatML format. From tiny to large — if PicoLM can run it, PicoLM Server can serve it.
Three steps. One config file. You're running local inference.
Grab the source and compile the server binary with Go.
Copy the example config and point it at your PicoLM binary and GGUF model file.
Launch the server. Your OpenAI-compatible endpoint is live at localhost:8080.
Prefer containers? One command and you're done.