Skip to content

Anthropic Compatible API

LayerScale is a drop-in replacement for the Anthropic Messages API. Point your existing Anthropic SDK code at LayerScale and it works without changes.

Setup

Python SDK

Terminal window
pip install anthropic
import anthropic
client = anthropic.Anthropic(
base_url="http://localhost:8080",
api_key="your-api-key"
)

TypeScript SDK

Terminal window
npm install @anthropic-ai/sdk
import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({
baseURL: "http://localhost:8080",
apiKey: "your-api-key",
});

Messages

Python

import anthropic
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
message = client.messages.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens=300,
system="You are a helpful assistant.",
messages=[
{"role": "user", "content": "Explain the CAP theorem in simple terms."},
],
)
print(message.content[0].text)

TypeScript

import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({
baseURL: "http://localhost:8080",
apiKey: "your-api-key",
});
const message = await client.messages.create({
model: "meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens: 300,
system: "You are a helpful assistant.",
messages: [
{ role: "user", content: "Explain the CAP theorem in simple terms." },
],
});
console.log(message.content[0].text);

REST

Terminal window
curl http://localhost:8080/v1/messages \
-H "Content-Type: application/json" \
-H "x-api-key: your-api-key" \
-d '{
"model": "meta-llama/Llama-4-Maverick-17B-128E-Original",
"max_tokens": 300,
"system": "You are a helpful assistant.",
"messages": [
{"role": "user", "content": "Explain the CAP theorem in simple terms."}
]
}'

Streaming

Python

import anthropic
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
with client.messages.stream(
model="meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens=200,
messages=[
{"role": "user", "content": "Write a haiku about inference latency."},
],
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
print()

TypeScript

import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({
baseURL: "http://localhost:8080",
apiKey: "your-api-key",
});
const stream = client.messages.stream({
model: "meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens: 200,
messages: [
{ role: "user", content: "Write a haiku about inference latency." },
],
});
for await (const event of stream) {
if (
event.type === "content_block_delta" &&
event.delta.type === "text_delta"
) {
process.stdout.write(event.delta.text);
}
}
console.log();

Tool Use

Python

import anthropic
import json
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
tools = [
{
"name": "get_stock_price",
"description": "Get the current stock price for a ticker symbol",
"input_schema": {
"type": "object",
"properties": {
"ticker": {
"type": "string",
"description": "Stock ticker symbol, e.g. AAPL",
},
},
"required": ["ticker"],
},
}
]
messages = [{"role": "user", "content": "What's the current price of NVDA?"}]
# First call: model decides to use the tool
response = client.messages.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens=200,
tools=tools,
messages=messages,
)
# Check if the model wants to use a tool
tool_use_block = next(
(block for block in response.content if block.type == "tool_use"), None
)
if tool_use_block:
print(f"Model called: {tool_use_block.name}({tool_use_block.input})")
# Simulate the tool result
stock_result = {"ticker": "NVDA", "price": 875.30, "currency": "USD"}
# Second call: send the tool result back
messages.append({"role": "assistant", "content": response.content})
messages.append({
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": tool_use_block.id,
"content": json.dumps(stock_result),
}
],
})
final_response = client.messages.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens=200,
tools=tools,
messages=messages,
)
text_block = next(
(block for block in final_response.content if block.type == "text"), None
)
if text_block:
print(text_block.text)

TypeScript

import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({
baseURL: "http://localhost:8080",
apiKey: "your-api-key",
});
const tools: Anthropic.Tool[] = [
{
name: "get_stock_price",
description: "Get the current stock price for a ticker symbol",
input_schema: {
type: "object" as const,
properties: {
ticker: { type: "string", description: "Stock ticker symbol" },
},
required: ["ticker"],
},
},
];
const messages: Anthropic.MessageParam[] = [
{ role: "user", content: "What's the current price of NVDA?" },
];
const response = await client.messages.create({
model: "meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens: 200,
tools,
messages,
});
const toolUseBlock = response.content.find(
(block): block is Anthropic.ToolUseBlock => block.type === "tool_use"
);
if (toolUseBlock) {
console.log(`Model called: ${toolUseBlock.name}(${JSON.stringify(toolUseBlock.input)})`);
const stockResult = { ticker: "NVDA", price: 875.30, currency: "USD" };
messages.push({ role: "assistant", content: response.content });
messages.push({
role: "user",
content: [
{
type: "tool_result",
tool_use_id: toolUseBlock.id,
content: JSON.stringify(stockResult),
},
],
});
const finalResponse = await client.messages.create({
model: "meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens: 200,
tools,
messages,
});
const textBlock = finalResponse.content.find(
(block): block is Anthropic.TextBlock => block.type === "text"
);
if (textBlock) {
console.log(textBlock.text);
}
}

Multi-Turn Conversations

import anthropic
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
messages = [
{"role": "user", "content": "What is a Merkle tree?"},
]
response = client.messages.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=300, messages=messages
)
messages.append({"role": "assistant", "content": response.content[0].text})
# Follow-up
messages.append({"role": "user", "content": "How is it used in Git?"})
response = client.messages.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=300, messages=messages
)
print(response.content[0].text)

Migrating from Anthropic API

To switch from the Anthropic API to LayerScale, change two lines:

# Before (Anthropic API)
client = anthropic.Anthropic()
# After (LayerScale)
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")

Everything else (messages format, streaming, tool use, response parsing) stays the same.