Anthropic Compatible API
LayerScale is a drop-in replacement for the Anthropic Messages API. Point your existing Anthropic SDK code at LayerScale and it works without changes.
Setup
Python SDK
pip install anthropicimport anthropic
client = anthropic.Anthropic( base_url="http://localhost:8080", api_key="your-api-key")TypeScript SDK
npm install @anthropic-ai/sdkimport Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({ baseURL: "http://localhost:8080", apiKey: "your-api-key",});Messages
Python
import anthropic
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
message = client.messages.create( model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=300, system="You are a helpful assistant.", messages=[ {"role": "user", "content": "Explain the CAP theorem in simple terms."}, ],)
print(message.content[0].text)TypeScript
import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({ baseURL: "http://localhost:8080", apiKey: "your-api-key",});
const message = await client.messages.create({ model: "meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens: 300, system: "You are a helpful assistant.", messages: [ { role: "user", content: "Explain the CAP theorem in simple terms." }, ],});
console.log(message.content[0].text);REST
curl http://localhost:8080/v1/messages \ -H "Content-Type: application/json" \ -H "x-api-key: your-api-key" \ -d '{ "model": "meta-llama/Llama-4-Maverick-17B-128E-Original", "max_tokens": 300, "system": "You are a helpful assistant.", "messages": [ {"role": "user", "content": "Explain the CAP theorem in simple terms."} ] }'Streaming
Python
import anthropic
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
with client.messages.stream( model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=200, messages=[ {"role": "user", "content": "Write a haiku about inference latency."}, ],) as stream: for text in stream.text_stream: print(text, end="", flush=True)print()TypeScript
import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({ baseURL: "http://localhost:8080", apiKey: "your-api-key",});
const stream = client.messages.stream({ model: "meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens: 200, messages: [ { role: "user", content: "Write a haiku about inference latency." }, ],});
for await (const event of stream) { if ( event.type === "content_block_delta" && event.delta.type === "text_delta" ) { process.stdout.write(event.delta.text); }}console.log();Tool Use
Python
import anthropicimport json
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
tools = [ { "name": "get_stock_price", "description": "Get the current stock price for a ticker symbol", "input_schema": { "type": "object", "properties": { "ticker": { "type": "string", "description": "Stock ticker symbol, e.g. AAPL", }, }, "required": ["ticker"], }, }]
messages = [{"role": "user", "content": "What's the current price of NVDA?"}]
# First call: model decides to use the toolresponse = client.messages.create( model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=200, tools=tools, messages=messages,)
# Check if the model wants to use a tooltool_use_block = next( (block for block in response.content if block.type == "tool_use"), None)
if tool_use_block: print(f"Model called: {tool_use_block.name}({tool_use_block.input})")
# Simulate the tool result stock_result = {"ticker": "NVDA", "price": 875.30, "currency": "USD"}
# Second call: send the tool result back messages.append({"role": "assistant", "content": response.content}) messages.append({ "role": "user", "content": [ { "type": "tool_result", "tool_use_id": tool_use_block.id, "content": json.dumps(stock_result), } ], })
final_response = client.messages.create( model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=200, tools=tools, messages=messages, )
text_block = next( (block for block in final_response.content if block.type == "text"), None ) if text_block: print(text_block.text)TypeScript
import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({ baseURL: "http://localhost:8080", apiKey: "your-api-key",});
const tools: Anthropic.Tool[] = [ { name: "get_stock_price", description: "Get the current stock price for a ticker symbol", input_schema: { type: "object" as const, properties: { ticker: { type: "string", description: "Stock ticker symbol" }, }, required: ["ticker"], }, },];
const messages: Anthropic.MessageParam[] = [ { role: "user", content: "What's the current price of NVDA?" },];
const response = await client.messages.create({ model: "meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens: 200, tools, messages,});
const toolUseBlock = response.content.find( (block): block is Anthropic.ToolUseBlock => block.type === "tool_use");
if (toolUseBlock) { console.log(`Model called: ${toolUseBlock.name}(${JSON.stringify(toolUseBlock.input)})`);
const stockResult = { ticker: "NVDA", price: 875.30, currency: "USD" };
messages.push({ role: "assistant", content: response.content }); messages.push({ role: "user", content: [ { type: "tool_result", tool_use_id: toolUseBlock.id, content: JSON.stringify(stockResult), }, ], });
const finalResponse = await client.messages.create({ model: "meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens: 200, tools, messages, });
const textBlock = finalResponse.content.find( (block): block is Anthropic.TextBlock => block.type === "text" ); if (textBlock) { console.log(textBlock.text); }}Multi-Turn Conversations
import anthropic
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
messages = [ {"role": "user", "content": "What is a Merkle tree?"},]
response = client.messages.create( model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=300, messages=messages)messages.append({"role": "assistant", "content": response.content[0].text})
# Follow-upmessages.append({"role": "user", "content": "How is it used in Git?"})
response = client.messages.create( model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=300, messages=messages)print(response.content[0].text)Migrating from Anthropic API
To switch from the Anthropic API to LayerScale, change two lines:
# Before (Anthropic API)client = anthropic.Anthropic()
# After (LayerScale)client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")Everything else (messages format, streaming, tool use, response parsing) stays the same.