Anthropic Compatible API

LayerScale is a drop-in replacement for the Anthropic Messages API. Point your existing Anthropic SDK code at LayerScale and it works without changes.

Setup

Python
TypeScript

pip install anthropic

import anthropic

client = anthropic.Anthropic(
    base_url="http://localhost:8080",
    api_key="your-api-key"
)

npm install @anthropic-ai/sdk

import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({
    baseURL: "http://localhost:8080",
    apiKey: "your-api-key",
});

Messages

import anthropic

client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")

message = client.messages.create(
    model="meta-llama/Llama-4-Maverick-17B-128E-Original",
    max_tokens=300,
    system="You are a helpful assistant.",
    messages=[
        {"role": "user", "content": "Explain the CAP theorem in simple terms."},
    ],
)

print(message.content[0].text)

import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({
    baseURL: "http://localhost:8080",
    apiKey: "your-api-key",
});

const message = await client.messages.create({
    model: "meta-llama/Llama-4-Maverick-17B-128E-Original",
    max_tokens: 300,
    system: "You are a helpful assistant.",
    messages: [
        { role: "user", content: "Explain the CAP theorem in simple terms." },
    ],
});

console.log(message.content[0].text);

curl http://localhost:8080/v1/messages \
  -H "Content-Type: application/json" \
  -H "x-api-key: your-api-key" \
  -d '{
    "model": "meta-llama/Llama-4-Maverick-17B-128E-Original",
    "max_tokens": 300,
    "system": "You are a helpful assistant.",
    "messages": [
      {"role": "user", "content": "Explain the CAP theorem in simple terms."}
    ]
  }'

Streaming

Python
TypeScript

import anthropic

client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")

with client.messages.stream(
    model="meta-llama/Llama-4-Maverick-17B-128E-Original",
    max_tokens=200,
    messages=[
        {"role": "user", "content": "Write a haiku about inference latency."},
    ],
) as stream:
    for text in stream.text_stream:
        print(text, end="", flush=True)
print()

import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({
    baseURL: "http://localhost:8080",
    apiKey: "your-api-key",
});

const stream = client.messages.stream({
    model: "meta-llama/Llama-4-Maverick-17B-128E-Original",
    max_tokens: 200,
    messages: [
        { role: "user", content: "Write a haiku about inference latency." },
    ],
});

for await (const event of stream) {
    if (
        event.type === "content_block_delta" &&
        event.delta.type === "text_delta"
    ) {
        process.stdout.write(event.delta.text);
    }
}
console.log();

Tool Use

Python
TypeScript

import anthropic
import json

client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")

tools = [
    {
        "name": "get_stock_price",
        "description": "Get the current stock price for a ticker symbol",
        "input_schema": {
            "type": "object",
            "properties": {
                "ticker": {
                    "type": "string",
                    "description": "Stock ticker symbol, e.g. AAPL",
                },
            },
            "required": ["ticker"],
        },
    }
]

messages = [{"role": "user", "content": "What's the current price of NVDA?"}]

# First call: model decides to use the tool
response = client.messages.create(
    model="meta-llama/Llama-4-Maverick-17B-128E-Original",
    max_tokens=200,
    tools=tools,
    messages=messages,
)

# Check if the model wants to use a tool
tool_use_block = next(
    (block for block in response.content if block.type == "tool_use"), None
)

if tool_use_block:
    print(f"Model called: {tool_use_block.name}({tool_use_block.input})")

    # Simulate the tool result
    stock_result = {"ticker": "NVDA", "price": 875.30, "currency": "USD"}

    # Second call: send the tool result back
    messages.append({"role": "assistant", "content": response.content})
    messages.append({
        "role": "user",
        "content": [
            {
                "type": "tool_result",
                "tool_use_id": tool_use_block.id,
                "content": json.dumps(stock_result),
            }
        ],
    })

    final_response = client.messages.create(
        model="meta-llama/Llama-4-Maverick-17B-128E-Original",
        max_tokens=200,
        tools=tools,
        messages=messages,
    )

    text_block = next(
        (block for block in final_response.content if block.type == "text"), None
    )
    if text_block:
        print(text_block.text)

import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({
    baseURL: "http://localhost:8080",
    apiKey: "your-api-key",
});

const tools: Anthropic.Tool[] = [
    {
        name: "get_stock_price",
        description: "Get the current stock price for a ticker symbol",
        input_schema: {
            type: "object" as const,
            properties: {
                ticker: { type: "string", description: "Stock ticker symbol" },
            },
            required: ["ticker"],
        },
    },
];

const messages: Anthropic.MessageParam[] = [
    { role: "user", content: "What's the current price of NVDA?" },
];

const response = await client.messages.create({
    model: "meta-llama/Llama-4-Maverick-17B-128E-Original",
    max_tokens: 200,
    tools,
    messages,
});

const toolUseBlock = response.content.find(
    (block): block is Anthropic.ToolUseBlock => block.type === "tool_use"
);

if (toolUseBlock) {
    console.log(`Model called: ${toolUseBlock.name}(${JSON.stringify(toolUseBlock.input)})`);

    const stockResult = { ticker: "NVDA", price: 875.30, currency: "USD" };

    messages.push({ role: "assistant", content: response.content });
    messages.push({
        role: "user",
        content: [
            {
                type: "tool_result",
                tool_use_id: toolUseBlock.id,
                content: JSON.stringify(stockResult),
            },
        ],
    });

    const finalResponse = await client.messages.create({
        model: "meta-llama/Llama-4-Maverick-17B-128E-Original",
        max_tokens: 200,
        tools,
        messages,
    });

    const textBlock = finalResponse.content.find(
        (block): block is Anthropic.TextBlock => block.type === "text"
    );
    if (textBlock) {
        console.log(textBlock.text);
    }
}

Multi-Turn Conversations

Python
TypeScript

import anthropic

client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")

messages = [
    {"role": "user", "content": "What is a Merkle tree?"},
]

response = client.messages.create(
    model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=300, messages=messages
)
messages.append({"role": "assistant", "content": response.content[0].text})

# Follow-up
messages.append({"role": "user", "content": "How is it used in Git?"})

response = client.messages.create(
    model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=300, messages=messages
)
print(response.content[0].text)

import Anthropic from "@anthropic-ai/sdk";

const client = new Anthropic({
    baseURL: "http://localhost:8080",
    apiKey: "your-api-key",
});

const messages: Anthropic.MessageParam[] = [
    { role: "user", content: "What is a Merkle tree?" },
];

let response = await client.messages.create({
    model: "meta-llama/Llama-4-Maverick-17B-128E-Original",
    max_tokens: 300,
    messages,
});
messages.push({
    role: "assistant",
    content: (response.content[0] as Anthropic.TextBlock).text,
});

// Follow-up
messages.push({ role: "user", content: "How is it used in Git?" });

response = await client.messages.create({
    model: "meta-llama/Llama-4-Maverick-17B-128E-Original",
    max_tokens: 300,
    messages,
});
console.log((response.content[0] as Anthropic.TextBlock).text);

Migrating from Anthropic API

To switch from the Anthropic API to LayerScale, change two lines:

Python
TypeScript

# Before (Anthropic API)
client = anthropic.Anthropic()

# After (LayerScale)
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")

// Before (Anthropic API)
const client = new Anthropic();

// After (LayerScale)
const client = new Anthropic({
    baseURL: "http://localhost:8080",
    apiKey: "your-api-key",
});

Everything else (messages format, streaming, tool use, response parsing) stays the same.