Skip to content

Anthropic Compatible API

LayerScale is a drop-in replacement for the Anthropic Messages API. Point your existing Anthropic SDK code at LayerScale and it works without changes.

Setup

Terminal window
pip install anthropic
import anthropic
client = anthropic.Anthropic(
base_url="http://localhost:8080",
api_key="your-api-key"
)

Messages

import anthropic
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
message = client.messages.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens=300,
system="You are a helpful assistant.",
messages=[
{"role": "user", "content": "Explain the CAP theorem in simple terms."},
],
)
print(message.content[0].text)

Streaming

import anthropic
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
with client.messages.stream(
model="meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens=200,
messages=[
{"role": "user", "content": "Write a haiku about inference latency."},
],
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
print()

Tool Use

import anthropic
import json
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
tools = [
{
"name": "get_stock_price",
"description": "Get the current stock price for a ticker symbol",
"input_schema": {
"type": "object",
"properties": {
"ticker": {
"type": "string",
"description": "Stock ticker symbol, e.g. AAPL",
},
},
"required": ["ticker"],
},
}
]
messages = [{"role": "user", "content": "What's the current price of NVDA?"}]
# First call: model decides to use the tool
response = client.messages.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens=200,
tools=tools,
messages=messages,
)
# Check if the model wants to use a tool
tool_use_block = next(
(block for block in response.content if block.type == "tool_use"), None
)
if tool_use_block:
print(f"Model called: {tool_use_block.name}({tool_use_block.input})")
# Simulate the tool result
stock_result = {"ticker": "NVDA", "price": 875.30, "currency": "USD"}
# Second call: send the tool result back
messages.append({"role": "assistant", "content": response.content})
messages.append({
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": tool_use_block.id,
"content": json.dumps(stock_result),
}
],
})
final_response = client.messages.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Original",
max_tokens=200,
tools=tools,
messages=messages,
)
text_block = next(
(block for block in final_response.content if block.type == "text"), None
)
if text_block:
print(text_block.text)

Multi-Turn Conversations

import anthropic
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")
messages = [
{"role": "user", "content": "What is a Merkle tree?"},
]
response = client.messages.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=300, messages=messages
)
messages.append({"role": "assistant", "content": response.content[0].text})
# Follow-up
messages.append({"role": "user", "content": "How is it used in Git?"})
response = client.messages.create(
model="meta-llama/Llama-4-Maverick-17B-128E-Original", max_tokens=300, messages=messages
)
print(response.content[0].text)

Migrating from Anthropic API

To switch from the Anthropic API to LayerScale, change two lines:

# Before (Anthropic API)
client = anthropic.Anthropic()
# After (LayerScale)
client = anthropic.Anthropic(base_url="http://localhost:8080", api_key="your-api-key")

Everything else (messages format, streaming, tool use, response parsing) stays the same.