109 lines
3.4 KiB
Python
109 lines
3.4 KiB
Python
"""Shared LLM client for MCP server narrative tools.
|
|
|
|
Multi-provider support — selected via LLM_PROVIDER env var:
|
|
|
|
groq (default) — Groq API, OpenAI-compatible. Fast, free tier.
|
|
Needs GROQ_API_KEY.
|
|
|
|
anthropic — Direct Anthropic SDK. Needs ANTHROPIC_API_KEY.
|
|
|
|
bedrock — AWS Bedrock Converse API. Needs AWS credentials.
|
|
|
|
openai — Any OpenAI-compatible endpoint.
|
|
Set OPENAI_API_KEY and optionally OPENAI_BASE_URL.
|
|
|
|
Usage:
|
|
LLM_PROVIDER=groq GROQ_API_KEY=gsk_... python -m mcp_servers.shared
|
|
"""
|
|
|
|
import os
|
|
from typing import Literal
|
|
|
|
Provider = Literal["groq", "anthropic", "bedrock", "openai"]
|
|
|
|
|
|
def _get_provider() -> Provider:
|
|
p = os.getenv("LLM_PROVIDER", "groq").lower()
|
|
if p in ("groq", "anthropic", "bedrock", "openai"):
|
|
return p
|
|
return "groq"
|
|
|
|
|
|
async def generate(system_prompt: str, user_content: str, max_tokens: int = 1024) -> str:
|
|
"""Call an LLM and return the text response."""
|
|
provider = _get_provider()
|
|
|
|
if provider == "anthropic":
|
|
return await _generate_anthropic(system_prompt, user_content, max_tokens)
|
|
elif provider == "bedrock":
|
|
return await _generate_bedrock(system_prompt, user_content, max_tokens)
|
|
else:
|
|
# groq, openai, or any OpenAI-compatible provider
|
|
return await _generate_openai_compat(system_prompt, user_content, max_tokens)
|
|
|
|
|
|
async def _generate_openai_compat(
|
|
system_prompt: str, user_content: str, max_tokens: int
|
|
) -> str:
|
|
"""OpenAI-compatible API (Groq, OpenAI, local, etc)."""
|
|
import openai
|
|
|
|
provider = _get_provider()
|
|
|
|
if provider == "groq":
|
|
api_key = os.getenv("GROQ_API_KEY")
|
|
base_url = "https://api.groq.com/openai/v1"
|
|
model = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
|
|
else:
|
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
base_url = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
|
|
model = os.getenv("OPENAI_MODEL", "gpt-4o")
|
|
|
|
client = openai.AsyncOpenAI(api_key=api_key, base_url=base_url)
|
|
response = await client.chat.completions.create(
|
|
model=model,
|
|
max_tokens=max_tokens,
|
|
temperature=0.7,
|
|
messages=[
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": user_content},
|
|
],
|
|
)
|
|
return response.choices[0].message.content
|
|
|
|
|
|
async def _generate_anthropic(
|
|
system_prompt: str, user_content: str, max_tokens: int
|
|
) -> str:
|
|
import anthropic
|
|
|
|
client = anthropic.AsyncAnthropic()
|
|
response = await client.messages.create(
|
|
model=os.getenv("ANTHROPIC_MODEL", "claude-sonnet-4-20250514"),
|
|
max_tokens=max_tokens,
|
|
temperature=0.7,
|
|
system=system_prompt,
|
|
messages=[{"role": "user", "content": user_content}],
|
|
)
|
|
return response.content[0].text
|
|
|
|
|
|
async def _generate_bedrock(
|
|
system_prompt: str, user_content: str, max_tokens: int
|
|
) -> str:
|
|
import json
|
|
|
|
import boto3
|
|
|
|
bedrock = boto3.client(
|
|
"bedrock-runtime",
|
|
region_name=os.getenv("AWS_DEFAULT_REGION", "us-east-1"),
|
|
)
|
|
response = bedrock.converse(
|
|
modelId=os.getenv("BEDROCK_MODEL_ID", "anthropic.claude-sonnet-4-20250514-v1:0"),
|
|
system=[{"text": system_prompt}],
|
|
messages=[{"role": "user", "content": [{"text": user_content}]}],
|
|
inferenceConfig={"maxTokens": max_tokens, "temperature": 0.7},
|
|
)
|
|
return response["output"]["message"]["content"][0]["text"]
|