#!/usr/bin/env python3 """ Quick LLM API smoke test. Usage: python3 scripts/test_llm_api.py --prompt "Say hello" python3 scripts/test_llm_api.py --show-config python3 scripts/test_llm_api.py --direct --show-config """ from __future__ import annotations import argparse import asyncio import json import sys from pathlib import Path import httpx from dotenv import load_dotenv PROJECT_ROOT = Path(__file__).resolve().parent.parent if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) load_dotenv(PROJECT_ROOT / ".env", override=False) def _load_llm_services(): from deeptutor.services.llm import factory from deeptutor.services.llm.config import clear_llm_config_cache, get_llm_config return factory, clear_llm_config_cache, get_llm_config factory, clear_llm_config_cache, get_llm_config = _load_llm_services() def _mask_key(key: str) -> str: if not key: return "(empty)" if len(key) <= 10: return "*" * len(key) return f"{key[:6]}...{key[-4:]}" async def run_factory_test(prompt: str, system_prompt: str, max_tokens: int, temperature: float) -> None: cfg = get_llm_config() print(f"[FactoryTest] model={cfg.model}") print("[FactoryTest] Calling factory.complete(...)") answer = await factory.complete( prompt=prompt, system_prompt=system_prompt, max_tokens=max_tokens, temperature=temperature, ) text = (answer or "").strip() print(f"[FactoryTest] OK (model={cfg.model})") print("[FactoryTest] Response preview:") print(text[:1000] if text else "(empty)") async def run_direct_test(prompt: str, system_prompt: str, max_tokens: int, temperature: float) -> None: cfg = get_llm_config() url = f"{(cfg.base_url or '').rstrip('/')}/chat/completions" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {cfg.api_key}", } payload = { "model": cfg.model, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}, ], "max_tokens": max_tokens, "temperature": temperature, } print(f"[DirectTest] POST {url}") async with httpx.AsyncClient(timeout=120) as client: resp = await client.post(url, headers=headers, json=payload) print(f"[DirectTest] status={resp.status_code}") try: body = resp.json() print("[DirectTest] JSON response:") print(json.dumps(body, ensure_ascii=False, indent=2)[:4000]) except Exception: print("[DirectTest] Non-JSON response:") print(resp.text[:2000]) async def main() -> None: parser = argparse.ArgumentParser(description="Quick LLM API smoke test") parser.add_argument("--prompt", default="Briefly explain what a derivative is.") parser.add_argument("--system", default="You are a concise helpful assistant.") parser.add_argument("--max-tokens", type=int, default=256) parser.add_argument("--temperature", type=float, default=0.2) parser.add_argument("--show-config", action="store_true") parser.add_argument( "--direct", action="store_true", help="Also call /chat/completions endpoint directly and print raw response.", ) args = parser.parse_args() clear_llm_config_cache() cfg = get_llm_config() if args.show_config: print("[Config]") print(f" binding={cfg.binding}") print(f" host={cfg.base_url}") print(f" model={cfg.model}") print(f" max_concurrency={cfg.max_concurrency}") print(f" requests_per_minute={cfg.requests_per_minute}") print(f" api_key={_mask_key(cfg.api_key)}") try: await run_factory_test(args.prompt, args.system, args.max_tokens, args.temperature) except Exception as exc: print(f"[FactoryTest] FAILED: {type(exc).__name__}: {exc}") if args.direct: try: await run_direct_test(args.prompt, args.system, args.max_tokens, args.temperature) except Exception as exc: print(f"[DirectTest] FAILED: {type(exc).__name__}: {exc}") if __name__ == "__main__": asyncio.run(main())