"""Live API smoke tests hitting a running server.

Skipped by default; set RUN_LIVE_API_TESTS=1 to enable.
Configure API base via API_BASE_URL (default: https://k050506koch-gpt3-dev-api.hf.space).
"""
from __future__ import annotations

import os
from typing import Set

import pytest
import httpx


DEFAULT_BASE_URL = "https://k050506koch-gpt3-dev-api.hf.space"


def _normalize_base_url(raw_base_url: str) -> str:
    base_url = raw_base_url.rstrip("/")
    if base_url.endswith("/v1"):
        base_url = base_url[:-3]
    return base_url


RUN_LIVE = os.environ.get("RUN_LIVE_API_TESTS") == "1"
BASE_URL = _normalize_base_url(os.environ.get("API_BASE_URL", DEFAULT_BASE_URL))
VERIFY_SSL = os.environ.get("API_VERIFY_SSL", "1") != "0"
PROMPT = "he is a doctor. His main goal is"


def _get_models(timeout: float = 10.0) -> Set[str]:
    try:
        with httpx.Client(timeout=timeout, verify=VERIFY_SSL) as client:
            resp = client.get(f"{BASE_URL}/v1/models")
            resp.raise_for_status()
            data = resp.json()
            return {item["id"] for item in data.get("data", [])}
    except httpx.HTTPError as exc:
        pytest.fail(
            f"Unable to reach live API at {BASE_URL}/v1/models: {exc}. "
            "Set API_BASE_URL to your server root URL (with or without '/v1')."
        )


@pytest.mark.skipif(not RUN_LIVE, reason="set RUN_LIVE_API_TESTS=1 to run live API tests")
def test_responses_openai_client() -> None:
    openai_module = pytest.importorskip("openai")
    OpenAI = openai_module.OpenAI
    model = "GPT4-dev-177M-1511-Instruct"
    available = _get_models()
    if model not in available:
        pytest.skip(f"model {model} not available on server; available={sorted(available)}")
    client = OpenAI(api_key="test", base_url=f"{BASE_URL}/v1")
    response = client.responses.create(model=model, input="Say hello in one sentence.")
    assert response.output[0].content[0].text


@pytest.mark.skipif(not RUN_LIVE, reason="set RUN_LIVE_API_TESTS=1 to run live API tests")
@pytest.mark.parametrize("model", ["GPT-2", "GPT3-dev-350m-2805"])  # adjust names as available
def test_completion_basic(model: str) -> None:
    available = _get_models()
    if model not in available:
        pytest.skip(f"model {model} not available on server; available={sorted(available)}")

    payload = {
        "model": model,
        "prompt": PROMPT,
        "max_tokens": 16,
        "temperature": 0.0,
    }
    # Allow generous timeout for first-run weight downloads
    timeout = httpx.Timeout(connect=10.0, read=600.0, write=30.0, pool=10.0)
    with httpx.Client(timeout=timeout, verify=VERIFY_SSL) as client:
        resp = client.post(f"{BASE_URL}/v1/completions", json=payload)
        resp.raise_for_status()
        body = resp.json()
    assert body.get("model") == model
    choices = body.get("choices") or []
    assert len(choices) >= 1
    assert isinstance(choices[0].get("text"), str)
    # The completion can be empty for some models with temperature=0, but should be a string
    usage = body.get("usage") or {}
    assert "total_tokens" in usage