Spaces:
Configuration error
Configuration error
Commit
·
cf33275
0
Parent(s):
Initial commit: Incident Copilot MCP
Browse files- .gitignore +31 -0
- README.md +201 -0
- app.py +615 -0
- mcp_servers/__init__.py +7 -0
- mcp_servers/blaxel_server.py +93 -0
- mcp_servers/gateway.py +35 -0
- mcp_servers/logs_server.py +192 -0
- mcp_servers/modal_server.py +84 -0
- mcp_servers/nebius_server.py +116 -0
- mcp_servers/voice_elevenlabs_server.py +107 -0
- modal_deep_analysis_app.py +92 -0
- requirements.txt +10 -0
- run_gateway.py +18 -0
.gitignore
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python bytecode and cache
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*.pyo
|
| 5 |
+
|
| 6 |
+
# Virtual environments
|
| 7 |
+
.venv/
|
| 8 |
+
venv/
|
| 9 |
+
.envrc
|
| 10 |
+
|
| 11 |
+
# Environment & secret files
|
| 12 |
+
.env
|
| 13 |
+
.env.*
|
| 14 |
+
*.env
|
| 15 |
+
*.env.*
|
| 16 |
+
|
| 17 |
+
# Editor / IDE
|
| 18 |
+
.vscode/
|
| 19 |
+
.idea/
|
| 20 |
+
*.swp
|
| 21 |
+
*.swo
|
| 22 |
+
|
| 23 |
+
# OS files
|
| 24 |
+
.DS_Store
|
| 25 |
+
Thumbs.db
|
| 26 |
+
|
| 27 |
+
# Logs
|
| 28 |
+
*.log
|
| 29 |
+
|
| 30 |
+
# Misc
|
| 31 |
+
.python-version
|
README.md
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Incident & Error Copilot (MCP + Gradio)
|
| 2 |
+
|
| 3 |
+
Incident & Error Copilot is a small incident-triage assistant built for the MCP hackathon.
|
| 4 |
+
It uses **Model Context Protocol (MCP)** tools to orchestrate:
|
| 5 |
+
|
| 6 |
+
- Logs & metrics (Neon Postgres or synthetic data)
|
| 7 |
+
- Nebius Token Factory (LLM incident summarization)
|
| 8 |
+
- Modal (deep log analysis)
|
| 9 |
+
- Blaxel (sandbox diagnostics)
|
| 10 |
+
- ElevenLabs (text-to-speech voice summary)
|
| 11 |
+
|
| 12 |
+
The UI is a single Gradio app; all tools are exposed via a local MCP HTTP gateway.
|
| 13 |
+
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
## Architecture
|
| 17 |
+
|
| 18 |
+
- **Gradio app**: `app.py`
|
| 19 |
+
- Chat interface for incident descriptions / stack traces
|
| 20 |
+
- Buttons for:
|
| 21 |
+
- Play Voice Summary (ElevenLabs)
|
| 22 |
+
- Deep Log Analysis (Modal)
|
| 23 |
+
- Sandbox Health Check (Blaxel)
|
| 24 |
+
- Auto Triage Incident (agent flow combining all tools)
|
| 25 |
+
- Talks to MCP tools over HTTP using the official `mcp` Python client.
|
| 26 |
+
|
| 27 |
+
- **MCP gateway**: `run_gateway.py` + `mcp_servers/gateway.py`
|
| 28 |
+
- `mcp_servers/gateway.py` mounts 5 FastMCP servers under different paths:
|
| 29 |
+
- `/logs` → `mcp_servers/logs_server.py`
|
| 30 |
+
- `/voice` → `mcp_servers/voice_elevenlabs_server.py`
|
| 31 |
+
- `/nebius` → `mcp_servers/nebius_server.py`
|
| 32 |
+
- `/modal` → `mcp_servers/modal_server.py`
|
| 33 |
+
- `/blaxel` → `mcp_servers/blaxel_server.py`
|
| 34 |
+
- Each FastMCP server exposes a Streamable HTTP endpoint at `/mcp`.
|
| 35 |
+
- `run_gateway.py` starts a local uvicorn server on `http://127.0.0.1:8004` and
|
| 36 |
+
mounts the gateway app. The Gradio app calls:
|
| 37 |
+
- `http://127.0.0.1:8004/logs/mcp`
|
| 38 |
+
- `http://127.0.0.1:8004/voice/mcp`
|
| 39 |
+
- `http://127.0.0.1:8004/nebius/mcp`
|
| 40 |
+
- `http://127.0.0.1:8004/modal/mcp`
|
| 41 |
+
- `http://127.0.0.1:8004/blaxel/mcp`
|
| 42 |
+
|
| 43 |
+
- **Individual MCP servers** (all FastMCP, Python):
|
| 44 |
+
- `mcp_servers/logs_server.py`
|
| 45 |
+
- Tools: `get_logs`, `summarize_logs`
|
| 46 |
+
- Uses Neon Postgres when `NEON_DATABASE_URL` / `DATABASE_URL` is set; otherwise
|
| 47 |
+
falls back to a small in-memory synthetic log store.
|
| 48 |
+
- `mcp_servers/nebius_server.py`
|
| 49 |
+
- Tool: `nebius_incident_summary`
|
| 50 |
+
- Calls Nebius Token Factory (OpenAI-compatible) to produce a structured incident
|
| 51 |
+
summary (title, severity, impact, root cause, actions).
|
| 52 |
+
- `mcp_servers/modal_server.py`
|
| 53 |
+
- Tool: `deep_log_analysis`
|
| 54 |
+
- Sends logs to a Modal web endpoint specified by `MODAL_DEEP_ANALYSIS_URL` for
|
| 55 |
+
heavy / statistical analysis.
|
| 56 |
+
- `mcp_servers/voice_elevenlabs_server.py`
|
| 57 |
+
- Tools: `list_voices`, `generate_incident_summary_audio`
|
| 58 |
+
- Wraps ElevenLabs TTS API and returns base64-encoded MP3 data.
|
| 59 |
+
- `mcp_servers/blaxel_server.py`
|
| 60 |
+
- Tool: `run_simple_diagnostic`
|
| 61 |
+
- Uses the Blaxel Python SDK to create/reuse a sandbox and run a small shell
|
| 62 |
+
command, returning stdout/stderr.
|
| 63 |
+
|
| 64 |
+
> Note: there is also a Blaxel-hosted unified MCP (`mcp-sgllk`) that mirrors these
|
| 65 |
+
> tools, but the demo flow is designed to work entirely with the local MCP gateway.
|
| 66 |
+
|
| 67 |
+
---
|
| 68 |
+
|
| 69 |
+
## Environment & dependencies
|
| 70 |
+
|
| 71 |
+
- **Python**: 3.11+
|
| 72 |
+
- **Dependencies**: see `requirements.txt` (Gradio, mcp, httpx, openai, modal,
|
| 73 |
+
blaxel, psycopg, uvicorn, python-dotenv, etc.).
|
| 74 |
+
|
| 75 |
+
Secrets & configuration are loaded via standard environment variables. For local
|
| 76 |
+
runs, they are typically provided via `.env.blaxel`:
|
| 77 |
+
|
| 78 |
+
- `NEON_DATABASE_URL` – Postgres/Neon connection string for logs (optional; if
|
| 79 |
+
absent, synthetic logs are used).
|
| 80 |
+
- `NEBIUS_API_KEY`, `NEBIUS_MODEL_ID`, `NEBIUS_BASE_URL` – Nebius Token Factory
|
| 81 |
+
API key and model config.
|
| 82 |
+
- `ELEVENLABS_API_KEY` – ElevenLabs TTS key.
|
| 83 |
+
- `MODAL_DEEP_ANALYSIS_URL` – Modal web endpoint for deep analysis.
|
| 84 |
+
- `MODAL_AUTH_TOKEN` – Optional bearer token for Modal.
|
| 85 |
+
- `BL_API_KEY`, `BL_WORKSPACE` – Blaxel credentials (used by Blaxel SDK and also
|
| 86 |
+
forwarded as headers in MCP requests; safe to omit for pure local demo if SDK is
|
| 87 |
+
already authenticated via CLI).
|
| 88 |
+
|
| 89 |
+
Example (redacted) `.env.blaxel` is already present in this repo for reference –
|
| 90 |
+
replace it with your own keys if you fork.
|
| 91 |
+
|
| 92 |
+
---
|
| 93 |
+
|
| 94 |
+
## Running locally (demo flow)
|
| 95 |
+
|
| 96 |
+
1. **Install dependencies**
|
| 97 |
+
|
| 98 |
+
```bash
|
| 99 |
+
cd mcp-action
|
| 100 |
+
pip install -r requirements.txt
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
2. **Populate secrets**
|
| 104 |
+
|
| 105 |
+
- Copy `.env.blaxel` (already present) or create your own with the variables
|
| 106 |
+
described above.
|
| 107 |
+
|
| 108 |
+
3. **Start the MCP gateway** (Terminal 1)
|
| 109 |
+
|
| 110 |
+
```bash
|
| 111 |
+
cd mcp-action
|
| 112 |
+
python run_gateway.py
|
| 113 |
+
```
|
| 114 |
+
|
| 115 |
+
This runs uvicorn on `http://127.0.0.1:8004` and mounts all MCP servers.
|
| 116 |
+
|
| 117 |
+
4. **Start the Gradio app** (Terminal 2)
|
| 118 |
+
|
| 119 |
+
```bash
|
| 120 |
+
cd mcp-action
|
| 121 |
+
python app.py
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
5. **Open the UI**
|
| 125 |
+
|
| 126 |
+
- Visit the URL printed by Gradio (typically `http://127.0.0.1:7860`).
|
| 127 |
+
|
| 128 |
+
6. **Exercise the tools**
|
| 129 |
+
|
| 130 |
+
- Paste an incident description or stack trace into the chat.
|
| 131 |
+
- Click:
|
| 132 |
+
- **Play Voice Summary (ElevenLabs)** – generates and plays an audio recap.
|
| 133 |
+
- **Deep Log Analysis (Modal)** – calls the deep analysis MCP and shows a
|
| 134 |
+
JSON summary plus a human-readable headline.
|
| 135 |
+
- **Sandbox Health Check (Blaxel)** – runs a simple command in a Blaxel
|
| 136 |
+
sandbox and prints stdout/stderr.
|
| 137 |
+
- **Auto Triage Incident (Agent)** – orchestrates logs → Nebius → Modal →
|
| 138 |
+
Blaxel and produces an end-to-end triage report.
|
| 139 |
+
|
| 140 |
+
---
|
| 141 |
+
|
| 142 |
+
## Deploying to Hugging Face Spaces (one-process, non-Docker)
|
| 143 |
+
|
| 144 |
+
Hugging Face Spaces can run both the MCP gateway and the Gradio app inside the
|
| 145 |
+
same container. One simple pattern is:
|
| 146 |
+
|
| 147 |
+
1. Create a new **Gradio** Space and point it at this repository.
|
| 148 |
+
2. In the Space **Settings → App** (or the "Advanced" start command field), set
|
| 149 |
+
the start command to:
|
| 150 |
+
|
| 151 |
+
```bash
|
| 152 |
+
python run_gateway.py & python app.py
|
| 153 |
+
```
|
| 154 |
+
|
| 155 |
+
This starts the uvicorn MCP gateway in the background, then launches the
|
| 156 |
+
Gradio UI in the foreground.
|
| 157 |
+
|
| 158 |
+
3. Add the same `.env.blaxel` variables to the Space **Secrets / Environment
|
| 159 |
+
variables** tab so the MCP servers can reach Neon, Nebius, Modal, Blaxel and
|
| 160 |
+
ElevenLabs.
|
| 161 |
+
|
| 162 |
+
4. Once the Space builds, open it and exercise the same buttons as in local
|
| 163 |
+
dev. The health of the MCP gateway can be seen in the Space logs.
|
| 164 |
+
|
| 165 |
+
> If you prefer Docker-based Spaces or other hosting, the same pattern applies:
|
| 166 |
+
> run `run_gateway.py` and `app.py` in the same container, expose only the
|
| 167 |
+
> Gradio port, and keep the MCP gateway internal to the container.
|
| 168 |
+
|
| 169 |
+
---
|
| 170 |
+
|
| 171 |
+
## Suggested demo script
|
| 172 |
+
|
| 173 |
+
A short flow that shows everything end-to-end:
|
| 174 |
+
|
| 175 |
+
1. **Intro**
|
| 176 |
+
- Briefly explain the goal: MCP-powered incident triage across multiple infra
|
| 177 |
+
providers.
|
| 178 |
+
|
| 179 |
+
2. **Chat-based incident summary**
|
| 180 |
+
- Paste a realistic stack trace / incident description.
|
| 181 |
+
- Show the Nebius-generated structured summary (severity, impact, root cause,
|
| 182 |
+
recommended actions).
|
| 183 |
+
|
| 184 |
+
3. **Voice recap**
|
| 185 |
+
- Click **Play Voice Summary (ElevenLabs)** and play a few seconds of the
|
| 186 |
+
generated MP3.
|
| 187 |
+
|
| 188 |
+
4. **Deep log analysis**
|
| 189 |
+
- Click **Deep Log Analysis (Modal)**.
|
| 190 |
+
- Highlight the log counts, severity distribution, latest error, etc.
|
| 191 |
+
|
| 192 |
+
5. **Sandbox diagnostics**
|
| 193 |
+
- Click **Sandbox Health Check (Blaxel)**.
|
| 194 |
+
- Show the sandbox command output (e.g. `uname -a` and diagnostic text).
|
| 195 |
+
|
| 196 |
+
6. **Agentic auto-triage**
|
| 197 |
+
- Click **Auto Triage Incident (Agent)**.
|
| 198 |
+
- Scroll through the markdown report, calling out how it orchestrates Logs →
|
| 199 |
+
Nebius → Modal → Blaxel and produces a concise, judge-friendly summary.
|
| 200 |
+
|
| 201 |
+
This is the flow intended for the hackathon submission and demo video.
|
app.py
ADDED
|
@@ -0,0 +1,615 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ast
|
| 2 |
+
import base64
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import tempfile
|
| 6 |
+
from typing import Any, Dict, List
|
| 7 |
+
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
import gradio as gr
|
| 10 |
+
from mcp import ClientSession, types as mcp_types
|
| 11 |
+
from mcp.client.streamable_http import streamablehttp_client
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# Load local environment (for dev) including optional .env.blaxel with Blaxel creds
|
| 15 |
+
load_dotenv()
|
| 16 |
+
load_dotenv(".env.blaxel", override=False)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# All tools now talk to a local MCP gateway (uvicorn + mcp_servers.gateway)
|
| 20 |
+
_MCP_GATEWAY_BASE_URL = "http://127.0.0.1:8004"
|
| 21 |
+
|
| 22 |
+
_BL_API_KEY = os.getenv("BL_API_KEY")
|
| 23 |
+
_BL_WORKSPACE = os.getenv("BL_WORKSPACE")
|
| 24 |
+
|
| 25 |
+
_MCP_HEADERS: Dict[str, str] = {}
|
| 26 |
+
if _BL_API_KEY:
|
| 27 |
+
# Support both generic Authorization and Blaxel-specific header names (harmless for local)
|
| 28 |
+
_MCP_HEADERS["Authorization"] = f"Bearer {_BL_API_KEY}"
|
| 29 |
+
_MCP_HEADERS["X-Blaxel-Authorization"] = f"Bearer {_BL_API_KEY}"
|
| 30 |
+
if _BL_WORKSPACE:
|
| 31 |
+
_MCP_HEADERS["X-Blaxel-Workspace"] = _BL_WORKSPACE
|
| 32 |
+
|
| 33 |
+
LOGS_SERVER_URL = f"{_MCP_GATEWAY_BASE_URL}/logs/mcp"
|
| 34 |
+
VOICE_SERVER_URL = f"{_MCP_GATEWAY_BASE_URL}/voice/mcp"
|
| 35 |
+
NEBIUS_SERVER_URL = f"{_MCP_GATEWAY_BASE_URL}/nebius/mcp"
|
| 36 |
+
MODAL_SERVER_URL = f"{_MCP_GATEWAY_BASE_URL}/modal/mcp"
|
| 37 |
+
BLAXEL_SERVER_URL = f"{_MCP_GATEWAY_BASE_URL}/blaxel/mcp"
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def _prepare_spoken_text(markdown: str) -> str:
|
| 41 |
+
"""Convert the rich incident markdown into a concise, TTS-friendly summary.
|
| 42 |
+
|
| 43 |
+
- Strips markdown syntax like **, ``, and leading list markers.
|
| 44 |
+
- Collapses newlines into sentences.
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
# Remove bold/inline code markers
|
| 48 |
+
text = markdown.replace("**", "").replace("`", "")
|
| 49 |
+
|
| 50 |
+
# Remove leading list markers like "- "
|
| 51 |
+
lines = []
|
| 52 |
+
for line in text.splitlines():
|
| 53 |
+
stripped = line.lstrip()
|
| 54 |
+
if stripped.startswith("- "):
|
| 55 |
+
stripped = stripped[2:]
|
| 56 |
+
lines.append(stripped)
|
| 57 |
+
|
| 58 |
+
text = " ".join(lines)
|
| 59 |
+
# Normalize whitespace
|
| 60 |
+
text = " ".join(text.split())
|
| 61 |
+
|
| 62 |
+
# Add a small preface so it sounds more natural
|
| 63 |
+
return f"Here is a short incident recap: {text}".strip()
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
async def _summarize_logs_via_mcp(service: str = "recs-api", env: str = "prod") -> Dict[str, Any]:
|
| 67 |
+
async with streamablehttp_client(LOGS_SERVER_URL, headers=_MCP_HEADERS) as (read, write, _):
|
| 68 |
+
async with ClientSession(read, write) as session:
|
| 69 |
+
await session.initialize()
|
| 70 |
+
result = await session.call_tool(
|
| 71 |
+
"summarize_logs",
|
| 72 |
+
arguments={"service": service, "env": env},
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# Prefer structured content but fall back to parsing JSON text
|
| 76 |
+
data: Dict[str, Any] = result.structuredContent or {}
|
| 77 |
+
|
| 78 |
+
if not data and getattr(result, "content", None):
|
| 79 |
+
first_block = result.content[0]
|
| 80 |
+
if isinstance(first_block, mcp_types.TextContent):
|
| 81 |
+
try:
|
| 82 |
+
data = json.loads(first_block.text)
|
| 83 |
+
except Exception:
|
| 84 |
+
data = {}
|
| 85 |
+
|
| 86 |
+
if not isinstance(data, dict):
|
| 87 |
+
return {}
|
| 88 |
+
return data
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
async def _generate_voice_summary_via_mcp(text: str) -> str:
|
| 92 |
+
if not text.strip():
|
| 93 |
+
raise ValueError("No content available to synthesize.")
|
| 94 |
+
|
| 95 |
+
async with streamablehttp_client(VOICE_SERVER_URL, headers=_MCP_HEADERS) as (read, write, _):
|
| 96 |
+
async with ClientSession(read, write) as session:
|
| 97 |
+
await session.initialize()
|
| 98 |
+
|
| 99 |
+
voices_result = await session.call_tool("list_voices", arguments={})
|
| 100 |
+
|
| 101 |
+
# Prefer structuredContent but fall back to parsing unstructured JSON
|
| 102 |
+
data: Dict[str, Any] = voices_result.structuredContent or {}
|
| 103 |
+
|
| 104 |
+
if not data and getattr(voices_result, "content", None):
|
| 105 |
+
first_block = voices_result.content[0]
|
| 106 |
+
if isinstance(first_block, mcp_types.TextContent):
|
| 107 |
+
try:
|
| 108 |
+
data = json.loads(first_block.text)
|
| 109 |
+
except Exception:
|
| 110 |
+
data = {}
|
| 111 |
+
|
| 112 |
+
voices_list = data.get("voices") or data.get("data") or []
|
| 113 |
+
voice_id = None
|
| 114 |
+
if isinstance(voices_list, list) and voices_list:
|
| 115 |
+
first_voice = voices_list[0]
|
| 116 |
+
if isinstance(first_voice, dict):
|
| 117 |
+
voice_id = first_voice.get("voice_id") or first_voice.get("id")
|
| 118 |
+
if not voice_id:
|
| 119 |
+
raise RuntimeError("No ElevenLabs voices available from MCP server.")
|
| 120 |
+
|
| 121 |
+
audio_result = await session.call_tool(
|
| 122 |
+
"generate_incident_summary_audio",
|
| 123 |
+
arguments={"text": text, "voice_id": voice_id},
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
# Prefer structured JSON but fall back to parsing text blocks
|
| 127 |
+
audio_data: Dict[str, Any] = audio_result.structuredContent or {}
|
| 128 |
+
if not audio_data and getattr(audio_result, "content", None):
|
| 129 |
+
first_block = audio_result.content[0]
|
| 130 |
+
if isinstance(first_block, mcp_types.TextContent):
|
| 131 |
+
try:
|
| 132 |
+
audio_data = json.loads(first_block.text)
|
| 133 |
+
except Exception:
|
| 134 |
+
audio_data = {}
|
| 135 |
+
|
| 136 |
+
audio_b64 = audio_data.get("audio_base64")
|
| 137 |
+
if not audio_b64:
|
| 138 |
+
# Try one more time to extract from text content (JSON or Python literal)
|
| 139 |
+
if getattr(audio_result, "content", None):
|
| 140 |
+
first_block = audio_result.content[0]
|
| 141 |
+
if isinstance(first_block, mcp_types.TextContent):
|
| 142 |
+
raw_text = first_block.text
|
| 143 |
+
parsed: Dict[str, Any] = {}
|
| 144 |
+
try:
|
| 145 |
+
parsed = json.loads(raw_text)
|
| 146 |
+
except Exception:
|
| 147 |
+
try:
|
| 148 |
+
parsed_obj = ast.literal_eval(raw_text)
|
| 149 |
+
if isinstance(parsed_obj, dict):
|
| 150 |
+
parsed = parsed_obj
|
| 151 |
+
except Exception:
|
| 152 |
+
parsed = {}
|
| 153 |
+
|
| 154 |
+
if isinstance(parsed, dict):
|
| 155 |
+
audio_b64 = parsed.get("audio_base64") or audio_b64
|
| 156 |
+
if audio_b64:
|
| 157 |
+
audio_data = parsed
|
| 158 |
+
|
| 159 |
+
if not audio_b64:
|
| 160 |
+
# Try to surface any error message returned by the MCP tool
|
| 161 |
+
error_details: Dict[str, Any] = {}
|
| 162 |
+
if audio_data:
|
| 163 |
+
error_details["structured"] = audio_data
|
| 164 |
+
|
| 165 |
+
if getattr(audio_result, "content", None):
|
| 166 |
+
first_block = audio_result.content[0]
|
| 167 |
+
if isinstance(first_block, mcp_types.TextContent):
|
| 168 |
+
error_details["text"] = first_block.text
|
| 169 |
+
|
| 170 |
+
raise RuntimeError(
|
| 171 |
+
"No audio_base64 field returned from ElevenLabs MCP server. "
|
| 172 |
+
f"Details: {error_details}"
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
audio_bytes = base64.b64decode(audio_b64)
|
| 176 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
|
| 177 |
+
tmp.write(audio_bytes)
|
| 178 |
+
path = tmp.name
|
| 179 |
+
|
| 180 |
+
return path
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
async def _get_logs_via_mcp(service: str = "recs-api", env: str = "prod") -> List[Dict[str, Any]]:
|
| 184 |
+
"""Fetch raw logs via MCP (from Neon-backed or synthetic store)."""
|
| 185 |
+
|
| 186 |
+
async with streamablehttp_client(LOGS_SERVER_URL, headers=_MCP_HEADERS) as (read, write, _):
|
| 187 |
+
async with ClientSession(read, write) as session:
|
| 188 |
+
await session.initialize()
|
| 189 |
+
result = await session.call_tool(
|
| 190 |
+
"get_logs",
|
| 191 |
+
arguments={"service": service, "env": env},
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
raw = result.structuredContent
|
| 195 |
+
|
| 196 |
+
# FastMCP wraps json_response results under a "result" key.
|
| 197 |
+
logs: List[Dict[str, Any]] = []
|
| 198 |
+
if isinstance(raw, list):
|
| 199 |
+
logs = raw
|
| 200 |
+
elif isinstance(raw, dict):
|
| 201 |
+
maybe_list = raw.get("result") or raw.get("logs")
|
| 202 |
+
if isinstance(maybe_list, list):
|
| 203 |
+
logs = maybe_list
|
| 204 |
+
|
| 205 |
+
# If structuredContent is empty, fall back to parsing text (if present).
|
| 206 |
+
if not logs and getattr(result, "content", None):
|
| 207 |
+
first_block = result.content[0]
|
| 208 |
+
if isinstance(first_block, mcp_types.TextContent):
|
| 209 |
+
try:
|
| 210 |
+
parsed = json.loads(first_block.text)
|
| 211 |
+
if isinstance(parsed, list):
|
| 212 |
+
logs = parsed
|
| 213 |
+
elif isinstance(parsed, dict):
|
| 214 |
+
maybe_list = parsed.get("result") or parsed.get("logs")
|
| 215 |
+
if isinstance(maybe_list, list):
|
| 216 |
+
logs = maybe_list
|
| 217 |
+
except Exception:
|
| 218 |
+
logs = []
|
| 219 |
+
|
| 220 |
+
return logs
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
async def _nebius_incident_summary_via_mcp(
|
| 224 |
+
user_message: str,
|
| 225 |
+
logs_summary_text: str,
|
| 226 |
+
) -> Dict[str, Any]:
|
| 227 |
+
async with streamablehttp_client(NEBIUS_SERVER_URL, headers=_MCP_HEADERS) as (read, write, _):
|
| 228 |
+
async with ClientSession(read, write) as session:
|
| 229 |
+
await session.initialize()
|
| 230 |
+
result = await session.call_tool(
|
| 231 |
+
"nebius_incident_summary",
|
| 232 |
+
arguments={
|
| 233 |
+
"user_description": user_message,
|
| 234 |
+
"logs_summary": logs_summary_text,
|
| 235 |
+
},
|
| 236 |
+
)
|
| 237 |
+
return result.structuredContent or {}
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
async def _modal_deep_analysis_via_mcp() -> str:
|
| 241 |
+
"""Call the Modal MCP server for deep log analysis and pretty-print the result."""
|
| 242 |
+
|
| 243 |
+
logs = await _get_logs_via_mcp(service="recs-api", env="prod")
|
| 244 |
+
|
| 245 |
+
async with streamablehttp_client(MODAL_SERVER_URL, headers=_MCP_HEADERS) as (read, write, _):
|
| 246 |
+
async with ClientSession(read, write) as session:
|
| 247 |
+
await session.initialize()
|
| 248 |
+
result = await session.call_tool(
|
| 249 |
+
"deep_log_analysis",
|
| 250 |
+
arguments={"service": "recs-api", "env": "prod", "logs": logs},
|
| 251 |
+
)
|
| 252 |
+
raw = result.structuredContent or {}
|
| 253 |
+
|
| 254 |
+
# FastMCP with json_response=True wraps tool return values under
|
| 255 |
+
# a top-level "result" key. Unwrap to get the actual analysis dict.
|
| 256 |
+
if isinstance(raw, dict) and "result" in raw and isinstance(raw["result"], dict):
|
| 257 |
+
data: Dict[str, Any] = raw["result"]
|
| 258 |
+
else:
|
| 259 |
+
data = raw if isinstance(raw, dict) else {}
|
| 260 |
+
|
| 261 |
+
if not isinstance(data, dict):
|
| 262 |
+
return f"Deep analysis (Modal) returned non-dict result: {raw!r}"
|
| 263 |
+
|
| 264 |
+
# Build a concise human-readable headline from the analysis payload.
|
| 265 |
+
log_count = data.get("log_count")
|
| 266 |
+
sev_counts = data.get("severity_counts") or {}
|
| 267 |
+
top_region = data.get("top_region")
|
| 268 |
+
latest_error = data.get("latest_error") or {}
|
| 269 |
+
|
| 270 |
+
sev_parts = []
|
| 271 |
+
if isinstance(sev_counts, dict):
|
| 272 |
+
for sev, cnt in sev_counts.items():
|
| 273 |
+
sev_parts.append(f"{sev}={cnt}")
|
| 274 |
+
sev_str = ", ".join(sev_parts) if sev_parts else "no severity distribution available"
|
| 275 |
+
|
| 276 |
+
latest_err_msg = latest_error.get("message") if isinstance(latest_error, dict) else None
|
| 277 |
+
latest_err_ts = latest_error.get("timestamp") if isinstance(latest_error, dict) else None
|
| 278 |
+
|
| 279 |
+
headline_parts = []
|
| 280 |
+
if isinstance(log_count, int):
|
| 281 |
+
headline_parts.append(f"Analyzed {log_count} logs")
|
| 282 |
+
if sev_parts:
|
| 283 |
+
headline_parts.append(f"severity mix: {sev_str}")
|
| 284 |
+
if top_region:
|
| 285 |
+
headline_parts.append(f"top region: {top_region}")
|
| 286 |
+
headline = "; ".join(headline_parts) if headline_parts else "Deep log analysis summary"
|
| 287 |
+
|
| 288 |
+
if latest_err_msg:
|
| 289 |
+
if latest_err_ts:
|
| 290 |
+
headline += f". Latest error at {latest_err_ts}: {latest_err_msg}"
|
| 291 |
+
else:
|
| 292 |
+
headline += f". Latest error: {latest_err_msg}"
|
| 293 |
+
|
| 294 |
+
pretty = json.dumps(data, indent=2)
|
| 295 |
+
return (
|
| 296 |
+
f"**Deep Analysis (Modal)**\n\n{headline}\n\n"
|
| 297 |
+
f"```json\n{pretty}\n```"
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
async def _blaxel_run_diagnostic_via_mcp() -> str:
|
| 302 |
+
"""Run a simple sandbox diagnostic via Blaxel MCP server."""
|
| 303 |
+
|
| 304 |
+
async with streamablehttp_client(BLAXEL_SERVER_URL, headers=_MCP_HEADERS) as (read, write, _):
|
| 305 |
+
async with ClientSession(read, write) as session:
|
| 306 |
+
await session.initialize()
|
| 307 |
+
result = await session.call_tool(
|
| 308 |
+
"run_simple_diagnostic",
|
| 309 |
+
arguments={
|
| 310 |
+
# A slightly richer default command that demonstrates the
|
| 311 |
+
# sandbox is actually running OS-level commands.
|
| 312 |
+
"command": (
|
| 313 |
+
"echo '[sandbox] incident diagnostics start' && "
|
| 314 |
+
"uname -a && echo 'sandbox diagnostics ok'"
|
| 315 |
+
),
|
| 316 |
+
},
|
| 317 |
+
)
|
| 318 |
+
raw = result.structuredContent
|
| 319 |
+
|
| 320 |
+
# FastMCP with json_response=True wraps tool return values under
|
| 321 |
+
# a top-level "result" key. Unwrap that so we get the actual
|
| 322 |
+
# diagnostics dict from the Blaxel MCP server.
|
| 323 |
+
if isinstance(raw, dict) and "result" in raw and isinstance(raw["result"], dict):
|
| 324 |
+
data: Dict[str, Any] = raw["result"]
|
| 325 |
+
else:
|
| 326 |
+
data = raw if isinstance(raw, dict) else {}
|
| 327 |
+
|
| 328 |
+
# If we still don't have structured data, surface any textual error
|
| 329 |
+
# returned by the MCP tool so the user can see what went wrong
|
| 330 |
+
# (e.g. auth issues, quota, etc.).
|
| 331 |
+
if not data and getattr(result, "content", None):
|
| 332 |
+
first_block = result.content[0]
|
| 333 |
+
if isinstance(first_block, mcp_types.TextContent):
|
| 334 |
+
text = first_block.text.strip()
|
| 335 |
+
return (
|
| 336 |
+
"**Sandbox Diagnostics (Blaxel)**\n"
|
| 337 |
+
"Blaxel MCP did not return structured diagnostics data. Raw response:\n\n"
|
| 338 |
+
f"```text\n{text}\n```"
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
if not isinstance(data, dict):
|
| 342 |
+
return f"Diagnostics (Blaxel) returned non-dict result: {raw!r}"
|
| 343 |
+
|
| 344 |
+
stdout = str(data.get("stdout", "")).strip()
|
| 345 |
+
stderr = str(data.get("stderr", "")).strip()
|
| 346 |
+
exit_code = data.get("exit_code")
|
| 347 |
+
|
| 348 |
+
parts = ["**Sandbox Diagnostics (Blaxel)**"]
|
| 349 |
+
parts.append(f"Exit code: {exit_code}")
|
| 350 |
+
if stdout:
|
| 351 |
+
parts.append("\n**stdout:**\n")
|
| 352 |
+
parts.append(f"```\n{stdout}\n```")
|
| 353 |
+
if stderr:
|
| 354 |
+
parts.append("\n**stderr:**\n")
|
| 355 |
+
parts.append(f"```\n{stderr}\n```")
|
| 356 |
+
return "\n".join(parts)
|
| 357 |
+
|
| 358 |
+
|
| 359 |
+
async def _auto_triage_incident(history: List[Dict[str, Any]]) -> str:
|
| 360 |
+
"""Agentic triage flow that orchestrates logs, Nebius, Modal, and Blaxel.
|
| 361 |
+
|
| 362 |
+
It reads the latest user incident description, pulls logs via the Logs MCP
|
| 363 |
+
(Neon-backed), runs Nebius for a structured incident summary, then runs
|
| 364 |
+
Modal deep analysis and Blaxel sandbox diagnostics. The result is a
|
| 365 |
+
markdown report describing the steps taken and the findings.
|
| 366 |
+
"""
|
| 367 |
+
|
| 368 |
+
# Find the latest user message to triage
|
| 369 |
+
last_user = None
|
| 370 |
+
for msg in reversed(history or []):
|
| 371 |
+
if msg.get("role") == "user":
|
| 372 |
+
last_user = msg
|
| 373 |
+
break
|
| 374 |
+
|
| 375 |
+
if not last_user:
|
| 376 |
+
return (
|
| 377 |
+
"**Agent Triage Report**\n\n"
|
| 378 |
+
"No user incident description found yet. "
|
| 379 |
+
"Please describe an incident in the chat first."
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
user_message = str(last_user.get("content", ""))
|
| 383 |
+
|
| 384 |
+
steps: List[str] = []
|
| 385 |
+
steps.append("1. Read your latest incident description.")
|
| 386 |
+
|
| 387 |
+
logs_summary_text = "No logs summary available."
|
| 388 |
+
logs_data: Dict[str, Any] = {}
|
| 389 |
+
nebius_data: Dict[str, Any] = {}
|
| 390 |
+
|
| 391 |
+
# Step 2: Pull logs and build a brief summary
|
| 392 |
+
try:
|
| 393 |
+
logs_data = await _summarize_logs_via_mcp(service="recs-api", env="prod")
|
| 394 |
+
logs_summary_text = logs_data.get("summary", logs_summary_text)
|
| 395 |
+
steps.append(
|
| 396 |
+
"2. Pulled recent logs for `recs-api` (prod) from Neon via the Logs MCP server."
|
| 397 |
+
)
|
| 398 |
+
except Exception as exc: # pragma: no cover - defensive
|
| 399 |
+
logs_summary_text = f"(Error fetching logs summary from MCP: {exc})"
|
| 400 |
+
steps.append("2. Attempted to pull logs from Neon but hit an error.")
|
| 401 |
+
|
| 402 |
+
# Step 3: Nebius incident summary
|
| 403 |
+
try:
|
| 404 |
+
nebius_data = await _nebius_incident_summary_via_mcp(
|
| 405 |
+
user_message=user_message,
|
| 406 |
+
logs_summary_text=logs_summary_text,
|
| 407 |
+
)
|
| 408 |
+
severity = nebius_data.get("severity", "Unknown")
|
| 409 |
+
steps.append(
|
| 410 |
+
f"3. Generated a structured incident summary using Nebius (severity: {severity})."
|
| 411 |
+
)
|
| 412 |
+
except Exception as exc: # pragma: no cover - defensive
|
| 413 |
+
nebius_data = {}
|
| 414 |
+
steps.append(f"3. Nebius incident summarization failed: {exc}.")
|
| 415 |
+
|
| 416 |
+
# Step 4: Modal deep analysis
|
| 417 |
+
try:
|
| 418 |
+
modal_section = await _modal_deep_analysis_via_mcp()
|
| 419 |
+
steps.append(
|
| 420 |
+
"4. Ran deep log analysis with Modal over the same Neon-backed logs."
|
| 421 |
+
)
|
| 422 |
+
except Exception as exc: # pragma: no cover - defensive
|
| 423 |
+
modal_section = f"Deep analysis (Modal) failed: {exc}"
|
| 424 |
+
steps.append("4. Attempted deep log analysis with Modal but hit an error.")
|
| 425 |
+
|
| 426 |
+
# Step 5: Blaxel sandbox diagnostics
|
| 427 |
+
try:
|
| 428 |
+
blaxel_section = await _blaxel_run_diagnostic_via_mcp()
|
| 429 |
+
steps.append(
|
| 430 |
+
"5. Executed sandbox diagnostics in a Blaxel VM to validate basic system health."
|
| 431 |
+
)
|
| 432 |
+
except Exception as exc: # pragma: no cover - defensive
|
| 433 |
+
blaxel_section = f"Sandbox diagnostics (Blaxel) failed: {exc}"
|
| 434 |
+
steps.append("5. Attempted sandbox diagnostics with Blaxel but hit an error.")
|
| 435 |
+
|
| 436 |
+
# Format the Nebius summary section
|
| 437 |
+
if nebius_data:
|
| 438 |
+
title = nebius_data.get("title", "Incident Summary")
|
| 439 |
+
severity = nebius_data.get("severity", "Unknown")
|
| 440 |
+
impact = nebius_data.get("impact", "Not specified")
|
| 441 |
+
root_cause = nebius_data.get("root_cause", "Not specified")
|
| 442 |
+
actions = nebius_data.get("actions", [])
|
| 443 |
+
if isinstance(actions, list):
|
| 444 |
+
actions_text = "\n".join(f"- {a}" for a in actions)
|
| 445 |
+
else:
|
| 446 |
+
actions_text = str(actions)
|
| 447 |
+
|
| 448 |
+
nebius_section = (
|
| 449 |
+
f"**{title}** (severity: {severity})\n\n"
|
| 450 |
+
f"**Impact:** {impact}\n\n"
|
| 451 |
+
f"**Probable root cause:** {root_cause}\n\n"
|
| 452 |
+
"**Log summary (recs-api, prod):**\n"
|
| 453 |
+
f"{logs_summary_text}\n\n"
|
| 454 |
+
"**Recommended actions:**\n"
|
| 455 |
+
f"{actions_text}\n"
|
| 456 |
+
)
|
| 457 |
+
else:
|
| 458 |
+
nebius_section = (
|
| 459 |
+
"**Incident Summary:** Nebius summarization was not available.\n\n"
|
| 460 |
+
"**Log summary (recs-api, prod):**\n"
|
| 461 |
+
f"{logs_summary_text}\n"
|
| 462 |
+
)
|
| 463 |
+
|
| 464 |
+
steps_md = "\n".join(f"- {s}" for s in steps)
|
| 465 |
+
|
| 466 |
+
report = (
|
| 467 |
+
"**Agent Triage Report**\n\n"
|
| 468 |
+
"**Steps taken:**\n"
|
| 469 |
+
f"{steps_md}\n\n"
|
| 470 |
+
"---\n\n"
|
| 471 |
+
"### Incident Summary (Nebius)\n\n"
|
| 472 |
+
f"{nebius_section}\n\n"
|
| 473 |
+
"### Deep Log Analysis (Modal)\n\n"
|
| 474 |
+
f"{modal_section}\n\n"
|
| 475 |
+
"### Sandbox Diagnostics (Blaxel)\n\n"
|
| 476 |
+
f"{blaxel_section}\n"
|
| 477 |
+
)
|
| 478 |
+
return report
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
async def _chat_fn(message: str, history: List[Dict[str, Any]]) -> str:
|
| 482 |
+
try:
|
| 483 |
+
logs_data = await _summarize_logs_via_mcp(service="recs-api", env="prod")
|
| 484 |
+
logs_summary_text = logs_data.get("summary", "No logs summary available.")
|
| 485 |
+
except Exception as exc:
|
| 486 |
+
logs_summary_text = f"(Error fetching logs summary from MCP: {exc})"
|
| 487 |
+
|
| 488 |
+
try:
|
| 489 |
+
nebius_data = await _nebius_incident_summary_via_mcp(
|
| 490 |
+
user_message=message,
|
| 491 |
+
logs_summary_text=logs_summary_text,
|
| 492 |
+
)
|
| 493 |
+
except Exception as exc:
|
| 494 |
+
# Fall back to a simpler message if Nebius is unavailable
|
| 495 |
+
return (
|
| 496 |
+
"Thanks for the incident description.\n\n"
|
| 497 |
+
"Here is a synthetic log summary for service `recs-api` (prod):\n"
|
| 498 |
+
f"{logs_summary_text}\n\n"
|
| 499 |
+
f"(Nebius incident summary call failed: {exc})"
|
| 500 |
+
)
|
| 501 |
+
|
| 502 |
+
title = nebius_data.get("title", "Incident Summary")
|
| 503 |
+
severity = nebius_data.get("severity", "Unknown")
|
| 504 |
+
impact = nebius_data.get("impact", "Not specified")
|
| 505 |
+
root_cause = nebius_data.get("root_cause", "Not specified")
|
| 506 |
+
actions = nebius_data.get("actions", [])
|
| 507 |
+
|
| 508 |
+
if isinstance(actions, list):
|
| 509 |
+
actions_text = "\n".join(f"- {a}" for a in actions)
|
| 510 |
+
else:
|
| 511 |
+
actions_text = str(actions)
|
| 512 |
+
|
| 513 |
+
reply = (
|
| 514 |
+
f"**{title}** (severity: {severity})\n\n"
|
| 515 |
+
f"**Impact:** {impact}\n\n"
|
| 516 |
+
f"**Probable root cause:** {root_cause}\n\n"
|
| 517 |
+
"**Log summary (recs-api, prod):**\n"
|
| 518 |
+
f"{logs_summary_text}\n\n"
|
| 519 |
+
"**Recommended actions:**\n"
|
| 520 |
+
f"{actions_text}\n\n"
|
| 521 |
+
"This summary was generated via Nebius Token Factory. "
|
| 522 |
+
"You can click **Generate Voice Summary (ElevenLabs)** to hear an audio recap."
|
| 523 |
+
)
|
| 524 |
+
return reply
|
| 525 |
+
|
| 526 |
+
|
| 527 |
+
async def _voice_from_history(history: List[Dict[str, Any]]) -> str:
|
| 528 |
+
last_assistant = None
|
| 529 |
+
for msg in reversed(history or []):
|
| 530 |
+
if msg.get("role") == "assistant":
|
| 531 |
+
last_assistant = msg
|
| 532 |
+
break
|
| 533 |
+
|
| 534 |
+
if not last_assistant:
|
| 535 |
+
raise ValueError("No assistant messages found to synthesize.")
|
| 536 |
+
|
| 537 |
+
content = str(last_assistant.get("content", ""))
|
| 538 |
+
spoken_text = _prepare_spoken_text(content)
|
| 539 |
+
return await _generate_voice_summary_via_mcp(spoken_text)
|
| 540 |
+
|
| 541 |
+
|
| 542 |
+
def build_interface() -> gr.Blocks:
|
| 543 |
+
with gr.Blocks(
|
| 544 |
+
title="Incident & Error Copilot",
|
| 545 |
+
) as demo:
|
| 546 |
+
gr.Markdown("# Incident & Error Copilot", elem_classes=["incident-header"])
|
| 547 |
+
gr.Markdown(
|
| 548 |
+
"Enterprise incident assistant powered by MCP servers (logs, Nebius, Modal, Blaxel, ElevenLabs).",
|
| 549 |
+
elem_classes=["incident-subheader"],
|
| 550 |
+
)
|
| 551 |
+
|
| 552 |
+
with gr.Row():
|
| 553 |
+
with gr.Column(scale=3):
|
| 554 |
+
chat = gr.ChatInterface(
|
| 555 |
+
fn=_chat_fn,
|
| 556 |
+
textbox=gr.Textbox(
|
| 557 |
+
placeholder="Describe an incident or paste an error stack trace...",
|
| 558 |
+
label="Incident description",
|
| 559 |
+
),
|
| 560 |
+
title="Incident Copilot",
|
| 561 |
+
)
|
| 562 |
+
|
| 563 |
+
with gr.Column(scale=2):
|
| 564 |
+
gr.Markdown("### Incident Tools")
|
| 565 |
+
gr.Markdown(
|
| 566 |
+
"Use these MCP-backed tools to go deeper on the current incident.",
|
| 567 |
+
)
|
| 568 |
+
|
| 569 |
+
# Voice summary panel
|
| 570 |
+
with gr.Group(elem_classes=["tool-panel"]):
|
| 571 |
+
gr.Markdown(
|
| 572 |
+
"**Play Voice Summary (ElevenLabs)** — listen to the latest assistant summary as audio.",
|
| 573 |
+
)
|
| 574 |
+
voice_button = gr.Button("Play Voice Summary (ElevenLabs)")
|
| 575 |
+
audio_player = gr.Audio(
|
| 576 |
+
label="Incident Voice Summary",
|
| 577 |
+
type="filepath",
|
| 578 |
+
interactive=False,
|
| 579 |
+
)
|
| 580 |
+
|
| 581 |
+
# Agentic auto-triage panel
|
| 582 |
+
with gr.Group(elem_classes=["tool-panel"]):
|
| 583 |
+
gr.Markdown(
|
| 584 |
+
"**Auto Triage Incident (Agent)** — runs logs, Nebius, Modal and Blaxel in sequence and produces a triage report.",
|
| 585 |
+
)
|
| 586 |
+
auto_button = gr.Button("Auto Triage Incident (Agent)")
|
| 587 |
+
auto_output = gr.Markdown(label="Agent Triage Report")
|
| 588 |
+
|
| 589 |
+
# Modal deep analysis panel
|
| 590 |
+
with gr.Group(elem_classes=["tool-panel"]):
|
| 591 |
+
gr.Markdown(
|
| 592 |
+
"**Deep Log Analysis (Modal)** — calls a Modal function for detailed log statistics and latest error context.",
|
| 593 |
+
)
|
| 594 |
+
modal_button = gr.Button("Deep Log Analysis (Modal)")
|
| 595 |
+
modal_output = gr.Markdown(label="Deep Analysis Result")
|
| 596 |
+
|
| 597 |
+
# Blaxel sandbox diagnostics panel
|
| 598 |
+
with gr.Group(elem_classes=["tool-panel"]):
|
| 599 |
+
gr.Markdown(
|
| 600 |
+
"**Sandbox Health Check (Blaxel)** — runs a lightweight command inside a Blaxel sandbox and shows its output.",
|
| 601 |
+
)
|
| 602 |
+
blaxel_button = gr.Button("Sandbox Health Check (Blaxel)")
|
| 603 |
+
blaxel_output = gr.Markdown(label="Diagnostics Output")
|
| 604 |
+
|
| 605 |
+
voice_button.click(_voice_from_history, inputs=[chat.chatbot], outputs=[audio_player])
|
| 606 |
+
auto_button.click(_auto_triage_incident, inputs=[chat.chatbot], outputs=[auto_output])
|
| 607 |
+
modal_button.click(_modal_deep_analysis_via_mcp, inputs=None, outputs=[modal_output])
|
| 608 |
+
blaxel_button.click(_blaxel_run_diagnostic_via_mcp, inputs=None, outputs=[blaxel_output])
|
| 609 |
+
|
| 610 |
+
return demo
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
if __name__ == "__main__":
|
| 614 |
+
demo = build_interface()
|
| 615 |
+
demo.launch() # For local dev; HF Spaces will call `demo = build_interface()` implicitly
|
mcp_servers/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""MCP servers for the Incident & Error Copilot.
|
| 2 |
+
|
| 3 |
+
Servers:
|
| 4 |
+
- logs_server: access synthetic application logs
|
| 5 |
+
- voice_elevenlabs_server: generate audio summaries via ElevenLabs
|
| 6 |
+
Additional servers (status, runbooks) will be added here.
|
| 7 |
+
"""
|
mcp_servers/blaxel_server.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Blaxel Sandboxed Diagnostics MCP server.
|
| 2 |
+
|
| 3 |
+
This MCP server uses the official Blaxel Python SDK to trigger simple
|
| 4 |
+
sandbox-based diagnostics. It expects that you have authenticated locally
|
| 5 |
+
(using `bl login` or environment variables like BL_API_KEY / BL_WORKSPACE).
|
| 6 |
+
|
| 7 |
+
Environment variables (one of these auth methods must be configured as per
|
| 8 |
+
Blaxel docs):
|
| 9 |
+
- BL_API_KEY, BL_WORKSPACE (recommended for remote deployment), or
|
| 10 |
+
- local Blaxel CLI login config.
|
| 11 |
+
|
| 12 |
+
Tools:
|
| 13 |
+
- run_simple_diagnostic: create or reuse a sandbox and run a shell command,
|
| 14 |
+
returning stdout/stderr.
|
| 15 |
+
|
| 16 |
+
This provides a concrete, real integration point with Blaxel's infrastructure
|
| 17 |
+
without mocking.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
from __future__ import annotations
|
| 21 |
+
|
| 22 |
+
import asyncio
|
| 23 |
+
import os
|
| 24 |
+
from typing import Any, Dict
|
| 25 |
+
|
| 26 |
+
from blaxel.core import SandboxInstance
|
| 27 |
+
from mcp.server.fastmcp import FastMCP
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
mcp = FastMCP("BlaxelDiagnostics", json_response=True)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
async def _get_or_create_sandbox(name: str = "incident-diagnostics") -> SandboxInstance:
|
| 34 |
+
"""Get an existing sandbox by name or create a new one.
|
| 35 |
+
|
| 36 |
+
Uses default image and region; you can later tune this via the Blaxel UI
|
| 37 |
+
or by editing this server.
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
sandbox = await SandboxInstance.get(name)
|
| 42 |
+
except Exception:
|
| 43 |
+
# Per Blaxel SDK docs, create() expects a single dict payload
|
| 44 |
+
# with fields like name, image, memory, ports, region, etc.
|
| 45 |
+
sandbox = await SandboxInstance.create(
|
| 46 |
+
{
|
| 47 |
+
"name": name,
|
| 48 |
+
"image": "blaxel/base-image:latest",
|
| 49 |
+
"memory": 2048, # MB
|
| 50 |
+
"ports": [],
|
| 51 |
+
}
|
| 52 |
+
)
|
| 53 |
+
return sandbox
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@mcp.tool()
|
| 57 |
+
async def run_simple_diagnostic(command: str = "echo 'diagnostic ok'", name: str = "incident-diagnostics") -> Dict[str, Any]:
|
| 58 |
+
"""Run a simple shell command inside a Blaxel sandbox.
|
| 59 |
+
|
| 60 |
+
This is a generic diagnostic hook. In practice, you can pass commands
|
| 61 |
+
like curl checks to upstream services, small Python scripts, etc.
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
sandbox = await _get_or_create_sandbox(name)
|
| 65 |
+
|
| 66 |
+
# Execute the command in the sandbox using the Blaxel SDK's
|
| 67 |
+
# process.exec API. We request waitForCompletion so that logs are
|
| 68 |
+
# available directly on the returned process object.
|
| 69 |
+
process = await sandbox.process.exec(
|
| 70 |
+
{
|
| 71 |
+
"name": "incident-diagnostics",
|
| 72 |
+
"command": command,
|
| 73 |
+
"waitForCompletion": True,
|
| 74 |
+
}
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# According to the docs, logs will contain stdout/stderr for the
|
| 78 |
+
# completed process. For the purposes of this demo we surface logs
|
| 79 |
+
# as stdout and assume exit_code 0 if no exception was raised.
|
| 80 |
+
logs = getattr(process, "logs", "")
|
| 81 |
+
|
| 82 |
+
return {
|
| 83 |
+
"sandbox_name": name,
|
| 84 |
+
"exit_code": 0,
|
| 85 |
+
"stdout": logs,
|
| 86 |
+
"stderr": "",
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
if __name__ == "__main__":
|
| 91 |
+
host = os.getenv("BL_SERVER_HOST", "0.0.0.0")
|
| 92 |
+
port = int(os.getenv("BL_SERVER_PORT", "8000"))
|
| 93 |
+
mcp.run(transport="streamable-http", host=host, port=port)
|
mcp_servers/gateway.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import contextlib
|
| 4 |
+
|
| 5 |
+
from starlette.applications import Starlette
|
| 6 |
+
from starlette.routing import Mount
|
| 7 |
+
|
| 8 |
+
from mcp_servers.logs_server import mcp as logs_mcp
|
| 9 |
+
from mcp_servers.voice_elevenlabs_server import mcp as voice_mcp
|
| 10 |
+
from mcp_servers.nebius_server import mcp as nebius_mcp
|
| 11 |
+
from mcp_servers.modal_server import mcp as modal_mcp
|
| 12 |
+
from mcp_servers.blaxel_server import mcp as blaxel_mcp
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@contextlib.asynccontextmanager
|
| 16 |
+
async def lifespan(app: Starlette):
|
| 17 |
+
async with contextlib.AsyncExitStack() as stack:
|
| 18 |
+
await stack.enter_async_context(logs_mcp.session_manager.run())
|
| 19 |
+
await stack.enter_async_context(voice_mcp.session_manager.run())
|
| 20 |
+
await stack.enter_async_context(nebius_mcp.session_manager.run())
|
| 21 |
+
await stack.enter_async_context(modal_mcp.session_manager.run())
|
| 22 |
+
await stack.enter_async_context(blaxel_mcp.session_manager.run())
|
| 23 |
+
yield
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
app = Starlette(
|
| 27 |
+
routes=[
|
| 28 |
+
Mount("/logs", logs_mcp.streamable_http_app()),
|
| 29 |
+
Mount("/voice", voice_mcp.streamable_http_app()),
|
| 30 |
+
Mount("/nebius", nebius_mcp.streamable_http_app()),
|
| 31 |
+
Mount("/modal", modal_mcp.streamable_http_app()),
|
| 32 |
+
Mount("/blaxel", blaxel_mcp.streamable_http_app()),
|
| 33 |
+
],
|
| 34 |
+
lifespan=lifespan,
|
| 35 |
+
)
|
mcp_servers/logs_server.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Logs MCP server exposing synthetic incident logs via MCP tools.
|
| 2 |
+
|
| 3 |
+
Run locally, for example:
|
| 4 |
+
python -m mcp_servers.logs_server
|
| 5 |
+
Then connect from an MCP-compatible client using the HTTP transport.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
from dataclasses import dataclass
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from typing import Any, Dict, List, Literal, Optional
|
| 13 |
+
|
| 14 |
+
import os
|
| 15 |
+
import psycopg
|
| 16 |
+
|
| 17 |
+
from mcp.server.fastmcp import FastMCP
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
mcp = FastMCP("IncidentLogs", json_response=True)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class LogEntry:
|
| 25 |
+
timestamp: str
|
| 26 |
+
service: str
|
| 27 |
+
env: Literal["prod", "staging", "dev"]
|
| 28 |
+
severity: Literal["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL"]
|
| 29 |
+
message: str
|
| 30 |
+
region: str
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
_NEON_DSN = os.getenv("NEON_DATABASE_URL") or os.getenv("DATABASE_URL")
|
| 34 |
+
_neon_conn: Optional[psycopg.Connection] = None
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _get_neon_conn() -> Optional[psycopg.Connection]:
|
| 38 |
+
"""Return a cached Neon/Postgres connection if a DSN is configured.
|
| 39 |
+
|
| 40 |
+
If no NEON_DATABASE_URL / DATABASE_URL is set, returns None and the
|
| 41 |
+
synthetic in-memory log store will be used instead.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
global _neon_conn
|
| 45 |
+
if not _NEON_DSN:
|
| 46 |
+
return None
|
| 47 |
+
if _neon_conn is None or _neon_conn.closed:
|
| 48 |
+
_neon_conn = psycopg.connect(_NEON_DSN)
|
| 49 |
+
return _neon_conn
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# Very small synthetic log store for now; we can expand later.
|
| 53 |
+
_LOGS: List[LogEntry] = [
|
| 54 |
+
LogEntry(
|
| 55 |
+
timestamp="2025-11-25T22:10:00Z",
|
| 56 |
+
service="recs-api",
|
| 57 |
+
env="prod",
|
| 58 |
+
severity="ERROR",
|
| 59 |
+
message="Timeout while calling upstream model backend (eu-west-1)",
|
| 60 |
+
region="eu-west-1",
|
| 61 |
+
),
|
| 62 |
+
LogEntry(
|
| 63 |
+
timestamp="2025-11-25T22:11:30Z",
|
| 64 |
+
service="recs-api",
|
| 65 |
+
env="prod",
|
| 66 |
+
severity="WARN",
|
| 67 |
+
message="Latency spike detected: p95=9800ms for /predict",
|
| 68 |
+
region="eu-west-1",
|
| 69 |
+
),
|
| 70 |
+
LogEntry(
|
| 71 |
+
timestamp="2025-11-25T22:12:10Z",
|
| 72 |
+
service="recs-api",
|
| 73 |
+
env="prod",
|
| 74 |
+
severity="INFO",
|
| 75 |
+
message="Auto-scaler requested 2 additional model replicas",
|
| 76 |
+
region="eu-west-1",
|
| 77 |
+
),
|
| 78 |
+
]
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
@mcp.tool()
|
| 82 |
+
def get_logs(
|
| 83 |
+
service: str,
|
| 84 |
+
env: Literal["prod", "staging", "dev"] = "prod",
|
| 85 |
+
severity: Optional[Literal["DEBUG", "INFO", "WARN", "ERROR", "CRITICAL"]] = None,
|
| 86 |
+
region: Optional[str] = None,
|
| 87 |
+
) -> list[dict]:
|
| 88 |
+
"""Fetch recent logs for a given service/environment.
|
| 89 |
+
|
| 90 |
+
If a Neon/Postgres DSN is configured, this will query the `incident_logs`
|
| 91 |
+
table; otherwise it falls back to the in-memory synthetic store.
|
| 92 |
+
"""
|
| 93 |
+
|
| 94 |
+
conn = _get_neon_conn()
|
| 95 |
+
if conn is not None:
|
| 96 |
+
where_clauses = ["service = %s", "env = %s"]
|
| 97 |
+
params: List[Any] = [service, env]
|
| 98 |
+
|
| 99 |
+
if severity is not None:
|
| 100 |
+
where_clauses.append("severity = %s")
|
| 101 |
+
params.append(severity)
|
| 102 |
+
if region is not None:
|
| 103 |
+
where_clauses.append("region = %s")
|
| 104 |
+
params.append(region)
|
| 105 |
+
|
| 106 |
+
where_sql = " AND ".join(where_clauses)
|
| 107 |
+
sql = (
|
| 108 |
+
"SELECT timestamp, service, env, severity, message, region "
|
| 109 |
+
"FROM incident_logs "
|
| 110 |
+
f"WHERE {where_sql} "
|
| 111 |
+
"ORDER BY timestamp DESC LIMIT 200"
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
with conn.cursor() as cur:
|
| 115 |
+
cur.execute(sql, params)
|
| 116 |
+
rows = cur.fetchall()
|
| 117 |
+
|
| 118 |
+
results: List[Dict[str, Any]] = []
|
| 119 |
+
for ts, svc, env_val, sev, msg, reg in rows:
|
| 120 |
+
if isinstance(ts, datetime):
|
| 121 |
+
ts_val = ts.isoformat()
|
| 122 |
+
else:
|
| 123 |
+
ts_val = str(ts)
|
| 124 |
+
results.append(
|
| 125 |
+
{
|
| 126 |
+
"timestamp": ts_val,
|
| 127 |
+
"service": svc,
|
| 128 |
+
"env": env_val,
|
| 129 |
+
"severity": sev,
|
| 130 |
+
"message": msg,
|
| 131 |
+
"region": reg,
|
| 132 |
+
}
|
| 133 |
+
)
|
| 134 |
+
return results
|
| 135 |
+
|
| 136 |
+
# Fallback: synthetic in-memory log store
|
| 137 |
+
def _matches(entry: LogEntry) -> bool:
|
| 138 |
+
if entry.service != service:
|
| 139 |
+
return False
|
| 140 |
+
if entry.env != env:
|
| 141 |
+
return False
|
| 142 |
+
if severity is not None and entry.severity != severity:
|
| 143 |
+
return False
|
| 144 |
+
if region is not None and entry.region != region:
|
| 145 |
+
return False
|
| 146 |
+
return True
|
| 147 |
+
|
| 148 |
+
results = [e for e in _LOGS if _matches(e)]
|
| 149 |
+
return [e.__dict__ for e in results]
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
@mcp.tool()
|
| 153 |
+
def summarize_logs(service: str, env: Literal["prod", "staging", "dev"] = "prod") -> dict:
|
| 154 |
+
"""Provide a tiny synthetic summary of recent logs for a service.
|
| 155 |
+
|
| 156 |
+
Returns counts by severity and a short human-readable summary.
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
conn = _get_neon_conn()
|
| 160 |
+
if conn is not None:
|
| 161 |
+
# Reuse get_logs to unify shape
|
| 162 |
+
relevant_dicts = get_logs(service=service, env=env)
|
| 163 |
+
else:
|
| 164 |
+
relevant_dicts = [e.__dict__ for e in _LOGS if e.service == service and e.env == env]
|
| 165 |
+
|
| 166 |
+
counts: dict[str, int] = {}
|
| 167 |
+
for e in relevant_dicts:
|
| 168 |
+
sev = e.get("severity")
|
| 169 |
+
if sev is None:
|
| 170 |
+
continue
|
| 171 |
+
counts[sev] = counts.get(sev, 0) + 1
|
| 172 |
+
|
| 173 |
+
summary: str
|
| 174 |
+
if not relevant_dicts:
|
| 175 |
+
summary = "No logs found for this service/env in the demo store."
|
| 176 |
+
else:
|
| 177 |
+
latest = max(relevant_dicts, key=lambda e: e.get("timestamp", ""))
|
| 178 |
+
summary = (
|
| 179 |
+
f"Latest log at {latest.get('timestamp')}: [{latest.get('severity')}] "
|
| 180 |
+
f"{latest.get('message')} (region={latest.get('region')})."
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
return {"counts": counts, "summary": summary}
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
if __name__ == "__main__":
|
| 187 |
+
# Expose as a streamable HTTP MCP server.
|
| 188 |
+
# When running on Blaxel MCP hosting, BL_SERVER_HOST/BL_SERVER_PORT
|
| 189 |
+
# are provided by the platform. Locally we default to 0.0.0.0:8000.
|
| 190 |
+
host = os.getenv("BL_SERVER_HOST", "0.0.0.0")
|
| 191 |
+
port = int(os.getenv("BL_SERVER_PORT", "8000"))
|
| 192 |
+
mcp.run(transport="streamable-http", host=host, port=port)
|
mcp_servers/modal_server.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Modal Deep Analysis MCP server.
|
| 2 |
+
|
| 3 |
+
This server forwards deep log analysis requests to a real Modal web endpoint
|
| 4 |
+
that you deploy.
|
| 5 |
+
|
| 6 |
+
Environment variables:
|
| 7 |
+
- MODAL_DEEP_ANALYSIS_URL: required. HTTPS URL of a Modal web endpoint
|
| 8 |
+
(e.g. https://modal-labs-example--deep-analysis.modal.run).
|
| 9 |
+
- MODAL_AUTH_TOKEN: optional. If set, sent as
|
| 10 |
+
`Authorization: Bearer <MODAL_AUTH_TOKEN>` header.
|
| 11 |
+
|
| 12 |
+
The Modal endpoint is expected to accept JSON like:
|
| 13 |
+
{
|
| 14 |
+
"service": "recs-api",
|
| 15 |
+
"env": "prod",
|
| 16 |
+
"logs": [ ... ]
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
and return a JSON object with deep analysis results, which this MCP server
|
| 20 |
+
returns directly to clients.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
from __future__ import annotations
|
| 24 |
+
|
| 25 |
+
import os
|
| 26 |
+
from typing import Any, Dict, List, Optional
|
| 27 |
+
|
| 28 |
+
import httpx
|
| 29 |
+
from mcp.server.fastmcp import FastMCP
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
mcp = FastMCP("ModalDeepAnalysis", json_response=True)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _get_modal_config() -> str:
|
| 36 |
+
url = os.getenv("MODAL_DEEP_ANALYSIS_URL")
|
| 37 |
+
if not url:
|
| 38 |
+
raise RuntimeError(
|
| 39 |
+
"MODAL_DEEP_ANALYSIS_URL is not set. Set it to your Modal web "
|
| 40 |
+
"endpoint URL for deep log analysis."
|
| 41 |
+
)
|
| 42 |
+
return url
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@mcp.tool()
|
| 46 |
+
async def deep_log_analysis(
|
| 47 |
+
service: str,
|
| 48 |
+
env: str = "prod",
|
| 49 |
+
logs: Optional[List[Dict[str, Any]]] = None,
|
| 50 |
+
) -> Dict[str, Any]:
|
| 51 |
+
"""Run deep log analysis using a deployed Modal function.
|
| 52 |
+
|
| 53 |
+
This calls a real Modal web endpoint specified by MODAL_DEEP_ANALYSIS_URL.
|
| 54 |
+
The endpoint should perform heavy/statistical analysis and return a JSON
|
| 55 |
+
object summarizing findings (spikes, time windows, correlations, etc.).
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
url = _get_modal_config()
|
| 59 |
+
headers: Dict[str, str] = {"Content-Type": "application/json"}
|
| 60 |
+
token = os.getenv("MODAL_AUTH_TOKEN")
|
| 61 |
+
if token:
|
| 62 |
+
headers["Authorization"] = f"Bearer {token}"
|
| 63 |
+
|
| 64 |
+
payload = {
|
| 65 |
+
"service": service,
|
| 66 |
+
"env": env,
|
| 67 |
+
"logs": logs or [],
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
async with httpx.AsyncClient() as client:
|
| 71 |
+
resp = await client.post(url, json=payload, headers=headers, timeout=60)
|
| 72 |
+
resp.raise_for_status()
|
| 73 |
+
data = resp.json()
|
| 74 |
+
|
| 75 |
+
if not isinstance(data, dict):
|
| 76 |
+
# Normalize non-dict responses
|
| 77 |
+
return {"result": data}
|
| 78 |
+
return data
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
if __name__ == "__main__":
|
| 82 |
+
host = os.getenv("BL_SERVER_HOST", "0.0.0.0")
|
| 83 |
+
port = int(os.getenv("BL_SERVER_PORT", "8000"))
|
| 84 |
+
mcp.run(transport="streamable-http", host=host, port=port)
|
mcp_servers/nebius_server.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Nebius Token Factory MCP server.
|
| 2 |
+
|
| 3 |
+
Provides tools that call the real Nebius Token Factory OpenAI-compatible API
|
| 4 |
+
for incident reasoning.
|
| 5 |
+
|
| 6 |
+
Environment variables required:
|
| 7 |
+
- NEBIUS_API_KEY: Nebius Token Factory API key.
|
| 8 |
+
- NEBIUS_MODEL_ID: Optional, model identifier (default: deepseek-ai/DeepSeek-R1-0528).
|
| 9 |
+
- NEBIUS_BASE_URL: Optional, override base URL (default: https://api.tokenfactory.nebius.com/v1/).
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import json
|
| 15 |
+
import os
|
| 16 |
+
from typing import Any, Dict, Optional
|
| 17 |
+
|
| 18 |
+
from mcp.server.fastmcp import FastMCP
|
| 19 |
+
from openai import AsyncOpenAI
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
mcp = FastMCP("NebiusIncident", json_response=True)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _get_client() -> AsyncOpenAI:
|
| 26 |
+
api_key = os.getenv("NEBIUS_API_KEY")
|
| 27 |
+
if not api_key:
|
| 28 |
+
raise RuntimeError(
|
| 29 |
+
"NEBIUS_API_KEY environment variable is not set; "
|
| 30 |
+
"set it to a valid Nebius Token Factory API key."
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
base_url = os.getenv("NEBIUS_BASE_URL", "https://api.tokenfactory.nebius.com/v1/")
|
| 34 |
+
return AsyncOpenAI(api_key=api_key, base_url=base_url)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _get_model_id() -> str:
|
| 38 |
+
return os.getenv("NEBIUS_MODEL_ID", "deepseek-ai/DeepSeek-R1-0528")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@mcp.tool()
|
| 42 |
+
async def nebius_incident_summary(
|
| 43 |
+
user_description: str,
|
| 44 |
+
logs_summary: Optional[str] = None,
|
| 45 |
+
) -> Dict[str, Any]:
|
| 46 |
+
"""Call Nebius LLM to generate a structured incident summary.
|
| 47 |
+
|
| 48 |
+
Returns a JSON object with fields like:
|
| 49 |
+
- title
|
| 50 |
+
- severity
|
| 51 |
+
- impact
|
| 52 |
+
- root_cause
|
| 53 |
+
- actions (list of recommended steps)
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
+
client = _get_client()
|
| 57 |
+
model = _get_model_id()
|
| 58 |
+
|
| 59 |
+
system_prompt = (
|
| 60 |
+
"You are an SRE / incident management assistant. "
|
| 61 |
+
"Given an incident description and optional logs summary, "
|
| 62 |
+
"produce a concise, structured JSON incident report."
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
user_content = {
|
| 66 |
+
"user_description": user_description,
|
| 67 |
+
"logs_summary": logs_summary,
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
messages = [
|
| 71 |
+
{
|
| 72 |
+
"role": "system",
|
| 73 |
+
"content": system_prompt,
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"role": "user",
|
| 77 |
+
"content": [
|
| 78 |
+
{
|
| 79 |
+
"type": "text",
|
| 80 |
+
"text": (
|
| 81 |
+
"Respond ONLY with a JSON object of the form "
|
| 82 |
+
"{\"title\": str, \"severity\": str, \"impact\": str, "
|
| 83 |
+
"\"root_cause\": str, \"actions\": [str]} based on this data: "
|
| 84 |
+
f"{json.dumps(user_content)}"
|
| 85 |
+
),
|
| 86 |
+
}
|
| 87 |
+
],
|
| 88 |
+
},
|
| 89 |
+
]
|
| 90 |
+
|
| 91 |
+
response = await client.chat.completions.create(
|
| 92 |
+
model=model,
|
| 93 |
+
messages=messages,
|
| 94 |
+
temperature=0.2,
|
| 95 |
+
response_format={"type": "json_object"},
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
# In JSON mode, message.content should be a JSON string.
|
| 99 |
+
content = response.choices[0].message.content
|
| 100 |
+
if isinstance(content, str):
|
| 101 |
+
try:
|
| 102 |
+
data = json.loads(content)
|
| 103 |
+
except json.JSONDecodeError:
|
| 104 |
+
# Fall back to wrapping raw content if decoding fails.
|
| 105 |
+
data = {"raw": content}
|
| 106 |
+
else:
|
| 107 |
+
# For safety if the SDK returns a different structure.
|
| 108 |
+
data = {"raw": str(content)}
|
| 109 |
+
|
| 110 |
+
return data
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
if __name__ == "__main__":
|
| 114 |
+
host = os.getenv("BL_SERVER_HOST", "0.0.0.0")
|
| 115 |
+
port = int(os.getenv("BL_SERVER_PORT", "8000"))
|
| 116 |
+
mcp.run(transport="streamable-http", host=host, port=port)
|
mcp_servers/voice_elevenlabs_server.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""ElevenLabs Voice MCP server.
|
| 2 |
+
|
| 3 |
+
Exposes tools to list voices and generate audio summaries for incidents.
|
| 4 |
+
|
| 5 |
+
Requires the ELEVENLABS_API_KEY environment variable to be set.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import base64
|
| 11 |
+
import os
|
| 12 |
+
from typing import Any, Dict, List
|
| 13 |
+
|
| 14 |
+
import httpx
|
| 15 |
+
from mcp.server.fastmcp import FastMCP
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
mcp = FastMCP("ElevenLabsVoice", json_response=True)
|
| 19 |
+
|
| 20 |
+
_BASE_URL = "https://api.elevenlabs.io/v1"
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _get_api_key() -> str:
|
| 24 |
+
api_key = os.getenv("ELEVENLABS_API_KEY")
|
| 25 |
+
if not api_key:
|
| 26 |
+
raise RuntimeError(
|
| 27 |
+
"ELEVENLABS_API_KEY environment variable is not set; "
|
| 28 |
+
"set it before using the ElevenLabs MCP server."
|
| 29 |
+
)
|
| 30 |
+
return api_key
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@mcp.tool()
|
| 34 |
+
async def list_voices() -> dict:
|
| 35 |
+
"""List available ElevenLabs voices.
|
| 36 |
+
|
| 37 |
+
Wraps GET /v1/voices and returns the JSON payload.
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
api_key = _get_api_key()
|
| 41 |
+
async with httpx.AsyncClient() as client:
|
| 42 |
+
resp = await client.get(
|
| 43 |
+
f"{_BASE_URL}/voices",
|
| 44 |
+
headers={"xi-api-key": api_key},
|
| 45 |
+
timeout=20,
|
| 46 |
+
)
|
| 47 |
+
resp.raise_for_status()
|
| 48 |
+
data = resp.json()
|
| 49 |
+
|
| 50 |
+
# Ensure we always return a plain dict for structured MCP output
|
| 51 |
+
if isinstance(data, dict):
|
| 52 |
+
return data
|
| 53 |
+
return {"voices": data}
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@mcp.tool()
|
| 57 |
+
async def generate_incident_summary_audio(
|
| 58 |
+
text: str,
|
| 59 |
+
voice_id: str,
|
| 60 |
+
model_id: str = "eleven_turbo_v2",
|
| 61 |
+
bitrate: str = "128k",
|
| 62 |
+
) -> Dict[str, Any]:
|
| 63 |
+
"""Generate an audio summary for an incident description or report.
|
| 64 |
+
|
| 65 |
+
Returns base64-encoded MP3 data plus basic metadata. The client app
|
| 66 |
+
(e.g. Gradio UI) can decode this into an audio player.
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
api_key = _get_api_key()
|
| 70 |
+
payload = {
|
| 71 |
+
"text": text,
|
| 72 |
+
"model_id": model_id,
|
| 73 |
+
"voice_settings": {
|
| 74 |
+
"stability": 0.5,
|
| 75 |
+
"similarity_boost": 0.7,
|
| 76 |
+
},
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
async with httpx.AsyncClient() as client:
|
| 80 |
+
resp = await client.post(
|
| 81 |
+
f"{_BASE_URL}/text-to-speech/{voice_id}",
|
| 82 |
+
headers={
|
| 83 |
+
"xi-api-key": api_key,
|
| 84 |
+
"Accept": "audio/mpeg",
|
| 85 |
+
"Content-Type": "application/json",
|
| 86 |
+
},
|
| 87 |
+
json=payload,
|
| 88 |
+
timeout=60,
|
| 89 |
+
)
|
| 90 |
+
resp.raise_for_status()
|
| 91 |
+
audio_bytes = resp.content
|
| 92 |
+
|
| 93 |
+
audio_b64 = base64.b64encode(audio_bytes).decode("ascii")
|
| 94 |
+
return {
|
| 95 |
+
"audio_base64": audio_b64,
|
| 96 |
+
"format": "mp3",
|
| 97 |
+
"model_id": model_id,
|
| 98 |
+
"voice_id": voice_id,
|
| 99 |
+
"bitrate": bitrate,
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
if __name__ == "__main__":
|
| 104 |
+
# Expose as a streamable HTTP MCP server.
|
| 105 |
+
host = os.getenv("BL_SERVER_HOST", "0.0.0.0")
|
| 106 |
+
port = int(os.getenv("BL_SERVER_PORT", "8000"))
|
| 107 |
+
mcp.run(transport="streamable-http", host=host, port=port)
|
modal_deep_analysis_app.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Modal app exposing a deep_log_analysis web endpoint.
|
| 2 |
+
|
| 3 |
+
This is called by the Modal MCP server (mcp_servers/modal_server.py).
|
| 4 |
+
It expects a JSON body of the form:
|
| 5 |
+
|
| 6 |
+
{
|
| 7 |
+
"service": "recs-api",
|
| 8 |
+
"env": "prod",
|
| 9 |
+
"logs": [
|
| 10 |
+
{"timestamp": "...", "service": "...", "env": "...",
|
| 11 |
+
"severity": "ERROR", "message": "...", "region": "..."},
|
| 12 |
+
...
|
| 13 |
+
]
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
and returns a JSON object with some aggregate stats and a short summary.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
|
| 21 |
+
from collections import Counter
|
| 22 |
+
from typing import Any, Dict, List
|
| 23 |
+
|
| 24 |
+
import modal
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# Web endpoints using modal.fastapi_endpoint now require FastAPI to be installed
|
| 28 |
+
# explicitly in the container image.
|
| 29 |
+
image = modal.Image.debian_slim().pip_install("fastapi[standard]")
|
| 30 |
+
|
| 31 |
+
app = modal.App("incident-deep-analysis", image=image)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@app.function()
|
| 35 |
+
@modal.fastapi_endpoint(method="POST", docs=True)
|
| 36 |
+
def deep_log_analysis(payload: Dict[str, Any]) -> Dict[str, Any]:
|
| 37 |
+
service = payload.get("service")
|
| 38 |
+
env = payload.get("env")
|
| 39 |
+
logs: List[Dict[str, Any]] = payload.get("logs") or []
|
| 40 |
+
|
| 41 |
+
# Basic stats over the logs we received
|
| 42 |
+
severity_counts: Counter[str] = Counter()
|
| 43 |
+
regions: Counter[str] = Counter()
|
| 44 |
+
latest_error: Dict[str, Any] | None = None
|
| 45 |
+
|
| 46 |
+
for entry in logs:
|
| 47 |
+
sev = str(entry.get("severity", "UNKNOWN"))
|
| 48 |
+
severity_counts[sev] += 1
|
| 49 |
+
region = str(entry.get("region", "unknown"))
|
| 50 |
+
regions[region] += 1
|
| 51 |
+
|
| 52 |
+
if sev in {"ERROR", "CRITICAL"}:
|
| 53 |
+
# keep the last error we see (logs are usually newest-first)
|
| 54 |
+
latest_error = entry
|
| 55 |
+
|
| 56 |
+
top_region, top_region_count = (None, 0)
|
| 57 |
+
if regions:
|
| 58 |
+
top_region, top_region_count = regions.most_common(1)[0]
|
| 59 |
+
|
| 60 |
+
summary_lines = []
|
| 61 |
+
summary_lines.append(
|
| 62 |
+
f"Deep analysis for service '{service}' in env '{env}' over {len(logs)} log entries."
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
if severity_counts:
|
| 66 |
+
parts = [f"{sev}={count}" for sev, count in severity_counts.items()]
|
| 67 |
+
summary_lines.append("Severity distribution: " + ", ".join(parts) + ".")
|
| 68 |
+
|
| 69 |
+
if latest_error is not None:
|
| 70 |
+
summary_lines.append(
|
| 71 |
+
"Latest high-severity event: "
|
| 72 |
+
f"[{latest_error.get('severity')}] {latest_error.get('message')} "
|
| 73 |
+
f"at {latest_error.get('timestamp')} (region={latest_error.get('region')})."
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
if top_region is not None:
|
| 77 |
+
summary_lines.append(
|
| 78 |
+
f"Region with most activity: {top_region} ({top_region_count} events)."
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
summary = " ".join(summary_lines)
|
| 82 |
+
|
| 83 |
+
return {
|
| 84 |
+
"service": service,
|
| 85 |
+
"env": env,
|
| 86 |
+
"log_count": len(logs),
|
| 87 |
+
"severity_counts": dict(severity_counts),
|
| 88 |
+
"top_region": top_region,
|
| 89 |
+
"top_region_count": top_region_count,
|
| 90 |
+
"latest_error": latest_error,
|
| 91 |
+
"summary": summary,
|
| 92 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
mcp
|
| 3 |
+
httpx
|
| 4 |
+
pydantic
|
| 5 |
+
python-dotenv
|
| 6 |
+
openai
|
| 7 |
+
modal
|
| 8 |
+
blaxel
|
| 9 |
+
psycopg[binary]
|
| 10 |
+
uvicorn[standard]
|
run_gateway.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
import uvicorn
|
| 5 |
+
|
| 6 |
+
from mcp_servers.gateway import app
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
if __name__ == "__main__":
|
| 10 |
+
# Load local env and .env.blaxel so all MCP servers see the same secrets
|
| 11 |
+
load_dotenv()
|
| 12 |
+
load_dotenv(".env.blaxel", override=False)
|
| 13 |
+
|
| 14 |
+
host = os.getenv("MCP_GATEWAY_HOST", "127.0.0.1")
|
| 15 |
+
# Default to 8004 locally to avoid conflicts with other services
|
| 16 |
+
port = int(os.getenv("MCP_GATEWAY_PORT", "8004"))
|
| 17 |
+
|
| 18 |
+
uvicorn.run(app, host=host, port=port)
|