// deno_julep_proxy.ts import { serve } from "https://deno.land/std@0.224.0/http/server.ts"; // Deno standard HTTP server // Julep API Base URL (fixed) const JULEP_API_BASE = "https://api.julep.ai/api"; // Hardcoded list of models (Agent IDs in this context) const HARDCODED_MODELS = [ 'mistral-large-2411', 'o1', 'text-embedding-3-large', 'vertex_ai/text-embedding-004', 'claude-3.5-haiku', 'cerebras/llama-4-scout-17b-16e-instruct', 'llama-3.1-8b', 'magnum-v4-72b', 'voyage-multilingual-2', 'claude-3-haiku', 'gpt-4o', 'BAAI/bge-m3', 'openrouter/meta-llama/llama-4-maverick', 'openrouter/meta-llama/llama-4-scout', 'claude-3.5-sonnet', 'hermes-3-llama-3.1-70b', 'claude-3.5-sonnet-20240620', 'qwen-2.5-72b-instruct', 'l3.3-euryale-70b', 'gpt-4o-mini', 'cerebras/llama-3.3-70b', 'o1-preview', 'gemini-1.5-pro-latest', 'l3.1-euryale-70b', 'claude-3-sonnet', 'Alibaba-NLP/gte-large-en-v1.5', 'openrouter/meta-llama/llama-4-scout:free', 'llama-3.1-70b', 'eva-qwen-2.5-72b', 'claude-3.5-sonnet-20241022', 'gemini-2.0-flash', 'deepseek-chat', 'o1-mini', 'eva-llama-3.33-70b', 'gemini-2.5-pro-preview-03-25', 'gemini-1.5-pro', 'gpt-4-turbo', 'openrouter/meta-llama/llama-4-maverick:free', 'o3-mini', 'claude-3.7-sonnet', 'voyage-3', 'cerebras/llama-3.1-8b', 'claude-3-opus' ]; // --- Helper Functions --- // Define acceptable log levels type LogLevel = 'debug' | 'info' | 'warn' | 'error' | 'trace'; // Added trace function log(level: LogLevel, message: string, data: unknown = null): void { // Basic check if the console object has the method if (typeof console[level] === 'function') { console[level](`[${level.toUpperCase()}] ${new Date().toISOString()} - ${message}${data ? ':' : ''}`, data !== null ? JSON.stringify(data, null, 2) : ''); } else { // Fallback for potentially missing methods like 'trace' in some environments console.log(`[${level.toUpperCase()}] ${new Date().toISOString()} - ${message}${data ? ':' : ''}`, data !== null ? JSON.stringify(data, null, 2) : ''); } } function getJulepApiKey(req: Request): string | null { const authHeader = req.headers.get("Authorization"); if (authHeader && authHeader.startsWith("Bearer ")) { log('debug', 'Extracted Julep API Key successfully.'); return authHeader.substring(7); } log('warn', 'Could not extract Julep API Key from Authorization header.'); return null; } // Note: Removed 'ctx' argument, using fire-and-forget for background tasks async function cleanupJulepResources(agentId: string | null, sessionId: string | null, headers: HeadersInit): Promise { log('info', 'Attempting Julep resource cleanup.', { agentId, sessionId }); const cleanupPromises: Promise[] = []; // Define cleanup logic as separate async functions for clarity const deleteResource = async (url: string, type: string, id: string) => { try { log('debug', `Sending DELETE request for ${type} ${id} to: ${url}`); const response = await fetch(url, { method: "DELETE", headers }); const responseText = await response.text(); // Get text regardless of status if (!response.ok) { log('warn', `Cleanup failed for ${type} ${id}: ${response.status} ${response.statusText}`, { body: responseText }); } else { log('info', `Cleanup successful for ${type} ${id}.`, { status: response.status, body: responseText }); } } catch (err) { log('error', `Cleanup error during fetch for ${type} ${id}: ${err instanceof Error ? err.message : String(err)}`, { error: err }); } }; if (sessionId) { const sessionDeleteUrl = `${JULEP_API_BASE}/sessions/${sessionId}`; cleanupPromises.push(deleteResource(sessionDeleteUrl, 'session', sessionId)); } if (agentId) { const agentDeleteUrl = `${JULEP_API_BASE}/agents/${agentId}`; // Add a small delay before deleting the agent, sometimes helps if session deletion is slow await sleep(100); cleanupPromises.push(deleteResource(agentDeleteUrl, 'agent', agentId)); } if (cleanupPromises.length > 0) { log('debug', `Waiting for ${cleanupPromises.length} cleanup promises.`); // Run cleanup in background. Deno keeps running until promises settle. Promise.allSettled(cleanupPromises) .then(results => { log('info', 'Cleanup promises settled.', { results }); }) .catch(error => { log('error', 'Unexpected error during Promise.allSettled for cleanup.', { error }); }); } else { log('info', 'No Julep resources to clean up.'); } } // Helper for small delays function sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } // Helper to format Julep ToolCall delta to OpenAI format function toolCallDeltaToOpenAI(julepToolCalls: any[] | undefined): any[] | undefined { if (!julepToolCalls) return undefined; return julepToolCalls.map((toolCall, index) => ({ index: toolCall.index ?? index, id: toolCall.id, type: "function", function: { name: toolCall.function?.name, arguments: toolCall.function?.arguments, }, })); } // Helper to format Julep ToolCall message to OpenAI format function toolCallMessageToOpenAI(julepToolCalls: any[] | undefined): any[] | undefined { if (!julepToolCalls) return undefined; return julepToolCalls.map(toolCall => ({ id: toolCall.id, type: "function", function: { name: toolCall.function?.name, arguments: toolCall.function?.arguments, }, })); } // Helper function to simulate streaming from a complete response async function simulateStream( julepChatData: any, requestedModel: string, writer: WritableStreamDefaultWriter, encoder: TextEncoder ): Promise { log('info', 'Starting stream simulation.'); try { const baseChunk = { id: julepChatData.id || `chatcmpl-sim-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(new Date(julepChatData.created_at || Date.now()).getTime() / 1000), model: requestedModel, system_fingerprint: julepChatData.system_fingerprint || null, }; for (const [index, choice] of julepChatData.choices.entries()) { log('debug', `Simulating stream for choice index ${index}.`); const role = choice.message?.role; const content = choice.message?.content; // Use toolCallDeltaToOpenAI for consistency in chunk format? OpenAI expects delta.tool_calls. // Julep provides full tool_calls in the *non-streamed* response. We need to format it for a *streamed* delta. // Let's adjust this slightly for streaming simulation. const toolCallsInput = choice.message?.tool_calls; const toolCallsDelta = toolCallsInput ? toolCallDeltaToOpenAI(toolCallsInput) : undefined; // Format as delta const finishReason = choice.finish_reason; // 1. Send role chunk if (role) { const roleChunk = { ...baseChunk, choices: [{ index: index, delta: { role: role }, finish_reason: null }] }; log('debug', 'Sending role chunk:', roleChunk); await writer.write(encoder.encode(`data: ${JSON.stringify(roleChunk)}\n\n`)); await sleep(5); } // 2. Send tool calls chunk(s) if they exist // OpenAI streams tool calls as an array in the delta. if (toolCallsDelta && toolCallsDelta.length > 0) { const toolCallDeltaChunk = { ...baseChunk, choices: [{ index: index, delta: { tool_calls: toolCallsDelta }, finish_reason: null }] }; log('debug', 'Sending tool_calls chunk:', toolCallDeltaChunk); await writer.write(encoder.encode(`data: ${JSON.stringify(toolCallDeltaChunk)}\n\n`)); await sleep(5); } // 3. Stream content if (content && typeof content === 'string') { log('debug', `Streaming content for choice ${index} (length: ${content.length})`); for (const char of content) { const contentChunk = { ...baseChunk, choices: [{ index: index, delta: { content: char }, finish_reason: null }] }; // Avoid logging every single character chunk unless absolutely necessary for debugging // log('trace', 'Sending content char chunk:', contentChunk); await writer.write(encoder.encode(`data: ${JSON.stringify(contentChunk)}\n\n`)); await sleep(2); // Simulate typing delay } log('debug', `Finished streaming content for choice ${index}`); } else if (content) { // Send non-string content as a single chunk (might be structured JSON etc.) const contentChunk = { ...baseChunk, choices: [{ index: index, delta: { content: JSON.stringify(content) }, finish_reason: null }] }; log('debug', 'Sending non-string content chunk:', contentChunk); await writer.write(encoder.encode(`data: ${JSON.stringify(contentChunk)}\n\n`)); await sleep(5); } // 4. Send finish reason chunk if (finishReason) { const finishChunk = { ...baseChunk, choices: [{ index: index, delta: {}, finish_reason: finishReason }] }; log('debug', 'Sending finish reason chunk:', finishChunk); await writer.write(encoder.encode(`data: ${JSON.stringify(finishChunk)}\n\n`)); await sleep(5); } } // 5. Send DONE marker log('info', 'Sending [DONE] marker.'); await writer.write(encoder.encode('data: [DONE]\n\n')); // Simulation successful, resolve the promise log('info', 'Stream simulation completed successfully.'); } catch (error) { log('error', `Error during stream simulation: ${error instanceof Error ? error.message : String(error)}`, { error: error }); try { await writer.abort(error); } catch {/* ignore abort error */} // Attempt to abort writer // Rethrow or handle error appropriately if needed upstream throw error; // Propagate error so the background task runner knows it failed } finally { // Ensure the writer is closed try { await writer.close(); } catch {/* ignore close error */} log('debug', 'Stream writer closed.'); } } // --- Endpoint Handlers --- async function handleModels(req: Request): Promise { log('info', 'Handling /v1/models request.'); const julepApiKey = getJulepApiKey(req); // Check key even for models endpoint? Optional. if (!julepApiKey) { log('warn', 'Unauthorized /v1/models request (missing API key).'); // Optionally allow models request without key, or enforce it: // return new Response("Unauthorized: Missing or invalid Authorization header", { status: 401 }); } const now = Math.floor(Date.now() / 1000); const openaiModels = HARDCODED_MODELS.map((modelId) => ({ id: modelId, object: "model", created: now, owned_by: "julep", permission: [{ id: `modelperm-${modelId}-${now}`, object: "model_permission", created: now, allow_create_engine: false, allow_sampling: true, allow_logprobs: true, allow_search_indices: false, allow_view: true, allow_fine_tuning: false, organization: "*", group: null, is_blocking: false, }], root: modelId, parent: null, })); log('debug', 'Returning hardcoded models list.'); return new Response(JSON.stringify({ data: openaiModels, object: "list" }), { headers: { "Content-Type": "application/json" }, status: 200, }); } // Note: Removed 'ctx' argument async function handleChatCompletions(req: Request): Promise { log('info', 'Handling /v1/chat/completions request.'); const julepApiKey = getJulepApiKey(req); if (!julepApiKey) { log('error', 'Unauthorized chat completions request: Missing Julep API Key.'); return new Response("Unauthorized: Missing or invalid Authorization header", { status: 401 }); } // Define headers early, use this single object throughout const headers: HeadersInit = { "Authorization": `Bearer ${julepApiKey}`, "Content-Type": "application/json", }; log('debug', 'Julep API request headers prepared (key omitted).', { "Content-Type": headers["Content-Type"] }); let agentId: string | null = null; let sessionId: string | null = null; let requestBody: any = null; // Initialize here try { // 1. Parse Incoming Request Body log('debug', 'Parsing incoming request body...'); try { requestBody = await req.json(); log('info', 'Incoming request body parsed successfully.'); log('debug', 'Parsed request body:', requestBody); // Log the full body for debugging } catch (e) { log('error', `Failed to parse incoming request JSON: ${e instanceof Error ? e.message : String(e)}`, { error: e }); let requestText = "[Could not read request text]"; try { // Need to clone the original request to read body again requestText = await req.clone().text(); log('debug', 'Raw incoming request body text:', requestText); } catch (readErr) { log('error', `Could not read raw request body text: ${readErr instanceof Error ? readErr.message : String(readErr)}`); } return new Response(`Bad Request: Invalid JSON format. ${e instanceof Error ? e.message : String(e)}`, { status: 400 }); } const { model, messages, stream, ...rest } = requestBody; const clientRequestedStream = stream === true; log('info', `Request details: model=${model}, clientRequestedStream=${clientRequestedStream}`); // Validate essential parameters if (!model || !messages || !Array.isArray(messages) || messages.length === 0) { log('error', 'Invalid request body: "model" and "messages" are required.', { model, messages }); return new Response("Invalid request body. 'model' and 'messages' are required.", { status: 400 }); } if (!HARDCODED_MODELS.includes(model)) { log('error', `Invalid model requested: ${model}`); return new Response(`Invalid model: ${model}. Please use one of the available models.`, { status: 400 }); } log('debug', 'Request parameters validated.'); // --- Agent and Session Creation --- // 2. Create Agent const createAgentUrl = `${JULEP_API_BASE}/agents`; const createAgentBody = { name: `temp-openai-${model}-${Date.now()}`, model: model, about: `Temporary agent for OpenAI model ${model}`, // instructions: ["Follow user instructions carefully."], // Optional: Keep or remove? Julep needs it? }; log('info', 'Attempting to create Julep Agent.', { url: createAgentUrl, body: createAgentBody }); const createAgentResponse = await fetch(createAgentUrl, { method: "POST", headers, body: JSON.stringify(createAgentBody) }); log('debug', `Create Agent response status: ${createAgentResponse.status}`); if (!createAgentResponse.ok) { const errorStatus = createAgentResponse.status; const errorStatusText = createAgentResponse.statusText; let errorText = "[Could not read error body]"; try { errorText = await createAgentResponse.text(); } catch (e) { log('warn', `Could not read error text from createAgentResponse: ${e instanceof Error ? e.message : String(e)}`); } log('error', `Error creating Julep Agent: ${errorStatus} - ${errorText}`); // No resources to cleanup yet return new Response(`Error creating Julep Agent: ${errorStatusText} - ${errorText}`, { status: errorStatus }); } let agentData: any; try { const agentResponseText = await createAgentResponse.clone().text(); // Clone before .json() log('debug', 'Create Agent raw response text:', agentResponseText); agentData = JSON.parse(agentResponseText); // Parse the cloned text // Or await createAgentResponse.json(); if not cloning for logging log('info', 'Julep Agent created successfully.', { agentData }); agentId = agentData.id; } catch (e) { log('error', `Failed to parse Julep Agent creation response JSON: ${e instanceof Error ? e.message : String(e)}`, { error: e }); // Attempt cleanup (fire-and-forget) cleanupJulepResources(agentId, sessionId, headers).catch(err => log('error', 'Background cleanup failed after agent parse error', err)); return new Response(`Internal Server Error: Failed to parse Julep Agent response. ${e instanceof Error ? e.message : String(e)}`, { status: 500 }); } // 3. Create Session const createSessionUrl = `${JULEP_API_BASE}/sessions`; const createSessionBody = { agent: agentId }; // Julep API uses agent log('info', 'Attempting to create Julep Session.', { url: createSessionUrl, body: createSessionBody }); const createSessionResponse = await fetch(createSessionUrl, { method: "POST", headers, body: JSON.stringify(createSessionBody) }); log('debug', `Create Session response status: ${createSessionResponse.status}`); if (!createSessionResponse.ok) { const errorStatus = createSessionResponse.status; const errorStatusText = createSessionResponse.statusText; let errorText = "[Could not read error body]"; try { errorText = await createSessionResponse.text(); } catch (e) { log('warn', `Could not read error text from createSessionResponse: ${e instanceof Error ? e.message : String(e)}`); } log('error', `Error creating Julep Session: ${errorStatus} - ${errorText}`); // Cleanup the agent we just created (fire-and-forget) cleanupJulepResources(agentId, null, headers).catch(err => log('error', 'Background cleanup failed after session creation error', err)); return new Response(`Error creating Julep Session: ${errorStatusText} - ${errorText}`, { status: errorStatus }); } let sessionData: any; try { const sessionResponseText = await createSessionResponse.clone().text(); log('debug', 'Create Session raw response text:', sessionResponseText); sessionData = JSON.parse(sessionResponseText); log('info', 'Julep Session created successfully.', { sessionData }); sessionId = sessionData.id; } catch (e) { log('error', `Failed to parse Julep Session creation response JSON: ${e instanceof Error ? e.message : String(e)}`, { error: e }); // Cleanup agent and session (fire-and-forget) cleanupJulepResources(agentId, sessionId, headers).catch(err => log('error', 'Background cleanup failed after session parse error', err)); return new Response(`Internal Server Error: Failed to parse Julep Session response. ${e instanceof Error ? e.message : String(e)}`, { status: 500 }); } // --- Perform Chat Completion (ALWAYS non-streaming to Julep) --- // 4. Send Chat Request to Julep const chatUrl = `${JULEP_API_BASE}/sessions/${sessionId}/chat`; const chatBodyToJulep = { messages: messages.map((msg: any) => ({ role: msg.role, content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content), // Include tool_calls if present in the input message (OpenAI format) tool_calls: msg.tool_calls, // Assuming Julep accepts OpenAI tool call format here tool_call_id: msg.tool_call_id // If it's a tool response message })), stream: false, // Force non-streaming // agent: agentId, ...rest, // Pass through other OpenAI parameters like temperature, top_p, etc. }; log('info', 'Sending Chat request to Julep (forced non-stream).', { url: chatUrl }); log('debug', 'Julep Chat Request Body:', chatBodyToJulep); const chatResponse = await fetch(chatUrl, { method: "POST", headers, body: JSON.stringify(chatBodyToJulep) }); log('debug', `Julep Chat response status: ${chatResponse.status}`); // --- Handle Julep Response --- if (!chatResponse.ok) { const errorStatus = chatResponse.status; const errorStatusText = chatResponse.statusText; let errorText = "[Could not read error body]"; try { errorText = await chatResponse.text(); } catch (e) { log('warn', `Could not read error text from chatResponse: ${e instanceof Error ? e.message : String(e)}`); } log('error', `Error during Julep Chat Completion: ${errorStatus} - ${errorText}`); // Cleanup agent and session (fire-and-forget) cleanupJulepResources(agentId, sessionId, headers).catch(err => log('error', 'Background cleanup failed after chat error', err)); return new Response(`Error during Julep Chat Completion: ${errorStatusText} - ${errorText}`, { status: errorStatus }); } // Julep request was successful, get the full JSON body let julepChatData: any; try { const chatResponseText = await chatResponse.clone().text(); log('debug', 'Julep Chat raw response text:', chatResponseText); julepChatData = JSON.parse(chatResponseText); log('info', 'Julep chat completion successful.', { responseId: julepChatData.id }) log('debug', 'Julep Chat response data:', julepChatData); } catch (e) { log('error', `Failed to parse Julep Chat response JSON (status was OK): ${e instanceof Error ? e.message : String(e)}`, { error: e }); // Cleanup agent and session (fire-and-forget) cleanupJulepResources(agentId, sessionId, headers).catch(err => log('error', 'Background cleanup failed after chat parse error', err)); return new Response(`Internal Server Error: Failed to parse Julep Chat response. ${e instanceof Error ? e.message : String(e)}`, { status: 500 }); } // *** Trigger cleanup NOW (fire-and-forget), before returning the response/stream *** log('info', 'Julep chat successful, queueing cleanup.'); cleanupJulepResources(agentId, sessionId, headers).catch(err => log('error', 'Background cleanup failed after successful chat', err)); // --- Format and Return Response to Client --- // Access the actual chat response data, usually nested under 'response' in Julep API const julepResponseData = julepChatData; if (!julepResponseData || !julepResponseData.choices) { log('error', 'Julep response format unexpected. Missing "response" or "response.choices".', { julepChatData }); return new Response('Internal Server Error: Unexpected format from Julep API.', { status: 500 }); } if (clientRequestedStream) { log('info', 'Client requested stream, starting simulation.'); const { readable, writable } = new TransformStream(); const writer = writable.getWriter(); const encoder = new TextEncoder(); // Start simulation in background (fire-and-forget) // Pass julepResponseData which contains choices, usage etc. simulateStream(julepResponseData, model, writer, encoder) .catch(streamErr => { log('error', 'Stream simulation background task failed.', { error: streamErr }); // We might not be able to signal this to the client easily if headers are already sent. }); log('debug', 'Returning readable stream to client.'); return new Response(readable, { headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", "Connection": "keep-alive" }, status: 200, }); } else { log('info', 'Client requested non-streaming response.'); // Format julepResponseData to OpenAI format const openaiCompletion = { id: julepResponseData.id || `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(new Date(julepResponseData.created_at || Date.now()).getTime() / 1000), model: model, // Use the originally requested model choices: julepResponseData.choices.map((choice: any) => ({ index: choice.index, message: { role: choice.message.role, content: choice.message.content, // Use toolCallMessageToOpenAI here for the completed message format tool_calls: choice.message.tool_calls ? toolCallMessageToOpenAI(choice.message.tool_calls) : undefined }, finish_reason: choice.finish_reason })), usage: julepResponseData.usage ? { prompt_tokens: julepResponseData.usage.prompt_tokens, completion_tokens: julepResponseData.usage.completion_tokens, total_tokens: julepResponseData.usage.total_tokens } : undefined, system_fingerprint: julepResponseData.system_fingerprint || null, }; log('debug', 'Formatted non-streaming OpenAI response:', openaiCompletion); log('info', 'Returning non-streaming JSON response to client.'); return new Response(JSON.stringify(openaiCompletion), { headers: { "Content-Type": "application/json" }, status: 200, }); } } catch (error) { // Catch errors from initial parsing, validation, or unexpected issues within the try block log('error', `Error in handleChatCompletions (outer catch): ${error instanceof Error ? error.message : String(error)}`, { error: error, agentId, sessionId }); // Use the headers defined at the start if available, otherwise create minimal ones const headersForCatchCleanup = headers || { "Authorization": `Bearer ${julepApiKey}`, "Content-Type": "application/json" }; // Use existing headers if possible // Attempt cleanup (fire-and-forget) cleanupJulepResources(agentId, sessionId, headersForCatchCleanup).catch(err => log('error', 'Background cleanup failed in outer catch block', err)); return new Response(`Internal Server Error: ${error instanceof Error ? error.message : String(error)}`, { status: 500 }); } } // --- CORS Handlers --- const corsHeaders = { 'Access-Control-Allow-Origin': '*', // Adjust in production! 'Access-Control-Allow-Methods': 'GET, POST, OPTIONS', 'Access-Control-Allow-Headers': 'Authorization, Content-Type', }; function handleOptions(request: Request): Response { log('debug', 'Handling OPTIONS preflight request.'); // Check if it's a CORS preflight request if (request.headers.get('Origin') !== null && request.headers.get('Access-Control-Request-Method') !== null && request.headers.get('Access-Control-Request-Headers') !== null) { log('debug', 'Returning CORS preflight headers.'); // Return CORS headers for preflight return new Response(null, { headers: corsHeaders, status: 204 }); // Use 204 No Content for OPTIONS } else { // Handle standard OPTIONS request (non-CORS preflight) log('debug', 'Returning standard OPTIONS Allow header.'); return new Response(null, { headers: { 'Allow': 'GET, POST, OPTIONS' } }); } } function addCorsHeaders(response: Response): Response { // Create a new Headers object based on the response's headers const newHeaders = new Headers(response.headers); // Add CORS headers Object.entries(corsHeaders).forEach(([key, value]) => { newHeaders.set(key, value); }); // Return a new Response with the modified headers // Note: Creating a new Response is necessary as Response headers are immutable return new Response(response.body, { status: response.status, statusText: response.statusText, headers: newHeaders }); } // --- Main Deno Server Entry Point --- log('info', 'Starting Deno server...'); serve(async (request: Request) => { const url = new URL(request.url); log('info', `Incoming request: ${request.method} ${url.pathname}${url.search}`); // Handle CORS preflight requests first if (request.method === 'OPTIONS') { return handleOptions(request); } let response: Response; try { if (url.pathname === "/v1/models" && request.method === "GET") { response = await handleModels(request); } else if (url.pathname === "/v1/chat/completions" && request.method === "POST") { response = await handleChatCompletions(request); } else { log('warn', `Path not found: ${url.pathname}`); response = new Response("Not Found", { status: 404 }); } } catch (e) { log('error', `Unhandled error in serve handler: ${e instanceof Error ? e.message : String(e)}`, { error: e, url: request.url, method: request.method }); response = new Response(`Internal Server Error: ${e instanceof Error ? e.message : String(e)}`, { status: 500 }); } // Add CORS headers to all actual responses (OPTIONS handled separately) return addCorsHeaders(response); });