servTe

Paused

App Files Files Community

Ge-AI commited on May 19

Commit

11d68bb

verified ·

1 Parent(s): 939c8f7

Update openai_ondemand_adapter.py

Browse files

Files changed (1) hide show

openai_ondemand_adapter.py +262 -200

openai_ondemand_adapter.py CHANGED Viewed

@@ -254,106 +254,135 @@ def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, quer
     accumulated_text_parts = []
     api_error_yielded = False
-    logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
-    try:
-        with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp:
-            if resp.status_code != 200:
-                api_error_yielded = True
-                error_text = resp.text
-                logging.error(f"【OnDemand流错误】请求失败 (子尝试 {current_attempt_num_logging})。状态码: {resp.status_code}, Session: {session_id_for_attempt}, 响应: {error_text[:500]}")
-                error_payload = {
-                    "error": {
-                        "message": f"OnDemand API Error (Stream Init, Attempt {current_attempt_num_logging}): {resp.status_code} - {error_text[:200]}",
-                        "type": "on_demand_api_error",
-                        "code": resp.status_code
-                    }
-                }
-                yield format_openai_sse_delta(error_payload)
-                yield "data: [DONE]\n\n"
-                return "".join(accumulated_text_parts).strip(), api_error_yielded
-            first_chunk_sent = False
-            last_line_str = ""
-            for line_bytes in resp.iter_lines():
-                if not line_bytes:
-                    continue
-                line_str = line_bytes.decode("utf-8")
-                last_line_str = line_str
-                if line_str.startswith("data:"):
-                    data_part = line_str[len("data:"):].strip()
-                    if data_part == "[DONE]":
-                        logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
                         yield "data: [DONE]\n\n"
                         return "".join(accumulated_text_parts).strip(), api_error_yielded
-                    elif data_part.startswith("[ERROR]:"):
-                        api_error_yielded = True
-                        error_json_str = data_part[len("[ERROR]:"):].strip()
-                        logging.warning(f"【OnDemand流】接收到错误事件 (子尝试 {current_attempt_num_logging}): {error_json_str}。Session: {session_id_for_attempt}")
-                        try:
-                            error_obj = json.loads(error_json_str)
-                        except json.JSONDecodeError:
-                            error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
-                        yield format_openai_sse_delta({"error": error_obj})
-                        yield "data: [DONE]\n\n"
-                        return "".join(accumulated_text_parts).strip(), api_error_yielded
-                    else:
-                        try:
-                            event_data = json.loads(data_part)
-                            if event_data.get("eventType") == "fulfillment":
-                                delta_content = event_data.get("answer", "")
-                                if delta_content is None: delta_content = ""
-                                accumulated_text_parts.append(delta_content)
-                                choice_delta = {}
-                                if not first_chunk_sent:
-                                    choice_delta["role"] = "assistant"
-                                    choice_delta["content"] = delta_content
-                                    first_chunk_sent = True
-                                else:
-                                    choice_delta["content"] = delta_content
-                                if not choice_delta.get("content") and not choice_delta.get("role"):
-                                    if not (choice_delta.get("role") and not choice_delta.get("content")):
-                                        continue
-                                openai_chunk = {
-                                    "id": "chatcmpl-" + str(uuid.uuid4())[:12],
-                                    "object": "chat.completion.chunk",
-                                    "created": int(time.time()),
-                                    "model": openai_model_name_for_response,
-                                    "choices": [{"delta": choice_delta, "index": 0, "finish_reason": None}]
-                                }
-                                yield format_openai_sse_delta(openai_chunk)
-                        except json.JSONDecodeError:
-                            logging.warning(f"【OnDemand流】无法解析JSON (子尝试 {current_attempt_num_logging}): {data_part[:100]}... Session: {session_id_for_attempt}")
-                            continue
-            if not api_error_yielded:
-                if not last_line_str.startswith("data: [DONE]"):
                      logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成，补充发送 [DONE]。Session: {session_id_for_attempt}")
                      yield "data: [DONE]\n\n"
-    except requests.exceptions.RequestException:
-        logging.error(f"【OnDemand流】请求过程中发生网络或请求异常 (子尝试 {current_attempt_num_logging}): Session: {session_id_for_attempt}", exc_info=False)
-        raise
-    except Exception as e:
-        logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {current_attempt_num_logging}): {e}, Session: {session_id_for_attempt}", exc_info=True)
-        api_error_yielded = True
-        error_payload = {
-            "error": {"message": f"Unknown error during streaming (Attempt {current_attempt_num_logging}): {str(e)}", "type": "unknown_streaming_error_in_attempt"}
-        }
-        yield format_openai_sse_delta(error_payload)
-        yield "data: [DONE]\n\n"
-    return "".join(accumulated_text_parts).strip(), api_error_yielded
 @app.route("/v1/chat/completions", methods=["POST"])
@@ -440,12 +469,13 @@ def chat_completions():
                 logging.warning(f"【请求处理 - Key轮换尝试 {key_retry_count}】HTTP/请求错误。Status: {status_code_from_exc}, Key: {keymgr.display_key(selected_apikey_for_outer_retry) if selected_apikey_for_outer_retry else 'N/A'}, Error: {http_err_outer}")
                 if selected_apikey_for_outer_retry:
-                    if status_code_from_exc == 524: # HTTP 524: A Timeout Occurred (Cloudflare)
                         logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 524 error.")
-                    # For other client/server errors that might indicate a key issue or persistent service issue with this key
-                    elif status_code_from_exc and (400 <= status_code_from_exc < 500 or status_code_from_exc in [500, 502, 503]): # excluding 524
                         keymgr.mark_bad(selected_apikey_for_outer_retry)
-                    elif not status_code_from_exc : # Network errors without a status code (e.g., connection refused, DNS failure)
                          keymgr.mark_bad(selected_apikey_for_outer_retry)
                 if key_retry_count >= max_key_retries:
@@ -458,23 +488,22 @@ def chat_completions():
             except Exception as e_outer:
                 last_exception_for_key_retry = e_outer
                 logging.error(f"【请求处理 - Key轮换尝试 {key_retry_count}】发生意外严重错误: {e_outer}", exc_info=True)
-                if selected_apikey_for_outer_retry: # Mark key bad on any other unexpected exception during setup
                     keymgr.mark_bad(selected_apikey_for_outer_retry)
                 break
-        # This block is reached if all key_retry_count attempts in with_valid_key_and_session fail
-        error_message = "重试次数过多，请检查上下文长度! 或联系管理员!" # User requested message
-        error_code_str = "max_retries_check_context_contact_admin" # Custom code for this scenario
         logging.error(f"【请求处理】所有Key/Session获取尝试失败。最终错误: {error_message} Last underlying exception: {last_exception_for_key_retry}")
         if is_stream_request:
             def error_stream_gen():
-                yield format_openai_sse_delta({"error": {"message": error_message, "type": "proxy_setup_error_max_retries", "code": error_code_str}})
                 yield "data: [DONE]\n\n"
-            return Response(error_stream_gen(), content_type='text/event-stream', status=500) # Status 500 as requested
         else:
-            return jsonify({"error": error_message, "code": error_code_str}), 500 # Status 500 as requested
     return with_valid_key_and_session(attempt_ondemand_request_wrapper)
@@ -488,7 +517,7 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
     while empty_retry_attempt_num < max_empty_response_retries:
         empty_retry_attempt_num += 1
         accumulated_text_this_attempt = ""
-        api_error_in_attempt = False
         if empty_retry_attempt_num > 1:
             logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
@@ -505,15 +534,16 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
                 if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
                     if status_code_from_exc_retry_setup == 524:
                         logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for stream retry.")
                     else:
                         keymgr.mark_bad(current_apikey_for_attempt)
                 if empty_retry_attempt_num >= max_empty_response_retries:
-                    # Final failure to get key/session for the last empty response retry
                     final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
                     final_error_code = "max_retries_check_context_contact_admin"
                     logging.error(f"【流式请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
-                    yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_setup_failed", "code": final_error_code}})
                     yield "data: [DONE]\n\n"
                     return
                 time.sleep(1)
@@ -523,6 +553,7 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
         log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
         try:
             result_tuple = yield from _execute_one_stream_attempt(
                 current_apikey_for_attempt,
                 current_session_id_for_attempt,
@@ -532,9 +563,9 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
                 f"{log_attempt_str} (Overall attempt {empty_retry_attempt_num})"
             )
             accumulated_text_this_attempt = result_tuple[0]
-            api_error_in_attempt = result_tuple[1]
-        except requests.exceptions.RequestException as e_req:
             log_key_display = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
             status_code_from_exc_stream = None
             if hasattr(e_req, 'response') and e_req.response is not None:
@@ -545,37 +576,46 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
             if current_apikey_for_attempt:
                 if status_code_from_exc_stream == 524:
                     logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 524 error during stream attempt.")
                 else:
                     keymgr.mark_bad(current_apikey_for_attempt)
             if empty_retry_attempt_num == 1:
                 raise e_req
             if empty_retry_attempt_num >= max_empty_response_retries:
                 final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
                 final_error_code = "max_retries_check_context_contact_admin"
                 logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误: {e_req}")
-                yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_request_failed", "code": final_error_code}})
                 yield "data: [DONE]\n\n"
                 return
             time.sleep(1)
-            continue
         if api_error_in_attempt:
-            logging.warning(f"【流式请求】({log_attempt_str}) OnDemand 服务返回错误或处理内部错误，已将错误信息流式传输。")
-            return
         if accumulated_text_this_attempt:
             logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
-            return
         logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
         if empty_retry_attempt_num >= max_empty_response_retries:
             final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
             final_error_code = "max_retries_check_context_contact_admin"
             logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
             yield format_openai_sse_delta({
-                "error": {"message": final_error_message, "type": "max_retries_exceeded_empty_response", "code": final_error_code}
             })
             yield "data: [DONE]\n\n"
             return
@@ -583,9 +623,6 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
         logging.info(f"【流式请求】空回复，将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
         time.sleep(1)
-    # Fallback if loop finishes unexpectedly (shouldn't happen with current logic)
-    # This case means all max_empty_response_retries were exhausted, and the last one was also empty.
-    # The specific error for this is handled inside the loop. This is a safeguard.
     final_fallback_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
     final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
     logging.error(f"【流式请求】意外退出空回复重试循环。返回最终错误。")
@@ -594,14 +631,12 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
 def handle_non_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
-    max_empty_response_retries = 5
     empty_retry_attempt_num = 0
     current_apikey_for_attempt = initial_apikey
     current_session_id_for_attempt = initial_session_id
-    url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query"
     while empty_retry_attempt_num < max_empty_response_retries:
         empty_retry_attempt_num += 1
@@ -610,7 +645,6 @@ def handle_non_stream_request(initial_apikey, initial_session_id, query_str, end
             try:
                 current_apikey_for_attempt = keymgr.get()
                 current_session_id_for_attempt = create_session(current_apikey_for_attempt)
-                url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query"
                 logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
             except (ValueError, requests.exceptions.RequestException) as e_key_session:
                 logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
@@ -621,110 +655,138 @@ def handle_non_stream_request(initial_apikey, initial_session_id, query_str, end
                 if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
                     if status_code_from_exc_retry_setup_ns == 524:
                         logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for non-stream retry.")
                     else:
                         keymgr.mark_bad(current_apikey_for_attempt)
                 if empty_retry_attempt_num >= max_empty_response_retries:
-                    # Final failure to get key/session for the last empty response retry
                     final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
                     final_error_code = "max_retries_check_context_contact_admin"
                     logging.error(f"【同步请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
-                    return jsonify({"error": final_error_message, "code": final_error_code}), 500 # Status 500
                 time.sleep(1)
                 current_apikey_for_attempt = None
                 continue
         log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
-        logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
-        payload = { "query": query_str, "endpointId": endpoint_id, "pluginIds": [], "responseMode": "sync" }
-        headers = {"apikey": current_apikey_for_attempt, "Content-Type": "application/json"}
-        try:
-            resp = requests.post(url, json=payload, headers=headers, timeout=120)
-            resp.raise_for_status()
-            response_json = resp.json()
-            if "data" not in response_json or "answer" not in response_json["data"]:
-                logging.error(f"【OnDemand同步错误】响应格式不符合预期 ({log_attempt_str})。Session: {current_session_id_for_attempt}, 响应: {str(response_json)[:500]}")
-                # This is an API format error, not an empty response.
-                # If this happens on the first attempt, it will be re-raised to with_valid_key_and_session
-                # If on a retry for empty response, it's a new kind of failure for that attempt.
-                raise ValueError(f"OnDemand API sync response missing 'data.answer' field on attempt {empty_retry_attempt_num}.")
-            ai_response_content = response_json["data"]["answer"]
-            if ai_response_content is None: ai_response_content = ""
-            if ai_response_content.strip():
-                logging.info(f"【同步请求】({log_attempt_str}) 成功获取非空内容。")
-                openai_response_obj = {
-                    "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
-                    "model": openai_model_name_for_response,
-                    "choices": [{"index": 0, "message": {"role": "assistant", "content": ai_response_content}, "finish_reason": "stop"}],
-                    "usage": {}
-                }
-                return jsonify(openai_response_obj)
-            else:
-                logging.warning(f"【同步请求】({log_attempt_str}) 返回空回复。")
-                if empty_retry_attempt_num >= max_empty_response_retries:
-                    final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
-                    final_error_code = "max_retries_check_context_contact_admin"
-                    logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
-                    return jsonify({
-                        "error": final_error_message,
                         "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
                         "model": openai_model_name_for_response,
-                        "choices": [{"index": 0, "message": {"role": "assistant", "content": ""}, "finish_reason": "length"}],
-                        "usage": {},
-                        "code": final_error_code
-                    }), 504 # Status 504 as requested
-                logging.info(f"【同步请求】空回复，将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
-                time.sleep(1)
-        except requests.exceptions.RequestException as e_req:
-            log_key_display_sync = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
-            status_code_from_exc_sync = None
-            if hasattr(e_req, 'response') and e_req.response is not None:
-                status_code_from_exc_sync = e_req.response.status_code
-            logging.warning(f"【同步请求】({log_attempt_str} using key {log_key_display_sync}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_sync}")
-            if current_apikey_for_attempt:
-                if status_code_from_exc_sync == 524:
-                    logging.info(f"【KeyManager】Key {log_key_display_sync} not marked bad for 524 error during sync attempt.")
-                else:
-                    keymgr.mark_bad(current_apikey_for_attempt)
-            if empty_retry_attempt_num == 1:
-                raise e_req
             if empty_retry_attempt_num >= max_empty_response_retries:
                 final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
                 final_error_code = "max_retries_check_context_contact_admin"
-                logging.error(f"【同步请求】在最后一次空回复重试时发生请求错误: {e_req}")
-                return jsonify({"error": final_error_message, "code":final_error_code, "details": str(e_req)}), 500 # Status 500
-            time.sleep(1)
-            continue
-        except (ValueError, KeyError, json.JSONDecodeError) as e_parse:
-            # This catches the ValueError from "data.answer" missing, or JSON decode errors
-            logging.error(f"【同步请求】({log_attempt_str}) 处理响应或格式时出错: {e_parse}", exc_info=True)
-            if empty_retry_attempt_num == 1:
-                 # If format error on first attempt, re-raise to be caught by with_valid_key_and_session
-                 # This implies a more fundamental issue than just an empty response.
-                 raise requests.exceptions.RequestException(f"Response format error on first attempt: {e_parse}") from e_parse
-            # If it's a format error during an empty-response retry, it's problematic.
-            if empty_retry_attempt_num >= max_empty_response_retries:
-                final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
-                final_error_code = "max_retries_check_context_contact_admin"
-                logging.error(f"【同步请求】在最后一次空回复重试时发生响应解析错误: {e_parse}")
-                return jsonify({"error": final_error_message, "code": final_error_code, "details": f"Parse error: {str(e_parse)}"}), 500 # Status 500
-            time.sleep(1)
-            continue # Try next key for empty response retry
-    # Fallback if loop finishes unexpectedly (e.g. all retries were empty and the last one didn't hit the specific return)
     final_fallback_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
     final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
     logging.error(f"【同步请求】意外退出空回复重试循环。返回最终错误。")
-    return jsonify({"error": final_fallback_error_message, "code": final_fallback_error_code}), 500 # Status 500
 @app.route("/v1/models", methods=["GET"])

     accumulated_text_parts = []
     api_error_yielded = False
+    # Inner retry loop for 500 errors specifically for this attempt
+    max_500_retries_for_this_call = 5
+    current_500_retry_count = 0
+    while current_500_retry_count < max_500_retries_for_this_call:
+        current_500_retry_count += 1
+        if current_500_retry_count > 1: # Log if this is a 500-retry
+            logging.info(f"【流式请求子尝试 {current_attempt_num_logging} - 500错误重试 {current_500_retry_count-1}/{max_500_retries_for_this_call-1}】Key: {keymgr.display_key(apikey_for_attempt)}")
+        else:
+             logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
+        try:
+            with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp:
+                if resp.status_code == 500: # Specific handling for 500 error
+                    logging.warning(f"【OnDemand流错误】(子尝试 {current_attempt_num_logging}, 500重试 {current_500_retry_count}) 收到500错误。Session: {session_id_for_attempt}")
+                    if current_500_retry_count >= max_500_retries_for_this_call:
+                        logging.error(f"【OnDemand流错误】(子尝试 {current_attempt_num_logging}) 达到500错误最大重试次数。将错误传递给上层。")
+                        # Yield a specific error for persistent 500 after retries
+                        api_error_yielded = True
+                        error_payload = {"error": {"message": f"OnDemand API persistent 500 error after {max_500_retries_for_this_call} retries (Attempt {current_attempt_num_logging}).",
+                                                   "type": "on_demand_persistent_500_error", "code": 500}}
+                        yield format_openai_sse_delta(error_payload)
                         yield "data: [DONE]\n\n"
                         return "".join(accumulated_text_parts).strip(), api_error_yielded
+                    time.sleep(1) # Wait before retrying the 500 error
+                    continue # Go to the next iteration of the 500-retry loop
+                if resp.status_code != 200: # Handle other non-200, non-500 errors
+                    api_error_yielded = True
+                    error_text = resp.text
+                    logging.error(f"【OnDemand流错误】请求失败 (子尝试 {current_attempt_num_logging})。状态码: {resp.status_code}, Session: {session_id_for_attempt}, 响应: {error_text[:500]}")
+                    error_payload = {
+                        "error": {
+                            "message": f"OnDemand API Error (Stream Init, Attempt {current_attempt_num_logging}): {resp.status_code} - {error_text[:200]}",
+                            "type": "on_demand_api_error",
+                            "code": resp.status_code
+                        }
+                    }
+                    yield format_openai_sse_delta(error_payload)
+                    yield "data: [DONE]\n\n"
+                    return "".join(accumulated_text_parts).strip(), api_error_yielded
+                # Successful 200 response, process stream
+                first_chunk_sent = False
+                last_line_str = ""
+                for line_bytes in resp.iter_lines():
+                    if not line_bytes:
+                        continue
+                    line_str = line_bytes.decode("utf-8")
+                    last_line_str = line_str
+                    if line_str.startswith("data:"):
+                        data_part = line_str[len("data:"):].strip()
+                        if data_part == "[DONE]":
+                            logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
+                            yield "data: [DONE]\n\n"
+                            return "".join(accumulated_text_parts).strip(), api_error_yielded
+                        elif data_part.startswith("[ERROR]:"):
+                            api_error_yielded = True
+                            error_json_str = data_part[len("[ERROR]:"):].strip()
+                            logging.warning(f"【OnDemand流】接收到错误事件 (子尝试 {current_attempt_num_logging}): {error_json_str}。Session: {session_id_for_attempt}")
+                            try:
+                                error_obj = json.loads(error_json_str)
+                            except json.JSONDecodeError:
+                                error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
+                            yield format_openai_sse_delta({"error": error_obj})
+                            yield "data: [DONE]\n\n"
+                            return "".join(accumulated_text_parts).strip(), api_error_yielded
+                        else:
+                            try:
+                                event_data = json.loads(data_part)
+                                if event_data.get("eventType") == "fulfillment":
+                                    delta_content = event_data.get("answer", "")
+                                    if delta_content is None: delta_content = ""
+                                    accumulated_text_parts.append(delta_content)
+                                    choice_delta = {}
+                                    if not first_chunk_sent:
+                                        choice_delta["role"] = "assistant"
+                                        choice_delta["content"] = delta_content
+                                        first_chunk_sent = True
+                                    else:
+                                        choice_delta["content"] = delta_content
+                                    if not choice_delta.get("content") and not choice_delta.get("role"):
+                                        if not (choice_delta.get("role") and not choice_delta.get("content")):
+                                            continue
+                                    openai_chunk = {
+                                        "id": "chatcmpl-" + str(uuid.uuid4())[:12],
+                                        "object": "chat.completion.chunk",
+                                        "created": int(time.time()),
+                                        "model": openai_model_name_for_response,
+                                        "choices": [{"delta": choice_delta, "index": 0, "finish_reason": None}]
+                                    }
+                                    yield format_openai_sse_delta(openai_chunk)
+                            except json.JSONDecodeError:
+                                logging.warning(f"【OnDemand流】无法解析JSON (子尝试 {current_attempt_num_logging}): {data_part[:100]}... Session: {session_id_for_attempt}")
+                                continue
+                if not api_error_yielded and not last_line_str.startswith("data: [DONE]"):
                      logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成，补充发送 [DONE]。Session: {session_id_for_attempt}")
                      yield "data: [DONE]\n\n"
+                return "".join(accumulated_text_parts).strip(), api_error_yielded # Success for this attempt
+        except requests.exceptions.RequestException as e_req_inner:
+            # This catches network errors or HTTP errors if resp.raise_for_status() was called (e.g. for persistent 500)
+            logging.error(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 请求时发生异常: {e_req_inner}, Key: {keymgr.display_key(apikey_for_attempt)}")
+            # If this was the last 500-retry, or another RequestException, re-raise to be handled by handle_stream_request's try-except
+            if current_500_retry_count >= max_500_retries_for_this_call or (hasattr(e_req_inner, 'response') and e_req_inner.response is not None and e_req_inner.response.status_code != 500):
+                raise e_req_inner
+            # If it was a 500 and we still have retries for 500, the loop will continue after a delay.
+            time.sleep(1) # Wait before retrying the 500 error
+            # The loop will continue to the next 500-retry.
+        except Exception as e_inner_unknown:
+            logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {current_attempt_num_logging}): {e_inner_unknown}, Session: {session_id_for_attempt}", exc_info=True)
+            api_error_yielded = True
+            error_payload = {
+                "error": {"message": f"Unknown error during streaming (Attempt {current_attempt_num_logging}): {str(e_inner_unknown)}", "type": "unknown_streaming_error_in_attempt"}
+            }
+            yield format_openai_sse_delta(error_payload)
+            yield "data: [DONE]\n\n"
+            return "".join(accumulated_text_parts).strip(), api_error_yielded
+    # If 500-retry loop exhausted without returning (should ideally raise inside or return success)
+    logging.error(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 500错误重试循环意外结束。")
+    # This case should ideally not be reached if logic inside loop is correct.
+    # Re-raise a generic error to be caught by the caller if it does.
+    raise requests.exceptions.RequestException(f"Exhausted internal 500 retries for attempt {current_attempt_num_logging} without success or specific error propagation.")
 @app.route("/v1/chat/completions", methods=["POST"])
                 logging.warning(f"【请求处理 - Key轮换尝试 {key_retry_count}】HTTP/请求错误。Status: {status_code_from_exc}, Key: {keymgr.display_key(selected_apikey_for_outer_retry) if selected_apikey_for_outer_retry else 'N/A'}, Error: {http_err_outer}")
                 if selected_apikey_for_outer_retry:
+                    if status_code_from_exc == 524:
                         logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 524 error.")
+                    elif status_code_from_exc == 500:
+                        logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 500 error.")
+                    elif status_code_from_exc and ((400 <= status_code_from_exc < 500) or status_code_from_exc in [502, 503]): # Explicitly list codes that mark bad, excluding 500, 524
                         keymgr.mark_bad(selected_apikey_for_outer_retry)
+                    elif not status_code_from_exc :
                          keymgr.mark_bad(selected_apikey_for_outer_retry)
                 if key_retry_count >= max_key_retries:
             except Exception as e_outer:
                 last_exception_for_key_retry = e_outer
                 logging.error(f"【请求处理 - Key轮换尝试 {key_retry_count}】发生意外严重错误: {e_outer}", exc_info=True)
+                if selected_apikey_for_outer_retry:
                     keymgr.mark_bad(selected_apikey_for_outer_retry)
                 break
+        error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
+        error_code_str = "max_retries_check_context_contact_admin"
         logging.error(f"【请求处理】所有Key/Session获取尝试失败。最终错误: {error_message} Last underlying exception: {last_exception_for_key_retry}")
         if is_stream_request:
             def error_stream_gen():
+                yield format_openai_sse_delta({"error": {"message": error_message, "type": "proxy_max_retries_exceeded", "code": error_code_str}})
                 yield "data: [DONE]\n\n"
+            return Response(error_stream_gen(), content_type='text/event-stream', status=500)
         else:
+            return jsonify({"error": error_message, "code": error_code_str}), 500
     return with_valid_key_and_session(attempt_ondemand_request_wrapper)
     while empty_retry_attempt_num < max_empty_response_retries:
         empty_retry_attempt_num += 1
         accumulated_text_this_attempt = ""
+        api_error_in_attempt = False # Renamed from api_error_yielded for clarity in this scope
         if empty_retry_attempt_num > 1:
             logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
                 if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
                     if status_code_from_exc_retry_setup == 524:
                         logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for stream retry.")
+                    elif status_code_from_exc_retry_setup == 500:
+                        logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 500 error during key/session acquisition for stream retry.")
                     else:
                         keymgr.mark_bad(current_apikey_for_attempt)
                 if empty_retry_attempt_num >= max_empty_response_retries:
                     final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
                     final_error_code = "max_retries_check_context_contact_admin"
                     logging.error(f"【流式请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
+                    yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_setup_failed", "code": final_error_code, "details": str(e_key_session)}})
                     yield "data: [DONE]\n\n"
                     return
                 time.sleep(1)
         log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
         try:
+            # result_tuple will be (accumulated_text, api_error_yielded_flag_from_execute)
             result_tuple = yield from _execute_one_stream_attempt(
                 current_apikey_for_attempt,
                 current_session_id_for_attempt,
                 f"{log_attempt_str} (Overall attempt {empty_retry_attempt_num})"
             )
             accumulated_text_this_attempt = result_tuple[0]
+            api_error_in_attempt = result_tuple[1] # This tells if _execute_one_stream_attempt itself yielded an error SSE
+        except requests.exceptions.RequestException as e_req: # Catch errors from _execute_one_stream_attempt's requests.post
             log_key_display = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
             status_code_from_exc_stream = None
             if hasattr(e_req, 'response') and e_req.response is not None:
             if current_apikey_for_attempt:
                 if status_code_from_exc_stream == 524:
                     logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 524 error during stream attempt.")
+                elif status_code_from_exc_stream == 500:
+                    logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 500 error during stream attempt.")
                 else:
                     keymgr.mark_bad(current_apikey_for_attempt)
             if empty_retry_attempt_num == 1:
+                # If the very first attempt (initial_apikey) fails with RequestException,
+                # re-raise to let with_valid_key_and_session handle key rotation.
                 raise e_req
+            # If it's an empty-response retry (attempt_num > 1) that failed with RequestException
             if empty_retry_attempt_num >= max_empty_response_retries:
                 final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
                 final_error_code = "max_retries_check_context_contact_admin"
                 logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误: {e_req}")
+                yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_request_failed", "code": final_error_code, "details": str(e_req)}})
                 yield "data: [DONE]\n\n"
                 return
             time.sleep(1)
+            continue # To the next iteration of the empty_retry_attempt_num loop (will try new key/session)
+        # After _execute_one_stream_attempt has finished (either normally or yielded its own error)
         if api_error_in_attempt:
+            # This means _execute_one_stream_attempt handled an API error (like 429, or persistent 500) and yielded an error SSE.
+            # The stream is already complete with an error.
+            logging.warning(f"【流式请求】({log_attempt_str}) 子尝试已处理并流式传输API错误。")
+            return # Stop further empty response retries.
         if accumulated_text_this_attempt:
             logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
+            return # Stream was successful and content yielded.
+        # If we reach here, content was empty from _execute_one_stream_attempt, and no API error was yielded by it.
         logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
         if empty_retry_attempt_num >= max_empty_response_retries:
             final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
             final_error_code = "max_retries_check_context_contact_admin"
             logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
             yield format_openai_sse_delta({
+                "error": {"message": final_error_message, "type": "max_empty_retries_exceeded", "code": final_error_code}
             })
             yield "data: [DONE]\n\n"
             return
         logging.info(f"【流式请求】空回复，将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
         time.sleep(1)
     final_fallback_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
     final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
     logging.error(f"【流式请求】意外退出空回复重试循环。返回最终错误。")
 def handle_non_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
+    max_empty_response_retries = 5
     empty_retry_attempt_num = 0
     current_apikey_for_attempt = initial_apikey
     current_session_id_for_attempt = initial_session_id
     while empty_retry_attempt_num < max_empty_response_retries:
         empty_retry_attempt_num += 1
             try:
                 current_apikey_for_attempt = keymgr.get()
                 current_session_id_for_attempt = create_session(current_apikey_for_attempt)
                 logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
             except (ValueError, requests.exceptions.RequestException) as e_key_session:
                 logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
                 if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
                     if status_code_from_exc_retry_setup_ns == 524:
                         logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for non-stream retry.")
+                    elif status_code_from_exc_retry_setup_ns == 500:
+                        logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 500 error during key/session acquisition for non-stream retry.")
                     else:
                         keymgr.mark_bad(current_apikey_for_attempt)
                 if empty_retry_attempt_num >= max_empty_response_retries:
                     final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
                     final_error_code = "max_retries_check_context_contact_admin"
                     logging.error(f"【同步请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
+                    return jsonify({"error": final_error_message, "code": final_error_code, "details": str(e_key_session)}), 500
                 time.sleep(1)
                 current_apikey_for_attempt = None
                 continue
         log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
+        # Inner loop for 500-error retries for the current key/session
+        max_500_retries_for_this_call = 5
+        current_500_retry_count = 0
+        while current_500_retry_count < max_500_retries_for_this_call:
+            current_500_retry_count += 1
+            if current_500_retry_count > 1:
+                logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}, 500错误重试 {current_500_retry_count-1}/{max_500_retries_for_this_call-1}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
+            else:
+                logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
+            url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query" # Ensure URL uses current session
+            payload = { "query": final_query_to_ondemand, "endpointId": target_endpoint_id, "pluginIds": [], "responseMode": "sync" } # Use correct query and endpoint
+            headers = {"apikey": current_apikey_for_attempt, "Content-Type": "application/json"}
+            try:
+                resp = requests.post(url, json=payload, headers=headers, timeout=120)
+                if resp.status_code == 500:
+                    logging.warning(f"【OnDemand同步错误】({log_attempt_str}, 500重试 {current_500_retry_count}) 收到500错误。")
+                    if current_500_retry_count >= max_500_retries_for_this_call:
+                        logging.error(f"【OnDemand同步错误】({log_attempt_str}) 达到500错误最大重试次数。将错误传递给上层。")
+                        resp.raise_for_status() # Re-raise HTTPError(500) to be caught by outer RequestException handler
+                    time.sleep(1)
+                    continue # Next 500-retry iteration
+                resp.raise_for_status() # For other non-200 errors (will be caught by RequestException below)
+                # Successful 200 OK
+                response_json = resp.json()
+                if "data" not in response_json or "answer" not in response_json["data"]:
+                    logging.error(f"【OnDemand同步错误】响应格式不符合预期 ({log_attempt_str})。Session: {current_session_id_for_attempt}, 响应: {str(response_json)[:500]}")
+                    raise ValueError(f"OnDemand API sync response missing 'data.answer' field on attempt {empty_retry_attempt_num}, 500-retry {current_500_retry_count}.")
+                ai_response_content = response_json["data"]["answer"]
+                if ai_response_content is None: ai_response_content = ""
+                if ai_response_content.strip():
+                    logging.info(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 成功获取非空内容。")
+                    openai_response_obj = {
                         "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
                         "model": openai_model_name_for_response,
+                        "choices": [{"index": 0, "message": {"role": "assistant", "content": ai_response_content}, "finish_reason": "stop"}],
+                        "usage": {}
+                    }
+                    return jsonify(openai_response_obj) # SUCCESS
+                else:
+                    # Empty response after a 200 OK (and non-500 error)
+                    logging.warning(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 返回空回复。")
+                    # This attempt (for this key/session) resulted in an empty response.
+                    # Break from the 500-retry loop to let the outer empty-response loop handle it.
+                    break # Break from current_500_retry_count loop
+            except requests.exceptions.RequestException as e_req: # Catches HTTPError (including re-raised 500) or other network issues
+                log_key_display_sync = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
+                status_code_from_exc_sync = None
+                if hasattr(e_req, 'response') and e_req.response is not None:
+                    status_code_from_exc_sync = e_req.response.status_code
+                logging.warning(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count} using key {log_key_display_sync}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_sync}")
+                # Key marking is handled by with_valid_key_and_session based on what's re-raised.
+                # If it's a 500 that exhausted its inner retries, it will be re-raised.
+                # If it's another RequestException, it's also re-raised.
+                # If this was the initial overall attempt (empty_retry_attempt_num == 1)
+                # AND this specific 500-retry loop has exhausted OR it's not a 500 error that can be retried by this inner loop:
+                if current_500_retry_count >= max_500_retries_for_this_call or status_code_from_exc_sync != 500:
+                    if empty_retry_attempt_num == 1:
+                        raise e_req # Re-raise for with_valid_key_and_session to handle key rotation
+                    else:
+                        # This was an empty-response retry that then hit a persistent 500 or other RequestException.
+                        # This attempt for this key has failed. Break 500-retry loop to go to next empty-response attempt.
+                        # To signal this failure for the current key/session to the outer empty-response loop:
+                        raise e_req # This will be caught by the outer try-except in the empty_retry_attempt_num loop
+                # If it was a 500 and we still have 500-retries, the loop will continue after sleep.
+                time.sleep(1)
+                continue # To the next iteration of current_500_retry_count loop
+            except (ValueError, KeyError, json.JSONDecodeError) as e_parse:
+                logging.error(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 处理响应或格式时出错: {e_parse}", exc_info=True)
+                if empty_retry_attempt_num == 1 and current_500_retry_count == 1 : # Format error on very first try
+                     raise requests.exceptions.RequestException(f"Response format error on first attempt: {e_parse}") from e_parse
+                # If format error during a retry, it's a failure for this key/session attempt
+                # Break from 500-retry loop, let empty-response loop handle it.
+                # To signal this failure for the current key/session:
+                raise requests.exceptions.RequestException(f"Response format error during retry: {e_parse}") from e_parse
+        # If the 500-retry loop completed (either successfully got non-empty, or broke due to empty, or raised an error)
+        # Check if we are here because of an empty response (meaning the 500-retry loop broke after a 200 OK but empty content)
+        if not ai_response_content.strip() and not ( 'resp' in locals() and resp.status_code != 200 ): # Check if it was an empty response from a 200 OK
             if empty_retry_attempt_num >= max_empty_response_retries:
                 final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
                 final_error_code = "max_retries_check_context_contact_admin"
+                logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
+                return jsonify({
+                    "error": final_error_message,
+                    "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
+                    "model": openai_model_name_for_response,
+                    "choices": [{"index": 0, "message": {"role": "assistant", "content": ""}, "finish_reason": "length"}],
+                    "usage": {}, "code": final_error_code
+                }), 500
+            logging.info(f"【同步请求】空回复（在500-重试循环之后），准备进行下一个空回复尝试。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
+            time.sleep(1)
+            # Outer loop (empty_retry_attempt_num) will continue
+        # If we are here and didn't return a success, it means the 500-retry loop might have been exhausted by 500s
+        # but didn't re-raise correctly, or some other path. This is a fallback.
+        # However, if it exhausted 500s, it should have re-raised an exception.
+    # Fallback if outer empty_retry_attempt_num loop finishes
     final_fallback_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
     final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
     logging.error(f"【同步请求】意外退出空回复重试循环。返回最终错误。")
+    return jsonify({"error": final_fallback_error_message, "code": final_fallback_error_code}), 500
 @app.route("/v1/models", methods=["GET"])