servTe

Paused

App Files Files Community

Ge-AI commited on May 13

Commit

66b138f

verified ·

1 Parent(s): c84194f

Update openai_ondemand_adapter.py

Browse files

Files changed (1) hide show

openai_ondemand_adapter.py +308 -217

openai_ondemand_adapter.py CHANGED Viewed

@@ -175,6 +175,145 @@ def format_openai_sse_delta(chunk_data_dict):
     """将数据块格式化为 OpenAI SSE (Server-Sent Events) 流格式"""
     return f"data: {json.dumps(chunk_data_dict, ensure_ascii=False)}\n\n"
 @app.route("/v1/chat/completions", methods=["POST"])
 def chat_completions():
     """处理聊天补全请求，模拟 OpenAI /v1/chat/completions 接口"""
@@ -196,26 +335,34 @@ def chat_completions():
     is_stream_request = bool(request_data.get("stream", False))
     # --- 构造发送给 OnDemand 的 query 字符串 ---
-    # 将整个对话历史格式化为一个字符串
-    # 格式: "Role: Content\nRole: Content..."
-    # 您可能需要根据 OnDemand API 的具体要求调整此格式
     formatted_query_parts = []
     for msg in messages:
         role = msg.get("role", "user").strip().capitalize()
-        content = msg.get("content", "")
         content_string = ""
-        if isinstance(content, list):
-            for info in content:
-                if isinstance(info, dict):
-                    for k, v in info.items():
-                        content_string += f"{k}: {v}\n{k}: {v}"
         elif isinstance(content, str):
-            content_string = content
         content_string = content_string.strip()
         if not content_string: # 跳过空内容的消息
             continue
-        formatted_query_parts.append(f"<|{role}|>: {content_string}")
     if not formatted_query_parts:
         return jsonify({"error": "No valid content found in 'messages'."}), 400
@@ -227,10 +374,6 @@ def chat_completions():
     # 内部函数，用于封装实际的API调用逻辑，方便重试和密钥管理
     def attempt_ondemand_request(current_apikey, current_session_id):
-        # 这个函数会被 with_valid_key_and_session 调用
-        # current_apikey 和 current_session_id 由 with_valid_key_and_session 提供
-        # 根据是否流式请求，调用不同的处理函数
         if is_stream_request:
             return handle_stream_request(current_apikey, current_session_id, final_query_to_ondemand, target_endpoint_id, openai_model_name)
         else:
@@ -238,61 +381,59 @@ def chat_completions():
     # 装饰器/高阶函数，用于管理API密钥获取、会话创建和重试逻辑
     def with_valid_key_and_session(action_func):
-        max_retries = len(ONDEMAND_APIKEYS) * 2 if ONDEMAND_APIKEYS else 1 # 每个key最多尝试2次
         retries_count = 0
         last_exception_seen = None
         while retries_count < max_retries:
             selected_apikey = None
             try:
-                selected_apikey = keymgr.get() # 从KeyManager获取一个API密钥
-                # 每次请求都创建一个新的OnDemand会话
                 logging.info(f"【请求处理】使用 API Key: {keymgr.display_key(selected_apikey)}，准备创建新会话...")
-                ondemand_session_id = create_session(selected_apikey) # 创建新会话
-                # 执行实际的请求操作 (流式或非流式)
                 return action_func(selected_apikey, ondemand_session_id)
-            except ValueError as ve: # KeyManager中没有key了
                 logging.critical(f"【请求处理】KeyManager 错误: {ve}")
                 last_exception_seen = ve
-                break # 无法获取密钥，直接中断
-            except requests.HTTPError as http_err: # 包括 create_session 或 query API 的 HTTP 错误
                 last_exception_seen = http_err
                 response = http_err.response
                 logging.warning(f"【请求处理】HTTP 错误发生。状态码: {response.status_code if response else 'N/A'}, Key: {keymgr.display_key(selected_apikey) if selected_apikey else 'N/A'}")
                 if selected_apikey and response is not None:
-                    # 根据错误码判断是否将Key标记为坏的
-                    # 401 (Unauthorized), 403 (Forbidden), 429 (Too Many Requests) 通常意味着Key有问题或达到限额
                     if response.status_code in (401, 403, 429):
                         keymgr.mark_bad(selected_apikey)
-                    # 某些5xx错误也可能与特定Key相关，或者只是服务端临时问题
-                    # elif response.status_code >= 500:
-                    #     keymgr.mark_bad(selected_apikey) # 谨慎处理5xx，也可能标记
                 retries_count += 1
                 logging.info(f"【请求处理】尝试次数: {retries_count}/{max_retries}. 等待片刻后重试...")
-                time.sleep(1) # 简单等待1秒后重试
                 continue
-            except requests.exceptions.Timeout:
-                last_exception_seen = "Request timed out."
-                logging.warning(f"【请求处理】请求超时。Key: {keymgr.display_key(selected_apikey) if selected_apikey else 'N/A'}")
                 if selected_apikey:
-                    keymgr.mark_bad(selected_apikey) # 超时也可能标记Key
                 retries_count += 1
                 logging.info(f"【请求处理】尝试次数: {retries_count}/{max_retries}. 等待片刻后重试...")
                 time.sleep(1)
                 continue
-            except Exception as e: # 其他所有Python异常
                 last_exception_seen = e
                 logging.error(f"【请求处理】发生意外的严重错误: {e}", exc_info=True)
                 if selected_apikey:
-                    keymgr.mark_bad(selected_apikey) # 发生未知严重错误时，也标记当前Key
-                retries_count += 1 # 增加重试计数，避免死循环
-                # 对于非常严重的未知错误，可能选择直接中断而不是继续重试
-                # break
-        # 如果所有重试都失败了
         error_message = "All attempts to process the request failed after multiple retries."
         if last_exception_seen:
             error_message += f" Last known error: {str(last_exception_seen)}"
@@ -303,190 +444,147 @@ def chat_completions():
 def handle_stream_request(apikey, session_id, query_str, endpoint_id, openai_model_name_for_response):
-    """处理流式聊天补全请求"""
-    def generate_stream_chunks():
-        url = f"{ONDEMAND_API_BASE}/sessions/{session_id}/query"
-        payload = {
-            "query": query_str,
-            "endpointId": endpoint_id,
-            "pluginIds": [], # 根据需要，通常聊天为空
-            "responseMode": "stream"
-        }
-        headers = {
-            "apikey": apikey,
-            "Content-Type": "application/json",
-            "Accept": "text/event-stream" # 指示服务器发送SSE
-        }
-        logging.info(f"【流式请求】发送到 OnDemand: Session={session_id}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey)}")
-        # logging.debug(f"【流式请求】Payload Query (first 200 chars): {query_str[:200]}...")
-        try:
-            with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp: # 流式请求超时可以设置长一些
-                if resp.status_code != 200:
-                    error_text = resp.text # 尝试读取错误响应体
-                    logging.error(f"【OnDemand流错误】请求失败。状态码: {resp.status_code}, Session: {session_id}, 响应: {error_text[:500]}")
-                    # 在流中产生一个错误事件
-                    yield format_openai_sse_delta({
-                        "error": {
-                            "message": f"OnDemand API Error (Stream Init): {resp.status_code} - {error_text[:200]}",
-                            "type": "on_demand_api_error",
-                            "code": resp.status_code
-                        }
-                    })
-                    yield "data: [DONE]\n\n" # 确保流结束
-                    return # 提前退出生成器
-                first_chunk_sent = False
-                for line_bytes in resp.iter_lines(): # 按行迭代响应
-                    if not line_bytes: # 跳过空行 (SSE中的keep-alive)
-                        continue
-                    line_str = line_bytes.decode("utf-8")
-                    if line_str.startswith("data:"):
-                        data_part = line_str[len("data:"):].strip()
-                        if data_part == "[DONE]":
-                            logging.info(f"【OnDemand流】接收到 [DONE] 信号。Session: {session_id}")
-                            yield "data: [DONE]\n\n"
-                            break
-                        elif data_part.startswith("[ERROR]:"):
-                            error_json_str = data_part[len("[ERROR]:"):].strip()
-                            logging.warning(f"【OnDemand流】接收到错误事件: {error_json_str}。Session: {session_id}")
-                            try:
-                                error_obj = json.loads(error_json_str)
-                                yield format_openai_sse_delta({"error": error_obj})
-                            except json.JSONDecodeError:
-                                yield format_openai_sse_delta({"error": {"message": error_json_str, "type": "on_demand_stream_error_format"}})
-                            yield "data: [DONE]\n\n" # 错误后也发送DONE
-                            break
-                        else:
-                            try:
-                                event_data = json.loads(data_part)
-                            except json.JSONDecodeError:
-                                logging.warning(f"【OnDemand流】无法解析JSON数据块: {data_part[:100]}... Session: {session_id}")
-                                continue # 跳过无法解析的块
-                            # 假设OnDemand流式响应中，'fulfillment'事件包含文本块
-                            if event_data.get("eventType") == "fulfillment":
-                                delta_content = event_data.get("answer", "") # 获取文本增量
-                                if delta_content is None: delta_content = "" # 确保是字符串
-                                choice_delta = {}
-                                if not first_chunk_sent: # 第一个有效数据块
-                                    choice_delta["role"] = "assistant"
-                                    choice_delta["content"] = delta_content
-                                    first_chunk_sent = True
-                                else:
-                                    choice_delta["content"] = delta_content
-                                if not choice_delta.get("content") and not choice_delta.get("role"): # 避免发送空delta
-                                    continue
-                                openai_chunk = {
-                                    "id": "chatcmpl-" + str(uuid.uuid4())[:12], # 更长的随机ID
-                                    "object": "chat.completion.chunk",
-                                    "created": int(time.time()),
-                                    "model": openai_model_name_for_response,
-                                    "choices": [{
-                                        "delta": choice_delta,
-                                        "index": 0,
-                                        "finish_reason": None # 流式传输中，finish_reason通常在最后一块或[DONE]后确定
-                                    }]
-                                }
-                                yield format_openai_sse_delta(openai_chunk)
-                # 确保如果循环正常结束（没有break且没有收到[DONE]），也发送一个[DONE]
-                # 但通常OnDemand API应该自己发送[DONE]
-                if not line_str.endswith("data: [DONE]"): # 简易检查
-                     logging.info(f"【OnDemand流】流迭代完成，补充发送 [DONE]。Session: {session_id}")
-                     yield "data: [DONE]\n\n"
-        except requests.exceptions.RequestException as e:
-            logging.error(f"【OnDemand流】请求过程中发生网络或请求异常: {e}, Session: {session_id}", exc_info=True)
-            yield format_openai_sse_delta({
-                "error": {
-                    "message": f"Network or request error during streaming: {str(e)}",
-                    "type": "streaming_request_exception"
-                }
-            })
-            yield "data: [DONE]\n\n"
-        except Exception as e:
-            logging.error(f"【OnDemand流】处理流时发生未知错误: {e}, Session: {session_id}", exc_info=True)
-            yield format_openai_sse_delta({
                 "error": {
-                    "message": f"Unknown error during streaming: {str(e)}",
-                    "type": "unknown_streaming_error"
                 }
-            })
             yield "data: [DONE]\n\n"
-    return Response(generate_stream_chunks(), content_type='text/event-stream')
 def handle_non_stream_request(apikey, session_id, query_str, endpoint_id, openai_model_name_for_response):
-    """处理非流式聊天补全请求"""
     url = f"{ONDEMAND_API_BASE}/sessions/{session_id}/query"
     payload = {
         "query": query_str,
         "endpointId": endpoint_id,
         "pluginIds": [],
-        "responseMode": "sync" # 同步模式
     }
     headers = {"apikey": apikey, "Content-Type": "application/json"}
-    logging.info(f"【同步请求】发送到 OnDemand: Session={session_id}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey)}")
-    # logging.debug(f"【同步请求】Payload Query (first 200 chars): {query_str[:200]}...")
-    try:
-        resp = requests.post(url, json=payload, headers=headers, timeout=120) # 同步请求超时
-        resp.raise_for_status() # 检查HTTP错误
-        response_json = resp.json()
-        # 验证响应结构，假设成功时 "data.answer" 包含回复文本
-        if "data" not in response_json or "answer" not in response_json["data"]:
-            logging.error(f"【OnDemand同步错误】响应格式不符合预期。Session: {session_id}, 响应: {str(response_json)[:500]}")
-            raise ValueError("OnDemand API sync response missing 'data.answer' field.")
-        ai_response_content = response_json["data"]["answer"]
-        if ai_response_content is None: ai_response_content = "" # 确保是字符串
-        # 构造OpenAI格式的响应
-        openai_response_obj = {
-            "id": "chatcmpl-" + str(uuid.uuid4())[:12],
-            "object": "chat.completion",
-            "created": int(time.time()),
-            "model": openai_model_name_for_response,
-            "choices": [
-                {
-                    "index": 0,
-                    "message": {
-                        "role": "assistant",
-                        "content": ai_response_content
-                    },
-                    "finish_reason": "stop" # 同步模式通常意味着完成
                 }
-            ],
-            "usage": { # OnDemand可能不提供usage，这里留空或估算
-                # "prompt_tokens": estimate_tokens(query_str),
-                # "completion_tokens": estimate_tokens(ai_response_content),
-                # "total_tokens": estimate_tokens(query_str) + estimate_tokens(ai_response_content)
-            }
-        }
-        return jsonify(openai_response_obj)
-    except requests.exceptions.Timeout as e:
-        logging.error(f"【OnDemand同步错误】请求超时。Session: {session_id}, Key: {keymgr.display_key(apikey)}")
-        # 此处异常会被 with_valid_key_and_session 捕获并处理重试或返回错误
-        raise
-    except requests.exceptions.RequestException as e: # 包括HTTPError
-        logging.error(f"【OnDemand同步错误】请求失败。Session: {session_id}, Key: {keymgr.display_key(apikey)}, 错误: {e}, 响应: {e.response.text[:500] if e.response else 'N/A'}")
-        raise
-    except (ValueError, KeyError, json.JSONDecodeError) as e: # 解析响应或结构错误
-        logging.error(f"【OnDemand同步错误】处理响应时出错。Session: {session_id}, 错误: {e}", exc_info=True)
-        # 包装成一个可以被上层理解的错误，或者直接让上层HTTPError处理
-        raise requests.HTTPError(f"Error processing OnDemand sync response: {e}", response=resp if 'resp' in locals() else None)
 @app.route("/v1/models", methods=["GET"])
@@ -495,15 +593,11 @@ def list_models():
     model_objects = []
     for model_key_alias in MODEL_MAP.keys():
         model_objects.append({
-            "id": model_key_alias, # 用户请求时使用的模型名
             "object": "model",
-            "created": int(time.time()), # 可以用一个固定的时间戳或动态生成
-            "owned_by": "ondemand-proxy" # 指示这些模型条目由代理提供
         })
-    # 如果有默认模型且不在MODEL_MAP的key中，也可以考虑加入
-    # if DEFAULT_ONDEMAND_MODEL not in [m["id"] for m in model_objects]:
-    # (这取决于DEFAULT_ONDEMAND_MODEL是否也应该作为用户可选的模型ID)
     return jsonify({
         "object": "list",
         "data": model_objects
@@ -529,7 +623,7 @@ def health_check():
 if __name__ == "__main__":
     log_format = '[%(asctime)s] %(levelname)s in %(module)s (%(funcName)s): %(message)s'
-    logging.basicConfig(level=logging.INFO, format=log_format)
     if not PRIVATE_KEY:
         logging.warning("****************************************************************")
@@ -554,8 +648,5 @@ if __name__ == "__main__":
     for user_model, od_endpoint in MODEL_MAP.items():
         logging.info(f"  '{user_model}' -> '{od_endpoint}'")
-    # 从环境变量读取端口，默认为7860
     port = int(os.environ.get("PORT", 7860))
-    # 对于生产环境，debug通常应为False
-    # 在HuggingFace Spaces等环境中，它们通常会处理HTTPS，所以本地运行HTTP即可
-    app.run(host="0.0.0.0", port=port, debug=False)

     """将数据块格式化为 OpenAI SSE (Server-Sent Events) 流格式"""
     return f"data: {json.dumps(chunk_data_dict, ensure_ascii=False)}\n\n"
+# --- chat_completions 辅助函数，用于流式请求的单个尝试 ---
+# 这个函数在 chat_completions 内部定义，或者在它可以访问 ONDEMAND_API_BASE, format_openai_sse_delta 等全局/闭包变量的地方定义
+def _execute_one_stream_attempt(apikey, session_id, query_str, endpoint_id, openai_model_name_for_response, attempt_num_logging):
+    """
+    执行一次流式请求尝试。
+    返回: (generated_sse_strings, accumulated_text_content, api_error_occurred)
+    generated_sse_strings: 此尝试生成的所有SSE事件字符串列表。
+    accumulated_text_content: 从流中累积的纯文本内容。
+    api_error_occurred: 布尔值，指示此尝试是否遇到可恢复的API错误（例如，非200状态码但被处理为SSE错误事件）。
+                       注意：网络超时等 requests.RequestException 会被直接抛出。
+    """
+    url = f"{ONDEMAND_API_BASE}/sessions/{session_id}/query"
+    payload = {
+        "query": query_str,
+        "endpointId": endpoint_id,
+        "pluginIds": [],
+        "responseMode": "stream"
+    }
+    headers = {
+        "apikey": apikey,
+        "Content-Type": "application/json",
+        "Accept": "text/event-stream"
+    }
+    generated_sse_strings = []
+    accumulated_text_parts = []
+    api_error_handled_as_sse = False # 标记是否已将API错误转换为SSE事件
+    logging.info(f"【流式请求子尝试 {attempt_num_logging}】发送到 OnDemand: Session={session_id}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey)}")
+    try:
+        with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp:
+            if resp.status_code != 200:
+                api_error_handled_as_sse = True
+                error_text = resp.text
+                logging.error(f"【OnDemand流错误】请求失败 (子尝试 {attempt_num_logging})。状态码: {resp.status_code}, Session: {session_id}, 响应: {error_text[:500]}")
+                error_payload = {
+                    "error": {
+                        "message": f"OnDemand API Error (Stream Init, Attempt {attempt_num_logging}): {resp.status_code} - {error_text[:200]}",
+                        "type": "on_demand_api_error",
+                        "code": resp.status_code
+                    }
+                }
+                generated_sse_strings.append(format_openai_sse_delta(error_payload))
+                generated_sse_strings.append("data: [DONE]\n\n")
+                return generated_sse_strings, "".join(accumulated_text_parts), api_error_handled_as_sse
+            first_chunk_sent = False
+            last_line_str = "" # 用于检查流是否以[DONE]结束
+            for line_bytes in resp.iter_lines():
+                if not line_bytes:
+                    continue
+                line_str = line_bytes.decode("utf-8")
+                last_line_str = line_str # 跟踪最后一行，以防流意外终止
+                if line_str.startswith("data:"):
+                    data_part = line_str[len("data:"):].strip()
+                    if data_part == "[DONE]":
+                        logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {attempt_num_logging})。Session: {session_id}")
+                        generated_sse_strings.append("data: [DONE]\n\n")
+                        break
+                    elif data_part.startswith("[ERROR]:"):
+                        api_error_handled_as_sse = True # OnDemand流内错误
+                        error_json_str = data_part[len("[ERROR]:"):].strip()
+                        logging.warning(f"【OnDemand流】接收到错误事件 (子尝试 {attempt_num_logging}): {error_json_str}。Session: {session_id}")
+                        try:
+                            error_obj = json.loads(error_json_str)
+                        except json.JSONDecodeError:
+                            error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
+                        generated_sse_strings.append(format_openai_sse_delta({"error": error_obj}))
+                        generated_sse_strings.append("data: [DONE]\n\n") # 错误后也发送DONE
+                        break
+                    else:
+                        try:
+                            event_data = json.loads(data_part)
+                            if event_data.get("eventType") == "fulfillment":
+                                delta_content = event_data.get("answer", "")
+                                if delta_content is None: delta_content = ""
+                                accumulated_text_parts.append(delta_content)
+                                choice_delta = {}
+                                if not first_chunk_sent:
+                                    choice_delta["role"] = "assistant"
+                                    choice_delta["content"] = delta_content
+                                    first_chunk_sent = True
+                                else:
+                                    choice_delta["content"] = delta_content
+                                if not choice_delta.get("content") and not choice_delta.get("role"):
+                                    # 避免发送完全空的 delta 对象，除非它是第一个角色块
+                                    if not (choice_delta.get("role") and not choice_delta.get("content")):
+                                        continue
+                                openai_chunk = {
+                                    "id": "chatcmpl-" + str(uuid.uuid4())[:12],
+                                    "object": "chat.completion.chunk",
+                                    "created": int(time.time()),
+                                    "model": openai_model_name_for_response,
+                                    "choices": [{
+                                        "delta": choice_delta,
+                                        "index": 0,
+                                        "finish_reason": None
+                                    }]
+                                }
+                                generated_sse_strings.append(format_openai_sse_delta(openai_chunk))
+                        except json.JSONDecodeError:
+                            logging.warning(f"【OnDemand流】无法解析JSON数据块 (子尝试 {attempt_num_logging}): {data_part[:100]}... Session: {session_id}")
+                            # 可以选择忽略，或者也作为一种错误事件发送
+                            # generated_sse_strings.append(f"event: warning\ndata: Malformed JSON in stream: {data_part[:100]}\n\n")
+                            continue
+            # 如果循环正常结束但最后一行不是 [DONE] 且没有API错误，补充一个 [DONE]
+            if not last_line_str.startswith("data: [DONE]") and not api_error_handled_as_sse:
+                 logging.info(f"【OnDemand流】(子尝试 {attempt_num_logging}) 流迭代完成，补充发送 [DONE]。Session: {session_id}")
+                 generated_sse_strings.append("data: [DONE]\n\n")
+    except requests.exceptions.RequestException as e:
+        # 网络/请求级别错误，应由更上层的重试逻辑（如 with_valid_key_and_session）处理
+        logging.error(f"【OnDemand流】请求过程中发生网络或请求异常 (子尝试 {attempt_num_logging}): {e}, Session: {session_id}", exc_info=False) # exc_info=False for brevity
+        raise # 重要：重新抛出，让调用者处理API Key/网络层面的重试
+    except Exception as e:
+        # 此处捕获在流处理中发生的其他意外Python错误
+        api_error_handled_as_sse = True # 将其视为一种API错误，以便返回错误信息给客户端
+        logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {attempt_num_logging}): {e}, Session: {session_id}", exc_info=True)
+        error_payload = {
+            "error": {
+                "message": f"Unknown error during streaming (Attempt {attempt_num_logging}): {str(e)}",
+                "type": "unknown_streaming_error_in_attempt"
+            }
+        }
+        generated_sse_strings.append(format_openai_sse_delta(error_payload))
+        generated_sse_strings.append("data: [DONE]\n\n")
+        # 不重新抛出，因为我们已经格式化了错误信息以便通过SSE发送
+    return generated_sse_strings, "".join(accumulated_text_parts).strip(), api_error_handled_as_sse
 @app.route("/v1/chat/completions", methods=["POST"])
 def chat_completions():
     """处理聊天补全请求，模拟 OpenAI /v1/chat/completions 接口"""
     is_stream_request = bool(request_data.get("stream", False))
     # --- 构造发送给 OnDemand 的 query 字符串 ---
     formatted_query_parts = []
     for msg in messages:
         role = msg.get("role", "user").strip().capitalize()
+        content = msg.get("content", "") # content可以是字符串或列表（例如包含图片时）
         content_string = ""
+        if isinstance(content, list): # 处理OpenAI content为列表的情况 (通常用于多模态)
+            # OnDemand的query字段可能只接受文本。这里简单拼接文本部分。
+            # 您可能需要根据OnDemand API如何处理多模态输入来调整此逻辑���
+            temp_parts = []
+            for item in content:
+                if isinstance(item, dict) and item.get("type") == "text":
+                    temp_parts.append(item.get("text", ""))
+                # elif isinstance(item, dict) and item.get("type") == "image_url":
+                #     temp_parts.append("[Image Content Not Transmitted To Text-Only OnDemand Query]") # 示例
+                # 按照用户原始代码逻辑处理 list content
+                elif isinstance(item, dict): # 用户原始逻辑
+                    for k, v_item in item.items(): # 修改变量名 v -> v_item 避免与外层冲突
+                        content_string += f"{k}: {v_item}\n{k}: {v_item}" # 用户原始逻辑
+            if not content_string and temp_parts: # 如果原始逻辑未产生字符串，但有文本部分
+                 content_string = "\n".join(filter(None, temp_parts))
         elif isinstance(content, str):
+            content_string = content
         content_string = content_string.strip()
         if not content_string: # 跳过空内容的消息
             continue
+        formatted_query_parts.append(f"<|{role}|>: {content_string}") # 使用用户指定的格式
     if not formatted_query_parts:
         return jsonify({"error": "No valid content found in 'messages'."}), 400
     # 内部函数，用于封装实际的API调用逻辑，方便重试和密钥管理
     def attempt_ondemand_request(current_apikey, current_session_id):
         if is_stream_request:
             return handle_stream_request(current_apikey, current_session_id, final_query_to_ondemand, target_endpoint_id, openai_model_name)
         else:
     # 装饰器/高阶函数，用于管理API密钥获取、会话创建和重试逻辑
     def with_valid_key_and_session(action_func):
+        max_retries = len(ONDEMAND_APIKEYS) * 2 if ONDEMAND_APIKEYS else 1
         retries_count = 0
         last_exception_seen = None
         while retries_count < max_retries:
             selected_apikey = None
             try:
+                selected_apikey = keymgr.get()
                 logging.info(f"【请求处理】使用 API Key: {keymgr.display_key(selected_apikey)}，准备创建新会话...")
+                ondemand_session_id = create_session(selected_apikey)
                 return action_func(selected_apikey, ondemand_session_id)
+            except ValueError as ve:
                 logging.critical(f"【请求处理】KeyManager 错误: {ve}")
                 last_exception_seen = ve
+                break
+            except requests.HTTPError as http_err:
                 last_exception_seen = http_err
                 response = http_err.response
                 logging.warning(f"【请求处理】HTTP 错误发生。状态码: {response.status_code if response else 'N/A'}, Key: {keymgr.display_key(selected_apikey) if selected_apikey else 'N/A'}")
                 if selected_apikey and response is not None:
                     if response.status_code in (401, 403, 429):
                         keymgr.mark_bad(selected_apikey)
                 retries_count += 1
                 logging.info(f"【请求处理】尝试次数: {retries_count}/{max_retries}. 等待片刻后重试...")
+                time.sleep(1)
                 continue
+            except requests.exceptions.Timeout as timeout_err: # 更明确地捕获 Timeout
+                last_exception_seen = timeout_err # timeout_err 而不是字符串
+                logging.warning(f"【请求处理】请求超时。Key: {keymgr.display_key(selected_apikey) if selected_apikey else 'N/A'}, Error: {timeout_err}")
                 if selected_apikey:
+                    keymgr.mark_bad(selected_apikey)
                 retries_count += 1
                 logging.info(f"【请求处理】尝试次数: {retries_count}/{max_retries}. 等待片刻后重试...")
                 time.sleep(1)
                 continue
+            except requests.exceptions.RequestException as req_ex: # 其他网络相关错误
+                last_exception_seen = req_ex
+                logging.warning(f"【请求处理】网络请求错误。Key: {keymgr.display_key(selected_apikey) if selected_apikey else 'N/A'}, Error: {req_ex}")
+                if selected_apikey: # 对于一般网络错误，也可能标记key
+                    keymgr.mark_bad(selected_apikey)
+                retries_count += 1
+                logging.info(f"【请求处理】尝试次数: {retries_count}/{max_retries}. 等待片刻后重试...")
+                time.sleep(1)
+                continue
+            except Exception as e:
                 last_exception_seen = e
                 logging.error(f"【请求处理】发生意外的严重错误: {e}", exc_info=True)
                 if selected_apikey:
+                    keymgr.mark_bad(selected_apikey)
+                retries_count += 1
+                # break # 对于非常严重的未知错误，可以选择直接中断
         error_message = "All attempts to process the request failed after multiple retries."
         if last_exception_seen:
             error_message += f" Last known error: {str(last_exception_seen)}"
 def handle_stream_request(apikey, session_id, query_str, endpoint_id, openai_model_name_for_response):
+    """处理流式聊天补全请求，包含空回复重试逻辑"""
+    max_empty_response_retries = 5
+    attempt_count = 0
+    final_sse_strings_to_yield = []
+    while attempt_count < max_empty_response_retries:
+        attempt_count += 1
+        # _execute_one_stream_attempt 可能会抛出 requests.RequestException (如超时、连接错误)
+        # 这些异常会由 with_valid_key_and_session 捕获并处理 (可能更换key重试)
+        sse_strings_this_attempt, accumulated_text_this_attempt, api_error_in_attempt = \
+            _execute_one_stream_attempt(apikey, session_id, query_str, endpoint_id, openai_model_name_for_response, attempt_count)
+        final_sse_strings_to_yield = sse_strings_this_attempt # 保存当前尝试的结果，无论好坏
+        if api_error_in_attempt:
+            logging.warning(f"【流式请求】尝试 {attempt_count} 时 OnDemand 服务返回错误或处理内部错误，将返回此错误信息给客户端。")
+            break # 退出空回复重试循环，直接使用包含错误信息的 final_sse_strings_to_yield
+        if accumulated_text_this_attempt:
+            logging.info(f"【流式请求】尝试 {attempt_count} 成功获取非空内容。")
+            break # 成功获取内容，退出空回复重试循环
+        # 到这里说明内容为空，且没有API错误
+        logging.warning(f"【流式请求】尝试 {attempt_count} 返回空内容。总共尝试次数 {max_empty_response_retries}。")
+        if attempt_count >= max_empty_response_retries:
+            logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回空回复错误。")
+            # 构造一个表示空回复错误的SSE事件
+            empty_error_payload = {
                 "error": {
+                    "message": f"Model returned an empty stream after {max_empty_response_retries} retries.",
+                    "type": "empty_stream_error_after_retries",
+                    "code": "empty_response"
                 }
+            }
+            final_sse_strings_to_yield = [format_openai_sse_delta(empty_error_payload), "data: [DONE]\n\n"]
+            break # 退出循环，使用这个错误信息
+        logging.info(f"【流式请求】空回复，将在1秒后重试。当前尝试 {attempt_count}/{max_empty_response_retries}")
+        time.sleep(1) # 等待1秒再进行下一次空回复重试
+    # 定义最终的生成器，用于Response对象
+    def final_generator_for_response():
+        if not final_sse_strings_to_yield: # 以防万一 final_sse_strings_to_yield 为空
+            logging.error("【流式请求】final_sse_strings_to_yield 为空，这不应该发生。返回通用错误。")
+            yield format_openai_sse_delta({"error": {"message": "Unexpected empty result in streaming.", "type": "internal_proxy_error"}})
             yield "data: [DONE]\n\n"
+        else:
+            for sse_str in final_sse_strings_to_yield:
+                yield sse_str
+    return Response(final_generator_for_response(), content_type='text/event-stream')
 def handle_non_stream_request(apikey, session_id, query_str, endpoint_id, openai_model_name_for_response):
+    """处理非流式聊天补全请求，包含空回复重试逻辑"""
     url = f"{ONDEMAND_API_BASE}/sessions/{session_id}/query"
     payload = {
         "query": query_str,
         "endpointId": endpoint_id,
         "pluginIds": [],
+        "responseMode": "sync"
     }
     headers = {"apikey": apikey, "Content-Type": "application/json"}
+    max_empty_response_retries = 5
+    empty_response_retry_count = 0
+    while empty_response_retry_count < max_empty_response_retries:
+        empty_response_retry_count += 1
+        logging.info(f"【同步请求】尝试 #{empty_response_retry_count}/{max_empty_response_retries}. Session={session_id}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey)}")
+        try:
+            resp = requests.post(url, json=payload, headers=headers, timeout=120)
+            resp.raise_for_status() # 检查HTTP错误 (4xx, 5xx) - 这会被 with_valid_key_and_session 捕获
+            response_json = resp.json()
+            if "data" not in response_json or "answer" not in response_json["data"]:
+                logging.error(f"【OnDemand同步错误】响应格式不符合预期 (尝试 {empty_response_retry_count})。Session: {session_id}, 响应: {str(response_json)[:500]}")
+                # 这种格式错误不计为空回复重试，而是视为API行为异常，可能需要上层重试或失败
+                # 为了简单起见，如果上层 with_valid_key_and_session 不处理这种 ValueError，这里我们直接返回错误
+                # 或者可以抛出自定义异常让上层处理
+                raise ValueError("OnDemand API sync response missing 'data.answer' field.")
+            ai_response_content = response_json["data"]["answer"]
+            if ai_response_content is None:
+                ai_response_content = ""
+            if ai_response_content.strip(): # 如果内容非空
+                logging.info(f"【同步请求】尝试 {empty_response_retry_count} 成功获取非空内容。")
+                openai_response_obj = {
+                    "id": "chatcmpl-" + str(uuid.uuid4())[:12],
+                    "object": "chat.completion",
+                    "created": int(time.time()),
+                    "model": openai_model_name_for_response,
+                    "choices": [{
+                        "index": 0,
+                        "message": {"role": "assistant", "content": ai_response_content},
+                        "finish_reason": "stop"
+                    }],
+                    "usage": {}
                 }
+                return jsonify(openai_response_obj)
+            else: # 内容为空
+                logging.warning(f"【同步请求】尝试 {empty_response_retry_count} 返回空回复。Session: {session_id}")
+                if empty_response_retry_count >= max_empty_response_retries:
+                    logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回空回复错误。")
+                    # 返回一个表示错误的JSON响应
+                    return jsonify({
+                        "error": f"Model returned an empty response after {max_empty_response_retries} retries.",
+                        "id": "chatcmpl-" + str(uuid.uuid4())[:12],
+                        "object": "chat.completion", # 保持对象类型一致
+                        "created": int(time.time()),
+                        "model": openai_model_name_for_response,
+                        "choices": [{
+                            "index": 0,
+                            "message": {"role": "assistant", "content": ""}, # 空内容
+                            "finish_reason": "length" # 或 "stop", 或自定义 "empty_response"
+                        }],
+                        "usage": {}
+                    }), 500 # 使用 500 Internal Server Error 或 503 Service Unavailable
+                logging.info(f"【同步请求】空回复，将在1秒后重试。当前尝试 {empty_response_retry_count}/{max_empty_response_retries}")
+                time.sleep(1) # 等待1秒再进行下一次空回复重试
+        except requests.exceptions.RequestException as e:
+            # 网络/请求级别错误 (包括 resp.raise_for_status() 引发的 HTTPError)
+            # 这些应由 with_valid_key_and_session 处理 (例如更换API Key重试)
+            logging.warning(f"【同步请求】(尝试 {empty_response_retry_count}) 发生请求级错误: {e}. 将由上层处理重试。")
+            raise # 重新抛出，让 with_valid_key_and_session 处理
+        except (ValueError, KeyError, json.JSONDecodeError) as e:
+            # 解析响应或响应结构错误
+            logging.error(f"【同步请求】(尝试 {empty_response_retry_count}) 处理响应时出错: {e}", exc_info=True)
+            # 这种错误通常不应通过简单的空回复重试解决，可能表明API响应格式已更改或存在问题
+            # 直接返回错误给客户端，或者抛给上层
+            return jsonify({"error": f"Error processing OnDemand sync response: {str(e)}"}), 502 # Bad Gateway
+    # 如果循环结束仍未成功（理论上应该在循环内返回或抛出异常）
+    logging.error(f"【同步请求】意外退出空回复重试循环。这不应该发生。")
+    return jsonify({"error": "Unexpected error in non-stream handling after empty response retries."}), 500
 @app.route("/v1/models", methods=["GET"])
     model_objects = []
     for model_key_alias in MODEL_MAP.keys():
         model_objects.append({
+            "id": model_key_alias,
             "object": "model",
+            "created": int(time.time()),
+            "owned_by": "ondemand-proxy"
         })
     return jsonify({
         "object": "list",
         "data": model_objects
 if __name__ == "__main__":
     log_format = '[%(asctime)s] %(levelname)s in %(module)s (%(funcName)s): %(message)s'
+    logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO").upper(), format=log_format)
     if not PRIVATE_KEY:
         logging.warning("****************************************************************")
     for user_model, od_endpoint in MODEL_MAP.items():
         logging.info(f"  '{user_model}' -> '{od_endpoint}'")
     port = int(os.environ.get("PORT", 7860))
+    app.run(host="0.0.0.0", port=port, debug=False)