Ge-AI commited on
Commit
11d68bb
·
verified ·
1 Parent(s): 939c8f7

Update openai_ondemand_adapter.py

Browse files
Files changed (1) hide show
  1. openai_ondemand_adapter.py +262 -200
openai_ondemand_adapter.py CHANGED
@@ -254,106 +254,135 @@ def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, quer
254
 
255
  accumulated_text_parts = []
256
  api_error_yielded = False
 
 
 
 
 
 
 
 
 
 
 
257
 
258
- logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
259
 
260
- try:
261
- with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp:
262
- if resp.status_code != 200:
263
- api_error_yielded = True
264
- error_text = resp.text
265
- logging.error(f"【OnDemand流错误】请求失败 (子尝试 {current_attempt_num_logging})。状态码: {resp.status_code}, Session: {session_id_for_attempt}, 响应: {error_text[:500]}")
266
- error_payload = {
267
- "error": {
268
- "message": f"OnDemand API Error (Stream Init, Attempt {current_attempt_num_logging}): {resp.status_code} - {error_text[:200]}",
269
- "type": "on_demand_api_error",
270
- "code": resp.status_code
271
- }
272
- }
273
- yield format_openai_sse_delta(error_payload)
274
- yield "data: [DONE]\n\n"
275
- return "".join(accumulated_text_parts).strip(), api_error_yielded
276
-
277
- first_chunk_sent = False
278
- last_line_str = ""
279
- for line_bytes in resp.iter_lines():
280
- if not line_bytes:
281
- continue
282
-
283
- line_str = line_bytes.decode("utf-8")
284
- last_line_str = line_str
285
-
286
- if line_str.startswith("data:"):
287
- data_part = line_str[len("data:"):].strip()
288
-
289
- if data_part == "[DONE]":
290
- logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
291
  yield "data: [DONE]\n\n"
292
  return "".join(accumulated_text_parts).strip(), api_error_yielded
293
-
294
- elif data_part.startswith("[ERROR]:"):
295
- api_error_yielded = True
296
- error_json_str = data_part[len("[ERROR]:"):].strip()
297
- logging.warning(f"【OnDemand流】接收到错误事件 (子尝试 {current_attempt_num_logging}): {error_json_str}。Session: {session_id_for_attempt}")
298
- try:
299
- error_obj = json.loads(error_json_str)
300
- except json.JSONDecodeError:
301
- error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
302
- yield format_openai_sse_delta({"error": error_obj})
303
- yield "data: [DONE]\n\n"
304
- return "".join(accumulated_text_parts).strip(), api_error_yielded
305
-
306
- else:
307
- try:
308
- event_data = json.loads(data_part)
309
- if event_data.get("eventType") == "fulfillment":
310
- delta_content = event_data.get("answer", "")
311
- if delta_content is None: delta_content = ""
312
- accumulated_text_parts.append(delta_content)
313
-
314
- choice_delta = {}
315
- if not first_chunk_sent:
316
- choice_delta["role"] = "assistant"
317
- choice_delta["content"] = delta_content
318
- first_chunk_sent = True
319
- else:
320
- choice_delta["content"] = delta_content
321
-
322
- if not choice_delta.get("content") and not choice_delta.get("role"):
323
- if not (choice_delta.get("role") and not choice_delta.get("content")):
324
- continue
325
-
326
- openai_chunk = {
327
- "id": "chatcmpl-" + str(uuid.uuid4())[:12],
328
- "object": "chat.completion.chunk",
329
- "created": int(time.time()),
330
- "model": openai_model_name_for_response,
331
- "choices": [{"delta": choice_delta, "index": 0, "finish_reason": None}]
332
- }
333
- yield format_openai_sse_delta(openai_chunk)
334
- except json.JSONDecodeError:
335
- logging.warning(f"【OnDemand流】无法解析JSON (子尝试 {current_attempt_num_logging}): {data_part[:100]}... Session: {session_id_for_attempt}")
336
- continue
337
-
338
- if not api_error_yielded:
339
- if not last_line_str.startswith("data: [DONE]"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成,补充发送 [DONE]。Session: {session_id_for_attempt}")
341
  yield "data: [DONE]\n\n"
 
 
 
 
 
 
 
 
 
 
 
342
 
343
- except requests.exceptions.RequestException:
344
- logging.error(f"【OnDemand流】请求过程中发生网络或请求异常 (子尝试 {current_attempt_num_logging}): Session: {session_id_for_attempt}", exc_info=False)
345
- raise
346
-
347
- except Exception as e:
348
- logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {current_attempt_num_logging}): {e}, Session: {session_id_for_attempt}", exc_info=True)
349
- api_error_yielded = True
350
- error_payload = {
351
- "error": {"message": f"Unknown error during streaming (Attempt {current_attempt_num_logging}): {str(e)}", "type": "unknown_streaming_error_in_attempt"}
352
- }
353
- yield format_openai_sse_delta(error_payload)
354
- yield "data: [DONE]\n\n"
355
 
356
- return "".join(accumulated_text_parts).strip(), api_error_yielded
 
 
 
 
357
 
358
 
359
  @app.route("/v1/chat/completions", methods=["POST"])
@@ -440,12 +469,13 @@ def chat_completions():
440
  logging.warning(f"【请求处理 - Key轮换尝试 {key_retry_count}】HTTP/请求错误。Status: {status_code_from_exc}, Key: {keymgr.display_key(selected_apikey_for_outer_retry) if selected_apikey_for_outer_retry else 'N/A'}, Error: {http_err_outer}")
441
 
442
  if selected_apikey_for_outer_retry:
443
- if status_code_from_exc == 524: # HTTP 524: A Timeout Occurred (Cloudflare)
444
  logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 524 error.")
445
- # For other client/server errors that might indicate a key issue or persistent service issue with this key
446
- elif status_code_from_exc and (400 <= status_code_from_exc < 500 or status_code_from_exc in [500, 502, 503]): # excluding 524
 
447
  keymgr.mark_bad(selected_apikey_for_outer_retry)
448
- elif not status_code_from_exc : # Network errors without a status code (e.g., connection refused, DNS failure)
449
  keymgr.mark_bad(selected_apikey_for_outer_retry)
450
 
451
  if key_retry_count >= max_key_retries:
@@ -458,23 +488,22 @@ def chat_completions():
458
  except Exception as e_outer:
459
  last_exception_for_key_retry = e_outer
460
  logging.error(f"【请求处理 - Key轮换尝试 {key_retry_count}】发生意外严重错误: {e_outer}", exc_info=True)
461
- if selected_apikey_for_outer_retry: # Mark key bad on any other unexpected exception during setup
462
  keymgr.mark_bad(selected_apikey_for_outer_retry)
463
  break
464
 
465
- # This block is reached if all key_retry_count attempts in with_valid_key_and_session fail
466
- error_message = "重试次数过多,请检查上下文长度! 或联系管理员!" # User requested message
467
- error_code_str = "max_retries_check_context_contact_admin" # Custom code for this scenario
468
 
469
  logging.error(f"【请求处理】所有Key/Session获取尝试失败。最终错误: {error_message} Last underlying exception: {last_exception_for_key_retry}")
470
 
471
  if is_stream_request:
472
  def error_stream_gen():
473
- yield format_openai_sse_delta({"error": {"message": error_message, "type": "proxy_setup_error_max_retries", "code": error_code_str}})
474
  yield "data: [DONE]\n\n"
475
- return Response(error_stream_gen(), content_type='text/event-stream', status=500) # Status 500 as requested
476
  else:
477
- return jsonify({"error": error_message, "code": error_code_str}), 500 # Status 500 as requested
478
 
479
  return with_valid_key_and_session(attempt_ondemand_request_wrapper)
480
 
@@ -488,7 +517,7 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
488
  while empty_retry_attempt_num < max_empty_response_retries:
489
  empty_retry_attempt_num += 1
490
  accumulated_text_this_attempt = ""
491
- api_error_in_attempt = False
492
 
493
  if empty_retry_attempt_num > 1:
494
  logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
@@ -505,15 +534,16 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
505
  if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
506
  if status_code_from_exc_retry_setup == 524:
507
  logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for stream retry.")
 
 
508
  else:
509
  keymgr.mark_bad(current_apikey_for_attempt)
510
 
511
  if empty_retry_attempt_num >= max_empty_response_retries:
512
- # Final failure to get key/session for the last empty response retry
513
  final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
514
  final_error_code = "max_retries_check_context_contact_admin"
515
  logging.error(f"【流式请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
516
- yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_setup_failed", "code": final_error_code}})
517
  yield "data: [DONE]\n\n"
518
  return
519
  time.sleep(1)
@@ -523,6 +553,7 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
523
  log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
524
 
525
  try:
 
526
  result_tuple = yield from _execute_one_stream_attempt(
527
  current_apikey_for_attempt,
528
  current_session_id_for_attempt,
@@ -532,9 +563,9 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
532
  f"{log_attempt_str} (Overall attempt {empty_retry_attempt_num})"
533
  )
534
  accumulated_text_this_attempt = result_tuple[0]
535
- api_error_in_attempt = result_tuple[1]
536
 
537
- except requests.exceptions.RequestException as e_req:
538
  log_key_display = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
539
  status_code_from_exc_stream = None
540
  if hasattr(e_req, 'response') and e_req.response is not None:
@@ -545,37 +576,46 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
545
  if current_apikey_for_attempt:
546
  if status_code_from_exc_stream == 524:
547
  logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 524 error during stream attempt.")
 
 
548
  else:
549
  keymgr.mark_bad(current_apikey_for_attempt)
550
 
551
  if empty_retry_attempt_num == 1:
 
 
552
  raise e_req
553
 
 
554
  if empty_retry_attempt_num >= max_empty_response_retries:
555
  final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
556
  final_error_code = "max_retries_check_context_contact_admin"
557
  logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误: {e_req}")
558
- yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_request_failed", "code": final_error_code}})
559
  yield "data: [DONE]\n\n"
560
  return
561
  time.sleep(1)
562
- continue
563
 
 
564
  if api_error_in_attempt:
565
- logging.warning(f"【流式请求】({log_attempt_str}) OnDemand 服务返回错误或处理内部错误,已将错误信息流式传输。")
566
- return
 
 
567
 
568
  if accumulated_text_this_attempt:
569
  logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
570
- return
571
 
 
572
  logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
573
  if empty_retry_attempt_num >= max_empty_response_retries:
574
  final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
575
  final_error_code = "max_retries_check_context_contact_admin"
576
  logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
577
  yield format_openai_sse_delta({
578
- "error": {"message": final_error_message, "type": "max_retries_exceeded_empty_response", "code": final_error_code}
579
  })
580
  yield "data: [DONE]\n\n"
581
  return
@@ -583,9 +623,6 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
583
  logging.info(f"【流式请求】空回复,将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
584
  time.sleep(1)
585
 
586
- # Fallback if loop finishes unexpectedly (shouldn't happen with current logic)
587
- # This case means all max_empty_response_retries were exhausted, and the last one was also empty.
588
- # The specific error for this is handled inside the loop. This is a safeguard.
589
  final_fallback_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
590
  final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
591
  logging.error(f"【流式请求】意外退出空回复重试循环。返回最终错误。")
@@ -594,14 +631,12 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
594
 
595
 
596
  def handle_non_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
597
- max_empty_response_retries = 5
598
  empty_retry_attempt_num = 0
599
 
600
  current_apikey_for_attempt = initial_apikey
601
  current_session_id_for_attempt = initial_session_id
602
 
603
- url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query"
604
-
605
  while empty_retry_attempt_num < max_empty_response_retries:
606
  empty_retry_attempt_num += 1
607
 
@@ -610,7 +645,6 @@ def handle_non_stream_request(initial_apikey, initial_session_id, query_str, end
610
  try:
611
  current_apikey_for_attempt = keymgr.get()
612
  current_session_id_for_attempt = create_session(current_apikey_for_attempt)
613
- url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query"
614
  logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
615
  except (ValueError, requests.exceptions.RequestException) as e_key_session:
616
  logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
@@ -621,110 +655,138 @@ def handle_non_stream_request(initial_apikey, initial_session_id, query_str, end
621
  if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
622
  if status_code_from_exc_retry_setup_ns == 524:
623
  logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for non-stream retry.")
 
 
624
  else:
625
  keymgr.mark_bad(current_apikey_for_attempt)
 
626
  if empty_retry_attempt_num >= max_empty_response_retries:
627
- # Final failure to get key/session for the last empty response retry
628
  final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
629
  final_error_code = "max_retries_check_context_contact_admin"
630
  logging.error(f"【同步请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
631
- return jsonify({"error": final_error_message, "code": final_error_code}), 500 # Status 500
632
  time.sleep(1)
633
  current_apikey_for_attempt = None
634
  continue
635
 
636
  log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
637
- logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
638
 
639
- payload = { "query": query_str, "endpointId": endpoint_id, "pluginIds": [], "responseMode": "sync" }
640
- headers = {"apikey": current_apikey_for_attempt, "Content-Type": "application/json"}
 
641
 
642
- try:
643
- resp = requests.post(url, json=payload, headers=headers, timeout=120)
644
- resp.raise_for_status()
645
-
646
- response_json = resp.json()
647
- if "data" not in response_json or "answer" not in response_json["data"]:
648
- logging.error(f"【OnDemand同步错误】响应格式不符合预期 ({log_attempt_str})。Session: {current_session_id_for_attempt}, 响应: {str(response_json)[:500]}")
649
- # This is an API format error, not an empty response.
650
- # If this happens on the first attempt, it will be re-raised to with_valid_key_and_session
651
- # If on a retry for empty response, it's a new kind of failure for that attempt.
652
- raise ValueError(f"OnDemand API sync response missing 'data.answer' field on attempt {empty_retry_attempt_num}.")
653
 
654
- ai_response_content = response_json["data"]["answer"]
655
- if ai_response_content is None: ai_response_content = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
 
657
- if ai_response_content.strip():
658
- logging.info(f"【同步请求】({log_attempt_str}) 成功获取非空内容。")
659
- openai_response_obj = {
660
- "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
661
- "model": openai_model_name_for_response,
662
- "choices": [{"index": 0, "message": {"role": "assistant", "content": ai_response_content}, "finish_reason": "stop"}],
663
- "usage": {}
664
- }
665
- return jsonify(openai_response_obj)
666
- else:
667
- logging.warning(f"【同步请求】({log_attempt_str}) 返回空回复。")
668
- if empty_retry_attempt_num >= max_empty_response_retries:
669
- final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
670
- final_error_code = "max_retries_check_context_contact_admin"
671
- logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
672
- return jsonify({
673
- "error": final_error_message,
674
  "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
675
  "model": openai_model_name_for_response,
676
- "choices": [{"index": 0, "message": {"role": "assistant", "content": ""}, "finish_reason": "length"}],
677
- "usage": {},
678
- "code": final_error_code
679
- }), 504 # Status 504 as requested
680
- logging.info(f"【同步请求】空回复,将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
681
- time.sleep(1)
682
-
683
- except requests.exceptions.RequestException as e_req:
684
- log_key_display_sync = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
685
- status_code_from_exc_sync = None
686
- if hasattr(e_req, 'response') and e_req.response is not None:
687
- status_code_from_exc_sync = e_req.response.status_code
688
-
689
- logging.warning(f"【同步请求】({log_attempt_str} using key {log_key_display_sync}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_sync}")
690
 
691
- if current_apikey_for_attempt:
692
- if status_code_from_exc_sync == 524:
693
- logging.info(f"【KeyManager】Key {log_key_display_sync} not marked bad for 524 error during sync attempt.")
694
- else:
695
- keymgr.mark_bad(current_apikey_for_attempt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696
 
697
- if empty_retry_attempt_num == 1:
698
- raise e_req
 
 
 
 
 
 
 
 
 
 
 
699
  if empty_retry_attempt_num >= max_empty_response_retries:
700
  final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
701
  final_error_code = "max_retries_check_context_contact_admin"
702
- logging.error(f"【同步请求】在最后一次空回复重试时发生请求错误: {e_req}")
703
- return jsonify({"error": final_error_message, "code":final_error_code, "details": str(e_req)}), 500 # Status 500
704
- time.sleep(1)
705
- continue
706
- except (ValueError, KeyError, json.JSONDecodeError) as e_parse:
707
- # This catches the ValueError from "data.answer" missing, or JSON decode errors
708
- logging.error(f"【同步请求】({log_attempt_str}) 处理响应或格式时出错: {e_parse}", exc_info=True)
709
- if empty_retry_attempt_num == 1:
710
- # If format error on first attempt, re-raise to be caught by with_valid_key_and_session
711
- # This implies a more fundamental issue than just an empty response.
712
- raise requests.exceptions.RequestException(f"Response format error on first attempt: {e_parse}") from e_parse
713
-
714
- # If it's a format error during an empty-response retry, it's problematic.
715
- if empty_retry_attempt_num >= max_empty_response_retries:
716
- final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
717
- final_error_code = "max_retries_check_context_contact_admin"
718
- logging.error(f"【同步请求】在最后一次空回复重试时发生响应解析错误: {e_parse}")
719
- return jsonify({"error": final_error_message, "code": final_error_code, "details": f"Parse error: {str(e_parse)}"}), 500 # Status 500
720
- time.sleep(1)
721
- continue # Try next key for empty response retry
722
-
723
- # Fallback if loop finishes unexpectedly (e.g. all retries were empty and the last one didn't hit the specific return)
724
  final_fallback_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
725
  final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
726
  logging.error(f"【同步请求】意外退出空回复重试循环。返回最终错误。")
727
- return jsonify({"error": final_fallback_error_message, "code": final_fallback_error_code}), 500 # Status 500
728
 
729
 
730
  @app.route("/v1/models", methods=["GET"])
 
254
 
255
  accumulated_text_parts = []
256
  api_error_yielded = False
257
+
258
+ # Inner retry loop for 500 errors specifically for this attempt
259
+ max_500_retries_for_this_call = 5
260
+ current_500_retry_count = 0
261
+
262
+ while current_500_retry_count < max_500_retries_for_this_call:
263
+ current_500_retry_count += 1
264
+ if current_500_retry_count > 1: # Log if this is a 500-retry
265
+ logging.info(f"【流式请求子尝试 {current_attempt_num_logging} - 500错误重试 {current_500_retry_count-1}/{max_500_retries_for_this_call-1}】Key: {keymgr.display_key(apikey_for_attempt)}")
266
+ else:
267
+ logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
268
 
 
269
 
270
+ try:
271
+ with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp:
272
+ if resp.status_code == 500: # Specific handling for 500 error
273
+ logging.warning(f"【OnDemand流错误】(子尝试 {current_attempt_num_logging}, 500重试 {current_500_retry_count}) 收到500错误。Session: {session_id_for_attempt}")
274
+ if current_500_retry_count >= max_500_retries_for_this_call:
275
+ logging.error(f"【OnDemand流错误】(子尝试 {current_attempt_num_logging}) 达到500错误最大重试次数。将错误传递给上层。")
276
+ # Yield a specific error for persistent 500 after retries
277
+ api_error_yielded = True
278
+ error_payload = {"error": {"message": f"OnDemand API persistent 500 error after {max_500_retries_for_this_call} retries (Attempt {current_attempt_num_logging}).",
279
+ "type": "on_demand_persistent_500_error", "code": 500}}
280
+ yield format_openai_sse_delta(error_payload)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  yield "data: [DONE]\n\n"
282
  return "".join(accumulated_text_parts).strip(), api_error_yielded
283
+ time.sleep(1) # Wait before retrying the 500 error
284
+ continue # Go to the next iteration of the 500-retry loop
285
+
286
+ if resp.status_code != 200: # Handle other non-200, non-500 errors
287
+ api_error_yielded = True
288
+ error_text = resp.text
289
+ logging.error(f"【OnDemand流错误】请求失败 (子尝试 {current_attempt_num_logging})。状态码: {resp.status_code}, Session: {session_id_for_attempt}, 响应: {error_text[:500]}")
290
+ error_payload = {
291
+ "error": {
292
+ "message": f"OnDemand API Error (Stream Init, Attempt {current_attempt_num_logging}): {resp.status_code} - {error_text[:200]}",
293
+ "type": "on_demand_api_error",
294
+ "code": resp.status_code
295
+ }
296
+ }
297
+ yield format_openai_sse_delta(error_payload)
298
+ yield "data: [DONE]\n\n"
299
+ return "".join(accumulated_text_parts).strip(), api_error_yielded
300
+
301
+ # Successful 200 response, process stream
302
+ first_chunk_sent = False
303
+ last_line_str = ""
304
+ for line_bytes in resp.iter_lines():
305
+ if not line_bytes:
306
+ continue
307
+ line_str = line_bytes.decode("utf-8")
308
+ last_line_str = line_str
309
+
310
+ if line_str.startswith("data:"):
311
+ data_part = line_str[len("data:"):].strip()
312
+ if data_part == "[DONE]":
313
+ logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
314
+ yield "data: [DONE]\n\n"
315
+ return "".join(accumulated_text_parts).strip(), api_error_yielded
316
+ elif data_part.startswith("[ERROR]:"):
317
+ api_error_yielded = True
318
+ error_json_str = data_part[len("[ERROR]:"):].strip()
319
+ logging.warning(f"【OnDemand流】接收到错误事件 (子尝试 {current_attempt_num_logging}): {error_json_str}。Session: {session_id_for_attempt}")
320
+ try:
321
+ error_obj = json.loads(error_json_str)
322
+ except json.JSONDecodeError:
323
+ error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
324
+ yield format_openai_sse_delta({"error": error_obj})
325
+ yield "data: [DONE]\n\n"
326
+ return "".join(accumulated_text_parts).strip(), api_error_yielded
327
+ else:
328
+ try:
329
+ event_data = json.loads(data_part)
330
+ if event_data.get("eventType") == "fulfillment":
331
+ delta_content = event_data.get("answer", "")
332
+ if delta_content is None: delta_content = ""
333
+ accumulated_text_parts.append(delta_content)
334
+ choice_delta = {}
335
+ if not first_chunk_sent:
336
+ choice_delta["role"] = "assistant"
337
+ choice_delta["content"] = delta_content
338
+ first_chunk_sent = True
339
+ else:
340
+ choice_delta["content"] = delta_content
341
+ if not choice_delta.get("content") and not choice_delta.get("role"):
342
+ if not (choice_delta.get("role") and not choice_delta.get("content")):
343
+ continue
344
+ openai_chunk = {
345
+ "id": "chatcmpl-" + str(uuid.uuid4())[:12],
346
+ "object": "chat.completion.chunk",
347
+ "created": int(time.time()),
348
+ "model": openai_model_name_for_response,
349
+ "choices": [{"delta": choice_delta, "index": 0, "finish_reason": None}]
350
+ }
351
+ yield format_openai_sse_delta(openai_chunk)
352
+ except json.JSONDecodeError:
353
+ logging.warning(f"【OnDemand流】无法解析JSON (子尝试 {current_attempt_num_logging}): {data_part[:100]}... Session: {session_id_for_attempt}")
354
+ continue
355
+
356
+ if not api_error_yielded and not last_line_str.startswith("data: [DONE]"):
357
  logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成,补充发送 [DONE]。Session: {session_id_for_attempt}")
358
  yield "data: [DONE]\n\n"
359
+ return "".join(accumulated_text_parts).strip(), api_error_yielded # Success for this attempt
360
+
361
+ except requests.exceptions.RequestException as e_req_inner:
362
+ # This catches network errors or HTTP errors if resp.raise_for_status() was called (e.g. for persistent 500)
363
+ logging.error(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 请求时发生异常: {e_req_inner}, Key: {keymgr.display_key(apikey_for_attempt)}")
364
+ # If this was the last 500-retry, or another RequestException, re-raise to be handled by handle_stream_request's try-except
365
+ if current_500_retry_count >= max_500_retries_for_this_call or (hasattr(e_req_inner, 'response') and e_req_inner.response is not None and e_req_inner.response.status_code != 500):
366
+ raise e_req_inner
367
+ # If it was a 500 and we still have retries for 500, the loop will continue after a delay.
368
+ time.sleep(1) # Wait before retrying the 500 error
369
+ # The loop will continue to the next 500-retry.
370
 
371
+ except Exception as e_inner_unknown:
372
+ logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {current_attempt_num_logging}): {e_inner_unknown}, Session: {session_id_for_attempt}", exc_info=True)
373
+ api_error_yielded = True
374
+ error_payload = {
375
+ "error": {"message": f"Unknown error during streaming (Attempt {current_attempt_num_logging}): {str(e_inner_unknown)}", "type": "unknown_streaming_error_in_attempt"}
376
+ }
377
+ yield format_openai_sse_delta(error_payload)
378
+ yield "data: [DONE]\n\n"
379
+ return "".join(accumulated_text_parts).strip(), api_error_yielded
 
 
 
380
 
381
+ # If 500-retry loop exhausted without returning (should ideally raise inside or return success)
382
+ logging.error(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 500错误重试循环意外结束。")
383
+ # This case should ideally not be reached if logic inside loop is correct.
384
+ # Re-raise a generic error to be caught by the caller if it does.
385
+ raise requests.exceptions.RequestException(f"Exhausted internal 500 retries for attempt {current_attempt_num_logging} without success or specific error propagation.")
386
 
387
 
388
  @app.route("/v1/chat/completions", methods=["POST"])
 
469
  logging.warning(f"【请求处理 - Key轮换尝试 {key_retry_count}】HTTP/请求错误。Status: {status_code_from_exc}, Key: {keymgr.display_key(selected_apikey_for_outer_retry) if selected_apikey_for_outer_retry else 'N/A'}, Error: {http_err_outer}")
470
 
471
  if selected_apikey_for_outer_retry:
472
+ if status_code_from_exc == 524:
473
  logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 524 error.")
474
+ elif status_code_from_exc == 500:
475
+ logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 500 error.")
476
+ elif status_code_from_exc and ((400 <= status_code_from_exc < 500) or status_code_from_exc in [502, 503]): # Explicitly list codes that mark bad, excluding 500, 524
477
  keymgr.mark_bad(selected_apikey_for_outer_retry)
478
+ elif not status_code_from_exc :
479
  keymgr.mark_bad(selected_apikey_for_outer_retry)
480
 
481
  if key_retry_count >= max_key_retries:
 
488
  except Exception as e_outer:
489
  last_exception_for_key_retry = e_outer
490
  logging.error(f"【请求处理 - Key轮换尝试 {key_retry_count}】发生意外严重错误: {e_outer}", exc_info=True)
491
+ if selected_apikey_for_outer_retry:
492
  keymgr.mark_bad(selected_apikey_for_outer_retry)
493
  break
494
 
495
+ error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
496
+ error_code_str = "max_retries_check_context_contact_admin"
 
497
 
498
  logging.error(f"【请求处理】所有Key/Session获取尝试失败。最终错误: {error_message} Last underlying exception: {last_exception_for_key_retry}")
499
 
500
  if is_stream_request:
501
  def error_stream_gen():
502
+ yield format_openai_sse_delta({"error": {"message": error_message, "type": "proxy_max_retries_exceeded", "code": error_code_str}})
503
  yield "data: [DONE]\n\n"
504
+ return Response(error_stream_gen(), content_type='text/event-stream', status=500)
505
  else:
506
+ return jsonify({"error": error_message, "code": error_code_str}), 500
507
 
508
  return with_valid_key_and_session(attempt_ondemand_request_wrapper)
509
 
 
517
  while empty_retry_attempt_num < max_empty_response_retries:
518
  empty_retry_attempt_num += 1
519
  accumulated_text_this_attempt = ""
520
+ api_error_in_attempt = False # Renamed from api_error_yielded for clarity in this scope
521
 
522
  if empty_retry_attempt_num > 1:
523
  logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
 
534
  if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
535
  if status_code_from_exc_retry_setup == 524:
536
  logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for stream retry.")
537
+ elif status_code_from_exc_retry_setup == 500:
538
+ logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 500 error during key/session acquisition for stream retry.")
539
  else:
540
  keymgr.mark_bad(current_apikey_for_attempt)
541
 
542
  if empty_retry_attempt_num >= max_empty_response_retries:
 
543
  final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
544
  final_error_code = "max_retries_check_context_contact_admin"
545
  logging.error(f"【流式请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
546
+ yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_setup_failed", "code": final_error_code, "details": str(e_key_session)}})
547
  yield "data: [DONE]\n\n"
548
  return
549
  time.sleep(1)
 
553
  log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
554
 
555
  try:
556
+ # result_tuple will be (accumulated_text, api_error_yielded_flag_from_execute)
557
  result_tuple = yield from _execute_one_stream_attempt(
558
  current_apikey_for_attempt,
559
  current_session_id_for_attempt,
 
563
  f"{log_attempt_str} (Overall attempt {empty_retry_attempt_num})"
564
  )
565
  accumulated_text_this_attempt = result_tuple[0]
566
+ api_error_in_attempt = result_tuple[1] # This tells if _execute_one_stream_attempt itself yielded an error SSE
567
 
568
+ except requests.exceptions.RequestException as e_req: # Catch errors from _execute_one_stream_attempt's requests.post
569
  log_key_display = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
570
  status_code_from_exc_stream = None
571
  if hasattr(e_req, 'response') and e_req.response is not None:
 
576
  if current_apikey_for_attempt:
577
  if status_code_from_exc_stream == 524:
578
  logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 524 error during stream attempt.")
579
+ elif status_code_from_exc_stream == 500:
580
+ logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 500 error during stream attempt.")
581
  else:
582
  keymgr.mark_bad(current_apikey_for_attempt)
583
 
584
  if empty_retry_attempt_num == 1:
585
+ # If the very first attempt (initial_apikey) fails with RequestException,
586
+ # re-raise to let with_valid_key_and_session handle key rotation.
587
  raise e_req
588
 
589
+ # If it's an empty-response retry (attempt_num > 1) that failed with RequestException
590
  if empty_retry_attempt_num >= max_empty_response_retries:
591
  final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
592
  final_error_code = "max_retries_check_context_contact_admin"
593
  logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误: {e_req}")
594
+ yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_request_failed", "code": final_error_code, "details": str(e_req)}})
595
  yield "data: [DONE]\n\n"
596
  return
597
  time.sleep(1)
598
+ continue # To the next iteration of the empty_retry_attempt_num loop (will try new key/session)
599
 
600
+ # After _execute_one_stream_attempt has finished (either normally or yielded its own error)
601
  if api_error_in_attempt:
602
+ # This means _execute_one_stream_attempt handled an API error (like 429, or persistent 500) and yielded an error SSE.
603
+ # The stream is already complete with an error.
604
+ logging.warning(f"【流式请求】({log_attempt_str}) 子尝试已处理并流式传输API错误。")
605
+ return # Stop further empty response retries.
606
 
607
  if accumulated_text_this_attempt:
608
  logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
609
+ return # Stream was successful and content yielded.
610
 
611
+ # If we reach here, content was empty from _execute_one_stream_attempt, and no API error was yielded by it.
612
  logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
613
  if empty_retry_attempt_num >= max_empty_response_retries:
614
  final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
615
  final_error_code = "max_retries_check_context_contact_admin"
616
  logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
617
  yield format_openai_sse_delta({
618
+ "error": {"message": final_error_message, "type": "max_empty_retries_exceeded", "code": final_error_code}
619
  })
620
  yield "data: [DONE]\n\n"
621
  return
 
623
  logging.info(f"【流式请求】空回复,将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
624
  time.sleep(1)
625
 
 
 
 
626
  final_fallback_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
627
  final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
628
  logging.error(f"【流式请求】意外退出空回复重试循环。返回最终错误。")
 
631
 
632
 
633
  def handle_non_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
634
+ max_empty_response_retries = 5
635
  empty_retry_attempt_num = 0
636
 
637
  current_apikey_for_attempt = initial_apikey
638
  current_session_id_for_attempt = initial_session_id
639
 
 
 
640
  while empty_retry_attempt_num < max_empty_response_retries:
641
  empty_retry_attempt_num += 1
642
 
 
645
  try:
646
  current_apikey_for_attempt = keymgr.get()
647
  current_session_id_for_attempt = create_session(current_apikey_for_attempt)
 
648
  logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
649
  except (ValueError, requests.exceptions.RequestException) as e_key_session:
650
  logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
 
655
  if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
656
  if status_code_from_exc_retry_setup_ns == 524:
657
  logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for non-stream retry.")
658
+ elif status_code_from_exc_retry_setup_ns == 500:
659
+ logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 500 error during key/session acquisition for non-stream retry.")
660
  else:
661
  keymgr.mark_bad(current_apikey_for_attempt)
662
+
663
  if empty_retry_attempt_num >= max_empty_response_retries:
 
664
  final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
665
  final_error_code = "max_retries_check_context_contact_admin"
666
  logging.error(f"【同步请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
667
+ return jsonify({"error": final_error_message, "code": final_error_code, "details": str(e_key_session)}), 500
668
  time.sleep(1)
669
  current_apikey_for_attempt = None
670
  continue
671
 
672
  log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
 
673
 
674
+ # Inner loop for 500-error retries for the current key/session
675
+ max_500_retries_for_this_call = 5
676
+ current_500_retry_count = 0
677
 
678
+ while current_500_retry_count < max_500_retries_for_this_call:
679
+ current_500_retry_count += 1
680
+ if current_500_retry_count > 1:
681
+ logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}, 500错误重试 {current_500_retry_count-1}/{max_500_retries_for_this_call-1}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
682
+ else:
683
+ logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
684
+
685
+ url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query" # Ensure URL uses current session
686
+ payload = { "query": final_query_to_ondemand, "endpointId": target_endpoint_id, "pluginIds": [], "responseMode": "sync" } # Use correct query and endpoint
687
+ headers = {"apikey": current_apikey_for_attempt, "Content-Type": "application/json"}
 
688
 
689
+ try:
690
+ resp = requests.post(url, json=payload, headers=headers, timeout=120)
691
+
692
+ if resp.status_code == 500:
693
+ logging.warning(f"【OnDemand同步错误】({log_attempt_str}, 500重试 {current_500_retry_count}) 收到500错误。")
694
+ if current_500_retry_count >= max_500_retries_for_this_call:
695
+ logging.error(f"【OnDemand同步错误】({log_attempt_str}) 达到500错误最大重试次数。将错误传递给上层。")
696
+ resp.raise_for_status() # Re-raise HTTPError(500) to be caught by outer RequestException handler
697
+ time.sleep(1)
698
+ continue # Next 500-retry iteration
699
+
700
+ resp.raise_for_status() # For other non-200 errors (will be caught by RequestException below)
701
+
702
+ # Successful 200 OK
703
+ response_json = resp.json()
704
+ if "data" not in response_json or "answer" not in response_json["data"]:
705
+ logging.error(f"【OnDemand同步错误】响应格式不符合预期 ({log_attempt_str})。Session: {current_session_id_for_attempt}, 响应: {str(response_json)[:500]}")
706
+ raise ValueError(f"OnDemand API sync response missing 'data.answer' field on attempt {empty_retry_attempt_num}, 500-retry {current_500_retry_count}.")
707
+
708
+ ai_response_content = response_json["data"]["answer"]
709
+ if ai_response_content is None: ai_response_content = ""
710
 
711
+ if ai_response_content.strip():
712
+ logging.info(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 成功获取非空内容。")
713
+ openai_response_obj = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714
  "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
715
  "model": openai_model_name_for_response,
716
+ "choices": [{"index": 0, "message": {"role": "assistant", "content": ai_response_content}, "finish_reason": "stop"}],
717
+ "usage": {}
718
+ }
719
+ return jsonify(openai_response_obj) # SUCCESS
720
+ else:
721
+ # Empty response after a 200 OK (and non-500 error)
722
+ logging.warning(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 返回空回复。")
723
+ # This attempt (for this key/session) resulted in an empty response.
724
+ # Break from the 500-retry loop to let the outer empty-response loop handle it.
725
+ break # Break from current_500_retry_count loop
 
 
 
 
726
 
727
+ except requests.exceptions.RequestException as e_req: # Catches HTTPError (including re-raised 500) or other network issues
728
+ log_key_display_sync = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
729
+ status_code_from_exc_sync = None
730
+ if hasattr(e_req, 'response') and e_req.response is not None:
731
+ status_code_from_exc_sync = e_req.response.status_code
732
+
733
+ logging.warning(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count} using key {log_key_display_sync}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_sync}")
734
+
735
+ # Key marking is handled by with_valid_key_and_session based on what's re-raised.
736
+ # If it's a 500 that exhausted its inner retries, it will be re-raised.
737
+ # If it's another RequestException, it's also re-raised.
738
+
739
+ # If this was the initial overall attempt (empty_retry_attempt_num == 1)
740
+ # AND this specific 500-retry loop has exhausted OR it's not a 500 error that can be retried by this inner loop:
741
+ if current_500_retry_count >= max_500_retries_for_this_call or status_code_from_exc_sync != 500:
742
+ if empty_retry_attempt_num == 1:
743
+ raise e_req # Re-raise for with_valid_key_and_session to handle key rotation
744
+ else:
745
+ # This was an empty-response retry that then hit a persistent 500 or other RequestException.
746
+ # This attempt for this key has failed. Break 500-retry loop to go to next empty-response attempt.
747
+ # To signal this failure for the current key/session to the outer empty-response loop:
748
+ raise e_req # This will be caught by the outer try-except in the empty_retry_attempt_num loop
749
+
750
+ # If it was a 500 and we still have 500-retries, the loop will continue after sleep.
751
+ time.sleep(1)
752
+ continue # To the next iteration of current_500_retry_count loop
753
 
754
+ except (ValueError, KeyError, json.JSONDecodeError) as e_parse:
755
+ logging.error(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 处理响应或格式时出错: {e_parse}", exc_info=True)
756
+ if empty_retry_attempt_num == 1 and current_500_retry_count == 1 : # Format error on very first try
757
+ raise requests.exceptions.RequestException(f"Response format error on first attempt: {e_parse}") from e_parse
758
+ # If format error during a retry, it's a failure for this key/session attempt
759
+ # Break from 500-retry loop, let empty-response loop handle it.
760
+ # To signal this failure for the current key/session:
761
+ raise requests.exceptions.RequestException(f"Response format error during retry: {e_parse}") from e_parse
762
+
763
+
764
+ # If the 500-retry loop completed (either successfully got non-empty, or broke due to empty, or raised an error)
765
+ # Check if we are here because of an empty response (meaning the 500-retry loop broke after a 200 OK but empty content)
766
+ if not ai_response_content.strip() and not ( 'resp' in locals() and resp.status_code != 200 ): # Check if it was an empty response from a 200 OK
767
  if empty_retry_attempt_num >= max_empty_response_retries:
768
  final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
769
  final_error_code = "max_retries_check_context_contact_admin"
770
+ logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
771
+ return jsonify({
772
+ "error": final_error_message,
773
+ "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
774
+ "model": openai_model_name_for_response,
775
+ "choices": [{"index": 0, "message": {"role": "assistant", "content": ""}, "finish_reason": "length"}],
776
+ "usage": {}, "code": final_error_code
777
+ }), 500
778
+ logging.info(f"【同步请求】空回复(在500-重试循环之后),准备进行下一个空回复尝试。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
779
+ time.sleep(1)
780
+ # Outer loop (empty_retry_attempt_num) will continue
781
+ # If we are here and didn't return a success, it means the 500-retry loop might have been exhausted by 500s
782
+ # but didn't re-raise correctly, or some other path. This is a fallback.
783
+ # However, if it exhausted 500s, it should have re-raised an exception.
784
+
785
+ # Fallback if outer empty_retry_attempt_num loop finishes
 
 
 
 
 
 
786
  final_fallback_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
787
  final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
788
  logging.error(f"【同步请求】意外退出空回复重试循环。返回最终错误。")
789
+ return jsonify({"error": final_fallback_error_message, "code": final_fallback_error_code}), 500
790
 
791
 
792
  @app.route("/v1/models", methods=["GET"])