Update openai_ondemand_adapter.py
Browse files- openai_ondemand_adapter.py +262 -200
openai_ondemand_adapter.py
CHANGED
|
@@ -254,106 +254,135 @@ def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, quer
|
|
| 254 |
|
| 255 |
accumulated_text_parts = []
|
| 256 |
api_error_yielded = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
|
| 258 |
-
logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
|
| 259 |
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
"message": f"OnDemand API
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
}
|
| 272 |
-
}
|
| 273 |
-
yield format_openai_sse_delta(error_payload)
|
| 274 |
-
yield "data: [DONE]\n\n"
|
| 275 |
-
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 276 |
-
|
| 277 |
-
first_chunk_sent = False
|
| 278 |
-
last_line_str = ""
|
| 279 |
-
for line_bytes in resp.iter_lines():
|
| 280 |
-
if not line_bytes:
|
| 281 |
-
continue
|
| 282 |
-
|
| 283 |
-
line_str = line_bytes.decode("utf-8")
|
| 284 |
-
last_line_str = line_str
|
| 285 |
-
|
| 286 |
-
if line_str.startswith("data:"):
|
| 287 |
-
data_part = line_str[len("data:"):].strip()
|
| 288 |
-
|
| 289 |
-
if data_part == "[DONE]":
|
| 290 |
-
logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
|
| 291 |
yield "data: [DONE]\n\n"
|
| 292 |
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成,补充发送 [DONE]。Session: {session_id_for_attempt}")
|
| 341 |
yield "data: [DONE]\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
}
|
| 353 |
-
yield format_openai_sse_delta(error_payload)
|
| 354 |
-
yield "data: [DONE]\n\n"
|
| 355 |
|
| 356 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
|
| 358 |
|
| 359 |
@app.route("/v1/chat/completions", methods=["POST"])
|
|
@@ -440,12 +469,13 @@ def chat_completions():
|
|
| 440 |
logging.warning(f"【请求处理 - Key轮换尝试 {key_retry_count}】HTTP/请求错误。Status: {status_code_from_exc}, Key: {keymgr.display_key(selected_apikey_for_outer_retry) if selected_apikey_for_outer_retry else 'N/A'}, Error: {http_err_outer}")
|
| 441 |
|
| 442 |
if selected_apikey_for_outer_retry:
|
| 443 |
-
if status_code_from_exc == 524:
|
| 444 |
logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 524 error.")
|
| 445 |
-
|
| 446 |
-
|
|
|
|
| 447 |
keymgr.mark_bad(selected_apikey_for_outer_retry)
|
| 448 |
-
elif not status_code_from_exc :
|
| 449 |
keymgr.mark_bad(selected_apikey_for_outer_retry)
|
| 450 |
|
| 451 |
if key_retry_count >= max_key_retries:
|
|
@@ -458,23 +488,22 @@ def chat_completions():
|
|
| 458 |
except Exception as e_outer:
|
| 459 |
last_exception_for_key_retry = e_outer
|
| 460 |
logging.error(f"【请求处理 - Key轮换尝试 {key_retry_count}】发生意外严重错误: {e_outer}", exc_info=True)
|
| 461 |
-
if selected_apikey_for_outer_retry:
|
| 462 |
keymgr.mark_bad(selected_apikey_for_outer_retry)
|
| 463 |
break
|
| 464 |
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
error_code_str = "max_retries_check_context_contact_admin" # Custom code for this scenario
|
| 468 |
|
| 469 |
logging.error(f"【请求处理】所有Key/Session获取尝试失败。最终错误: {error_message} Last underlying exception: {last_exception_for_key_retry}")
|
| 470 |
|
| 471 |
if is_stream_request:
|
| 472 |
def error_stream_gen():
|
| 473 |
-
yield format_openai_sse_delta({"error": {"message": error_message, "type": "
|
| 474 |
yield "data: [DONE]\n\n"
|
| 475 |
-
return Response(error_stream_gen(), content_type='text/event-stream', status=500)
|
| 476 |
else:
|
| 477 |
-
return jsonify({"error": error_message, "code": error_code_str}), 500
|
| 478 |
|
| 479 |
return with_valid_key_and_session(attempt_ondemand_request_wrapper)
|
| 480 |
|
|
@@ -488,7 +517,7 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
|
|
| 488 |
while empty_retry_attempt_num < max_empty_response_retries:
|
| 489 |
empty_retry_attempt_num += 1
|
| 490 |
accumulated_text_this_attempt = ""
|
| 491 |
-
api_error_in_attempt = False
|
| 492 |
|
| 493 |
if empty_retry_attempt_num > 1:
|
| 494 |
logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
|
|
@@ -505,15 +534,16 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
|
|
| 505 |
if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
|
| 506 |
if status_code_from_exc_retry_setup == 524:
|
| 507 |
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for stream retry.")
|
|
|
|
|
|
|
| 508 |
else:
|
| 509 |
keymgr.mark_bad(current_apikey_for_attempt)
|
| 510 |
|
| 511 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 512 |
-
# Final failure to get key/session for the last empty response retry
|
| 513 |
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 514 |
final_error_code = "max_retries_check_context_contact_admin"
|
| 515 |
logging.error(f"【流式请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
|
| 516 |
-
yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_setup_failed", "code": final_error_code}})
|
| 517 |
yield "data: [DONE]\n\n"
|
| 518 |
return
|
| 519 |
time.sleep(1)
|
|
@@ -523,6 +553,7 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
|
|
| 523 |
log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
|
| 524 |
|
| 525 |
try:
|
|
|
|
| 526 |
result_tuple = yield from _execute_one_stream_attempt(
|
| 527 |
current_apikey_for_attempt,
|
| 528 |
current_session_id_for_attempt,
|
|
@@ -532,9 +563,9 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
|
|
| 532 |
f"{log_attempt_str} (Overall attempt {empty_retry_attempt_num})"
|
| 533 |
)
|
| 534 |
accumulated_text_this_attempt = result_tuple[0]
|
| 535 |
-
api_error_in_attempt = result_tuple[1]
|
| 536 |
|
| 537 |
-
except requests.exceptions.RequestException as e_req:
|
| 538 |
log_key_display = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
|
| 539 |
status_code_from_exc_stream = None
|
| 540 |
if hasattr(e_req, 'response') and e_req.response is not None:
|
|
@@ -545,37 +576,46 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
|
|
| 545 |
if current_apikey_for_attempt:
|
| 546 |
if status_code_from_exc_stream == 524:
|
| 547 |
logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 524 error during stream attempt.")
|
|
|
|
|
|
|
| 548 |
else:
|
| 549 |
keymgr.mark_bad(current_apikey_for_attempt)
|
| 550 |
|
| 551 |
if empty_retry_attempt_num == 1:
|
|
|
|
|
|
|
| 552 |
raise e_req
|
| 553 |
|
|
|
|
| 554 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 555 |
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 556 |
final_error_code = "max_retries_check_context_contact_admin"
|
| 557 |
logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误: {e_req}")
|
| 558 |
-
yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_request_failed", "code": final_error_code}})
|
| 559 |
yield "data: [DONE]\n\n"
|
| 560 |
return
|
| 561 |
time.sleep(1)
|
| 562 |
-
continue
|
| 563 |
|
|
|
|
| 564 |
if api_error_in_attempt:
|
| 565 |
-
|
| 566 |
-
|
|
|
|
|
|
|
| 567 |
|
| 568 |
if accumulated_text_this_attempt:
|
| 569 |
logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
|
| 570 |
-
return
|
| 571 |
|
|
|
|
| 572 |
logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
|
| 573 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 574 |
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 575 |
final_error_code = "max_retries_check_context_contact_admin"
|
| 576 |
logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
|
| 577 |
yield format_openai_sse_delta({
|
| 578 |
-
"error": {"message": final_error_message, "type": "
|
| 579 |
})
|
| 580 |
yield "data: [DONE]\n\n"
|
| 581 |
return
|
|
@@ -583,9 +623,6 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
|
|
| 583 |
logging.info(f"【流式请求】空回复,将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
|
| 584 |
time.sleep(1)
|
| 585 |
|
| 586 |
-
# Fallback if loop finishes unexpectedly (shouldn't happen with current logic)
|
| 587 |
-
# This case means all max_empty_response_retries were exhausted, and the last one was also empty.
|
| 588 |
-
# The specific error for this is handled inside the loop. This is a safeguard.
|
| 589 |
final_fallback_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 590 |
final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
|
| 591 |
logging.error(f"【流式请求】意外退出空回复重试循环。返回最终错误。")
|
|
@@ -594,14 +631,12 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
|
|
| 594 |
|
| 595 |
|
| 596 |
def handle_non_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
|
| 597 |
-
max_empty_response_retries = 5
|
| 598 |
empty_retry_attempt_num = 0
|
| 599 |
|
| 600 |
current_apikey_for_attempt = initial_apikey
|
| 601 |
current_session_id_for_attempt = initial_session_id
|
| 602 |
|
| 603 |
-
url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query"
|
| 604 |
-
|
| 605 |
while empty_retry_attempt_num < max_empty_response_retries:
|
| 606 |
empty_retry_attempt_num += 1
|
| 607 |
|
|
@@ -610,7 +645,6 @@ def handle_non_stream_request(initial_apikey, initial_session_id, query_str, end
|
|
| 610 |
try:
|
| 611 |
current_apikey_for_attempt = keymgr.get()
|
| 612 |
current_session_id_for_attempt = create_session(current_apikey_for_attempt)
|
| 613 |
-
url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query"
|
| 614 |
logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
|
| 615 |
except (ValueError, requests.exceptions.RequestException) as e_key_session:
|
| 616 |
logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
|
|
@@ -621,110 +655,138 @@ def handle_non_stream_request(initial_apikey, initial_session_id, query_str, end
|
|
| 621 |
if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
|
| 622 |
if status_code_from_exc_retry_setup_ns == 524:
|
| 623 |
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for non-stream retry.")
|
|
|
|
|
|
|
| 624 |
else:
|
| 625 |
keymgr.mark_bad(current_apikey_for_attempt)
|
|
|
|
| 626 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 627 |
-
# Final failure to get key/session for the last empty response retry
|
| 628 |
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 629 |
final_error_code = "max_retries_check_context_contact_admin"
|
| 630 |
logging.error(f"【同步请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
|
| 631 |
-
return jsonify({"error": final_error_message, "code": final_error_code}), 500
|
| 632 |
time.sleep(1)
|
| 633 |
current_apikey_for_attempt = None
|
| 634 |
continue
|
| 635 |
|
| 636 |
log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
|
| 637 |
-
logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
|
| 638 |
|
| 639 |
-
|
| 640 |
-
|
|
|
|
| 641 |
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
raise ValueError(f"OnDemand API sync response missing 'data.answer' field on attempt {empty_retry_attempt_num}.")
|
| 653 |
|
| 654 |
-
|
| 655 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 656 |
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
"id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
|
| 661 |
-
"model": openai_model_name_for_response,
|
| 662 |
-
"choices": [{"index": 0, "message": {"role": "assistant", "content": ai_response_content}, "finish_reason": "stop"}],
|
| 663 |
-
"usage": {}
|
| 664 |
-
}
|
| 665 |
-
return jsonify(openai_response_obj)
|
| 666 |
-
else:
|
| 667 |
-
logging.warning(f"【同步请求】({log_attempt_str}) 返回空回复。")
|
| 668 |
-
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 669 |
-
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 670 |
-
final_error_code = "max_retries_check_context_contact_admin"
|
| 671 |
-
logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
|
| 672 |
-
return jsonify({
|
| 673 |
-
"error": final_error_message,
|
| 674 |
"id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
|
| 675 |
"model": openai_model_name_for_response,
|
| 676 |
-
"choices": [{"index": 0, "message": {"role": "assistant", "content":
|
| 677 |
-
"usage": {}
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
if hasattr(e_req, 'response') and e_req.response is not None:
|
| 687 |
-
status_code_from_exc_sync = e_req.response.status_code
|
| 688 |
-
|
| 689 |
-
logging.warning(f"【同步请求】({log_attempt_str} using key {log_key_display_sync}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_sync}")
|
| 690 |
|
| 691 |
-
|
| 692 |
-
if
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 696 |
|
| 697 |
-
|
| 698 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 699 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 700 |
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 701 |
final_error_code = "max_retries_check_context_contact_admin"
|
| 702 |
-
logging.error(f"
|
| 703 |
-
return jsonify({
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
logging.error(f"【同步请求】在最后一次空回复重试时发生响应解析错误: {e_parse}")
|
| 719 |
-
return jsonify({"error": final_error_message, "code": final_error_code, "details": f"Parse error: {str(e_parse)}"}), 500 # Status 500
|
| 720 |
-
time.sleep(1)
|
| 721 |
-
continue # Try next key for empty response retry
|
| 722 |
-
|
| 723 |
-
# Fallback if loop finishes unexpectedly (e.g. all retries were empty and the last one didn't hit the specific return)
|
| 724 |
final_fallback_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 725 |
final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
|
| 726 |
logging.error(f"【同步请求】意外退出空回复重试循环。返回最终错误。")
|
| 727 |
-
return jsonify({"error": final_fallback_error_message, "code": final_fallback_error_code}), 500
|
| 728 |
|
| 729 |
|
| 730 |
@app.route("/v1/models", methods=["GET"])
|
|
|
|
| 254 |
|
| 255 |
accumulated_text_parts = []
|
| 256 |
api_error_yielded = False
|
| 257 |
+
|
| 258 |
+
# Inner retry loop for 500 errors specifically for this attempt
|
| 259 |
+
max_500_retries_for_this_call = 5
|
| 260 |
+
current_500_retry_count = 0
|
| 261 |
+
|
| 262 |
+
while current_500_retry_count < max_500_retries_for_this_call:
|
| 263 |
+
current_500_retry_count += 1
|
| 264 |
+
if current_500_retry_count > 1: # Log if this is a 500-retry
|
| 265 |
+
logging.info(f"【流式请求子尝试 {current_attempt_num_logging} - 500错误重试 {current_500_retry_count-1}/{max_500_retries_for_this_call-1}】Key: {keymgr.display_key(apikey_for_attempt)}")
|
| 266 |
+
else:
|
| 267 |
+
logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
|
| 268 |
|
|
|
|
| 269 |
|
| 270 |
+
try:
|
| 271 |
+
with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp:
|
| 272 |
+
if resp.status_code == 500: # Specific handling for 500 error
|
| 273 |
+
logging.warning(f"【OnDemand流错误】(子尝试 {current_attempt_num_logging}, 500重试 {current_500_retry_count}) 收到500错误。Session: {session_id_for_attempt}")
|
| 274 |
+
if current_500_retry_count >= max_500_retries_for_this_call:
|
| 275 |
+
logging.error(f"【OnDemand流错误】(子尝试 {current_attempt_num_logging}) 达到500错误最大重试次数。将错误传递给上层。")
|
| 276 |
+
# Yield a specific error for persistent 500 after retries
|
| 277 |
+
api_error_yielded = True
|
| 278 |
+
error_payload = {"error": {"message": f"OnDemand API persistent 500 error after {max_500_retries_for_this_call} retries (Attempt {current_attempt_num_logging}).",
|
| 279 |
+
"type": "on_demand_persistent_500_error", "code": 500}}
|
| 280 |
+
yield format_openai_sse_delta(error_payload)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
yield "data: [DONE]\n\n"
|
| 282 |
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 283 |
+
time.sleep(1) # Wait before retrying the 500 error
|
| 284 |
+
continue # Go to the next iteration of the 500-retry loop
|
| 285 |
+
|
| 286 |
+
if resp.status_code != 200: # Handle other non-200, non-500 errors
|
| 287 |
+
api_error_yielded = True
|
| 288 |
+
error_text = resp.text
|
| 289 |
+
logging.error(f"【OnDemand流错误】请求失败 (子尝试 {current_attempt_num_logging})。状态码: {resp.status_code}, Session: {session_id_for_attempt}, 响应: {error_text[:500]}")
|
| 290 |
+
error_payload = {
|
| 291 |
+
"error": {
|
| 292 |
+
"message": f"OnDemand API Error (Stream Init, Attempt {current_attempt_num_logging}): {resp.status_code} - {error_text[:200]}",
|
| 293 |
+
"type": "on_demand_api_error",
|
| 294 |
+
"code": resp.status_code
|
| 295 |
+
}
|
| 296 |
+
}
|
| 297 |
+
yield format_openai_sse_delta(error_payload)
|
| 298 |
+
yield "data: [DONE]\n\n"
|
| 299 |
+
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 300 |
+
|
| 301 |
+
# Successful 200 response, process stream
|
| 302 |
+
first_chunk_sent = False
|
| 303 |
+
last_line_str = ""
|
| 304 |
+
for line_bytes in resp.iter_lines():
|
| 305 |
+
if not line_bytes:
|
| 306 |
+
continue
|
| 307 |
+
line_str = line_bytes.decode("utf-8")
|
| 308 |
+
last_line_str = line_str
|
| 309 |
+
|
| 310 |
+
if line_str.startswith("data:"):
|
| 311 |
+
data_part = line_str[len("data:"):].strip()
|
| 312 |
+
if data_part == "[DONE]":
|
| 313 |
+
logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
|
| 314 |
+
yield "data: [DONE]\n\n"
|
| 315 |
+
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 316 |
+
elif data_part.startswith("[ERROR]:"):
|
| 317 |
+
api_error_yielded = True
|
| 318 |
+
error_json_str = data_part[len("[ERROR]:"):].strip()
|
| 319 |
+
logging.warning(f"【OnDemand流】接收到错误事件 (子尝试 {current_attempt_num_logging}): {error_json_str}。Session: {session_id_for_attempt}")
|
| 320 |
+
try:
|
| 321 |
+
error_obj = json.loads(error_json_str)
|
| 322 |
+
except json.JSONDecodeError:
|
| 323 |
+
error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
|
| 324 |
+
yield format_openai_sse_delta({"error": error_obj})
|
| 325 |
+
yield "data: [DONE]\n\n"
|
| 326 |
+
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 327 |
+
else:
|
| 328 |
+
try:
|
| 329 |
+
event_data = json.loads(data_part)
|
| 330 |
+
if event_data.get("eventType") == "fulfillment":
|
| 331 |
+
delta_content = event_data.get("answer", "")
|
| 332 |
+
if delta_content is None: delta_content = ""
|
| 333 |
+
accumulated_text_parts.append(delta_content)
|
| 334 |
+
choice_delta = {}
|
| 335 |
+
if not first_chunk_sent:
|
| 336 |
+
choice_delta["role"] = "assistant"
|
| 337 |
+
choice_delta["content"] = delta_content
|
| 338 |
+
first_chunk_sent = True
|
| 339 |
+
else:
|
| 340 |
+
choice_delta["content"] = delta_content
|
| 341 |
+
if not choice_delta.get("content") and not choice_delta.get("role"):
|
| 342 |
+
if not (choice_delta.get("role") and not choice_delta.get("content")):
|
| 343 |
+
continue
|
| 344 |
+
openai_chunk = {
|
| 345 |
+
"id": "chatcmpl-" + str(uuid.uuid4())[:12],
|
| 346 |
+
"object": "chat.completion.chunk",
|
| 347 |
+
"created": int(time.time()),
|
| 348 |
+
"model": openai_model_name_for_response,
|
| 349 |
+
"choices": [{"delta": choice_delta, "index": 0, "finish_reason": None}]
|
| 350 |
+
}
|
| 351 |
+
yield format_openai_sse_delta(openai_chunk)
|
| 352 |
+
except json.JSONDecodeError:
|
| 353 |
+
logging.warning(f"【OnDemand流】无法解析JSON (子尝试 {current_attempt_num_logging}): {data_part[:100]}... Session: {session_id_for_attempt}")
|
| 354 |
+
continue
|
| 355 |
+
|
| 356 |
+
if not api_error_yielded and not last_line_str.startswith("data: [DONE]"):
|
| 357 |
logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成,补充发送 [DONE]。Session: {session_id_for_attempt}")
|
| 358 |
yield "data: [DONE]\n\n"
|
| 359 |
+
return "".join(accumulated_text_parts).strip(), api_error_yielded # Success for this attempt
|
| 360 |
+
|
| 361 |
+
except requests.exceptions.RequestException as e_req_inner:
|
| 362 |
+
# This catches network errors or HTTP errors if resp.raise_for_status() was called (e.g. for persistent 500)
|
| 363 |
+
logging.error(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 请求时发生异常: {e_req_inner}, Key: {keymgr.display_key(apikey_for_attempt)}")
|
| 364 |
+
# If this was the last 500-retry, or another RequestException, re-raise to be handled by handle_stream_request's try-except
|
| 365 |
+
if current_500_retry_count >= max_500_retries_for_this_call or (hasattr(e_req_inner, 'response') and e_req_inner.response is not None and e_req_inner.response.status_code != 500):
|
| 366 |
+
raise e_req_inner
|
| 367 |
+
# If it was a 500 and we still have retries for 500, the loop will continue after a delay.
|
| 368 |
+
time.sleep(1) # Wait before retrying the 500 error
|
| 369 |
+
# The loop will continue to the next 500-retry.
|
| 370 |
|
| 371 |
+
except Exception as e_inner_unknown:
|
| 372 |
+
logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {current_attempt_num_logging}): {e_inner_unknown}, Session: {session_id_for_attempt}", exc_info=True)
|
| 373 |
+
api_error_yielded = True
|
| 374 |
+
error_payload = {
|
| 375 |
+
"error": {"message": f"Unknown error during streaming (Attempt {current_attempt_num_logging}): {str(e_inner_unknown)}", "type": "unknown_streaming_error_in_attempt"}
|
| 376 |
+
}
|
| 377 |
+
yield format_openai_sse_delta(error_payload)
|
| 378 |
+
yield "data: [DONE]\n\n"
|
| 379 |
+
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
|
|
|
|
|
|
|
|
|
| 380 |
|
| 381 |
+
# If 500-retry loop exhausted without returning (should ideally raise inside or return success)
|
| 382 |
+
logging.error(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 500错误重试循环意外结束。")
|
| 383 |
+
# This case should ideally not be reached if logic inside loop is correct.
|
| 384 |
+
# Re-raise a generic error to be caught by the caller if it does.
|
| 385 |
+
raise requests.exceptions.RequestException(f"Exhausted internal 500 retries for attempt {current_attempt_num_logging} without success or specific error propagation.")
|
| 386 |
|
| 387 |
|
| 388 |
@app.route("/v1/chat/completions", methods=["POST"])
|
|
|
|
| 469 |
logging.warning(f"【请求处理 - Key轮换尝试 {key_retry_count}】HTTP/请求错误。Status: {status_code_from_exc}, Key: {keymgr.display_key(selected_apikey_for_outer_retry) if selected_apikey_for_outer_retry else 'N/A'}, Error: {http_err_outer}")
|
| 470 |
|
| 471 |
if selected_apikey_for_outer_retry:
|
| 472 |
+
if status_code_from_exc == 524:
|
| 473 |
logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 524 error.")
|
| 474 |
+
elif status_code_from_exc == 500:
|
| 475 |
+
logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 500 error.")
|
| 476 |
+
elif status_code_from_exc and ((400 <= status_code_from_exc < 500) or status_code_from_exc in [502, 503]): # Explicitly list codes that mark bad, excluding 500, 524
|
| 477 |
keymgr.mark_bad(selected_apikey_for_outer_retry)
|
| 478 |
+
elif not status_code_from_exc :
|
| 479 |
keymgr.mark_bad(selected_apikey_for_outer_retry)
|
| 480 |
|
| 481 |
if key_retry_count >= max_key_retries:
|
|
|
|
| 488 |
except Exception as e_outer:
|
| 489 |
last_exception_for_key_retry = e_outer
|
| 490 |
logging.error(f"【请求处理 - Key轮换尝试 {key_retry_count}】发生意外严重错误: {e_outer}", exc_info=True)
|
| 491 |
+
if selected_apikey_for_outer_retry:
|
| 492 |
keymgr.mark_bad(selected_apikey_for_outer_retry)
|
| 493 |
break
|
| 494 |
|
| 495 |
+
error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 496 |
+
error_code_str = "max_retries_check_context_contact_admin"
|
|
|
|
| 497 |
|
| 498 |
logging.error(f"【请求处理】所有Key/Session获取尝试失败。最终错误: {error_message} Last underlying exception: {last_exception_for_key_retry}")
|
| 499 |
|
| 500 |
if is_stream_request:
|
| 501 |
def error_stream_gen():
|
| 502 |
+
yield format_openai_sse_delta({"error": {"message": error_message, "type": "proxy_max_retries_exceeded", "code": error_code_str}})
|
| 503 |
yield "data: [DONE]\n\n"
|
| 504 |
+
return Response(error_stream_gen(), content_type='text/event-stream', status=500)
|
| 505 |
else:
|
| 506 |
+
return jsonify({"error": error_message, "code": error_code_str}), 500
|
| 507 |
|
| 508 |
return with_valid_key_and_session(attempt_ondemand_request_wrapper)
|
| 509 |
|
|
|
|
| 517 |
while empty_retry_attempt_num < max_empty_response_retries:
|
| 518 |
empty_retry_attempt_num += 1
|
| 519 |
accumulated_text_this_attempt = ""
|
| 520 |
+
api_error_in_attempt = False # Renamed from api_error_yielded for clarity in this scope
|
| 521 |
|
| 522 |
if empty_retry_attempt_num > 1:
|
| 523 |
logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
|
|
|
|
| 534 |
if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
|
| 535 |
if status_code_from_exc_retry_setup == 524:
|
| 536 |
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for stream retry.")
|
| 537 |
+
elif status_code_from_exc_retry_setup == 500:
|
| 538 |
+
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 500 error during key/session acquisition for stream retry.")
|
| 539 |
else:
|
| 540 |
keymgr.mark_bad(current_apikey_for_attempt)
|
| 541 |
|
| 542 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
|
|
|
| 543 |
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 544 |
final_error_code = "max_retries_check_context_contact_admin"
|
| 545 |
logging.error(f"【流式请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
|
| 546 |
+
yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_setup_failed", "code": final_error_code, "details": str(e_key_session)}})
|
| 547 |
yield "data: [DONE]\n\n"
|
| 548 |
return
|
| 549 |
time.sleep(1)
|
|
|
|
| 553 |
log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
|
| 554 |
|
| 555 |
try:
|
| 556 |
+
# result_tuple will be (accumulated_text, api_error_yielded_flag_from_execute)
|
| 557 |
result_tuple = yield from _execute_one_stream_attempt(
|
| 558 |
current_apikey_for_attempt,
|
| 559 |
current_session_id_for_attempt,
|
|
|
|
| 563 |
f"{log_attempt_str} (Overall attempt {empty_retry_attempt_num})"
|
| 564 |
)
|
| 565 |
accumulated_text_this_attempt = result_tuple[0]
|
| 566 |
+
api_error_in_attempt = result_tuple[1] # This tells if _execute_one_stream_attempt itself yielded an error SSE
|
| 567 |
|
| 568 |
+
except requests.exceptions.RequestException as e_req: # Catch errors from _execute_one_stream_attempt's requests.post
|
| 569 |
log_key_display = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
|
| 570 |
status_code_from_exc_stream = None
|
| 571 |
if hasattr(e_req, 'response') and e_req.response is not None:
|
|
|
|
| 576 |
if current_apikey_for_attempt:
|
| 577 |
if status_code_from_exc_stream == 524:
|
| 578 |
logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 524 error during stream attempt.")
|
| 579 |
+
elif status_code_from_exc_stream == 500:
|
| 580 |
+
logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 500 error during stream attempt.")
|
| 581 |
else:
|
| 582 |
keymgr.mark_bad(current_apikey_for_attempt)
|
| 583 |
|
| 584 |
if empty_retry_attempt_num == 1:
|
| 585 |
+
# If the very first attempt (initial_apikey) fails with RequestException,
|
| 586 |
+
# re-raise to let with_valid_key_and_session handle key rotation.
|
| 587 |
raise e_req
|
| 588 |
|
| 589 |
+
# If it's an empty-response retry (attempt_num > 1) that failed with RequestException
|
| 590 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 591 |
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 592 |
final_error_code = "max_retries_check_context_contact_admin"
|
| 593 |
logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误: {e_req}")
|
| 594 |
+
yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_request_failed", "code": final_error_code, "details": str(e_req)}})
|
| 595 |
yield "data: [DONE]\n\n"
|
| 596 |
return
|
| 597 |
time.sleep(1)
|
| 598 |
+
continue # To the next iteration of the empty_retry_attempt_num loop (will try new key/session)
|
| 599 |
|
| 600 |
+
# After _execute_one_stream_attempt has finished (either normally or yielded its own error)
|
| 601 |
if api_error_in_attempt:
|
| 602 |
+
# This means _execute_one_stream_attempt handled an API error (like 429, or persistent 500) and yielded an error SSE.
|
| 603 |
+
# The stream is already complete with an error.
|
| 604 |
+
logging.warning(f"【流式请求】({log_attempt_str}) 子尝试已处理并流式传输API错误。")
|
| 605 |
+
return # Stop further empty response retries.
|
| 606 |
|
| 607 |
if accumulated_text_this_attempt:
|
| 608 |
logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
|
| 609 |
+
return # Stream was successful and content yielded.
|
| 610 |
|
| 611 |
+
# If we reach here, content was empty from _execute_one_stream_attempt, and no API error was yielded by it.
|
| 612 |
logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
|
| 613 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 614 |
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 615 |
final_error_code = "max_retries_check_context_contact_admin"
|
| 616 |
logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
|
| 617 |
yield format_openai_sse_delta({
|
| 618 |
+
"error": {"message": final_error_message, "type": "max_empty_retries_exceeded", "code": final_error_code}
|
| 619 |
})
|
| 620 |
yield "data: [DONE]\n\n"
|
| 621 |
return
|
|
|
|
| 623 |
logging.info(f"【流式请求】空回复,将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
|
| 624 |
time.sleep(1)
|
| 625 |
|
|
|
|
|
|
|
|
|
|
| 626 |
final_fallback_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 627 |
final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
|
| 628 |
logging.error(f"【流式请求】意外退出空回复重试循环。返回最终错误。")
|
|
|
|
| 631 |
|
| 632 |
|
| 633 |
def handle_non_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
|
| 634 |
+
max_empty_response_retries = 5
|
| 635 |
empty_retry_attempt_num = 0
|
| 636 |
|
| 637 |
current_apikey_for_attempt = initial_apikey
|
| 638 |
current_session_id_for_attempt = initial_session_id
|
| 639 |
|
|
|
|
|
|
|
| 640 |
while empty_retry_attempt_num < max_empty_response_retries:
|
| 641 |
empty_retry_attempt_num += 1
|
| 642 |
|
|
|
|
| 645 |
try:
|
| 646 |
current_apikey_for_attempt = keymgr.get()
|
| 647 |
current_session_id_for_attempt = create_session(current_apikey_for_attempt)
|
|
|
|
| 648 |
logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
|
| 649 |
except (ValueError, requests.exceptions.RequestException) as e_key_session:
|
| 650 |
logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
|
|
|
|
| 655 |
if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
|
| 656 |
if status_code_from_exc_retry_setup_ns == 524:
|
| 657 |
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for non-stream retry.")
|
| 658 |
+
elif status_code_from_exc_retry_setup_ns == 500:
|
| 659 |
+
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 500 error during key/session acquisition for non-stream retry.")
|
| 660 |
else:
|
| 661 |
keymgr.mark_bad(current_apikey_for_attempt)
|
| 662 |
+
|
| 663 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
|
|
|
| 664 |
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 665 |
final_error_code = "max_retries_check_context_contact_admin"
|
| 666 |
logging.error(f"【同步请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
|
| 667 |
+
return jsonify({"error": final_error_message, "code": final_error_code, "details": str(e_key_session)}), 500
|
| 668 |
time.sleep(1)
|
| 669 |
current_apikey_for_attempt = None
|
| 670 |
continue
|
| 671 |
|
| 672 |
log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
|
|
|
|
| 673 |
|
| 674 |
+
# Inner loop for 500-error retries for the current key/session
|
| 675 |
+
max_500_retries_for_this_call = 5
|
| 676 |
+
current_500_retry_count = 0
|
| 677 |
|
| 678 |
+
while current_500_retry_count < max_500_retries_for_this_call:
|
| 679 |
+
current_500_retry_count += 1
|
| 680 |
+
if current_500_retry_count > 1:
|
| 681 |
+
logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}, 500错误重试 {current_500_retry_count-1}/{max_500_retries_for_this_call-1}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
|
| 682 |
+
else:
|
| 683 |
+
logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
|
| 684 |
+
|
| 685 |
+
url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query" # Ensure URL uses current session
|
| 686 |
+
payload = { "query": final_query_to_ondemand, "endpointId": target_endpoint_id, "pluginIds": [], "responseMode": "sync" } # Use correct query and endpoint
|
| 687 |
+
headers = {"apikey": current_apikey_for_attempt, "Content-Type": "application/json"}
|
|
|
|
| 688 |
|
| 689 |
+
try:
|
| 690 |
+
resp = requests.post(url, json=payload, headers=headers, timeout=120)
|
| 691 |
+
|
| 692 |
+
if resp.status_code == 500:
|
| 693 |
+
logging.warning(f"【OnDemand同步错误】({log_attempt_str}, 500重试 {current_500_retry_count}) 收到500错误。")
|
| 694 |
+
if current_500_retry_count >= max_500_retries_for_this_call:
|
| 695 |
+
logging.error(f"【OnDemand同步错误】({log_attempt_str}) 达到500错误最大重试次数。将错误传递给上层。")
|
| 696 |
+
resp.raise_for_status() # Re-raise HTTPError(500) to be caught by outer RequestException handler
|
| 697 |
+
time.sleep(1)
|
| 698 |
+
continue # Next 500-retry iteration
|
| 699 |
+
|
| 700 |
+
resp.raise_for_status() # For other non-200 errors (will be caught by RequestException below)
|
| 701 |
+
|
| 702 |
+
# Successful 200 OK
|
| 703 |
+
response_json = resp.json()
|
| 704 |
+
if "data" not in response_json or "answer" not in response_json["data"]:
|
| 705 |
+
logging.error(f"【OnDemand同步错误】响应格式不符合预期 ({log_attempt_str})。Session: {current_session_id_for_attempt}, 响应: {str(response_json)[:500]}")
|
| 706 |
+
raise ValueError(f"OnDemand API sync response missing 'data.answer' field on attempt {empty_retry_attempt_num}, 500-retry {current_500_retry_count}.")
|
| 707 |
+
|
| 708 |
+
ai_response_content = response_json["data"]["answer"]
|
| 709 |
+
if ai_response_content is None: ai_response_content = ""
|
| 710 |
|
| 711 |
+
if ai_response_content.strip():
|
| 712 |
+
logging.info(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 成功获取非空内容。")
|
| 713 |
+
openai_response_obj = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
"id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
|
| 715 |
"model": openai_model_name_for_response,
|
| 716 |
+
"choices": [{"index": 0, "message": {"role": "assistant", "content": ai_response_content}, "finish_reason": "stop"}],
|
| 717 |
+
"usage": {}
|
| 718 |
+
}
|
| 719 |
+
return jsonify(openai_response_obj) # SUCCESS
|
| 720 |
+
else:
|
| 721 |
+
# Empty response after a 200 OK (and non-500 error)
|
| 722 |
+
logging.warning(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 返回空回复。")
|
| 723 |
+
# This attempt (for this key/session) resulted in an empty response.
|
| 724 |
+
# Break from the 500-retry loop to let the outer empty-response loop handle it.
|
| 725 |
+
break # Break from current_500_retry_count loop
|
|
|
|
|
|
|
|
|
|
|
|
|
| 726 |
|
| 727 |
+
except requests.exceptions.RequestException as e_req: # Catches HTTPError (including re-raised 500) or other network issues
|
| 728 |
+
log_key_display_sync = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
|
| 729 |
+
status_code_from_exc_sync = None
|
| 730 |
+
if hasattr(e_req, 'response') and e_req.response is not None:
|
| 731 |
+
status_code_from_exc_sync = e_req.response.status_code
|
| 732 |
+
|
| 733 |
+
logging.warning(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count} using key {log_key_display_sync}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_sync}")
|
| 734 |
+
|
| 735 |
+
# Key marking is handled by with_valid_key_and_session based on what's re-raised.
|
| 736 |
+
# If it's a 500 that exhausted its inner retries, it will be re-raised.
|
| 737 |
+
# If it's another RequestException, it's also re-raised.
|
| 738 |
+
|
| 739 |
+
# If this was the initial overall attempt (empty_retry_attempt_num == 1)
|
| 740 |
+
# AND this specific 500-retry loop has exhausted OR it's not a 500 error that can be retried by this inner loop:
|
| 741 |
+
if current_500_retry_count >= max_500_retries_for_this_call or status_code_from_exc_sync != 500:
|
| 742 |
+
if empty_retry_attempt_num == 1:
|
| 743 |
+
raise e_req # Re-raise for with_valid_key_and_session to handle key rotation
|
| 744 |
+
else:
|
| 745 |
+
# This was an empty-response retry that then hit a persistent 500 or other RequestException.
|
| 746 |
+
# This attempt for this key has failed. Break 500-retry loop to go to next empty-response attempt.
|
| 747 |
+
# To signal this failure for the current key/session to the outer empty-response loop:
|
| 748 |
+
raise e_req # This will be caught by the outer try-except in the empty_retry_attempt_num loop
|
| 749 |
+
|
| 750 |
+
# If it was a 500 and we still have 500-retries, the loop will continue after sleep.
|
| 751 |
+
time.sleep(1)
|
| 752 |
+
continue # To the next iteration of current_500_retry_count loop
|
| 753 |
|
| 754 |
+
except (ValueError, KeyError, json.JSONDecodeError) as e_parse:
|
| 755 |
+
logging.error(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 处理响应或格式时出错: {e_parse}", exc_info=True)
|
| 756 |
+
if empty_retry_attempt_num == 1 and current_500_retry_count == 1 : # Format error on very first try
|
| 757 |
+
raise requests.exceptions.RequestException(f"Response format error on first attempt: {e_parse}") from e_parse
|
| 758 |
+
# If format error during a retry, it's a failure for this key/session attempt
|
| 759 |
+
# Break from 500-retry loop, let empty-response loop handle it.
|
| 760 |
+
# To signal this failure for the current key/session:
|
| 761 |
+
raise requests.exceptions.RequestException(f"Response format error during retry: {e_parse}") from e_parse
|
| 762 |
+
|
| 763 |
+
|
| 764 |
+
# If the 500-retry loop completed (either successfully got non-empty, or broke due to empty, or raised an error)
|
| 765 |
+
# Check if we are here because of an empty response (meaning the 500-retry loop broke after a 200 OK but empty content)
|
| 766 |
+
if not ai_response_content.strip() and not ( 'resp' in locals() and resp.status_code != 200 ): # Check if it was an empty response from a 200 OK
|
| 767 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 768 |
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 769 |
final_error_code = "max_retries_check_context_contact_admin"
|
| 770 |
+
logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
|
| 771 |
+
return jsonify({
|
| 772 |
+
"error": final_error_message,
|
| 773 |
+
"id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
|
| 774 |
+
"model": openai_model_name_for_response,
|
| 775 |
+
"choices": [{"index": 0, "message": {"role": "assistant", "content": ""}, "finish_reason": "length"}],
|
| 776 |
+
"usage": {}, "code": final_error_code
|
| 777 |
+
}), 500
|
| 778 |
+
logging.info(f"【同步请求】空回复(在500-重试循环之后),准备进行下一个空回复尝试。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
|
| 779 |
+
time.sleep(1)
|
| 780 |
+
# Outer loop (empty_retry_attempt_num) will continue
|
| 781 |
+
# If we are here and didn't return a success, it means the 500-retry loop might have been exhausted by 500s
|
| 782 |
+
# but didn't re-raise correctly, or some other path. This is a fallback.
|
| 783 |
+
# However, if it exhausted 500s, it should have re-raised an exception.
|
| 784 |
+
|
| 785 |
+
# Fallback if outer empty_retry_attempt_num loop finishes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 786 |
final_fallback_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
|
| 787 |
final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
|
| 788 |
logging.error(f"【同步请求】意外退出空回复重试循环。返回最终错误。")
|
| 789 |
+
return jsonify({"error": final_fallback_error_message, "code": final_fallback_error_code}), 500
|
| 790 |
|
| 791 |
|
| 792 |
@app.route("/v1/models", methods=["GET"])
|