Update openai_ondemand_adapter.py
Browse files- openai_ondemand_adapter.py +76 -67
openai_ondemand_adapter.py
CHANGED
|
@@ -238,13 +238,7 @@ def create_session(apikey, external_user_id=None, plugin_ids=None):
|
|
| 238 |
def format_openai_sse_delta(chunk_data_dict):
|
| 239 |
return f"data: {json.dumps(chunk_data_dict, ensure_ascii=False)}\n\n"
|
| 240 |
|
| 241 |
-
# Modified: This function is now a generator and returns accumulated text + error flag at the end.
|
| 242 |
def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, query_str, endpoint_id, openai_model_name_for_response, current_attempt_num_logging):
|
| 243 |
-
"""
|
| 244 |
-
Executes one streaming request attempt.
|
| 245 |
-
Yields SSE event strings.
|
| 246 |
-
Returns a tuple: (accumulated_text_content, api_error_yielded_flag)
|
| 247 |
-
"""
|
| 248 |
url = f"{ONDEMAND_API_BASE}/sessions/{session_id_for_attempt}/query"
|
| 249 |
payload = {
|
| 250 |
"query": query_str,
|
|
@@ -259,7 +253,7 @@ def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, quer
|
|
| 259 |
}
|
| 260 |
|
| 261 |
accumulated_text_parts = []
|
| 262 |
-
api_error_yielded = False
|
| 263 |
|
| 264 |
logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
|
| 265 |
|
|
@@ -278,7 +272,7 @@ def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, quer
|
|
| 278 |
}
|
| 279 |
yield format_openai_sse_delta(error_payload)
|
| 280 |
yield "data: [DONE]\n\n"
|
| 281 |
-
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 282 |
|
| 283 |
first_chunk_sent = False
|
| 284 |
last_line_str = ""
|
|
@@ -295,7 +289,6 @@ def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, quer
|
|
| 295 |
if data_part == "[DONE]":
|
| 296 |
logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
|
| 297 |
yield "data: [DONE]\n\n"
|
| 298 |
-
# No break here, let the function return after the loop
|
| 299 |
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 300 |
|
| 301 |
elif data_part.startswith("[ERROR]:"):
|
|
@@ -308,7 +301,7 @@ def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, quer
|
|
| 308 |
error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
|
| 309 |
yield format_openai_sse_delta({"error": error_obj})
|
| 310 |
yield "data: [DONE]\n\n"
|
| 311 |
-
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 312 |
|
| 313 |
else:
|
| 314 |
try:
|
|
@@ -337,28 +330,23 @@ def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, quer
|
|
| 337 |
"model": openai_model_name_for_response,
|
| 338 |
"choices": [{"delta": choice_delta, "index": 0, "finish_reason": None}]
|
| 339 |
}
|
| 340 |
-
yield format_openai_sse_delta(openai_chunk)
|
| 341 |
except json.JSONDecodeError:
|
| 342 |
logging.warning(f"【OnDemand流】无法解析JSON (子尝试 {current_attempt_num_logging}): {data_part[:100]}... Session: {session_id_for_attempt}")
|
| 343 |
continue
|
| 344 |
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
if not api_error_yielded: # If no error was yielded, and no [DONE] was in data, yield a [DONE]
|
| 348 |
-
if not last_line_str.startswith("data: [DONE]"): # Check if last processed line was not already DONE
|
| 349 |
logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成,补充发送 [DONE]。Session: {session_id_for_attempt}")
|
| 350 |
yield "data: [DONE]\n\n"
|
| 351 |
|
| 352 |
except requests.exceptions.RequestException:
|
| 353 |
-
# Network/request level errors before or during streaming
|
| 354 |
-
# These should be caught by the caller (handle_stream_request) to decide on retries
|
| 355 |
logging.error(f"【OnDemand流】请求过程中发生网络或请求异常 (子尝试 {current_attempt_num_logging}): Session: {session_id_for_attempt}", exc_info=False)
|
| 356 |
-
raise
|
| 357 |
|
| 358 |
except Exception as e:
|
| 359 |
-
# Unexpected Python errors during stream processing
|
| 360 |
logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {current_attempt_num_logging}): {e}, Session: {session_id_for_attempt}", exc_info=True)
|
| 361 |
-
api_error_yielded = True
|
| 362 |
error_payload = {
|
| 363 |
"error": {"message": f"Unknown error during streaming (Attempt {current_attempt_num_logging}): {str(e)}", "type": "unknown_streaming_error_in_attempt"}
|
| 364 |
}
|
|
@@ -418,7 +406,6 @@ def chat_completions():
|
|
| 418 |
|
| 419 |
def attempt_ondemand_request_wrapper(current_apikey_from_wrapper, current_session_id_from_wrapper):
|
| 420 |
if is_stream_request:
|
| 421 |
-
# Pass the generator directly to Response
|
| 422 |
return Response(
|
| 423 |
handle_stream_request(current_apikey_from_wrapper, current_session_id_from_wrapper, final_query_to_ondemand, target_endpoint_id, openai_model_name),
|
| 424 |
content_type='text/event-stream'
|
|
@@ -446,9 +433,17 @@ def chat_completions():
|
|
| 446 |
break
|
| 447 |
except requests.exceptions.RequestException as http_err_outer:
|
| 448 |
last_exception_for_key_retry = http_err_outer
|
| 449 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
if selected_apikey_for_outer_retry:
|
| 451 |
-
|
|
|
|
|
|
|
|
|
|
| 452 |
|
| 453 |
if key_retry_count >= max_key_retries:
|
| 454 |
logging.error(f"【请求处理】所有Key轮换尝试均失败。最后错误: {last_exception_for_key_retry}")
|
|
@@ -468,28 +463,18 @@ def chat_completions():
|
|
| 468 |
if last_exception_for_key_retry:
|
| 469 |
error_message += f" Last known error during key/session phase: {str(last_exception_for_key_retry)}"
|
| 470 |
logging.error(error_message)
|
| 471 |
-
|
| 472 |
-
# This part might need adjustment if the action_func_to_wrap for stream is expected to return a Response object
|
| 473 |
-
# However, if action_func_to_wrap (attempt_ondemand_request_wrapper) for stream returns a Response,
|
| 474 |
-
# then this jsonify will only be hit if create_session or keymgr.get fails repeatedly.
|
| 475 |
if is_stream_request:
|
| 476 |
-
# Construct a generator that yields an error SSE
|
| 477 |
def error_stream_gen():
|
| 478 |
-
yield format_openai_sse_delta({"error": {"message": error_message, "type": "proxy_setup_error", "code":
|
| 479 |
yield "data: [DONE]\n\n"
|
| 480 |
return Response(error_stream_gen(), content_type='text/event-stream', status=503)
|
| 481 |
else:
|
| 482 |
-
return jsonify({"error": error_message}), 503
|
| 483 |
|
| 484 |
return with_valid_key_and_session(attempt_ondemand_request_wrapper)
|
| 485 |
|
| 486 |
-
# Modified: This function is now a generator that uses `yield from`
|
| 487 |
def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
|
| 488 |
-
"""
|
| 489 |
-
Handles streaming chat completion requests with empty response retries.
|
| 490 |
-
Each empty response retry uses a new API key and session.
|
| 491 |
-
Yields SSE event strings directly.
|
| 492 |
-
"""
|
| 493 |
max_empty_response_retries = 5
|
| 494 |
empty_retry_attempt_num = 0
|
| 495 |
|
|
@@ -510,9 +495,16 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
|
|
| 510 |
except (ValueError, requests.exceptions.RequestException) as e_key_session:
|
| 511 |
logging.warning(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
|
| 512 |
if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
|
| 513 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 515 |
-
yield format_openai_sse_delta({"error": {"message": f"Failed to get new key/session for final empty stream retry. Error: {str(e_key_session)}", "type": "internal_proxy_error"}})
|
| 516 |
yield "data: [DONE]\n\n"
|
| 517 |
return
|
| 518 |
time.sleep(1)
|
|
@@ -522,8 +514,6 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
|
|
| 522 |
log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
|
| 523 |
|
| 524 |
try:
|
| 525 |
-
# Yield from the sub-generator; result_tuple will be (accumulated_text, api_error_yielded_flag)
|
| 526 |
-
# This is where the true streaming to the client happens chunk by chunk.
|
| 527 |
result_tuple = yield from _execute_one_stream_attempt(
|
| 528 |
current_apikey_for_attempt,
|
| 529 |
current_session_id_for_attempt,
|
|
@@ -536,50 +526,52 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
|
|
| 536 |
api_error_in_attempt = result_tuple[1]
|
| 537 |
|
| 538 |
except requests.exceptions.RequestException as e_req:
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 542 |
|
| 543 |
if empty_retry_attempt_num == 1:
|
| 544 |
-
# This was the initial_apikey. Re-raise for the outer key retry mechanism in with_valid_key_and_session.
|
| 545 |
-
# The with_valid_key_and_session will then try a new key for the *entire* operation.
|
| 546 |
raise e_req
|
| 547 |
|
| 548 |
-
# If it's an empty-response retry (attempt_num > 1) that failed with RequestException
|
| 549 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 550 |
logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误。")
|
| 551 |
-
yield format_openai_sse_delta({"error": {"message": f"Request failed on final empty stream retry attempt: {str(e_req)}", "type": "internal_proxy_error"}})
|
| 552 |
yield "data: [DONE]\n\n"
|
| 553 |
return
|
| 554 |
time.sleep(1)
|
| 555 |
-
continue
|
| 556 |
|
| 557 |
-
# Check results after _execute_one_stream_attempt has finished for this attempt
|
| 558 |
if api_error_in_attempt:
|
| 559 |
logging.warning(f"【流式请求】({log_attempt_str}) OnDemand 服务返回错误或处理内部错误,已将错误信息流式传输。")
|
| 560 |
-
# Error already yielded by _execute_one_stream_attempt, so we just stop.
|
| 561 |
return
|
| 562 |
|
| 563 |
if accumulated_text_this_attempt:
|
| 564 |
logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
|
| 565 |
-
# Content already yielded by _execute_one_stream_attempt. We are done.
|
| 566 |
return
|
| 567 |
|
| 568 |
-
# If we reach here, content was empty and no API error was yielded by _execute_one_stream_attempt
|
| 569 |
logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
|
| 570 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 571 |
logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。")
|
| 572 |
yield format_openai_sse_delta({
|
| 573 |
-
"error": {"message":
|
| 574 |
})
|
| 575 |
yield "data: [DONE]\n\n"
|
| 576 |
return
|
| 577 |
|
| 578 |
logging.info(f"【流式请求】空回复,将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
|
| 579 |
time.sleep(1)
|
| 580 |
-
|
| 581 |
logging.error("【流式请求】意外退出空回复重试循环。")
|
| 582 |
-
yield format_openai_sse_delta({"error": {"message": "Unexpected error in stream handling.", "type": "internal_proxy_error"}})
|
| 583 |
yield "data: [DONE]\n\n"
|
| 584 |
|
| 585 |
|
|
@@ -605,9 +597,15 @@ def handle_non_stream_request(initial_apikey, initial_session_id, query_str, end
|
|
| 605 |
except (ValueError, requests.exceptions.RequestException) as e_key_session:
|
| 606 |
logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
|
| 607 |
if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
|
| 608 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 609 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 610 |
-
return jsonify({"error": f"Failed to get new key/session for final empty response retry. Error: {str(e_key_session)}"}), 503
|
| 611 |
time.sleep(1)
|
| 612 |
current_apikey_for_attempt = None
|
| 613 |
continue
|
|
@@ -644,32 +642,43 @@ def handle_non_stream_request(initial_apikey, initial_session_id, query_str, end
|
|
| 644 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 645 |
logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。")
|
| 646 |
return jsonify({
|
| 647 |
-
"error":
|
| 648 |
"id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
|
| 649 |
"model": openai_model_name_for_response,
|
| 650 |
-
"choices": [{"index": 0, "message": {"role": "assistant", "content": ""}, "finish_reason": "length"}],
|
| 651 |
-
"usage": {}
|
| 652 |
-
|
|
|
|
| 653 |
logging.info(f"【同步请求】空回复,将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
|
| 654 |
time.sleep(1)
|
| 655 |
|
| 656 |
except requests.exceptions.RequestException as e_req:
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 660 |
if empty_retry_attempt_num == 1:
|
| 661 |
raise e_req
|
| 662 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 663 |
logging.error(f"【同步请求】在最后一次空回复重试时发生请求错误。")
|
| 664 |
-
return jsonify({"error": f"Request failed on final empty response retry attempt. Last error: {str(e_req)}"}), 503
|
| 665 |
time.sleep(1)
|
| 666 |
continue
|
| 667 |
except (ValueError, KeyError, json.JSONDecodeError) as e_parse:
|
| 668 |
logging.error(f"【同步请求】({log_attempt_str}) 处理响应时出错: {e_parse}", exc_info=True)
|
| 669 |
-
return jsonify({"error": f"Error processing OnDemand sync response: {str(e_parse)}"}), 502
|
| 670 |
|
| 671 |
logging.error(f"【同步请求】意外退出空回复重试循环。")
|
| 672 |
-
return jsonify({"error": "
|
| 673 |
|
| 674 |
|
| 675 |
@app.route("/v1/models", methods=["GET"])
|
|
|
|
| 238 |
def format_openai_sse_delta(chunk_data_dict):
|
| 239 |
return f"data: {json.dumps(chunk_data_dict, ensure_ascii=False)}\n\n"
|
| 240 |
|
|
|
|
| 241 |
def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, query_str, endpoint_id, openai_model_name_for_response, current_attempt_num_logging):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
url = f"{ONDEMAND_API_BASE}/sessions/{session_id_for_attempt}/query"
|
| 243 |
payload = {
|
| 244 |
"query": query_str,
|
|
|
|
| 253 |
}
|
| 254 |
|
| 255 |
accumulated_text_parts = []
|
| 256 |
+
api_error_yielded = False
|
| 257 |
|
| 258 |
logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
|
| 259 |
|
|
|
|
| 272 |
}
|
| 273 |
yield format_openai_sse_delta(error_payload)
|
| 274 |
yield "data: [DONE]\n\n"
|
| 275 |
+
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 276 |
|
| 277 |
first_chunk_sent = False
|
| 278 |
last_line_str = ""
|
|
|
|
| 289 |
if data_part == "[DONE]":
|
| 290 |
logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
|
| 291 |
yield "data: [DONE]\n\n"
|
|
|
|
| 292 |
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 293 |
|
| 294 |
elif data_part.startswith("[ERROR]:"):
|
|
|
|
| 301 |
error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
|
| 302 |
yield format_openai_sse_delta({"error": error_obj})
|
| 303 |
yield "data: [DONE]\n\n"
|
| 304 |
+
return "".join(accumulated_text_parts).strip(), api_error_yielded
|
| 305 |
|
| 306 |
else:
|
| 307 |
try:
|
|
|
|
| 330 |
"model": openai_model_name_for_response,
|
| 331 |
"choices": [{"delta": choice_delta, "index": 0, "finish_reason": None}]
|
| 332 |
}
|
| 333 |
+
yield format_openai_sse_delta(openai_chunk)
|
| 334 |
except json.JSONDecodeError:
|
| 335 |
logging.warning(f"【OnDemand流】无法解析JSON (子尝试 {current_attempt_num_logging}): {data_part[:100]}... Session: {session_id_for_attempt}")
|
| 336 |
continue
|
| 337 |
|
| 338 |
+
if not api_error_yielded:
|
| 339 |
+
if not last_line_str.startswith("data: [DONE]"):
|
|
|
|
|
|
|
| 340 |
logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成,补充发送 [DONE]。Session: {session_id_for_attempt}")
|
| 341 |
yield "data: [DONE]\n\n"
|
| 342 |
|
| 343 |
except requests.exceptions.RequestException:
|
|
|
|
|
|
|
| 344 |
logging.error(f"【OnDemand流】请求过程中发生网络或请求异常 (子尝试 {current_attempt_num_logging}): Session: {session_id_for_attempt}", exc_info=False)
|
| 345 |
+
raise
|
| 346 |
|
| 347 |
except Exception as e:
|
|
|
|
| 348 |
logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {current_attempt_num_logging}): {e}, Session: {session_id_for_attempt}", exc_info=True)
|
| 349 |
+
api_error_yielded = True
|
| 350 |
error_payload = {
|
| 351 |
"error": {"message": f"Unknown error during streaming (Attempt {current_attempt_num_logging}): {str(e)}", "type": "unknown_streaming_error_in_attempt"}
|
| 352 |
}
|
|
|
|
| 406 |
|
| 407 |
def attempt_ondemand_request_wrapper(current_apikey_from_wrapper, current_session_id_from_wrapper):
|
| 408 |
if is_stream_request:
|
|
|
|
| 409 |
return Response(
|
| 410 |
handle_stream_request(current_apikey_from_wrapper, current_session_id_from_wrapper, final_query_to_ondemand, target_endpoint_id, openai_model_name),
|
| 411 |
content_type='text/event-stream'
|
|
|
|
| 433 |
break
|
| 434 |
except requests.exceptions.RequestException as http_err_outer:
|
| 435 |
last_exception_for_key_retry = http_err_outer
|
| 436 |
+
status_code_from_exc = None
|
| 437 |
+
if hasattr(http_err_outer, 'response') and http_err_outer.response is not None:
|
| 438 |
+
status_code_from_exc = http_err_outer.response.status_code
|
| 439 |
+
|
| 440 |
+
logging.warning(f"【请求处理 - Key轮换尝试 {key_retry_count}】HTTP/请求错误。Status: {status_code_from_exc}, Key: {keymgr.display_key(selected_apikey_for_outer_retry) if selected_apikey_for_outer_retry else 'N/A'}, Error: {http_err_outer}")
|
| 441 |
+
|
| 442 |
if selected_apikey_for_outer_retry:
|
| 443 |
+
if status_code_from_exc == 524:
|
| 444 |
+
logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 524 error.")
|
| 445 |
+
else:
|
| 446 |
+
keymgr.mark_bad(selected_apikey_for_outer_retry)
|
| 447 |
|
| 448 |
if key_retry_count >= max_key_retries:
|
| 449 |
logging.error(f"【请求处理】所有Key轮换尝试均失败。最后错误: {last_exception_for_key_retry}")
|
|
|
|
| 463 |
if last_exception_for_key_retry:
|
| 464 |
error_message += f" Last known error during key/session phase: {str(last_exception_for_key_retry)}"
|
| 465 |
logging.error(error_message)
|
| 466 |
+
|
|
|
|
|
|
|
|
|
|
| 467 |
if is_stream_request:
|
|
|
|
| 468 |
def error_stream_gen():
|
| 469 |
+
yield format_openai_sse_delta({"error": {"message": error_message, "type": "proxy_setup_error", "code": "proxy_error_503"}}) # Changed code
|
| 470 |
yield "data: [DONE]\n\n"
|
| 471 |
return Response(error_stream_gen(), content_type='text/event-stream', status=503)
|
| 472 |
else:
|
| 473 |
+
return jsonify({"error": error_message, "code": "proxy_error_503"}), 503 # Added code here
|
| 474 |
|
| 475 |
return with_valid_key_and_session(attempt_ondemand_request_wrapper)
|
| 476 |
|
|
|
|
| 477 |
def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
max_empty_response_retries = 5
|
| 479 |
empty_retry_attempt_num = 0
|
| 480 |
|
|
|
|
| 495 |
except (ValueError, requests.exceptions.RequestException) as e_key_session:
|
| 496 |
logging.warning(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
|
| 497 |
if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
|
| 498 |
+
status_code_from_exc = None
|
| 499 |
+
if hasattr(e_key_session, 'response') and e_key_session.response is not None:
|
| 500 |
+
status_code_from_exc = e_key_session.response.status_code
|
| 501 |
+
if status_code_from_exc == 524:
|
| 502 |
+
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for retry.")
|
| 503 |
+
else:
|
| 504 |
+
keymgr.mark_bad(current_apikey_for_attempt)
|
| 505 |
+
|
| 506 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 507 |
+
yield format_openai_sse_delta({"error": {"message": f"Failed to get new key/session for final empty stream retry. Error: {str(e_key_session)}", "type": "internal_proxy_error", "code": "proxy_retry_setup_failed"}})
|
| 508 |
yield "data: [DONE]\n\n"
|
| 509 |
return
|
| 510 |
time.sleep(1)
|
|
|
|
| 514 |
log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
|
| 515 |
|
| 516 |
try:
|
|
|
|
|
|
|
| 517 |
result_tuple = yield from _execute_one_stream_attempt(
|
| 518 |
current_apikey_for_attempt,
|
| 519 |
current_session_id_for_attempt,
|
|
|
|
| 526 |
api_error_in_attempt = result_tuple[1]
|
| 527 |
|
| 528 |
except requests.exceptions.RequestException as e_req:
|
| 529 |
+
log_key_display = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
|
| 530 |
+
status_code_from_exc_stream = None
|
| 531 |
+
if hasattr(e_req, 'response') and e_req.response is not None:
|
| 532 |
+
status_code_from_exc_stream = e_req.response.status_code
|
| 533 |
+
|
| 534 |
+
logging.warning(f"【流式请求】({log_attempt_str} using key {log_key_display}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_stream}")
|
| 535 |
+
|
| 536 |
+
if current_apikey_for_attempt:
|
| 537 |
+
if status_code_from_exc_stream == 524:
|
| 538 |
+
logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 524 error during stream attempt.")
|
| 539 |
+
else:
|
| 540 |
+
keymgr.mark_bad(current_apikey_for_attempt)
|
| 541 |
|
| 542 |
if empty_retry_attempt_num == 1:
|
|
|
|
|
|
|
| 543 |
raise e_req
|
| 544 |
|
|
|
|
| 545 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 546 |
logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误。")
|
| 547 |
+
yield format_openai_sse_delta({"error": {"message": f"Request failed on final empty stream retry attempt: {str(e_req)}", "type": "internal_proxy_error", "code": "proxy_final_retry_failed"}})
|
| 548 |
yield "data: [DONE]\n\n"
|
| 549 |
return
|
| 550 |
time.sleep(1)
|
| 551 |
+
continue
|
| 552 |
|
|
|
|
| 553 |
if api_error_in_attempt:
|
| 554 |
logging.warning(f"【流式请求】({log_attempt_str}) OnDemand 服务返回错误或处理内部错误,已将错误信息流式传输。")
|
|
|
|
| 555 |
return
|
| 556 |
|
| 557 |
if accumulated_text_this_attempt:
|
| 558 |
logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
|
|
|
|
| 559 |
return
|
| 560 |
|
|
|
|
| 561 |
logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
|
| 562 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 563 |
logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。")
|
| 564 |
yield format_openai_sse_delta({
|
| 565 |
+
"error": {"message": "返回空回复,请重试", "type": "empty_response_after_retries", "code": "empty_response_please_retry"} # Changed message and code
|
| 566 |
})
|
| 567 |
yield "data: [DONE]\n\n"
|
| 568 |
return
|
| 569 |
|
| 570 |
logging.info(f"【流式请求】空回复,将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
|
| 571 |
time.sleep(1)
|
| 572 |
+
|
| 573 |
logging.error("【流式请求】意外退出空回复重试循环。")
|
| 574 |
+
yield format_openai_sse_delta({"error": {"message": "Unexpected error in stream handling.", "type": "internal_proxy_error", "code":"unexpected_stream_exit"}})
|
| 575 |
yield "data: [DONE]\n\n"
|
| 576 |
|
| 577 |
|
|
|
|
| 597 |
except (ValueError, requests.exceptions.RequestException) as e_key_session:
|
| 598 |
logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
|
| 599 |
if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
|
| 600 |
+
status_code_from_exc = None
|
| 601 |
+
if hasattr(e_key_session, 'response') and e_key_session.response is not None:
|
| 602 |
+
status_code_from_exc = e_key_session.response.status_code
|
| 603 |
+
if status_code_from_exc == 524:
|
| 604 |
+
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for non-stream retry.")
|
| 605 |
+
else:
|
| 606 |
+
keymgr.mark_bad(current_apikey_for_attempt)
|
| 607 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 608 |
+
return jsonify({"error": f"Failed to get new key/session for final empty response retry. Error: {str(e_key_session)}", "code":"proxy_retry_setup_failed"}), 503
|
| 609 |
time.sleep(1)
|
| 610 |
current_apikey_for_attempt = None
|
| 611 |
continue
|
|
|
|
| 642 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 643 |
logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。")
|
| 644 |
return jsonify({
|
| 645 |
+
"error": "返回空回复,请重试", # Changed message
|
| 646 |
"id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
|
| 647 |
"model": openai_model_name_for_response,
|
| 648 |
+
"choices": [{"index": 0, "message": {"role": "assistant", "content": ""}, "finish_reason": "length"}], # or "error" finish_reason
|
| 649 |
+
"usage": {},
|
| 650 |
+
"code": "empty_response_please_retry" # Added code
|
| 651 |
+
}), 503 # Changed status code to 503 for "please retry"
|
| 652 |
logging.info(f"【同步请求】空回复,将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
|
| 653 |
time.sleep(1)
|
| 654 |
|
| 655 |
except requests.exceptions.RequestException as e_req:
|
| 656 |
+
log_key_display_sync = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
|
| 657 |
+
status_code_from_exc_sync = None
|
| 658 |
+
if hasattr(e_req, 'response') and e_req.response is not None:
|
| 659 |
+
status_code_from_exc_sync = e_req.response.status_code
|
| 660 |
+
|
| 661 |
+
logging.warning(f"【同步请求】({log_attempt_str} using key {log_key_display_sync}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_sync}")
|
| 662 |
+
|
| 663 |
+
if current_apikey_for_attempt:
|
| 664 |
+
if status_code_from_exc_sync == 524:
|
| 665 |
+
logging.info(f"【KeyManager】Key {log_key_display_sync} not marked bad for 524 error during sync attempt.")
|
| 666 |
+
else:
|
| 667 |
+
keymgr.mark_bad(current_apikey_for_attempt)
|
| 668 |
+
|
| 669 |
if empty_retry_attempt_num == 1:
|
| 670 |
raise e_req
|
| 671 |
if empty_retry_attempt_num >= max_empty_response_retries:
|
| 672 |
logging.error(f"【同步请求】在最后一次空回复重试时发生请求错误。")
|
| 673 |
+
return jsonify({"error": f"Request failed on final empty response retry attempt. Last error: {str(e_req)}", "code":"proxy_final_retry_failed"}), 503
|
| 674 |
time.sleep(1)
|
| 675 |
continue
|
| 676 |
except (ValueError, KeyError, json.JSONDecodeError) as e_parse:
|
| 677 |
logging.error(f"【同步请求】({log_attempt_str}) 处理响应时出错: {e_parse}", exc_info=True)
|
| 678 |
+
return jsonify({"error": f"Error processing OnDemand sync response: {str(e_parse)}", "code": "on_demand_parse_error"}), 502
|
| 679 |
|
| 680 |
logging.error(f"【同步请求】意外退出空回复重试循环。")
|
| 681 |
+
return jsonify({"error": "返回空回复,请重试", "code": "unexpected_empty_retry_exit_please_retry"}), 503 # Changed message and code, and status
|
| 682 |
|
| 683 |
|
| 684 |
@app.route("/v1/models", methods=["GET"])
|