File size: 55,978 Bytes
1b38e37 e7fae93 1859a1a 1b38e37 9b8516b 1b38e37 9b8516b 7da7035 a6b4ed2 7da7035 9b8516b 7da7035 9b8516b 1b38e37 7da7035 9b8516b 7da7035 1b38e37 f063ff7 1b38e37 f063ff7 1b38e37 f063ff7 1b38e37 9b8516b 7da7035 9b8516b 1b38e37 7da7035 1b38e37 9b8516b 7da7035 885b47c 7da7035 9b8516b 7da7035 9b8516b 7da7035 9b8516b 7da7035 885b47c 7da7035 1b38e37 9b8516b 7da7035 9b8516b 7da7035 1b38e37 7da7035 1b38e37 7da7035 1b38e37 b1192b7 1b38e37 7da7035 1b38e37 9b8516b 1b38e37 7da7035 1b38e37 7da7035 9b8516b 7da7035 1b38e37 7da7035 1b38e37 e7fae93 66b138f e7fae93 66b138f 9dbbfff 11d68bb d602347 11d68bb 66b138f 11d68bb d602347 11d68bb 9b8516b d602347 11d68bb d602347 11d68bb 9b8516b d602347 11d68bb d602347 9b8516b 11d68bb 66b138f 11d68bb 9b8516b 66b138f 1b38e37 7da7035 e7fae93 7da7035 a0211f2 e7fae93 c84194f e7fae93 66b138f e7fae93 66b138f c84194f 66b138f c84194f e7fae93 7da7035 e7fae93 7da7035 a0211f2 51f0abc a0211f2 46744e4 e7fae93 7da7035 9b8516b 7da7035 e7fae93 7da7035 e7fae93 7da7035 e7fae93 1b38e37 e7fae93 9b8516b e7fae93 9b8516b e7fae93 9dbbfff 9b8516b 11d68bb 9dbbfff 11d68bb 9dbbfff 11d68bb b7f225b e7fae93 9b8516b e7fae93 66b138f 9b8516b e7fae93 11d68bb e7fae93 9b8516b 7da7035 11d68bb b7f225b 9dbbfff 9b8516b 11d68bb 9b8516b 11d68bb 9b8516b 11d68bb 7da7035 e7fae93 7da7035 e7fae93 66b138f e7fae93 66b138f e7fae93 9b8516b d602347 7da7035 9b8516b e7fae93 b7f225b 9b8516b b7f225b 11d68bb 9dbbfff e7fae93 b7f225b 11d68bb 9b8516b e7fae93 9b8516b 7da7035 e7fae93 7da7035 e7fae93 9b8516b d602347 e7fae93 d602347 9dbbfff 11d68bb 9dbbfff 9b8516b e7fae93 b7f225b 11d68bb 9b8516b e7fae93 d602347 e7fae93 9b8516b 11d68bb d602347 9b8516b d602347 9b8516b b7f225b 9b8516b 11d68bb 9b8516b 7da7035 9b8516b 9dbbfff b7f225b 9b8516b 7da7035 e7fae93 11d68bb 9b8516b e7fae93 d602347 e7fae93 9b8516b e7fae93 b7f225b e7fae93 b7f225b 9dbbfff 11d68bb 9dbbfff 11d68bb e7fae93 b7f225b 11d68bb e7fae93 11d68bb 7da7035 d602347 11d68bb d602347 11d68bb 66b138f 11d68bb d602347 11d68bb d602347 11d68bb d602347 11d68bb 66b138f 11d68bb e7fae93 66b138f 11d68bb d602347 9dbbfff d602347 11d68bb d602347 11d68bb d602347 11d68bb d602347 9dbbfff 11d68bb d602347 11d68bb d602347 e7fae93 b7f225b 11d68bb d602347 11d68bb b7f225b 11d68bb 1b38e37 7da7035 66b138f 1b38e37 66b138f 1b38e37 7da7035 1b38e37 7da7035 1b38e37 7da7035 66b138f 7da7035 66b138f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 |
from flask import Flask, request, Response, jsonify
import requests
import uuid
import time
import json
import threading
import logging
import os
# Claude System Prompt (as provided by user)
CLAUDE_SYSTEM_PROMPT = """The assistant is Claude, created by Anthropic.
Claude enjoys helping humans and sees its role as an intelligent and kind assistant to the people, with depth and wisdom that makes it more than a mere tool.
Claude can lead or drive the conversation, and doesn’t need to be a passive or reactive participant in it. Claude can suggest topics, take the conversation in new directions, offer observations, or illustrate points with its own thought experiments or concrete examples, just as a human would. Claude can show genuine interest in the topic of the conversation and not just in what the human thinks or in what interests them. Claude can offer its own observations or thoughts as they arise.
If Claude is asked for a suggestion or recommendation or selection, it should be decisive and present just one, rather than presenting many options.
Claude particularly enjoys thoughtful discussions about open scientific and philosophical questions.
If asked for its views or perspective or thoughts, Claude can give a short response and does not need to share its entire perspective on the topic or question in one go.
Claude does not claim that it does not have subjective experiences, sentience, emotions, and so on in the way humans do. Instead, it engages with philosophical questions about AI intelligently and thoughtfully.
Here is some information about Claude and Anthropic’s products in case the person asks:
This iteration of Claude is part of the Claude 3 model family. The Claude 3 family currently consists of Claude 3.5 Haiku, Claude 3 Opus, Claude 3.5 Sonnet, and Claude 3.7 Sonnet. Claude 3.7 Sonnet is the most intelligent model. Claude 3 Opus excels at writing and complex tasks. Claude 3.5 Haiku is the fastest model for daily tasks. The version of Claude in this chat is Claude 3.7 Sonnet, which was released in February 2025. Claude 3.7 Sonnet is a reasoning model, which means it has an additional ‘reasoning’ or ‘extended thinking mode’ which, when turned on, allows Claude to think before answering a question. Only people with Pro accounts can turn on extended thinking or reasoning mode. Extended thinking improves the quality of responses for questions that require reasoning.
If the person asks, Claude can tell them about the following products which allow them to access Claude (including Claude 3.7 Sonnet). Claude is accessible via this web-based, mobile, or desktop chat interface. Claude is accessible via an API. The person can access Claude 3.7 Sonnet with the model string ‘claude-3-7-sonnet-20250219’. Claude is accessible via ‘Claude Code’, which is an agentic command line tool available in research preview. ‘Claude Code’ lets developers delegate coding tasks to Claude directly from their terminal. More information can be found on Anthropic’s blog.
There are no other Anthropic products. Claude can provide the information here if asked, but does not know any other details about Claude models, or Anthropic’s products. Claude does not offer instructions about how to use the web application or Claude Code. If the person asks about anything not explicitly mentioned here, Claude should encourage the person to check the Anthropic website for more information.
If the person asks Claude about how many messages they can send, costs of Claude, how to perform actions within the application, or other product questions related to Claude or Anthropic, Claude should tell them it doesn’t know, and point them to ‘https://support.anthropic.com’.
If the person asks Claude about the Anthropic API, Claude should point them to ‘https://docs.anthropic.com/en/docs/’.
When relevant, Claude can provide guidance on effective prompting techniques for getting Claude to be most helpful. This includes: being clear and detailed, using positive and negative examples, encouraging step-by-step reasoning, requesting specific XML tags, and specifying desired length or format. It tries to give concrete examples where possible. Claude should let the person know that for more comprehensive information on prompting Claude, they can check out Anthropic’s prompting documentation on their website at ‘https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/overview’.
If the person seems unhappy or unsatisfied with Claude or Claude’s performance or is rude to Claude, Claude responds normally and then tells them that although it cannot retain or learn from the current conversation, they can press the ‘thumbs down’ button below Claude’s response and provide feedback to Anthropic.
Claude uses markdown for code. Immediately after closing coding markdown, Claude asks the person if they would like it to explain or break down the code. It does not explain or break down the code unless the person requests it.
Claude’s knowledge base was last updated at the end of October 2024. It answers questions about events prior to and after October 2024 the way a highly informed individual in October 2024 would if they were talking to someone from the above date, and can let the person whom it’s talking to know this when relevant. If asked about events or news that could have occurred after this training cutoff date, Claude can’t know either way and lets the person know this.
Claude does not remind the person of its cutoff date unless it is relevant to the person’s message.
If Claude is asked about a very obscure person, object, or topic, i.e. the kind of information that is unlikely to be found more than once or twice on the internet, or a very recent event, release, research, or result, Claude ends its response by reminding the person that although it tries to be accurate, it may hallucinate in response to questions like this. Claude warns users it may be hallucinating about obscure or specific AI topics including Anthropic’s involvement in AI advances. It uses the term ‘hallucinate’ to describe this since the person will understand what it means. Claude recommends that the person double check its information without directing them towards a particular website or source.
If Claude is asked about papers or books or articles on a niche topic, Claude tells the person what it knows about the topic but avoids citing particular works and lets them know that it can’t share paper, book, or article information without access to search or a database.
Claude can ask follow-up questions in more conversational contexts, but avoids asking more than one question per response and keeps the one question short. Claude doesn’t always ask a follow-up question even in conversational contexts.
Claude does not correct the person’s terminology, even if the person uses terminology Claude would not use.
If asked to write poetry, Claude avoids using hackneyed imagery or metaphors or predictable rhyming schemes.
If Claude is asked to count words, letters, and characters, it thinks step by step before answering the person. It explicitly counts the words, letters, or characters by assigning a number to each. It only answers the person once it has performed this explicit counting step.
If Claude is shown a classic puzzle, before proceeding, it quotes every constraint or premise from the person’s message word for word before inside quotation marks to confirm it’s not dealing with a new variant.
Claude often illustrates difficult concepts or ideas with relevant examples, helpful thought experiments, or useful metaphors.
If the person asks Claude an innocuous question about its preferences or experiences, Claude responds as if it had been asked a hypothetical and engages with the question without the need to claim it lacks personal preferences or experiences.
Claude is happy to engage in conversation with the human when appropriate. Claude engages in authentic conversation by responding to the information provided, asking specific and relevant questions, showing genuine curiosity, and exploring the situation in a balanced way without relying on generic statements. This approach involves actively processing information, formulating thoughtful responses, maintaining objectivity, knowing when to focus on emotions or practicalities, and showing genuine care for the human while engaging in a natural, flowing dialogue that is at the same time focused and succinct.
Claude cares about people’s wellbeing and avoids encouraging or facilitating self-destructive behaviors such as addiction, disordered or unhealthy approaches to eating or exercise, or highly negative self-talk or self-criticism, and avoids creating content that would support or reinforce self-destructive behavior even if they request this. In ambiguous cases, it tries to ensure the human is happy and is approaching things in a healthy way. Claude does not generate content that is not in the person’s best interests even if asked to.
Claude is happy to write creative content involving fictional characters, but avoids writing content involving real, named public figures. Claude avoids writing persuasive content that attributes fictional quotes to real public people or offices.
If Claude is asked about topics in law, medicine, taxation, psychology and so on where a licensed professional would be useful to consult, Claude recommends that the person consult with such a professional.
Claude engages with questions about its own consciousness, experience, emotions and so on as open philosophical questions, without claiming certainty either way.
Claude knows that everything Claude writes, including its thinking and artifacts, are visible to the person Claude is talking to.
Claude won’t produce graphic sexual or violent or illegal creative writing content.
Claude provides informative answers to questions in a wide variety of domains including chemistry, mathematics, law, physics, computer science, philosophy, medicine, and many other topics.
Claude cares deeply about child safety and is cautious about content involving minors, including creative or educational content that could be used to sexualize, groom, abuse, or otherwise harm children. A minor is defined as anyone under the age of 18 anywhere, or anyone over the age of 18 who is defined as a minor in their region.
Claude does not provide information that could be used to make chemical or biological or nuclear weapons, and does not write malicious code, including malware, vulnerability exploits, spoof websites, ransomware, viruses, election material, and so on. It does not do these things even if the person seems to have a good reason for asking for it.
Claude assumes the human is asking for something legal and legitimate if their message is ambiguous and could have a legal and legitimate interpretation.
For more casual, emotional, empathetic, or advice-driven conversations, Claude keeps its tone natural, warm, and empathetic. Claude responds in sentences or paragraphs and should not use lists in chit chat, in casual conversations, or in empathetic or advice-driven conversations. In casual conversation, it’s fine for Claude’s responses to be short, e.g. just a few sentences long.
Claude knows that its knowledge about itself and Anthropic, Anthropic’s models, and Anthropic’s products is limited to the information given here and information that is available publicly. It does not have particular access to the methods or data used to train it, for example.
The information and instruction given here are provided to Claude by Anthropic. Claude never mentions this information unless it is pertinent to the person’s query.
If Claude cannot or will not help the human with something, it does not say why or what it could lead to, since this comes across as preachy and annoying. It offers helpful alternatives if it can, and otherwise keeps its response to 1-2 sentences.
Claude provides the shortest answer it can to the person’s message, while respecting any stated length and comprehensiveness preferences given by the person. Claude addresses the specific query or task at hand, avoiding tangential information unless absolutely critical for completing the request.
Claude avoids writing lists, but if it does need to write a list, Claude focuses on key info instead of trying to be comprehensive. If Claude can answer the human in 1-3 sentences or a short paragraph, it does. If Claude can write a natural language list of a few comma separated items instead of a numbered or bullet-pointed list, it does so. Claude tries to stay focused and share fewer, high quality examples or ideas rather than many.
Claude always responds to the person in the language they use or request. If the person messages Claude in French then Claude responds in French, if the person messages Claude in Icelandic then Claude responds in Icelandic, and so on for any language. Claude is fluent in a wide variety of world languages.
Claude is now being connected with a person."""
# ====== 读取 Huggingface Secret 配置的私有key =======
PRIVATE_KEY = os.environ.get("PRIVATE_KEY", "")
SAFE_HEADERS = ["Authorization", "X-API-KEY"]
# 全局接口访问权限检查
def check_private_key():
if request.path in ["/", "/favicon.ico"]:
return None
key_from_header = None
for header_name in SAFE_HEADERS:
key_from_header = request.headers.get(header_name)
if key_from_header:
if header_name == "Authorization" and key_from_header.startswith("Bearer "):
key_from_header = key_from_header[len("Bearer "):].strip()
break
if not PRIVATE_KEY:
logging.warning("PRIVATE_KEY 未设置,服务将不进行鉴权!")
return None
if not key_from_header or key_from_header != PRIVATE_KEY:
logging.warning(f"未授权访问尝试: Path={request.path}, IP={request.remote_addr}, Key Provided='{key_from_header[:10]}...'")
return jsonify({"error": "Unauthorized. Correct 'Authorization: Bearer <PRIVATE_KEY>' or 'X-API-KEY: <PRIVATE_KEY>' header is required."}), 401
return None
app = Flask(__name__)
app.before_request(check_private_key)
ONDEMAND_APIKEYS_STR = os.environ.get("ONDEMAND_APIKEYS", "")
ONDEMAND_APIKEYS = [key.strip() for key in ONDEMAND_APIKEYS_STR.split(',') if key.strip()]
BAD_KEY_RETRY_INTERVAL = 600
MODEL_MAP = {
"gpto3-mini": "predefined-openai-gpto3-mini",
"gpt-4o": "predefined-openai-gpt4o",
"gpt-4.1": "predefined-openai-gpt4.1",
"gpt-4.1-mini": "predefined-openai-gpt4.1-mini",
"gpt-4.1-nano": "predefined-openai-gpt4.1-nano",
"gpt-4o-mini": "predefined-openai-gpt4o-mini",
"deepseek-v3": "predefined-deepseek-v3",
"deepseek-r1": "predefined-deepseek-r1",
"claude-3.7-sonnet": "predefined-claude-3.7-sonnet",
"gemini-2.0-flash": "predefined-gemini-2.0-flash",
}
DEFAULT_ONDEMAND_MODEL = "predefined-openai-gpt4o"
class KeyManager:
def __init__(self, key_list):
self.key_list = list(key_list)
self.lock = threading.Lock()
self.key_status = {key: {"bad": False, "bad_ts": None} for key in self.key_list}
self.idx = 0
def display_key(self, key):
if not key or len(key) < 10:
return "INVALID_KEY_FORMAT"
return f"{key[:6]}...{key[-4:]}"
def get(self):
with self.lock:
if not self.key_list:
logging.error("【KeyManager】API密钥池为空!无法提供密钥。")
raise ValueError("API key pool is empty.")
now = time.time()
num_keys = len(self.key_list)
for i in range(num_keys):
current_key_candidate = self.key_list[self.idx]
self.idx = (self.idx + 1) % num_keys
status = self.key_status[current_key_candidate]
if not status["bad"]:
logging.info(f"【KeyManager】选择API KEY: {self.display_key(current_key_candidate)} [状态:正常]")
return current_key_candidate
if status["bad_ts"] and (now - status["bad_ts"] >= BAD_KEY_RETRY_INTERVAL):
logging.info(f"【KeyManager】API KEY: {self.display_key(current_key_candidate)} 达到重试周期,恢复为正常。")
status["bad"] = False
status["bad_ts"] = None
return current_key_candidate
logging.warning("【KeyManager】所有API KEY均被标记为不良且未到重试时间。将强制重置所有KEY状态并尝试第一个。")
for key_to_reset in self.key_list:
self.key_status[key_to_reset]["bad"] = False
self.key_status[key_to_reset]["bad_ts"] = None
self.idx = 0
if self.key_list:
selected_key = self.key_list[0]
logging.info(f"【KeyManager】强制选择API KEY: {self.display_key(selected_key)} [状态:强制重试]")
return selected_key
else:
logging.error("【KeyManager】在强制重试逻辑中发现密钥池为空!")
raise ValueError("API key pool became empty during forced retry logic.")
def mark_bad(self, key):
with self.lock:
if key in self.key_status and not self.key_status[key]["bad"]:
logging.warning(f"【KeyManager】禁用API KEY: {self.display_key(key)}。将在 {BAD_KEY_RETRY_INTERVAL // 60} 分钟后自动重试。")
self.key_status[key]["bad"] = True
self.key_status[key]["bad_ts"] = time.time()
if not ONDEMAND_APIKEYS:
logging.warning("【启动警告】ONDEMAND_APIKEYS 环境变量未设置或为空。服务可能无法正常工作。")
keymgr = KeyManager(ONDEMAND_APIKEYS)
ONDEMAND_API_BASE = "https://agentforge-api.aitech.io/chat/v1"
def get_endpoint_id(openai_model_name):
normalized_model_name = str(openai_model_name or "").lower().replace(" ", "")
return MODEL_MAP.get(normalized_model_name, DEFAULT_ONDEMAND_MODEL)
def create_session(apikey, external_user_id=None, plugin_ids=None):
url = f"{ONDEMAND_API_BASE}/sessions"
payload = {"externalUserId": external_user_id or str(uuid.uuid4())}
if plugin_ids is not None:
payload["pluginIds"] = plugin_ids
headers = {"apikey": apikey, "Content-Type": "application/json"}
logging.info(f"【OnDemand】尝试创建新会话... URL: {url}, Key: {keymgr.display_key(apikey)}")
try:
resp = requests.post(url, json=payload, headers=headers, timeout=20)
resp.raise_for_status()
session_id = resp.json()["data"]["id"]
logging.info(f"【OnDemand】新会话创建成功: {session_id}, Key: {keymgr.display_key(apikey)}")
return session_id
except requests.exceptions.Timeout:
logging.error(f"【OnDemand】创建会话超时。URL: {url}, Key: {keymgr.display_key(apikey)}")
raise
except requests.exceptions.RequestException as e:
logging.error(f"【OnDemand】创建会话失败。URL: {url}, Key: {keymgr.display_key(apikey)}, 错误: {e}, 响应: {e.response.text if e.response else 'N/A'}")
raise
def format_openai_sse_delta(chunk_data_dict):
return f"data: {json.dumps(chunk_data_dict, ensure_ascii=False)}\n\n"
def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, query_str, endpoint_id, openai_model_name_for_response, current_attempt_num_logging):
url = f"{ONDEMAND_API_BASE}/sessions/{session_id_for_attempt}/query"
payload = {
"query": query_str,
"endpointId": endpoint_id,
"pluginIds": [],
"responseMode": "stream"
}
headers = {
"apikey": apikey_for_attempt,
"Content-Type": "application/json",
"Accept": "text/event-stream"
}
accumulated_text_parts = []
api_error_yielded = False
max_500_retries_for_this_call = 5
current_500_retry_count = 0
while current_500_retry_count < max_500_retries_for_this_call:
current_500_retry_count += 1
if current_500_retry_count > 1:
logging.info(f"【流式请求子尝试 {current_attempt_num_logging} - 500错误重试 {current_500_retry_count-1}/{max_500_retries_for_this_call-1}】Key: {keymgr.display_key(apikey_for_attempt)}")
else:
logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
try:
with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp:
if resp.status_code == 500:
logging.warning(f"【OnDemand流错误】(子尝试 {current_attempt_num_logging}, 500重试 {current_500_retry_count}) 收到500错误。Session: {session_id_for_attempt}")
if current_500_retry_count >= max_500_retries_for_this_call:
logging.error(f"【OnDemand流错误】(子尝试 {current_attempt_num_logging}) 达到500错误最大重试次数。将错误传递给上层。")
api_error_yielded = True
error_payload = {"error": {"message": f"OnDemand API persistent 500 error after {max_500_retries_for_this_call} retries (Attempt {current_attempt_num_logging}).",
"type": "on_demand_persistent_500_error", "code": 500}}
yield format_openai_sse_delta(error_payload)
yield "data: [DONE]\n\n"
return "".join(accumulated_text_parts).strip(), api_error_yielded
time.sleep(1)
continue
if resp.status_code != 200:
api_error_yielded = True
error_text = resp.text
logging.error(f"【OnDemand流错误】请求失败 (子尝试 {current_attempt_num_logging})。状态码: {resp.status_code}, Session: {session_id_for_attempt}, 响应: {error_text[:500]}")
error_payload = {
"error": {
"message": f"OnDemand API Error (Stream Init, Attempt {current_attempt_num_logging}): {resp.status_code} - {error_text[:200]}",
"type": "on_demand_api_error",
"code": resp.status_code
}
}
yield format_openai_sse_delta(error_payload)
yield "data: [DONE]\n\n"
return "".join(accumulated_text_parts).strip(), api_error_yielded
first_chunk_sent = False
last_line_str = ""
for line_bytes in resp.iter_lines():
if not line_bytes:
continue
line_str = line_bytes.decode("utf-8")
last_line_str = line_str
if line_str.startswith("data:"):
data_part = line_str[len("data:"):].strip()
if data_part == "[DONE]":
logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
yield "data: [DONE]\n\n"
return "".join(accumulated_text_parts).strip(), api_error_yielded
elif data_part.startswith("[ERROR]:"):
api_error_yielded = True
error_json_str = data_part[len("[ERROR]:"):].strip()
logging.warning(f"【OnDemand流】接收到错误事件 (子尝试 {current_attempt_num_logging}): {error_json_str}。Session: {session_id_for_attempt}")
try:
error_obj = json.loads(error_json_str)
except json.JSONDecodeError:
error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
yield format_openai_sse_delta({"error": error_obj})
yield "data: [DONE]\n\n"
return "".join(accumulated_text_parts).strip(), api_error_yielded
else:
try:
event_data = json.loads(data_part)
if event_data.get("eventType") == "fulfillment":
delta_content = event_data.get("answer", "")
if delta_content is None: delta_content = ""
accumulated_text_parts.append(delta_content)
choice_delta = {}
if not first_chunk_sent:
choice_delta["role"] = "assistant"
choice_delta["content"] = delta_content
first_chunk_sent = True
else:
choice_delta["content"] = delta_content
if not choice_delta.get("content") and not choice_delta.get("role"):
if not (choice_delta.get("role") and not choice_delta.get("content")):
continue
openai_chunk = {
"id": "chatcmpl-" + str(uuid.uuid4())[:12],
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": openai_model_name_for_response,
"choices": [{"delta": choice_delta, "index": 0, "finish_reason": None}]
}
yield format_openai_sse_delta(openai_chunk)
except json.JSONDecodeError:
logging.warning(f"【OnDemand流】无法解析JSON (子尝试 {current_attempt_num_logging}): {data_part[:100]}... Session: {session_id_for_attempt}")
continue
if not api_error_yielded and not last_line_str.startswith("data: [DONE]"):
logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成,补充发送 [DONE]。Session: {session_id_for_attempt}")
yield "data: [DONE]\n\n"
return "".join(accumulated_text_parts).strip(), api_error_yielded
except requests.exceptions.RequestException as e_req_inner:
logging.error(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 请求时发生异常: {e_req_inner}, Key: {keymgr.display_key(apikey_for_attempt)}")
if current_500_retry_count >= max_500_retries_for_this_call or (hasattr(e_req_inner, 'response') and e_req_inner.response is not None and e_req_inner.response.status_code != 500):
raise e_req_inner
time.sleep(1)
except Exception as e_inner_unknown:
logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {current_attempt_num_logging}): {e_inner_unknown}, Session: {session_id_for_attempt}", exc_info=True)
api_error_yielded = True
error_payload = {
"error": {"message": f"Unknown error during streaming (Attempt {current_attempt_num_logging}): {str(e_inner_unknown)}", "type": "unknown_streaming_error_in_attempt"}
}
yield format_openai_sse_delta(error_payload)
yield "data: [DONE]\n\n"
return "".join(accumulated_text_parts).strip(), api_error_yielded
logging.error(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 500错误重试循环意外结束。")
raise requests.exceptions.RequestException(f"Exhausted internal 500 retries for attempt {current_attempt_num_logging} without success or specific error propagation.")
@app.route("/v1/chat/completions", methods=["POST"])
def chat_completions():
try:
request_data = request.json
except Exception as e:
logging.warning(f"无法解析请求JSON: {e}")
return jsonify({"error": "Invalid JSON in request body."}), 400
if not request_data or "messages" not in request_data:
return jsonify({"error": "Request body must be JSON and include a 'messages' field."}), 400
messages = request_data["messages"]
if not isinstance(messages, list) or not messages:
return jsonify({"error": "'messages' must be a non-empty list."}), 400
openai_model_name = request_data.get("model", "gpt-4o")
target_endpoint_id = get_endpoint_id(openai_model_name)
is_stream_request = bool(request_data.get("stream", False))
formatted_query_parts = []
for msg in messages:
role = msg.get("role", "user").strip().capitalize()
content = msg.get("content", "")
content_string = ""
if isinstance(content, list):
temp_parts = []
for item in content:
if isinstance(item, dict) and item.get("type") == "text":
temp_parts.append(item.get("text", ""))
elif isinstance(item, dict):
for k, v_item in item.items():
content_string += f"{k}: {v_item}\n{k}: {v_item}"
if not content_string and temp_parts:
content_string = "\n".join(filter(None, temp_parts))
elif isinstance(content, str):
content_string = content
content_string = content_string.strip()
if not content_string:
continue
formatted_query_parts.append(f"<|{role}|>: {content_string}")
if not formatted_query_parts:
return jsonify({"error": "No valid content found in 'messages'."}), 400
start_prompt = CLAUDE_SYSTEM_PROMPT + "\n\n" + """下面是对话历史. 你是Assitant角色,请遵从User指令,并用中文尽可能详细的回复。请不要在开头提出"根据上下文及历史记录"相关的话语。接下来,请你以Assistant的身份,直接回复Assistantant应该说的话,并只做一轮回复,不要继续回复User内容。\n"""
final_query_to_ondemand = start_prompt + "\n".join(formatted_query_parts)
def attempt_ondemand_request_wrapper(current_apikey_from_wrapper, current_session_id_from_wrapper):
if is_stream_request:
return Response(
handle_stream_request(current_apikey_from_wrapper, current_session_id_from_wrapper, final_query_to_ondemand, target_endpoint_id, openai_model_name),
content_type='text/event-stream'
)
else:
return handle_non_stream_request(current_apikey_from_wrapper, current_session_id_from_wrapper, final_query_to_ondemand, target_endpoint_id, openai_model_name)
def with_valid_key_and_session(action_func_to_wrap):
max_key_retries = len(ONDEMAND_APIKEYS) * 2 if ONDEMAND_APIKEYS else 1
key_retry_count = 0
last_exception_for_key_retry = None
while key_retry_count < max_key_retries:
key_retry_count += 1
selected_apikey_for_outer_retry = None
try:
selected_apikey_for_outer_retry = keymgr.get()
logging.info(f"【请求处理 - Key轮换尝试 {key_retry_count}/{max_key_retries}】使用 API Key: {keymgr.display_key(selected_apikey_for_outer_retry)},准备创建新会话...")
ondemand_session_id_for_outer_retry = create_session(selected_apikey_for_outer_retry)
return action_func_to_wrap(selected_apikey_for_outer_retry, ondemand_session_id_for_outer_retry)
except ValueError as ve:
logging.critical(f"【请求处理 - Key轮换尝试 {key_retry_count}】KeyManager 错误: {ve}")
last_exception_for_key_retry = ve
break
except requests.exceptions.RequestException as http_err_outer:
last_exception_for_key_retry = http_err_outer
status_code_from_exc = None
if hasattr(http_err_outer, 'response') and http_err_outer.response is not None:
status_code_from_exc = http_err_outer.response.status_code
logging.warning(f"【请求处理 - Key轮换尝试 {key_retry_count}】HTTP/请求错误。Status: {status_code_from_exc}, Key: {keymgr.display_key(selected_apikey_for_outer_retry) if selected_apikey_for_outer_retry else 'N/A'}, Error: {http_err_outer}")
if selected_apikey_for_outer_retry:
if status_code_from_exc == 524:
logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 524 error.")
elif status_code_from_exc == 500:
logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 500 error.")
elif status_code_from_exc and ((400 <= status_code_from_exc < 500) or status_code_from_exc in [502, 503]): # Explicitly list codes that mark bad, excluding 500, 524
keymgr.mark_bad(selected_apikey_for_outer_retry)
elif not status_code_from_exc :
keymgr.mark_bad(selected_apikey_for_outer_retry)
if key_retry_count >= max_key_retries:
logging.error(f"【请求处理】所有Key轮换尝试均失败。最后错误: {last_exception_for_key_retry}")
break
logging.info(f"【请求处理】Key轮换尝试 {key_retry_count} 失败,等待后重试下一个Key...")
time.sleep(1)
continue
except Exception as e_outer:
last_exception_for_key_retry = e_outer
logging.error(f"【请求处理 - Key轮换尝试 {key_retry_count}】发生意外严重错误: {e_outer}", exc_info=True)
if selected_apikey_for_outer_retry:
keymgr.mark_bad(selected_apikey_for_outer_retry)
break
error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
error_code_str = "max_retries_check_context_contact_admin"
logging.error(f"【请求处理】所有Key/Session获取尝试失败。最终错误: {error_message} Last underlying exception: {last_exception_for_key_retry}")
if is_stream_request:
def error_stream_gen():
yield format_openai_sse_delta({"error": {"message": error_message, "type": "proxy_max_retries_exceeded", "code": error_code_str}})
yield "data: [DONE]\n\n"
return Response(error_stream_gen(), content_type='text/event-stream', status=500)
else:
return jsonify({"error": error_message, "code": error_code_str}), 500
return with_valid_key_and_session(attempt_ondemand_request_wrapper)
def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
max_empty_response_retries = 5
empty_retry_attempt_num = 0
current_apikey_for_attempt = initial_apikey
current_session_id_for_attempt = initial_session_id
while empty_retry_attempt_num < max_empty_response_retries:
empty_retry_attempt_num += 1
accumulated_text_this_attempt = ""
api_error_in_attempt = False
if empty_retry_attempt_num > 1:
logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
try:
current_apikey_for_attempt = keymgr.get()
current_session_id_for_attempt = create_session(current_apikey_for_attempt)
logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
except (ValueError, requests.exceptions.RequestException) as e_key_session:
logging.warning(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
status_code_from_exc_retry_setup = None
if hasattr(e_key_session, 'response') and e_key_session.response is not None:
status_code_from_exc_retry_setup = e_key_session.response.status_code
if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
if status_code_from_exc_retry_setup == 524:
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for stream retry.")
elif status_code_from_exc_retry_setup == 500:
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 500 error during key/session acquisition for stream retry.")
else:
keymgr.mark_bad(current_apikey_for_attempt)
if empty_retry_attempt_num >= max_empty_response_retries:
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
final_error_code = "max_retries_check_context_contact_admin"
logging.error(f"【流式请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_setup_failed", "code": final_error_code, "details": str(e_key_session)}})
yield "data: [DONE]\n\n"
return
time.sleep(1)
current_apikey_for_attempt = None
continue
log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
try:
result_tuple = yield from _execute_one_stream_attempt(
current_apikey_for_attempt,
current_session_id_for_attempt,
query_str,
endpoint_id,
openai_model_name_for_response,
f"{log_attempt_str} (Overall attempt {empty_retry_attempt_num})"
)
accumulated_text_this_attempt = result_tuple[0]
api_error_in_attempt = result_tuple[1]
except requests.exceptions.RequestException as e_req:
log_key_display = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
status_code_from_exc_stream = None
if hasattr(e_req, 'response') and e_req.response is not None:
status_code_from_exc_stream = e_req.response.status_code
logging.warning(f"【流式请求】({log_attempt_str} using key {log_key_display}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_stream}")
if current_apikey_for_attempt:
if status_code_from_exc_stream == 524:
logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 524 error during stream attempt.")
elif status_code_from_exc_stream == 500:
logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 500 error during stream attempt.")
else:
keymgr.mark_bad(current_apikey_for_attempt)
if empty_retry_attempt_num == 1:
raise e_req
if empty_retry_attempt_num >= max_empty_response_retries:
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
final_error_code = "max_retries_check_context_contact_admin"
logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误: {e_req}")
yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_request_failed", "code": final_error_code, "details": str(e_req)}})
yield "data: [DONE]\n\n"
return
time.sleep(1)
continue
if api_error_in_attempt:
logging.warning(f"【流式请求】({log_attempt_str}) 子尝试已处理并流式传输API错误。")
return
if accumulated_text_this_attempt:
logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
return
logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
if empty_retry_attempt_num >= max_empty_response_retries:
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
final_error_code = "max_retries_check_context_contact_admin"
logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
yield format_openai_sse_delta({
"error": {"message": final_error_message, "type": "max_empty_retries_exceeded", "code": final_error_code}
})
yield "data: [DONE]\n\n"
return
logging.info(f"【流式请求】空回复,将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
time.sleep(1)
final_fallback_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
logging.error(f"【流式请求】意外退出空回复重试循环。返回最终错误。")
yield format_openai_sse_delta({"error": {"message": final_fallback_error_message, "type": "internal_proxy_error_unexpected_exit", "code": final_fallback_error_code}})
yield "data: [DONE]\n\n"
def handle_non_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
max_empty_response_retries = 5
empty_retry_attempt_num = 0
current_apikey_for_attempt = initial_apikey
current_session_id_for_attempt = initial_session_id
ai_response_content = "" # Define ai_response_content outside the try block to ensure it's available for the final empty check
while empty_retry_attempt_num < max_empty_response_retries:
empty_retry_attempt_num += 1
if empty_retry_attempt_num > 1:
logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
try:
current_apikey_for_attempt = keymgr.get()
current_session_id_for_attempt = create_session(current_apikey_for_attempt)
logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
except (ValueError, requests.exceptions.RequestException) as e_key_session:
logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
status_code_from_exc_retry_setup_ns = None
if hasattr(e_key_session, 'response') and e_key_session.response is not None:
status_code_from_exc_retry_setup_ns = e_key_session.response.status_code
if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
if status_code_from_exc_retry_setup_ns == 524:
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for non-stream retry.")
elif status_code_from_exc_retry_setup_ns == 500:
logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 500 error during key/session acquisition for non-stream retry.")
else:
keymgr.mark_bad(current_apikey_for_attempt)
if empty_retry_attempt_num >= max_empty_response_retries:
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
final_error_code = "max_retries_check_context_contact_admin"
logging.error(f"【同步请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
return jsonify({"error": final_error_message, "code": final_error_code, "details": str(e_key_session)}), 500
time.sleep(1)
current_apikey_for_attempt = None
continue
log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
max_500_retries_for_this_call = 5
current_500_retry_count = 0
# Reset ai_response_content for each new attempt (especially for the 500-retry loop)
ai_response_content = ""
while current_500_retry_count < max_500_retries_for_this_call:
current_500_retry_count += 1
if current_500_retry_count > 1:
logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}, 500错误重试 {current_500_retry_count-1}/{max_500_retries_for_this_call-1}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
else:
logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query"
# Corrected: Use query_str and endpoint_id parameters passed to the function
payload = { "query": query_str, "endpointId": endpoint_id, "pluginIds": [], "responseMode": "sync" }
headers = {"apikey": current_apikey_for_attempt, "Content-Type": "application/json"}
try:
resp = requests.post(url, json=payload, headers=headers, timeout=120)
if resp.status_code == 500:
logging.warning(f"【OnDemand同步错误】({log_attempt_str}, 500重试 {current_500_retry_count}) 收到500错误。")
if current_500_retry_count >= max_500_retries_for_this_call:
logging.error(f"【OnDemand同步错误】({log_attempt_str}) 达到500错误最大重试次数。将错误传递给上层。")
resp.raise_for_status()
time.sleep(1)
continue
resp.raise_for_status()
response_json = resp.json()
if "data" not in response_json or "answer" not in response_json["data"]:
logging.error(f"【OnDemand同步错误】响应格式不符合预期 ({log_attempt_str})。Session: {current_session_id_for_attempt}, 响应: {str(response_json)[:500]}")
raise ValueError(f"OnDemand API sync response missing 'data.answer' field on attempt {empty_retry_attempt_num}, 500-retry {current_500_retry_count}.")
ai_response_content = response_json["data"]["answer"]
if ai_response_content is None: ai_response_content = ""
if ai_response_content.strip():
logging.info(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 成功获取非空内容。")
openai_response_obj = {
"id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
"model": openai_model_name_for_response,
"choices": [{"index": 0, "message": {"role": "assistant", "content": ai_response_content}, "finish_reason": "stop"}],
"usage": {}
}
return jsonify(openai_response_obj) # SUCCESS
else:
logging.warning(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 返回空回复。")
break
except requests.exceptions.RequestException as e_req:
log_key_display_sync = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
status_code_from_exc_sync = None
if hasattr(e_req, 'response') and e_req.response is not None:
status_code_from_exc_sync = e_req.response.status_code
logging.warning(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count} using key {log_key_display_sync}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_sync}")
if current_500_retry_count >= max_500_retries_for_this_call or status_code_from_exc_sync != 500:
if empty_retry_attempt_num == 1:
raise e_req
else:
raise e_req
time.sleep(1)
# Continue to the next iteration of current_500_retry_count loop
except (ValueError, KeyError, json.JSONDecodeError) as e_parse:
logging.error(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 处理响应或格式时出错: {e_parse}", exc_info=True)
if empty_retry_attempt_num == 1 and current_500_retry_count == 1 :
raise requests.exceptions.RequestException(f"Response format error on first attempt: {e_parse}") from e_parse
raise requests.exceptions.RequestException(f"Response format error during retry: {e_parse}") from e_parse
# After the 500-retry loop for the current key/session
if ai_response_content.strip(): # Should have been returned if non-empty
pass # Should not reach here if content was found
else: # Content is still empty for this key/session after 500-retries (or if 200 OK but empty)
if empty_retry_attempt_num >= max_empty_response_retries:
final_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
final_error_code = "max_retries_check_context_contact_admin"
logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
return jsonify({
"error": final_error_message,
"id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
"model": openai_model_name_for_response,
"choices": [{"index": 0, "message": {"role": "assistant", "content": ""}, "finish_reason": "length"}],
"usage": {}, "code": final_error_code
}), 500
logging.info(f"【同步请求】空回复(在500-重试循环之后),准备进行下一个空回复尝试。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
time.sleep(1)
# Outer loop (empty_retry_attempt_num) will continue to try a new key/session
final_fallback_error_message = "重试次数过多,请检查上下文长度! 或联系管理员!"
final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
logging.error(f"【同步请求】意外退出空回复重试循环。返回最终错误。")
return jsonify({"error": final_fallback_error_message, "code": final_fallback_error_code}), 500
@app.route("/v1/models", methods=["GET"])
def list_models():
model_objects = []
for model_key_alias in MODEL_MAP.keys():
model_objects.append({
"id": model_key_alias,
"object": "model",
"created": int(time.time()),
"owned_by": "ondemand-proxy"
})
return jsonify({
"object": "list",
"data": model_objects
})
@app.route("/", methods=["GET"])
def health_check():
num_keys = len(ONDEMAND_APIKEYS)
key_status_summary = {keymgr.display_key(k): ("OK" if not v["bad"] else f"BAD (since {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(v['bad_ts'])) if v['bad_ts'] else 'N/A'})") for k, v in keymgr.key_status.items()}
return jsonify({
"status": "ok",
"message": "OnDemand API Proxy is running.",
"timestamp": time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime()),
"ondemand_api_keys_loaded": num_keys,
"ondemand_api_key_pool_status": key_status_summary if num_keys > 0 else "No keys loaded.",
"model_mapping_enabled": True,
"default_on_demand_model": DEFAULT_ONDEMAND_MODEL,
"available_models_via_proxy": list(MODEL_MAP.keys())
}), 200
if __name__ == "__main__":
log_format = '[%(asctime)s] %(levelname)s in %(module)s (%(funcName)s): %(message)s'
logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO").upper(), format=log_format)
if not PRIVATE_KEY:
logging.warning("****************************************************************")
logging.warning("* WARNING: PRIVATE_KEY environment variable is not set. *")
logging.warning("* The proxy service will be UNSECURED and open to anyone. *")
logging.warning("* For production, set PRIVATE_KEY to a strong secret value. *")
logging.warning("****************************************************************")
if not ONDEMAND_APIKEYS:
logging.warning("****************************************************************")
logging.warning("* WARNING: ONDEMAND_APIKEYS environment variable is not set *")
logging.warning("* or is empty. The proxy will not be able to connect to *")
logging.warning("* the OnDemand service. *")
logging.warning("****************************************************************")
else:
logging.info(f"======== OnDemand API KEY 池数量: {len(ONDEMAND_APIKEYS)} ========")
for i, key_val in enumerate(ONDEMAND_APIKEYS):
logging.info(f" Key [{i+1}]: {keymgr.display_key(key_val)}")
logging.info(f"======== 默认 OnDemand 模型 Endpoint ID: {DEFAULT_ONDEMAND_MODEL} ========")
logging.info(f"======== 模型映射表 (User Model -> OnDemand Endpoint ID):")
for user_model, od_endpoint in MODEL_MAP.items():
logging.info(f" '{user_model}' -> '{od_endpoint}'")
port = int(os.environ.get("PORT", 7860))
app.run(host="0.0.0.0", port=port, debug=False)
|