Spaces:
Build error
Build error
| import re | |
| from huggingface_hub import InferenceClient | |
| from openai import OpenAI | |
| from together import Together | |
| from src.text_poa_graph import TextPOAGraph | |
| def extract_context(text_poa_graph, node_id): | |
| """Extract context up to and including the specified node_id.""" | |
| contexts = {} | |
| for label, path in text_poa_graph._seq_paths.items(): | |
| idx = path.index(node_id) | |
| context = path[: idx + 1] | |
| contexts[label] = " ".join( | |
| text_poa_graph.nodedict[nid].variations.get(label, text_poa_graph.nodedict[nid].text) | |
| for nid in context | |
| ) | |
| return contexts | |
| def extract_alternative_paths(text_poa_graph: TextPOAGraph, node_id): | |
| """Extract all alternative paths from this uncertainty point to the next consensus node.""" | |
| alternative_paths = {} | |
| for label, path in text_poa_graph._seq_paths.items(): | |
| idx = path.index(node_id) | |
| next_cn = None | |
| for i in range(idx + 1, len(path)): | |
| if path[i] in text_poa_graph.consensus_node_ids: | |
| next_cn = path[i] | |
| break | |
| if next_cn: | |
| next_cn_idx = path.index(next_cn) | |
| alternative_segment = path[idx + 1 : next_cn_idx + 1] | |
| else: | |
| alternative_segment = [] | |
| alternative_paths[label] = " ".join( | |
| text_poa_graph.nodedict[nid].variations.get(label, text_poa_graph.nodedict[nid].text) | |
| for nid in alternative_segment | |
| ) | |
| return alternative_paths | |
| def is_same_branch(text_poa_graph: TextPOAGraph, node_id, lable_1, label_2): | |
| """Check if the next vaiable nodes for two sequences are the same after node_id.""" | |
| path_1 = text_poa_graph._seq_paths[lable_1] | |
| path_2 = text_poa_graph._seq_paths[label_2] | |
| idx_1 = path_1.index(node_id) | |
| idx_2 = path_2.index(node_id) | |
| return path_1[idx_1 + 1] == path_2[idx_2 + 1] | |
| def extract_equivalent_classes(text_poa_graph: TextPOAGraph, node_id, selected_labels): | |
| """Extract equivalent classes from the text POA graph.""" | |
| if not selected_labels: | |
| return [] | |
| equivalent_classes = [] | |
| for label in selected_labels: | |
| matched = False | |
| for class_group in equivalent_classes: | |
| if is_same_branch(text_poa_graph, node_id, class_group[0], label): | |
| class_group.append(label) | |
| matched = True | |
| break | |
| if not matched: | |
| equivalent_classes.append([label]) | |
| return equivalent_classes | |
| def verify_correctness_pairwise( | |
| full_text_1: str, full_text_2: str, verification_model: str, problem: str, api: str = "openai" | |
| ): | |
| """Pairwise verification of two partial solution paths.""" | |
| if api == "openai": | |
| client = OpenAI() | |
| elif api == "hf": | |
| client = InferenceClient() | |
| elif api == "together": | |
| client = Together() | |
| else: | |
| raise ValueError(f"Invalid API: {api}") | |
| prompt = f""" | |
| You will be given a problem and 2 partial solutions. | |
| Your task is to use comparison as an EFFICIENCY TOOL to quickly identify potential errors. | |
| You will be given guidelines to follow, and you will be penalized if you do not follow them. | |
| Problem: {problem} | |
| Partial Solution 1: {full_text_1} | |
| Partial Solution 2: {full_text_2} | |
| CRITICAL GUIDELINES: | |
| - DO NOT penalize a solution for being incomplete or having missing steps | |
| - DO NOT make a comparison of which solution is better | |
| - DO NOT consider steps incorrect just because they differ between solutions | |
| - DO NOT prematurely evaluate based on final answers or future steps | |
| - DO NOT expect both solutions to be at the same stage of completion | |
| - DO NOT consider a step incorrect just because it lacks sufficient detail or justification | |
| KEY EFFICIENCY PRINCIPLE: | |
| - Use agreement between solutions as evidence of correctness | |
| - Use disagreement as a signal to investigate more deeply | |
| - Only label a step as an error if it contains a specific mathematical mistake | |
| - Incompleteness is not a mathematical error. | |
| Here are the instructions for how to complete your task: | |
| EFFICIENT VERIFICATION APPROACH: | |
| 1. QUICK COMPARISON (Use this to focus your attention): | |
| - Immediately identify where the solutions differ in approach or results | |
| - Use these differences as "error hotspots" to prioritize your verification | |
| - When solutions agree, you can generally assume that part is correct | |
| - When solutions disagree, investigate those specific points deeply | |
| 2. TARGETED VERIFICATION (Only where needed): | |
| - Most important: Do not consider any incomplete steps as errors | |
| - Focus your mathematical verification on the "hotspots" identified above | |
| - Check mathematical validity only at points of difference or uncertainty | |
| - Avoid line-by-line checking of steps where solutions agree | |
| - For each potential error spot, verify if the mathematical reasoning is valid | |
| - If an intermediate step is later corrected, do not penalize the solution for having the incorrect intermediate step | |
| After your targeted verification, propose a score tuple (score_1, score_2): | |
| - Score (1,1) if both partial solutions are valid | |
| - Score (1,0) if only the first solution is valid | |
| - Score (0,1) if only the second solution is valid | |
| - Score (0,0) if both solutions are invalid | |
| In case you score a solution as 0, you must give an explanation for each check below: | |
| 3. FINAL CHECKS: | |
| - If you score a solution as 0, you MUST identify the specific mathematical error. | |
| - You must also double check the problem statement. Reconsider your score and determine if you have misinterpreted the problem statement. | |
| - You must also check whether you have penalized a solution for being incomplete or having missing steps. | |
| Before outputting your final score, you must answer these questions: | |
| STOP! Did you give a score of 0 to a solution that was incomplete? | |
| STOP! Did you penalize a solution for being incomplete or having missing steps? | |
| STOP! Did you make a comparison of which solution is better? | |
| STOP! Did you consider steps incorrect just because they differ between solutions? | |
| STOP! Did you prematurely evaluate based on final answers? | |
| STOP! Did you consider a step incorrect just because it lacks sufficient detail or justification? | |
| Now give your final score: | |
| Final score: | |
| """ | |
| completion = client.chat.completions.create( | |
| model=verification_model, | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| temperature=0.0, | |
| ) | |
| response = completion.choices[0].message.content.strip() | |
| print(full_text_1) | |
| print(full_text_2) | |
| print(f"Correctness score: {response} \n") | |
| score_match = re.findall(r"\(\s*([01](?:\.0)?)\s*,\s*([01](?:\.0)?)\s*\)", response) | |
| score = score_match[-1] if score_match else (0, 0) | |
| return score | |
| def self_complete(verification_prompt: str, verification_model: str, api: str = "openai"): | |
| print(verification_prompt) | |
| """Completetion method""" | |
| if api == "openai": | |
| client = OpenAI() | |
| elif api == "hf": | |
| client = InferenceClient() | |
| elif api == "together": | |
| client = Together() | |
| else: | |
| raise ValueError(f"Invalid API: {api}") | |
| completion = client.chat.completions.create( | |
| model=verification_model, | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": verification_prompt}, | |
| ], | |
| temperature=0.0, | |
| ) | |
| response = completion.choices[0].message.content.strip() | |
| return response | |