Spaces:

vilarin
/

Translation-Agent-WebUI

Paused

App Files Files Community

vilarin commited on Jun 29, 2024

Commit

9984001

verified ·

1 Parent(s): e340fe7

Upload 6 files

Browse files

Files changed (6) hide show

app/webui/README.md +0 -0
app/webui/app.py +147 -0
app/webui/patch.py +131 -0
app/webui/process.py +136 -0
src/translation_agent/__init__.py +1 -0
src/translation_agent/utils.py +687 -0

app/webui/README.md ADDED Viewed

File without changes

app/webui/app.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import re
+import gradio as gr
+from process import model_load, lang_detector, diff_texts, translator, read_doc
+from llama_index.core import SimpleDirectoryReader
+def huanik(
+    endpoint,
+    model,
+    api_key,
+    source_lang,
+    target_lang,
+    source_text,
+    country,
+    max_tokens,
+    context_window,
+    num_output,
+):
+    if not source_text or source_lang == target_lang:
+        raise gr.Error("Please check that the content or options are entered correctly.")
+    try:
+        model_load(endpoint, model, api_key, context_window, num_output)
+    except Exception as e:
+        raise gr.Error(f"An unexpected error occurred: {e}")
+    source_text =  re.sub(r'\n+', '\n', source_text)
+    init_translation, reflect_translation, final_translation = translator(
+        source_lang=source_lang,
+        target_lang=target_lang,
+        source_text=source_text,
+        country=country,
+        max_tokens=max_tokens,
+    )
+    final_diff = gr.HighlightedText(
+        diff_texts(init_translation, final_translation),
+        label="Diff translation",
+        combine_adjacent=True,
+        show_legend=True,
+        visible=True,
+        color_map={"removed": "red", "added": "green"})
+    return init_translation, reflect_translation, final_translation, final_diff
+def update_model(endpoint):
+    endpoint_model_map = {
+        "Groq": "llama3-70b-8192",
+        "OpenAI": "gpt-4o",
+        "Cohere": "command-r",
+        "TogetherAI": "Qwen/Qwen2-72B-Instruct",
+        "Ollama": "llama3",
+        "Huggingface": "mistralai/Mistral-7B-Instruct-v0.3"
+    }
+    return gr.update(value=endpoint_model_map[endpoint])
+def read_doc(file):
+    docs = SimpleDirectoryReader(input_files=file).load_data()
+    return docs
+TITLE = """
+<h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
+"""
+CSS = """
+    h1 {
+        text-align: center;
+        display: block;
+        height: 10vh;
+        align-content: center;
+    }
+    footer {
+        visibility: hidden;
+    }
+"""
+with gr.Blocks(theme="soft", css=CSS) as demo:
+    gr.Markdown(TITLE)
+    with gr.Row():
+        with gr.Column(scale=1):
+            endpoint = gr.Dropdown(
+                label="Endpoint",
+                choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
+                value="Groq",
+            )
+            model = gr.Textbox(label="Model", value="llama3-70b-8192", )
+            api_key = gr.Textbox(label="API_KEY", type="password", )
+            source_lang = gr.Textbox(
+                label="Source Lang(Auto-Detect)",
+                value="English",
+            )
+            target_lang = gr.Textbox(
+                label="Target Lang",
+                value="Spanish",
+            )
+            country = gr.Textbox(label="Country", value="Argentina", max_lines=1)
+            with gr.Accordion("Advanced Options", open=False):
+                max_tokens = gr.Slider(
+                    label="Max tokens Per Chunk",
+                    minimum=512,
+                    maximum=2046,
+                    value=1000,
+                    step=8,
+                    )
+                context_window = gr.Slider(
+                    label="Context Window",
+                    minimum=512,
+                    maximum=8192,
+                    value=4096,
+                    step=8,
+                    )
+                num_output = gr.Slider(
+                    label="Output Num",
+                    minimum=256,
+                    maximum=8192,
+                    value=512,
+                    step=8,
+                    )
+        with gr.Column(scale=4):
+            source_text = gr.Textbox(
+                label="Source Text",
+                value="How we live is so different from how we ought to live that he who studies "+\
+                "what ought to be done rather than what is done will learn the way to his downfall "+\
+                "rather than to his preservation.",
+                lines=5,
+            )
+            with gr.Tab("Final"):
+                output_final = gr.Textbox(label="FInal Translation", lines=3, show_copy_button=True)
+            with gr.Tab("Initial"):
+                output_init = gr.Textbox(label="Init Translation", lines=3, show_copy_button=True)
+            with gr.Tab("Reflection"):
+                output_reflect = gr.Textbox(label="Reflection", lines=3, show_copy_button=True)
+            with gr.Tab("Diff"):
+                output_diff = gr.HighlightedText(visible = False)
+    with gr.Row():
+        submit = gr.Button(value="Submit")
+        upload = gr.UploadButton("Upload")
+        clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
+    endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
+    source_text.change(lang_detector, source_text, source_lang)
+    submit.click(fn=huanik, inputs=[endpoint, model, api_key, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
+    upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
+if __name__ == "__main__":
+    demo.queue(api_open=False).launch(show_api=False, share=False)

app/webui/patch.py ADDED Viewed

	@@ -0,0 +1,131 @@

+# a monkey patch to use llama-index completion
+from typing import Union, Callable
+from functools import wraps
+from src.translation_agent.utils import *
+from llama_index.llms.groq import Groq
+from llama_index.llms.cohere import Cohere
+from llama_index.llms.openai import OpenAI
+from llama_index.llms.together import TogetherLLM
+from llama_index.llms.ollama import Ollama
+from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+from llama_index.core import Settings
+from llama_index.core.llms import ChatMessage
+# Add your LLMs here
+def model_load(
+        endpoint: str,
+        model: str,
+        api_key: str = None,
+        context_window: int = 4096,
+        num_output: int = 512,
+):
+    if endpoint == "Groq":
+        llm = Groq(
+            model=model,
+            api_key=api_key,
+        )
+    elif endpoint == "Cohere":
+        llm = Cohere(
+            model=model,
+            api_key=api_key,
+        )
+    elif endpoint == "OpenAI":
+        llm = OpenAI(
+            model=model,
+            api_key=api_key,
+        )
+    elif endpoint == "TogetherAI":
+        llm = TogetherLLM(
+            model=model,
+            api_key=api_key,
+        )
+    elif endpoint == "ollama":
+        llm = Ollama(
+            model=model,
+            request_timeout=120.0)
+    elif endpoint == "Huggingface":
+        llm = HuggingFaceInferenceAPI(
+            model_name=model,
+            token=api_key,
+            task="text-generation",
+        )
+    Settings.llm = llm
+    # maximum input size to the LLM
+    Settings.context_window = context_window
+    # number of tokens reserved for text generation.
+    Settings.num_output = num_output
+def completion_wrapper(func: Callable) -> Callable:
+    @wraps(func)
+    def wrapper(
+        prompt: str,
+        system_message: str = "You are a helpful assistant.",
+        temperature: float = 0.3,
+        json_mode: bool = False,
+    ) -> Union[str, dict]:
+        """
+            Generate a completion using the OpenAI API.
+        Args:
+            prompt (str): The user's prompt or query.
+            system_message (str, optional): The system message to set the context for the assistant.
+                Defaults to "You are a helpful assistant.".
+            temperature (float, optional): The sampling temperature for controlling the randomness of the generated text.
+                Defaults to 0.3.
+            json_mode (bool, optional): Whether to return the response in JSON format.
+                Defaults to False.
+        Returns:
+            Union[str, dict]: The generated completion.
+                If json_mode is True, returns the complete API response as a dictionary.
+                If json_mode is False, returns the generated text as a string.
+        """
+        llm = Settings.llm
+        if llm.class_name() == "HuggingFaceInferenceAPI":
+            llm.system_prompt = system_message
+            messages = [
+                ChatMessage(
+                    role="user", content=prompt),
+            ]
+            response = llm.chat(
+                messages=messages,
+                temperature=temperature,
+                top_p=1,
+            )
+            return response.message.content
+        else:
+            messages = [
+                ChatMessage(
+                    role="system", content=system_message),
+                ChatMessage(
+                    role="user", content=prompt),
+            ]
+            if json_mode:
+                response = llm.chat(
+                    temperature=temperature,
+                    top_p=1,
+                    response_format={"type": "json_object"},
+                    messages=messages,
+                )
+                return response.message.content
+            else:
+                response = llm.chat(
+                    temperature=temperature,
+                    top_p=1,
+                    messages=messages,
+                )
+                return response.message.content
+    return wrapper
+openai_completion = get_completion
+get_completion = completion_wrapper(openai_completion)

app/webui/process.py ADDED Viewed

	@@ -0,0 +1,136 @@

+from polyglot.detect import Detector
+from polyglot.text import Text
+from difflib import Differ
+from icecream import ic
+from patch import *
+from llama_index.core.node_parser import SentenceSplitter
+def lang_detector(text):
+    min_chars = 5
+    if len(text) < min_chars:
+        return "Input text too short"
+    try:
+        detector = Detector(text).language
+        lang_info = str(detector)
+        code = re.search(r"name: (\w+)", lang_info).group(1)
+        return code
+    except Exception as e:
+        return f"ERROR：{str(e)}"
+def tokenize(text):
+    # Use polyglot to tokenize the text
+    polyglot_text = Text(text)
+    words = polyglot_text.words
+    # Check if the text contains spaces
+    if ' ' in text:
+        # Create a list of words and spaces
+        tokens = []
+        for word in words:
+            tokens.append(word)
+            tokens.append(' ')  # Add space after each word
+        return tokens[:-1]  # Remove the last space
+    else:
+        return words
+def diff_texts(text1, text2):
+    tokens1 = tokenize(text1)
+    tokens2 = tokenize(text2)
+    d = Differ()
+    diff_result = list(d.compare(tokens1, tokens2))
+    highlighted_text = []
+    for token in diff_result:
+        word = token[2:]
+        category = None
+        if token[0] == '+':
+            category = 'added'
+        elif token[0] == '-':
+            category = 'removed'
+        elif token[0] == '?':
+            continue  # Ignore the hints line
+        highlighted_text.append((word, category))
+    return highlighted_text
+#modified from src.translaation-agent.utils.tranlsate
+def translator(
+        source_lang,
+        target_lang,
+        source_text,
+        country,
+        max_tokens=MAX_TOKENS_PER_CHUNK
+):
+    """Translate the source_text from source_lang to target_lang."""
+    num_tokens_in_text = num_tokens_in_string(source_text)
+    ic(num_tokens_in_text)
+    if num_tokens_in_text < max_tokens:
+        ic("Translating text as single chunk")
+        #Note: use yield from B() if put yield in function B()
+        init_translation = one_chunk_initial_translation(
+            source_lang, target_lang, source_text
+        )
+        reflection = one_chunk_reflect_on_translation(
+            source_lang, target_lang, source_text, init_translation, country
+        )
+        final_translation = one_chunk_improve_translation(
+            source_lang, target_lang, source_text, init_translation, reflection
+        )
+        return init_translation, reflection, final_translation
+    else:
+        ic("Translating text as multiple chunks")
+        token_size = calculate_chunk_size(
+            token_count=num_tokens_in_text, token_limit=max_tokens
+        )
+        ic(token_size)
+        #using sentence splitter
+        text_parser = SentenceSplitter(
+           chunk_size=token_size,
+        )
+        source_text_chunks = text_parser.split_text(source_text)
+        translation_1_chunks = multichunk_initial_translation(
+            source_lang, target_lang, source_text_chunks
+        )
+        init_translation = "".join(translation_1_chunks)
+        reflection_chunks = multichunk_reflect_on_translation(
+            source_lang,
+            target_lang,
+            source_text_chunks,
+            translation_1_chunks,
+            country,
+        )
+        reflection = "".join(reflection_chunks)
+        translation_2_chunks = multichunk_improve_translation(
+            source_lang,
+            target_lang,
+            source_text_chunks,
+            translation_1_chunks,
+            reflection_chunks,
+        )
+        final_translation = "".join(translation_2_chunks)
+        return init_translation, reflection, final_translation

src/translation_agent/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .utils import translate

src/translation_agent/utils.py ADDED Viewed

	@@ -0,0 +1,687 @@

+import os
+from typing import List
+from typing import Union
+import openai
+import tiktoken
+from dotenv import load_dotenv
+from icecream import ic
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+load_dotenv()  # read local .env file
+client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+MAX_TOKENS_PER_CHUNK = (
+    1000  # if text is more than this many tokens, we'll break it up into
+)
+# discrete chunks to translate one chunk at a time
+def get_completion(
+    prompt: str,
+    system_message: str = "You are a helpful assistant.",
+    model: str = "gpt-4-turbo",
+    temperature: float = 0.3,
+    json_mode: bool = False,
+) -> Union[str, dict]:
+    """
+        Generate a completion using the OpenAI API.
+    Args:
+        prompt (str): The user's prompt or query.
+        system_message (str, optional): The system message to set the context for the assistant.
+            Defaults to "You are a helpful assistant.".
+        model (str, optional): The name of the OpenAI model to use for generating the completion.
+            Defaults to "gpt-4-turbo".
+        temperature (float, optional): The sampling temperature for controlling the randomness of the generated text.
+            Defaults to 0.3.
+        json_mode (bool, optional): Whether to return the response in JSON format.
+            Defaults to False.
+    Returns:
+        Union[str, dict]: The generated completion.
+            If json_mode is True, returns the complete API response as a dictionary.
+            If json_mode is False, returns the generated text as a string.
+    """
+    if json_mode:
+        response = client.chat.completions.create(
+            model=model,
+            temperature=temperature,
+            top_p=1,
+            response_format={"type": "json_object"},
+            messages=[
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": prompt},
+            ],
+        )
+        return response.choices[0].message.content
+    else:
+        response = client.chat.completions.create(
+            model=model,
+            temperature=temperature,
+            top_p=1,
+            messages=[
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": prompt},
+            ],
+        )
+        return response.choices[0].message.content
+def one_chunk_initial_translation(
+    source_lang: str, target_lang: str, source_text: str
+) -> str:
+    """
+    Translate the entire text as one chunk using an LLM.
+    Args:
+        source_lang (str): The source language of the text.
+        target_lang (str): The target language for translation.
+        source_text (str): The text to be translated.
+    Returns:
+        str: The translated text.
+    """
+    system_message = f"You are an expert linguist, specializing in translation from {source_lang} to {target_lang}."
+    translation_prompt = f"""This is an {source_lang} to {target_lang} translation, please provide the {target_lang} translation for this text. \
+Do not provide any explanations or text apart from the translation.
+{source_lang}: {source_text}
+{target_lang}:"""
+    prompt = translation_prompt.format(source_text=source_text)
+    translation = get_completion(prompt, system_message=system_message)
+    return translation
+def one_chunk_reflect_on_translation(
+    source_lang: str,
+    target_lang: str,
+    source_text: str,
+    translation_1: str,
+    country: str = "",
+) -> str:
+    """
+    Use an LLM to reflect on the translation, treating the entire text as one chunk.
+    Args:
+        source_lang (str): The source language of the text.
+        target_lang (str): The target language of the translation.
+        source_text (str): The original text in the source language.
+        translation_1 (str): The initial translation of the source text.
+        country (str): Country specified for target language.
+    Returns:
+        str: The LLM's reflection on the translation, providing constructive criticism and suggestions for improvement.
+    """
+    system_message = f"You are an expert linguist specializing in translation from {source_lang} to {target_lang}. \
+You will be provided with a source text and its translation and your goal is to improve the translation."
+    if country != "":
+        reflection_prompt = f"""Your task is to carefully read a source text and a translation from {source_lang} to {target_lang}, and then give constructive criticism and helpful suggestions to improve the translation. \
+The final style and tone of the translation should match the style of {target_lang} colloquially spoken in {country}.
+The source text and initial translation, delimited by XML tags <SOURCE_TEXT></SOURCE_TEXT> and <TRANSLATION></TRANSLATION>, are as follows:
+<SOURCE_TEXT>
+{source_text}
+</SOURCE_TEXT>
+<TRANSLATION>
+{translation_1}
+</TRANSLATION>
+When writing suggestions, pay attention to whether there are ways to improve the translation's \n\
+(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),\n\
+(ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules, and ensuring there are no unnecessary repetitions),\n\
+(iii) style (by ensuring the translations reflect the style of the source text and takes into account any cultural context),\n\
+(iv) terminology (by ensuring terminology use is consistent and reflects the source text domain; and by only ensuring you use equivalent idioms {target_lang}).\n\
+Write a list of specific, helpful and constructive suggestions for improving the translation.
+Each suggestion should address one specific part of the translation.
+Output only the suggestions and nothing else."""
+    else:
+        reflection_prompt = f"""Your task is to carefully read a source text and a translation from {source_lang} to {target_lang}, and then give constructive criticism and helpful suggestions to improve the translation. \
+The source text and initial translation, delimited by XML tags <SOURCE_TEXT></SOURCE_TEXT> and <TRANSLATION></TRANSLATION>, are as follows:
+<SOURCE_TEXT>
+{source_text}
+</SOURCE_TEXT>
+<TRANSLATION>
+{translation_1}
+</TRANSLATION>
+When writing suggestions, pay attention to whether there are ways to improve the translation's \n\
+(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),\n\
+(ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules, and ensuring there are no unnecessary repetitions),\n\
+(iii) style (by ensuring the translations reflect the style of the source text and takes into account any cultural context),\n\
+(iv) terminology (by ensuring terminology use is consistent and reflects the source text domain; and by only ensuring you use equivalent idioms {target_lang}).\n\
+Write a list of specific, helpful and constructive suggestions for improving the translation.
+Each suggestion should address one specific part of the translation.
+Output only the suggestions and nothing else."""
+    prompt = reflection_prompt.format(
+        source_lang=source_lang,
+        target_lang=target_lang,
+        source_text=source_text,
+        translation_1=translation_1,
+    )
+    reflection = get_completion(prompt, system_message=system_message)
+    return reflection
+def one_chunk_improve_translation(
+    source_lang: str,
+    target_lang: str,
+    source_text: str,
+    translation_1: str,
+    reflection: str,
+) -> str:
+    """
+    Use the reflection to improve the translation, treating the entire text as one chunk.
+    Args:
+        source_lang (str): The source language of the text.
+        target_lang (str): The target language for the translation.
+        source_text (str): The original text in the source language.
+        translation_1 (str): The initial translation of the source text.
+        reflection (str): Expert suggestions and constructive criticism for improving the translation.
+    Returns:
+        str: The improved translation based on the expert suggestions.
+    """
+    system_message = f"You are an expert linguist, specializing in translation editing from {source_lang} to {target_lang}."
+    prompt = f"""Your task is to carefully read, then edit, a translation from {source_lang} to {target_lang}, taking into
+account a list of expert suggestions and constructive criticisms.
+The source text, the initial translation, and the expert linguist suggestions are delimited by XML tags <SOURCE_TEXT></SOURCE_TEXT>, <TRANSLATION></TRANSLATION> and <EXPERT_SUGGESTIONS></EXPERT_SUGGESTIONS> \
+as follows:
+<SOURCE_TEXT>
+{source_text}
+</SOURCE_TEXT>
+<TRANSLATION>
+{translation_1}
+</TRANSLATION>
+<EXPERT_SUGGESTIONS>
+{reflection}
+</EXPERT_SUGGESTIONS>
+Please take into account the expert suggestions when editing the translation. Edit the translation by ensuring:
+(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),
+(ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules and ensuring there are no unnecessary repetitions), \
+(iii) style (by ensuring the translations reflect the style of the source text)
+(iv) terminology (inappropriate for context, inconsistent use), or
+(v) other errors.
+Output only the new translation and nothing else."""
+    translation_2 = get_completion(prompt, system_message)
+    return translation_2
+def one_chunk_translate_text(
+    source_lang: str, target_lang: str, source_text: str, country: str = ""
+) -> str:
+    """
+    Translate a single chunk of text from the source language to the target language.
+    This function performs a two-step translation process:
+    1. Get an initial translation of the source text.
+    2. Reflect on the initial translation and generate an improved translation.
+    Args:
+        source_lang (str): The source language of the text.
+        target_lang (str): The target language for the translation.
+        source_text (str): The text to be translated.
+        country (str): Country specified for target language.
+    Returns:
+        str: The improved translation of the source text.
+    """
+    translation_1 = one_chunk_initial_translation(
+        source_lang, target_lang, source_text
+    )
+    reflection = one_chunk_reflect_on_translation(
+        source_lang, target_lang, source_text, translation_1, country
+    )
+    translation_2 = one_chunk_improve_translation(
+        source_lang, target_lang, source_text, translation_1, reflection
+    )
+    return translation_2
+def num_tokens_in_string(
+    input_str: str, encoding_name: str = "cl100k_base"
+) -> int:
+    """
+    Calculate the number of tokens in a given string using a specified encoding.
+    Args:
+        str (str): The input string to be tokenized.
+        encoding_name (str, optional): The name of the encoding to use. Defaults to "cl100k_base",
+            which is the most commonly used encoder (used by GPT-4).
+    Returns:
+        int: The number of tokens in the input string.
+    Example:
+        >>> text = "Hello, how are you?"
+        >>> num_tokens = num_tokens_in_string(text)
+        >>> print(num_tokens)
+        5
+    """
+    encoding = tiktoken.get_encoding(encoding_name)
+    num_tokens = len(encoding.encode(input_str))
+    return num_tokens
+def multichunk_initial_translation(
+    source_lang: str, target_lang: str, source_text_chunks: List[str]
+) -> List[str]:
+    """
+    Translate a text in multiple chunks from the source language to the target language.
+    Args:
+        source_lang (str): The source language of the text.
+        target_lang (str): The target language for translation.
+        source_text_chunks (List[str]): A list of text chunks to be translated.
+    Returns:
+        List[str]: A list of translated text chunks.
+    """
+    system_message = f"You are an expert linguist, specializing in translation from {source_lang} to {target_lang}."
+    translation_prompt = """Your task is provide a professional translation from {source_lang} to {target_lang} of PART of a text.
+The source text is below, delimited by XML tags <SOURCE_TEXT> and </SOURCE_TEXT>. Translate only the part within the source text
+delimited by <TRANSLATE_THIS> and </TRANSLATE_THIS>. You can use the rest of the source text as context, but do not translate any
+of the other text. Do not output anything other than the translation of the indicated part of the text.
+<SOURCE_TEXT>
+{tagged_text}
+</SOURCE_TEXT>
+To reiterate, you should translate only this part of the text, shown here again between <TRANSLATE_THIS> and </TRANSLATE_THIS>:
+<TRANSLATE_THIS>
+{chunk_to_translate}
+</TRANSLATE_THIS>
+Output only the translation of the portion you are asked to translate, and nothing else.
+"""
+    translation_chunks = []
+    for i in range(len(source_text_chunks)):
+        # Will translate chunk i
+        tagged_text = (
+            "".join(source_text_chunks[0:i])
+            + "<TRANSLATE_THIS>"
+            + source_text_chunks[i]
+            + "</TRANSLATE_THIS>"
+            + "".join(source_text_chunks[i + 1 :])
+        )
+        prompt = translation_prompt.format(
+            source_lang=source_lang,
+            target_lang=target_lang,
+            tagged_text=tagged_text,
+            chunk_to_translate=source_text_chunks[i],
+        )
+        translation = get_completion(prompt, system_message=system_message)
+        translation_chunks.append(translation)
+    return translation_chunks
+def multichunk_reflect_on_translation(
+    source_lang: str,
+    target_lang: str,
+    source_text_chunks: List[str],
+    translation_1_chunks: List[str],
+    country: str = "",
+) -> List[str]:
+    """
+    Provides constructive criticism and suggestions for improving a partial translation.
+    Args:
+        source_lang (str): The source language of the text.
+        target_lang (str): The target language of the translation.
+        source_text_chunks (List[str]): The source text divided into chunks.
+        translation_1_chunks (List[str]): The translated chunks corresponding to the source text chunks.
+        country (str): Country specified for target language.
+    Returns:
+        List[str]: A list of reflections containing suggestions for improving each translated chunk.
+    """
+    system_message = f"You are an expert linguist specializing in translation from {source_lang} to {target_lang}. \
+You will be provided with a source text and its translation and your goal is to improve the translation."
+    if country != "":
+        reflection_prompt = """Your task is to carefully read a source text and part of a translation of that text from {source_lang} to {target_lang}, and then give constructive criticism and helpful suggestions for improving the translation.
+The final style and tone of the translation should match the style of {target_lang} colloquially spoken in {country}.
+The source text is below, delimited by XML tags <SOURCE_TEXT> and </SOURCE_TEXT>, and the part that has been translated
+is delimited by <TRANSLATE_THIS> and </TRANSLATE_THIS> within the source text. You can use the rest of the source text
+as context for critiquing the translated part.
+<SOURCE_TEXT>
+{tagged_text}
+</SOURCE_TEXT>
+To reiterate, only part of the text is being translated, shown here again between <TRANSLATE_THIS> and </TRANSLATE_THIS>:
+<TRANSLATE_THIS>
+{chunk_to_translate}
+</TRANSLATE_THIS>
+The translation of the indicated part, delimited below by <TRANSLATION> and </TRANSLATION>, is as follows:
+<TRANSLATION>
+{translation_1_chunk}
+</TRANSLATION>
+When writing suggestions, pay attention to whether there are ways to improve the translation's:\n\
+(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),\n\
+(ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules, and ensuring there are no unnecessary repetitions),\n\
+(iii) style (by ensuring the translations reflect the style of the source text and takes into account any cultural context),\n\
+(iv) terminology (by ensuring terminology use is consistent and reflects the source text domain; and by only ensuring you use equivalent idioms {target_lang}).\n\
+Write a list of specific, helpful and constructive suggestions for improving the translation.
+Each suggestion should address one specific part of the translation.
+Output only the suggestions and nothing else."""
+    else:
+        reflection_prompt = """Your task is to carefully read a source text and part of a translation of that text from {source_lang} to {target_lang}, and then give constructive criticism and helpful suggestions for improving the translation.
+The source text is below, delimited by XML tags <SOURCE_TEXT> and </SOURCE_TEXT>, and the part that has been translated
+is delimited by <TRANSLATE_THIS> and </TRANSLATE_THIS> within the source text. You can use the rest of the source text
+as context for critiquing the translated part.
+<SOURCE_TEXT>
+{tagged_text}
+</SOURCE_TEXT>
+To reiterate, only part of the text is being translated, shown here again between <TRANSLATE_THIS> and </TRANSLATE_THIS>:
+<TRANSLATE_THIS>
+{chunk_to_translate}
+</TRANSLATE_THIS>
+The translation of the indicated part, delimited below by <TRANSLATION> and </TRANSLATION>, is as follows:
+<TRANSLATION>
+{translation_1_chunk}
+</TRANSLATION>
+When writing suggestions, pay attention to whether there are ways to improve the translation's:\n\
+(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),\n\
+(ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules, and ensuring there are no unnecessary repetitions),\n\
+(iii) style (by ensuring the translations reflect the style of the source text and takes into account any cultural context),\n\
+(iv) terminology (by ensuring terminology use is consistent and reflects the source text domain; and by only ensuring you use equivalent idioms {target_lang}).\n\
+Write a list of specific, helpful and constructive suggestions for improving the translation.
+Each suggestion should address one specific part of the translation.
+Output only the suggestions and nothing else."""
+    reflection_chunks = []
+    for i in range(len(source_text_chunks)):
+        # Will translate chunk i
+        tagged_text = (
+            "".join(source_text_chunks[0:i])
+            + "<TRANSLATE_THIS>"
+            + source_text_chunks[i]
+            + "</TRANSLATE_THIS>"
+            + "".join(source_text_chunks[i + 1 :])
+        )
+        if country != "":
+            prompt = reflection_prompt.format(
+                source_lang=source_lang,
+                target_lang=target_lang,
+                tagged_text=tagged_text,
+                chunk_to_translate=source_text_chunks[i],
+                translation_1_chunk=translation_1_chunks[i],
+                country=country,
+            )
+        else:
+            prompt = reflection_prompt.format(
+                source_lang=source_lang,
+                target_lang=target_lang,
+                tagged_text=tagged_text,
+                chunk_to_translate=source_text_chunks[i],
+                translation_1_chunk=translation_1_chunks[i],
+            )
+        reflection = get_completion(prompt, system_message=system_message)
+        reflection_chunks.append(reflection)
+    return reflection_chunks
+def multichunk_improve_translation(
+    source_lang: str,
+    target_lang: str,
+    source_text_chunks: List[str],
+    translation_1_chunks: List[str],
+    reflection_chunks: List[str],
+) -> List[str]:
+    """
+    Improves the translation of a text from source language to target language by considering expert suggestions.
+    Args:
+        source_lang (str): The source language of the text.
+        target_lang (str): The target language for translation.
+        source_text_chunks (List[str]): The source text divided into chunks.
+        translation_1_chunks (List[str]): The initial translation of each chunk.
+        reflection_chunks (List[str]): Expert suggestions for improving each translated chunk.
+    Returns:
+        List[str]: The improved translation of each chunk.
+    """
+    system_message = f"You are an expert linguist, specializing in translation editing from {source_lang} to {target_lang}."
+    improvement_prompt = """Your task is to carefully read, then improve, a translation from {source_lang} to {target_lang}, taking into
+account a set of expert suggestions and constructive criticisms. Below, the source text, initial translation, and expert suggestions are provided.
+The source text is below, delimited by XML tags <SOURCE_TEXT> and </SOURCE_TEXT>, and the part that has been translated
+is delimited by <TRANSLATE_THIS> and </TRANSLATE_THIS> within the source text. You can use the rest of the source text
+as context, but need to provide a translation only of the part indicated by <TRANSLATE_THIS> and </TRANSLATE_THIS>.
+<SOURCE_TEXT>
+{tagged_text}
+</SOURCE_TEXT>
+To reiterate, only part of the text is being translated, shown here again between <TRANSLATE_THIS> and </TRANSLATE_THIS>:
+<TRANSLATE_THIS>
+{chunk_to_translate}
+</TRANSLATE_THIS>
+The translation of the indicated part, delimited below by <TRANSLATION> and </TRANSLATION>, is as follows:
+<TRANSLATION>
+{translation_1_chunk}
+</TRANSLATION>
+The expert translations of the indicated part, delimited below by <EXPERT_SUGGESTIONS> and </EXPERT_SUGGESTIONS>, is as follows:
+<EXPERT_SUGGESTIONS>
+{reflection_chunk}
+</EXPERT_SUGGESTIONS>
+Taking into account the expert suggestions rewrite the translation to improve it, paying attention
+to whether there are ways to improve the translation's
+(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),
+(ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules and ensuring there are no unnecessary repetitions), \
+(iii) style (by ensuring the translations reflect the style of the source text)
+(iv) terminology (inappropriate for context, inconsistent use), or
+(v) other errors.
+Output only the new translation of the indicated part and nothing else."""
+    translation_2_chunks = []
+    for i in range(len(source_text_chunks)):
+        # Will translate chunk i
+        tagged_text = (
+            "".join(source_text_chunks[0:i])
+            + "<TRANSLATE_THIS>"
+            + source_text_chunks[i]
+            + "</TRANSLATE_THIS>"
+            + "".join(source_text_chunks[i + 1 :])
+        )
+        prompt = improvement_prompt.format(
+            source_lang=source_lang,
+            target_lang=target_lang,
+            tagged_text=tagged_text,
+            chunk_to_translate=source_text_chunks[i],
+            translation_1_chunk=translation_1_chunks[i],
+            reflection_chunk=reflection_chunks[i],
+        )
+        translation_2 = get_completion(prompt, system_message=system_message)
+        translation_2_chunks.append(translation_2)
+    return translation_2_chunks
+def multichunk_translation(
+    source_lang, target_lang, source_text_chunks, country: str = ""
+):
+    """
+    Improves the translation of multiple text chunks based on the initial translation and reflection.
+    Args:
+        source_lang (str): The source language of the text chunks.
+        target_lang (str): The target language for translation.
+        source_text_chunks (List[str]): The list of source text chunks to be translated.
+        translation_1_chunks (List[str]): The list of initial translations for each source text chunk.
+        reflection_chunks (List[str]): The list of reflections on the initial translations.
+        country (str): Country specified for target language
+    Returns:
+        List[str]: The list of improved translations for each source text chunk.
+    """
+    translation_1_chunks = multichunk_initial_translation(
+        source_lang, target_lang, source_text_chunks
+    )
+    reflection_chunks = multichunk_reflect_on_translation(
+        source_lang,
+        target_lang,
+        source_text_chunks,
+        translation_1_chunks,
+        country,
+    )
+    translation_2_chunks = multichunk_improve_translation(
+        source_lang,
+        target_lang,
+        source_text_chunks,
+        translation_1_chunks,
+        reflection_chunks,
+    )
+    return translation_2_chunks
+def calculate_chunk_size(token_count: int, token_limit: int) -> int:
+    """
+    Calculate the chunk size based on the token count and token limit.
+    Args:
+        token_count (int): The total number of tokens.
+        token_limit (int): The maximum number of tokens allowed per chunk.
+    Returns:
+        int: The calculated chunk size.
+    Description:
+        This function calculates the chunk size based on the given token count and token limit.
+        If the token count is less than or equal to the token limit, the function returns the token count as the chunk size.
+        Otherwise, it calculates the number of chunks needed to accommodate all the tokens within the token limit.
+        The chunk size is determined by dividing the token limit by the number of chunks.
+        If there are remaining tokens after dividing the token count by the token limit,
+        the chunk size is adjusted by adding the remaining tokens divided by the number of chunks.
+    Example:
+        >>> calculate_chunk_size(1000, 500)
+        500
+        >>> calculate_chunk_size(1530, 500)
+        389
+        >>> calculate_chunk_size(2242, 500)
+        496
+    """
+    if token_count <= token_limit:
+        return token_count
+    num_chunks = (token_count + token_limit - 1) // token_limit
+    chunk_size = token_count // num_chunks
+    remaining_tokens = token_count % token_limit
+    if remaining_tokens > 0:
+        chunk_size += remaining_tokens // num_chunks
+    return chunk_size
+def translate(
+    source_lang,
+    target_lang,
+    source_text,
+    country,
+    max_tokens=MAX_TOKENS_PER_CHUNK,
+):
+    """Translate the source_text from source_lang to target_lang."""
+    num_tokens_in_text = num_tokens_in_string(source_text)
+    ic(num_tokens_in_text)
+    if num_tokens_in_text < max_tokens:
+        ic("Translating text as single chunk")
+        final_translation = one_chunk_translate_text(
+            source_lang, target_lang, source_text, country
+        )
+        return final_translation
+    else:
+        ic("Translating text as multiple chunks")
+        token_size = calculate_chunk_size(
+            token_count=num_tokens_in_text, token_limit=max_tokens
+        )
+        ic(token_size)
+        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
+            model_name="gpt-4",
+            chunk_size=token_size,
+            chunk_overlap=0,
+        )
+        source_text_chunks = text_splitter.split_text(source_text)
+        translation_2_chunks = multichunk_translation(
+            source_lang, target_lang, source_text_chunks, country
+        )
+        return "".join(translation_2_chunks)