Upload 12 files
Browse files- app/__init__.py +0 -0
- app/__pycache__/__init__.cpython-310.pyc +0 -0
- app/webui/README.md +79 -5
- app/webui/__pycache__/__init__.cpython-310.pyc +0 -0
- app/webui/__pycache__/app.cpython-310.pyc +0 -0
- app/webui/__pycache__/patch.cpython-310.pyc +0 -0
- app/webui/__pycache__/process.cpython-310.pyc +0 -0
- app/webui/app.py +9 -10
- app/webui/patch.py +19 -16
- app/webui/process.py +12 -20
- app/webui/requirements.txt +12 -0
app/__init__.py
ADDED
|
File without changes
|
app/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (136 Bytes). View file
|
|
|
app/webui/README.md
CHANGED
|
@@ -1,7 +1,81 @@
|
|
| 1 |
-
# Tranlsation-Agent-Webui
|
| 2 |
|
| 3 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
-
git clone https://github.com/andrewyng/translation-agent.git
|
| 6 |
-
cd translation-agent\app\webui
|
| 7 |
-
pip install -
|
|
|
|
|
|
|
| 1 |
|
| 2 |
+
## Translation Agent WebUI
|
| 3 |
+
|
| 4 |
+
This repository contains a Gradio web UI for a translation agent that utilizes various language models for translation.
|
| 5 |
+
|
| 6 |
+
**Features:**
|
| 7 |
+
|
| 8 |
+
- **Tokenized Text:** Displays translated text with tokenization, highlighting differences between original and translated words.
|
| 9 |
+
- **Document Upload:** Supports uploading various document formats (PDF, TXT, DOC, etc.) for translation.
|
| 10 |
+
- **Multiple API Support:** Integrates with popular language models like:
|
| 11 |
+
- Groq
|
| 12 |
+
- OpenAI
|
| 13 |
+
- Cohere
|
| 14 |
+
- Ollama
|
| 15 |
+
- Together AI
|
| 16 |
+
- Hugging Face Inference API
|
| 17 |
+
...
|
| 18 |
+
Llama Index supported, easily extendable
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
**Getting Started**
|
| 22 |
+
|
| 23 |
+
1. **Install Dependencies:**
|
| 24 |
+
**Linux(Using Python Venv)**
|
| 25 |
+
```bash
|
| 26 |
+
git clone https://github.com/andrewyng/translation-agent.git
|
| 27 |
+
cd translation-agent
|
| 28 |
+
python -m venv web_ui
|
| 29 |
+
source web_ui/bin/activate
|
| 30 |
+
pip install -r app/webui/requirements.txt
|
| 31 |
+
|
| 32 |
+
```
|
| 33 |
+
**Windows**
|
| 34 |
+
```bash
|
| 35 |
+
git clone https://github.com/andrewyng/translation-agent.git
|
| 36 |
+
cd translation-agent
|
| 37 |
+
python -m venv web_ui
|
| 38 |
+
.\web_ui\Scripts\activate
|
| 39 |
+
pip install -r app/webui/requirements.txt
|
| 40 |
+
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
2. **Set API Keys:**
|
| 44 |
+
- Rename `.env.sample` to `.env`, you can add your API keys for each service:
|
| 45 |
+
|
| 46 |
+
```
|
| 47 |
+
OPENAI_API_KEY="sk-xxxxx" # Keep this field
|
| 48 |
+
GROQ_API_KEY="xxxxx"
|
| 49 |
+
COHERE_API_KEY="xxxxx"
|
| 50 |
+
TOGETHER_API_KEY="xxxxx"
|
| 51 |
+
HF_TOKEN="xxxxx"
|
| 52 |
+
```
|
| 53 |
+
- Then you can also set the API_KEY in webui.
|
| 54 |
+
|
| 55 |
+
3. **Run the Web UI:**
|
| 56 |
+
```bash
|
| 57 |
+
python -m app.webui.app
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
4. **Access the Web UI:**
|
| 61 |
+
Open your web browser and navigate to `http://127.0.0.1:7860/`.
|
| 62 |
+
|
| 63 |
+
**Usage:**
|
| 64 |
+
|
| 65 |
+
1. Select your desired translation API from the Endpoint dropdown menu.
|
| 66 |
+
2. If using Hugging Face API, enter your `HF_TOKEN` in the `api_key` textbox.
|
| 67 |
+
3. Input the source text or upload your document file.
|
| 68 |
+
4. Submit and get translation, the UI will display the translated text with tokenization and highlight differences.
|
| 69 |
+
|
| 70 |
+
**Customization:**
|
| 71 |
+
|
| 72 |
+
- **Add New LLMs:** Modify the `patch.py` file to integrate additional LLMs.
|
| 73 |
+
|
| 74 |
+
**Contributing:**
|
| 75 |
+
|
| 76 |
+
Contributions are welcome! Feel free to open issues or submit pull requests.
|
| 77 |
+
|
| 78 |
+
**License:**
|
| 79 |
+
|
| 80 |
+
This project is licensed under the MIT License.
|
| 81 |
|
|
|
|
|
|
|
|
|
app/webui/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (142 Bytes). View file
|
|
|
app/webui/__pycache__/app.cpython-310.pyc
ADDED
|
Binary file (4.22 kB). View file
|
|
|
app/webui/__pycache__/patch.cpython-310.pyc
ADDED
|
Binary file (3.37 kB). View file
|
|
|
app/webui/__pycache__/process.cpython-310.pyc
ADDED
|
Binary file (2.33 kB). View file
|
|
|
app/webui/app.py
CHANGED
|
@@ -7,7 +7,7 @@ sys.path.insert(0, project_root)
|
|
| 7 |
|
| 8 |
import re
|
| 9 |
import gradio as gr
|
| 10 |
-
from app.webui.process import model_load,
|
| 11 |
from llama_index.core import SimpleDirectoryReader
|
| 12 |
|
| 13 |
def huanik(
|
|
@@ -63,8 +63,8 @@ def update_model(endpoint):
|
|
| 63 |
return gr.update(value=endpoint_model_map[endpoint])
|
| 64 |
|
| 65 |
def read_doc(file):
|
| 66 |
-
docs = SimpleDirectoryReader(input_files=file).load_data()
|
| 67 |
-
return docs
|
| 68 |
|
| 69 |
TITLE = """
|
| 70 |
<h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
|
|
@@ -82,7 +82,7 @@ CSS = """
|
|
| 82 |
}
|
| 83 |
"""
|
| 84 |
|
| 85 |
-
with gr.Blocks(theme="soft", css=CSS) as demo:
|
| 86 |
gr.Markdown(TITLE)
|
| 87 |
with gr.Row():
|
| 88 |
with gr.Column(scale=1):
|
|
@@ -94,7 +94,7 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
|
|
| 94 |
model = gr.Textbox(label="Model", value="gpt-4o", )
|
| 95 |
api_key = gr.Textbox(label="API_KEY", type="password", )
|
| 96 |
source_lang = gr.Textbox(
|
| 97 |
-
label="Source Lang
|
| 98 |
value="English",
|
| 99 |
)
|
| 100 |
target_lang = gr.Textbox(
|
|
@@ -130,14 +130,14 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
|
|
| 130 |
value="How we live is so different from how we ought to live that he who studies "+\
|
| 131 |
"what ought to be done rather than what is done will learn the way to his downfall "+\
|
| 132 |
"rather than to his preservation.",
|
| 133 |
-
lines=
|
| 134 |
)
|
| 135 |
with gr.Tab("Final"):
|
| 136 |
-
output_final = gr.Textbox(label="FInal Translation", lines=
|
| 137 |
with gr.Tab("Initial"):
|
| 138 |
-
output_init = gr.Textbox(label="Init Translation", lines=
|
| 139 |
with gr.Tab("Reflection"):
|
| 140 |
-
output_reflect = gr.Textbox(label="Reflection", lines=
|
| 141 |
with gr.Tab("Diff"):
|
| 142 |
output_diff = gr.HighlightedText(visible = False)
|
| 143 |
with gr.Row():
|
|
@@ -146,7 +146,6 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
|
|
| 146 |
clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
|
| 147 |
|
| 148 |
endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
|
| 149 |
-
source_text.change(lang_detector, source_text, source_lang)
|
| 150 |
submit.click(fn=huanik, inputs=[endpoint, model, api_key, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
|
| 151 |
upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
|
| 152 |
|
|
|
|
| 7 |
|
| 8 |
import re
|
| 9 |
import gradio as gr
|
| 10 |
+
from app.webui.process import model_load, diff_texts, translator
|
| 11 |
from llama_index.core import SimpleDirectoryReader
|
| 12 |
|
| 13 |
def huanik(
|
|
|
|
| 63 |
return gr.update(value=endpoint_model_map[endpoint])
|
| 64 |
|
| 65 |
def read_doc(file):
|
| 66 |
+
docs = SimpleDirectoryReader(input_files=[file]).load_data()
|
| 67 |
+
return docs[0].text
|
| 68 |
|
| 69 |
TITLE = """
|
| 70 |
<h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
|
|
|
|
| 82 |
}
|
| 83 |
"""
|
| 84 |
|
| 85 |
+
with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
|
| 86 |
gr.Markdown(TITLE)
|
| 87 |
with gr.Row():
|
| 88 |
with gr.Column(scale=1):
|
|
|
|
| 94 |
model = gr.Textbox(label="Model", value="gpt-4o", )
|
| 95 |
api_key = gr.Textbox(label="API_KEY", type="password", )
|
| 96 |
source_lang = gr.Textbox(
|
| 97 |
+
label="Source Lang",
|
| 98 |
value="English",
|
| 99 |
)
|
| 100 |
target_lang = gr.Textbox(
|
|
|
|
| 130 |
value="How we live is so different from how we ought to live that he who studies "+\
|
| 131 |
"what ought to be done rather than what is done will learn the way to his downfall "+\
|
| 132 |
"rather than to his preservation.",
|
| 133 |
+
lines=10,
|
| 134 |
)
|
| 135 |
with gr.Tab("Final"):
|
| 136 |
+
output_final = gr.Textbox(label="FInal Translation", lines=10, show_copy_button=True)
|
| 137 |
with gr.Tab("Initial"):
|
| 138 |
+
output_init = gr.Textbox(label="Init Translation", lines=10, show_copy_button=True)
|
| 139 |
with gr.Tab("Reflection"):
|
| 140 |
+
output_reflect = gr.Textbox(label="Reflection", lines=10, show_copy_button=True)
|
| 141 |
with gr.Tab("Diff"):
|
| 142 |
output_diff = gr.HighlightedText(visible = False)
|
| 143 |
with gr.Row():
|
|
|
|
| 146 |
clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
|
| 147 |
|
| 148 |
endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
|
|
|
|
| 149 |
submit.click(fn=huanik, inputs=[endpoint, model, api_key, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
|
| 150 |
upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
|
| 151 |
|
app/webui/patch.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
| 1 |
# a monkey patch to use llama-index completion
|
| 2 |
import os
|
| 3 |
-
from typing import Union
|
| 4 |
-
|
| 5 |
-
from src.translation_agent.utils import *
|
| 6 |
-
|
| 7 |
|
| 8 |
from llama_index.llms.groq import Groq
|
| 9 |
from llama_index.llms.cohere import Cohere
|
|
@@ -28,12 +26,12 @@ def model_load(
|
|
| 28 |
if endpoint == "Groq":
|
| 29 |
llm = Groq(
|
| 30 |
model=model,
|
| 31 |
-
api_key=api_key,
|
| 32 |
)
|
| 33 |
elif endpoint == "Cohere":
|
| 34 |
llm = Cohere(
|
| 35 |
model=model,
|
| 36 |
-
api_key=api_key,
|
| 37 |
)
|
| 38 |
elif endpoint == "OpenAI":
|
| 39 |
llm = OpenAI(
|
|
@@ -43,16 +41,16 @@ def model_load(
|
|
| 43 |
elif endpoint == "TogetherAI":
|
| 44 |
llm = TogetherLLM(
|
| 45 |
model=model,
|
| 46 |
-
api_key=api_key,
|
| 47 |
)
|
| 48 |
-
elif endpoint == "
|
| 49 |
llm = Ollama(
|
| 50 |
model=model,
|
| 51 |
request_timeout=120.0)
|
| 52 |
elif endpoint == "Huggingface":
|
| 53 |
llm = HuggingFaceInferenceAPI(
|
| 54 |
model_name=model,
|
| 55 |
-
token=api_key,
|
| 56 |
task="text-generation",
|
| 57 |
)
|
| 58 |
Settings.llm = llm
|
|
@@ -63,10 +61,7 @@ def model_load(
|
|
| 63 |
Settings.num_output = num_output
|
| 64 |
|
| 65 |
|
| 66 |
-
|
| 67 |
-
def completion_wrapper(func: Callable) -> Callable:
|
| 68 |
-
@wraps(func)
|
| 69 |
-
def wrapper(
|
| 70 |
prompt: str,
|
| 71 |
system_message: str = "You are a helpful assistant.",
|
| 72 |
temperature: float = 0.3,
|
|
@@ -126,7 +121,15 @@ def completion_wrapper(func: Callable) -> Callable:
|
|
| 126 |
)
|
| 127 |
return response.message.content
|
| 128 |
|
| 129 |
-
|
| 130 |
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# a monkey patch to use llama-index completion
|
| 2 |
import os
|
| 3 |
+
from typing import Union
|
| 4 |
+
import src.translation_agent.utils as utils
|
|
|
|
|
|
|
| 5 |
|
| 6 |
from llama_index.llms.groq import Groq
|
| 7 |
from llama_index.llms.cohere import Cohere
|
|
|
|
| 26 |
if endpoint == "Groq":
|
| 27 |
llm = Groq(
|
| 28 |
model=model,
|
| 29 |
+
api_key=api_key if api_key else os.getenv("GROQ_API_KEY"),
|
| 30 |
)
|
| 31 |
elif endpoint == "Cohere":
|
| 32 |
llm = Cohere(
|
| 33 |
model=model,
|
| 34 |
+
api_key=api_key if api_key else os.getenv("COHERE_API_KEY"),
|
| 35 |
)
|
| 36 |
elif endpoint == "OpenAI":
|
| 37 |
llm = OpenAI(
|
|
|
|
| 41 |
elif endpoint == "TogetherAI":
|
| 42 |
llm = TogetherLLM(
|
| 43 |
model=model,
|
| 44 |
+
api_key=api_key if api_key else os.getenv("TOGETHER_API_KEY"),
|
| 45 |
)
|
| 46 |
+
elif endpoint == "Ollama":
|
| 47 |
llm = Ollama(
|
| 48 |
model=model,
|
| 49 |
request_timeout=120.0)
|
| 50 |
elif endpoint == "Huggingface":
|
| 51 |
llm = HuggingFaceInferenceAPI(
|
| 52 |
model_name=model,
|
| 53 |
+
token=api_key if api_key else os.getenv("HF_TOKEN"),
|
| 54 |
task="text-generation",
|
| 55 |
)
|
| 56 |
Settings.llm = llm
|
|
|
|
| 61 |
Settings.num_output = num_output
|
| 62 |
|
| 63 |
|
| 64 |
+
def get_completion(
|
|
|
|
|
|
|
|
|
|
| 65 |
prompt: str,
|
| 66 |
system_message: str = "You are a helpful assistant.",
|
| 67 |
temperature: float = 0.3,
|
|
|
|
| 121 |
)
|
| 122 |
return response.message.content
|
| 123 |
|
| 124 |
+
utils.get_completion = get_completion
|
| 125 |
|
| 126 |
+
one_chunk_initial_translation = utils.one_chunk_initial_translation
|
| 127 |
+
one_chunk_reflect_on_translation = utils.one_chunk_reflect_on_translation
|
| 128 |
+
one_chunk_improve_translation = utils.one_chunk_improve_translation
|
| 129 |
+
one_chunk_translate_text = utils.one_chunk_translate_text
|
| 130 |
+
num_tokens_in_string = utils.num_tokens_in_string
|
| 131 |
+
multichunk_initial_translation = utils.multichunk_initial_translation
|
| 132 |
+
multichunk_reflect_on_translation = utils.multichunk_reflect_on_translation
|
| 133 |
+
multichunk_improve_translation = utils.multichunk_improve_translation
|
| 134 |
+
multichunk_translation = utils.multichunk_translation
|
| 135 |
+
calculate_chunk_size =utils.calculate_chunk_size
|
app/webui/process.py
CHANGED
|
@@ -1,34 +1,26 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
from difflib import Differ
|
| 4 |
from icecream import ic
|
| 5 |
-
from app.webui.patch import
|
|
|
|
|
|
|
| 6 |
from llama_index.core.node_parser import SentenceSplitter
|
| 7 |
|
| 8 |
-
def lang_detector(text):
|
| 9 |
-
min_chars = 5
|
| 10 |
-
if len(text) < min_chars:
|
| 11 |
-
return "Input text too short"
|
| 12 |
-
try:
|
| 13 |
-
detector = Detector(text).language
|
| 14 |
-
lang_info = str(detector)
|
| 15 |
-
code = re.search(r"name: (\w+)", lang_info).group(1)
|
| 16 |
-
return code
|
| 17 |
-
except Exception as e:
|
| 18 |
-
return f"ERROR:{str(e)}"
|
| 19 |
|
| 20 |
-
|
| 21 |
-
# Use polyglot to tokenize the text
|
| 22 |
-
polyglot_text = Text(text)
|
| 23 |
-
words = polyglot_text.words
|
| 24 |
|
|
|
|
|
|
|
|
|
|
| 25 |
# Check if the text contains spaces
|
| 26 |
if ' ' in text:
|
| 27 |
# Create a list of words and spaces
|
| 28 |
tokens = []
|
| 29 |
for word in words:
|
| 30 |
tokens.append(word)
|
| 31 |
-
|
|
|
|
| 32 |
return tokens[:-1] # Remove the last space
|
| 33 |
else:
|
| 34 |
return words
|
|
@@ -62,7 +54,7 @@ def translator(
|
|
| 62 |
target_lang,
|
| 63 |
source_text,
|
| 64 |
country,
|
| 65 |
-
max_tokens=
|
| 66 |
):
|
| 67 |
"""Translate the source_text from source_lang to target_lang."""
|
| 68 |
num_tokens_in_text = num_tokens_in_string(source_text)
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import nltk
|
| 3 |
from difflib import Differ
|
| 4 |
from icecream import ic
|
| 5 |
+
from app.webui.patch import model_load,num_tokens_in_string,one_chunk_initial_translation, one_chunk_reflect_on_translation, one_chunk_improve_translation
|
| 6 |
+
from app.webui.patch import calculate_chunk_size, multichunk_initial_translation, multichunk_reflect_on_translation, multichunk_improve_translation
|
| 7 |
+
|
| 8 |
from llama_index.core.node_parser import SentenceSplitter
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
nltk.download('punkt', quiet=True)
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
def tokenize(text):
|
| 14 |
+
# Use nltk to tokenize the text
|
| 15 |
+
words = nltk.word_tokenize(text)
|
| 16 |
# Check if the text contains spaces
|
| 17 |
if ' ' in text:
|
| 18 |
# Create a list of words and spaces
|
| 19 |
tokens = []
|
| 20 |
for word in words:
|
| 21 |
tokens.append(word)
|
| 22 |
+
if not word.startswith("'") and not word.endswith("'"): # Avoid adding space after punctuation
|
| 23 |
+
tokens.append(' ') # Add space after each word
|
| 24 |
return tokens[:-1] # Remove the last space
|
| 25 |
else:
|
| 26 |
return words
|
|
|
|
| 54 |
target_lang,
|
| 55 |
source_text,
|
| 56 |
country,
|
| 57 |
+
max_tokens=1000,
|
| 58 |
):
|
| 59 |
"""Translate the source_text from source_lang to target_lang."""
|
| 60 |
num_tokens_in_text = num_tokens_in_string(source_text)
|
app/webui/requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
llama-index
|
| 2 |
+
llama-index-llms-groq
|
| 3 |
+
llama-index-llms-openai
|
| 4 |
+
llama-index-llms-cohere
|
| 5 |
+
llama-index-llms-together
|
| 6 |
+
llama-index-llms-ollama
|
| 7 |
+
llama-index-llms-huggingface-api
|
| 8 |
+
tiktoken
|
| 9 |
+
icecream
|
| 10 |
+
nltk
|
| 11 |
+
langchain-text-splitters
|
| 12 |
+
gradio
|