AiCoderv2 commited on
Commit
9fd140b
·
verified ·
1 Parent(s): 7948a19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -131
app.py CHANGED
@@ -1,7 +1,7 @@
1
  from transformers import pipeline
2
  import gradio as gr
3
 
4
- # Initialize translation pipeline with Hugging Face's multilingual model
5
  translator = pipeline("translation", model="facebook/m2m100_418M")
6
 
7
  def translate_text(text, src_lang_code, tgt_lang_code):
@@ -20,123 +20,123 @@ def translate_text(text, src_lang_code, tgt_lang_code):
20
  )
21
  return result[0]['translation_text']
22
 
23
- # Comprehensive language dictionary (ISO 639-1 codes)
24
  LANGUAGES = {
25
- "Afrikaans": "af",
26
- "Albanian": "sq",
27
- "Amharic": "am",
28
- "Arabic": "ar",
29
- "Armenian": "hy",
30
- "Azerbaijani": "az",
31
- "Basque": "eu",
32
- "Belarusian": "be",
33
- "Bengali": "bn",
34
- "Bosnian": "bs",
35
- "Bulgarian": "bg",
36
- "Catalan": "ca",
37
- "Cebuano": "ceb",
38
- "Chichewa": "ny",
39
- "Chinese (Simplified)": "zh",
40
- "Chinese (Traditional)": "zh",
41
- "Corsican": "co",
42
- "Croatian": "hr",
43
- "Czech": "cs",
44
- "Danish": "da",
45
- "Dutch": "nl",
46
- "English": "en",
47
- "Esperanto": "eo",
48
- "Estonian": "et",
49
- "Filipino": "tl",
50
- "Finnish": "fi",
51
- "French": "fr",
52
- "Frisian": "fy",
53
- "Galician": "gl",
54
- "Georgian": "ka",
55
- "German": "de",
56
- "Greek": "el",
57
- "Gujarati": "gu",
58
- "Haitian Creole": "ht",
59
- "Hausa": "ha",
60
- "Hawaiian": "haw",
61
- "Hebrew": "he",
62
- "Hindi": "hi",
63
- "Hmong": "hmn",
64
- "Hungarian": "hu",
65
- "Icelandic": "is",
66
- "Igbo": "ig",
67
- "Indonesian": "id",
68
- "Irish": "ga",
69
- "Italian": "it",
70
- "Japanese": "ja",
71
- "Javanese": "jv",
72
- "Kannada": "kn",
73
- "Kazakh": "kk",
74
- "Khmer": "km",
75
- "Kinyarwanda": "rw",
76
- "Korean": "ko",
77
- "Kurdish (Kurmanji)": "ku",
78
- "Kyrgyz": "ky",
79
- "Lao": "lo",
80
- "Latin": "la",
81
- "Latvian": "lv",
82
- "Lithuanian": "lt",
83
- "Luxembourgish": "lb",
84
- "Macedonian": "mk",
85
- "Malagasy": "mg",
86
- "Malay": "ms",
87
- "Malayalam": "ml",
88
- "Maltese": "mt",
89
- "Maori": "mi",
90
- "Marathi": "mr",
91
- "Mongolian": "mn",
92
- "Myanmar (Burmese)": "my",
93
- "Nepali": "ne",
94
- "Norwegian": "no",
95
- "Odia (Oriya)": "or",
96
- "Pashto": "ps",
97
- "Persian": "fa",
98
- "Polish": "pl",
99
- "Portuguese": "pt",
100
- "Punjabi": "pa",
101
- "Romanian": "ro",
102
- "Russian": "ru",
103
- "Samoan": "sm",
104
- "Scots Gaelic": "gd",
105
- "Serbian": "sr",
106
- "Sesotho": "st",
107
- "Shona": "sn",
108
- "Sindhi": "sd",
109
- "Sinhala": "si",
110
- "Slovak": "sk",
111
- "Slovenian": "sl",
112
- "Somali": "so",
113
- "Spanish": "es",
114
- "Sundanese": "su",
115
- "Swahili": "sw",
116
- "Swedish": "sv",
117
- "Tajik": "tg",
118
- "Tamil": "ta",
119
- "Tatar": "tt",
120
- "Telugu": "te",
121
- "Thai": "th",
122
- "Turkish": "tr",
123
- "Turkmen": "tk",
124
- "Ukrainian": "uk",
125
- "Urdu": "ur",
126
- "Uyghur": "ug",
127
- "Uzbek": "uz",
128
- "Vietnamese": "vi",
129
- "Welsh": "cy",
130
- "Xhosa": "xh",
131
- "Yiddish": "yi",
132
- "Yoruba": "yo",
133
- "Zulu": "zu"
134
  }
135
 
136
  # Create sorted lists for dropdowns
137
  language_names = sorted(LANGUAGES.keys())
138
- default_src = "English"
139
- default_tgt = "Spanish"
140
 
141
  # Gradio interface
142
  with gr.Blocks(title="Universal Translator") as demo:
@@ -169,18 +169,7 @@ with gr.Blocks(title="Universal Translator") as demo:
169
  )
170
 
171
  translate_btn = gr.Button("Translate", variant="primary")
172
- translate_btn.click(
173
- translate_text,
174
- inputs=[
175
- input_text,
176
- gr.Textbox(visible=False, value=""), # Source language code (will be set in function)
177
- gr.Textbox(visible=False, value="") # Target language code (will be set in function)
178
- ],
179
- outputs=output_text,
180
- api_name="translate"
181
- )
182
 
183
- # Update button click to include language codes
184
  def translate_wrapper(text, src_lang_name, tgt_lang_name):
185
  src_code = LANGUAGES[src_lang_name]
186
  tgt_code = LANGUAGES[tgt_lang_name]
@@ -194,12 +183,12 @@ with gr.Blocks(title="Universal Translator") as demo:
194
 
195
  gr.Examples(
196
  [
197
- ["Hello, how are you today?", "English", "French"],
198
- ["The weather is beautiful", "English", "German"],
199
- ["I love programming with Python", "English", "Spanish"],
200
- ["Machine learning is fascinating", "English", "Chinese (Simplified)"],
201
- ["こんにちは、元気ですか?", "Japanese", "English"],
202
- ["Bonjour, comment allez-vous?", "French", "Russian"]
203
  ],
204
  inputs=[input_text, src_lang, tgt_lang]
205
  )
@@ -207,7 +196,7 @@ with gr.Blocks(title="Universal Translator") as demo:
207
  gr.Markdown("### Supported Languages")
208
  gr.Markdown(f"- **Total Languages**: {len(language_names)}")
209
  gr.Markdown("- **Model**: [facebook/m2m100_418M](https://huggingface.co/facebook/m2m100_418M)")
210
- gr.Markdown("- **Note**: First translation may take 10-20 seconds (model loading)")
211
 
212
  if __name__ == "__main__":
213
  demo.launch()
 
1
  from transformers import pipeline
2
  import gradio as gr
3
 
4
+ # Initialize translation pipeline
5
  translator = pipeline("translation", model="facebook/m2m100_418M")
6
 
7
  def translate_text(text, src_lang_code, tgt_lang_code):
 
20
  )
21
  return result[0]['translation_text']
22
 
23
+ # Language dictionary with native text examples
24
  LANGUAGES = {
25
+ "Afrikaans - Afrikaans": "af",
26
+ "Albanian - Shqip": "sq",
27
+ "Amharic - አማርኛ": "am",
28
+ "Arabic - العربية": "ar",
29
+ "Armenian - Հայերեն": "hy",
30
+ "Azerbaijani - Azərbaycanca": "az",
31
+ "Basque - Euskara": "eu",
32
+ "Belarusian - Беларуская": "be",
33
+ "Bengali - বাংলা": "bn",
34
+ "Bosnian - Bosanski": "bs",
35
+ "Bulgarian - Български": "bg",
36
+ "Catalan - Català": "ca",
37
+ "Cebuano - Cebuano": "ceb",
38
+ "Chichewa - Nyanja": "ny",
39
+ "Chinese Simplified - 中文": "zh",
40
+ "Chinese Traditional - 中文": "zh",
41
+ "Corsican - Corsu": "co",
42
+ "Croatian - Hrvatski": "hr",
43
+ "Czech - Čeština": "cs",
44
+ "Danish - Dansk": "da",
45
+ "Dutch - Nederlands": "nl",
46
+ "English - English": "en",
47
+ "Esperanto - Esperanto": "eo",
48
+ "Estonian - Eesti": "et",
49
+ "Filipino - Filipino": "tl",
50
+ "Finnish - Suomi": "fi",
51
+ "French - Français": "fr",
52
+ "Frisian - Frysk": "fy",
53
+ "Galician - Galego": "gl",
54
+ "Georgian - ქართული": "ka",
55
+ "German - Deutsch": "de",
56
+ "Greek - Ελληνικά": "el",
57
+ "Gujarati - ગુજરાતી": "gu",
58
+ "Haitian Creole - Kreyòl ayisyen": "ht",
59
+ "Hausa - Hausa": "ha",
60
+ "Hawaiian - ʻŌlelo Hawaiʻi": "haw",
61
+ "Hebrew - עברית": "he",
62
+ "Hindi - हिन्दी": "hi",
63
+ "Hmong - Hmong": "hmn",
64
+ "Hungarian - Magyar": "hu",
65
+ "Icelandic - Íslenska": "is",
66
+ "Igbo - Asụsụ Igbo": "ig",
67
+ "Indonesian - Bahasa Indonesia": "id",
68
+ "Irish - Gaeilge": "ga",
69
+ "Italian - Italiano": "it",
70
+ "Japanese - 日本語": "ja",
71
+ "Javanese - Basa Jawa": "jv",
72
+ "Kannada - ಕನ್ನಡ": "kn",
73
+ "Kazakh - Қазақша": "kk",
74
+ "Khmer - ភាសាខ្មែរ": "km",
75
+ "Kinyarwanda - Kinyarwanda": "rw",
76
+ "Korean - 한국어": "ko",
77
+ "Kurdish - Kurdî": "ku",
78
+ "Kyrgyz - Кыргызча": "ky",
79
+ "Lao - ພາສາລາວ": "lo",
80
+ "Latin - Latina": "la",
81
+ "Latvian - Latviešu": "lv",
82
+ "Lithuanian - Lietuvių": "lt",
83
+ "Luxembourgish - Lëtzebuergesch": "lb",
84
+ "Macedonian - Македонски": "mk",
85
+ "Malagasy - Malagasy": "mg",
86
+ "Malay - Bahasa Melayu": "ms",
87
+ "Malayalam - മലയാളം": "ml",
88
+ "Maltese - Malti": "mt",
89
+ "Maori - Te Reo Māori": "mi",
90
+ "Marathi - मराठी": "mr",
91
+ "Mongolian - Монгол": "mn",
92
+ "Myanmar - မြန်မာဘာသာ": "my",
93
+ "Nepali - नेपाली": "ne",
94
+ "Norwegian - Norsk": "no",
95
+ "Odia - ଓଡ଼ିଆ": "or",
96
+ "Pashto - پښتو": "ps",
97
+ "Persian - فارسی": "fa",
98
+ "Polish - Polski": "pl",
99
+ "Portuguese - Português": "pt",
100
+ "Punjabi - ਪੰਜਾਬੀ": "pa",
101
+ "Romanian - Română": "ro",
102
+ "Russian - Русский": "ru",
103
+ "Samoan - Gagana Samoa": "sm",
104
+ "Scots Gaelic - Gàidhlig": "gd",
105
+ "Serbian - Српски": "sr",
106
+ "Sesotho - Sesotho": "st",
107
+ "Shona - ChiShona": "sn",
108
+ "Sindhi - سنڌي": "sd",
109
+ "Sinhala - සිංහල": "si",
110
+ "Slovak - Slovenčina": "sk",
111
+ "Slovenian - Slovenščina": "sl",
112
+ "Somali - Soomaali": "so",
113
+ "Spanish - Español": "es",
114
+ "Sundanese - Basa Sunda": "su",
115
+ "Swahili - Kiswahili": "sw",
116
+ "Swedish - Svenska": "sv",
117
+ "Tajik - Тоҷикӣ": "tg",
118
+ "Tamil - தமிழ்": "ta",
119
+ "Tatar - Татарча": "tt",
120
+ "Telugu - తెలుగు": "te",
121
+ "Thai - ไทย": "th",
122
+ "Turkish - Türkçe": "tr",
123
+ "Turkmen - Türkmençe": "tk",
124
+ "Ukrainian - Українська": "uk",
125
+ "Urdu - اردو": "ur",
126
+ "Uyghur - ئۇيغۇرچە": "ug",
127
+ "Uzbek - O'zbekcha": "uz",
128
+ "Vietnamese - Tiếng Việt": "vi",
129
+ "Welsh - Cymraeg": "cy",
130
+ "Xhosa - IsiXhosa": "xh",
131
+ "Yiddish - יידיש": "yi",
132
+ "Yoruba - Èdè Yorùbá": "yo",
133
+ "Zulu - IsiZulu": "zu"
134
  }
135
 
136
  # Create sorted lists for dropdowns
137
  language_names = sorted(LANGUAGES.keys())
138
+ default_src = "English - English"
139
+ default_tgt = "Spanish - Español"
140
 
141
  # Gradio interface
142
  with gr.Blocks(title="Universal Translator") as demo:
 
169
  )
170
 
171
  translate_btn = gr.Button("Translate", variant="primary")
 
 
 
 
 
 
 
 
 
 
172
 
 
173
  def translate_wrapper(text, src_lang_name, tgt_lang_name):
174
  src_code = LANGUAGES[src_lang_name]
175
  tgt_code = LANGUAGES[tgt_lang_name]
 
183
 
184
  gr.Examples(
185
  [
186
+ ["Hello, how are you today?", "English - English", "French - Français"],
187
+ ["The weather is beautiful", "English - English", "German - Deutsch"],
188
+ ["I love programming with Python", "English - English", "Spanish - Español"],
189
+ ["Machine learning is fascinating", "English - English", "Chinese Simplified - 中文"],
190
+ ["こんにちは、元気ですか?", "Japanese - 日本語", "English - English"],
191
+ ["Bonjour, comment allez-vous?", "French - Français", "Russian - Русский"]
192
  ],
193
  inputs=[input_text, src_lang, tgt_lang]
194
  )
 
196
  gr.Markdown("### Supported Languages")
197
  gr.Markdown(f"- **Total Languages**: {len(language_names)}")
198
  gr.Markdown("- **Model**: [facebook/m2m100_418M](https://huggingface.co/facebook/m2m100_418M)")
199
+ gr.Markdown("- **Tip**: Each language shows how it's written in its own script (e.g., Russian - Русский)")
200
 
201
  if __name__ == "__main__":
202
  demo.launch()