Spaces:
Sleeping
Sleeping
Commit
·
becf438
1
Parent(s):
4f8b3ce
Upd syntax
Browse files- app.py +2 -2
- vi/processing.py +5 -5
app.py
CHANGED
|
@@ -200,7 +200,7 @@ def root():
|
|
| 200 |
async function startJob(dataset) {{
|
| 201 |
const log = document.getElementById("log");
|
| 202 |
const vietnameseToggle = document.getElementById("vietnameseTranslation");
|
| 203 |
-
const isVietnameseMode = vietnameseToggle.checked;
|
| 204 |
|
| 205 |
log.innerHTML = "⏳ Starting job for <b>" + dataset + "</b>" + (isVietnameseMode ? " with Vietnamese translation" : "") + "...";
|
| 206 |
try {{
|
|
@@ -240,7 +240,7 @@ def root():
|
|
| 240 |
async function startRagJob(dataset) {{
|
| 241 |
const log = document.getElementById("log");
|
| 242 |
const vietnameseToggle = document.getElementById("vietnameseTranslation");
|
| 243 |
-
const isVietnameseMode = vietnameseToggle.checked;
|
| 244 |
|
| 245 |
log.innerHTML = "⏳ Starting RAG processing for <b>" + dataset + "</b>" + (isVietnameseMode ? " with Vietnamese translation" : "") + "...";
|
| 246 |
try {{
|
|
|
|
| 200 |
async function startJob(dataset) {{
|
| 201 |
const log = document.getElementById("log");
|
| 202 |
const vietnameseToggle = document.getElementById("vietnameseTranslation");
|
| 203 |
+
const isVietnameseMode = vietnameseToggle ? vietnameseToggle.checked : false;
|
| 204 |
|
| 205 |
log.innerHTML = "⏳ Starting job for <b>" + dataset + "</b>" + (isVietnameseMode ? " with Vietnamese translation" : "") + "...";
|
| 206 |
try {{
|
|
|
|
| 240 |
async function startRagJob(dataset) {{
|
| 241 |
const log = document.getElementById("log");
|
| 242 |
const vietnameseToggle = document.getElementById("vietnameseTranslation");
|
| 243 |
+
const isVietnameseMode = vietnameseToggle ? vietnameseToggle.checked : false;
|
| 244 |
|
| 245 |
log.innerHTML = "⏳ Starting RAG processing for <b>" + dataset + "</b>" + (isVietnameseMode ? " with Vietnamese translation" : "") + "...";
|
| 246 |
try {{
|
vi/processing.py
CHANGED
|
@@ -99,7 +99,7 @@ def _validate_vi_translation(original: str, translated: str) -> bool:
|
|
| 99 |
# If no Vietnamese characters but significantly different from original, accept it
|
| 100 |
# (some translations might not have Vietnamese diacritics)
|
| 101 |
if len(translated) > len(original) * 0.5 and len(translated) < len(original) * 2.0:
|
| 102 |
-
|
| 103 |
|
| 104 |
return False
|
| 105 |
|
|
@@ -155,7 +155,7 @@ def translate_sft_row(row: Dict[str, Any], translator, text_fields: List[str] =
|
|
| 155 |
logger.debug(f" Are they the same? {original == translated}")
|
| 156 |
|
| 157 |
# Validate and sanitize translated field
|
| 158 |
-
|
| 159 |
translated_sft[field] = _vi_sanitize_text(translated)
|
| 160 |
logger.debug(f"✅ Successfully translated field '{field}'")
|
| 161 |
# Add success statistics if stats available
|
|
@@ -172,7 +172,7 @@ def translate_sft_row(row: Dict[str, Any], translator, text_fields: List[str] =
|
|
| 172 |
except Exception as e:
|
| 173 |
logger.error(f"Failed to translate field '{field}': {e}")
|
| 174 |
translated_sft[field] = sft_data[field]
|
| 175 |
-
|
| 176 |
# Keep original if field doesn't exist or is empty
|
| 177 |
translated_sft[field] = sft_data.get(field, "")
|
| 178 |
|
|
@@ -233,7 +233,7 @@ def translate_rag_row(row: Dict[str, Any], translator, text_fields: List[str] =
|
|
| 233 |
logger.debug(f" Are they the same? {original == translated}")
|
| 234 |
|
| 235 |
# Validate and sanitize translated field
|
| 236 |
-
|
| 237 |
translated_row[field] = _vi_sanitize_text(translated)
|
| 238 |
logger.debug(f"✅ Successfully translated RAG field '{field}'")
|
| 239 |
# Add success statistics if stats available
|
|
@@ -250,7 +250,7 @@ def translate_rag_row(row: Dict[str, Any], translator, text_fields: List[str] =
|
|
| 250 |
except Exception as e:
|
| 251 |
logger.error(f"Failed to translate RAG field '{field}': {e}")
|
| 252 |
translated_row[field] = row[field]
|
| 253 |
-
|
| 254 |
# Keep original if field doesn't exist or is empty
|
| 255 |
translated_row[field] = row.get(field, "")
|
| 256 |
|
|
|
|
| 99 |
# If no Vietnamese characters but significantly different from original, accept it
|
| 100 |
# (some translations might not have Vietnamese diacritics)
|
| 101 |
if len(translated) > len(original) * 0.5 and len(translated) < len(original) * 2.0:
|
| 102 |
+
return True
|
| 103 |
|
| 104 |
return False
|
| 105 |
|
|
|
|
| 155 |
logger.debug(f" Are they the same? {original == translated}")
|
| 156 |
|
| 157 |
# Validate and sanitize translated field
|
| 158 |
+
if _validate_vi_translation(original, translated):
|
| 159 |
translated_sft[field] = _vi_sanitize_text(translated)
|
| 160 |
logger.debug(f"✅ Successfully translated field '{field}'")
|
| 161 |
# Add success statistics if stats available
|
|
|
|
| 172 |
except Exception as e:
|
| 173 |
logger.error(f"Failed to translate field '{field}': {e}")
|
| 174 |
translated_sft[field] = sft_data[field]
|
| 175 |
+
else:
|
| 176 |
# Keep original if field doesn't exist or is empty
|
| 177 |
translated_sft[field] = sft_data.get(field, "")
|
| 178 |
|
|
|
|
| 233 |
logger.debug(f" Are they the same? {original == translated}")
|
| 234 |
|
| 235 |
# Validate and sanitize translated field
|
| 236 |
+
if _validate_vi_translation(original, translated):
|
| 237 |
translated_row[field] = _vi_sanitize_text(translated)
|
| 238 |
logger.debug(f"✅ Successfully translated RAG field '{field}'")
|
| 239 |
# Add success statistics if stats available
|
|
|
|
| 250 |
except Exception as e:
|
| 251 |
logger.error(f"Failed to translate RAG field '{field}': {e}")
|
| 252 |
translated_row[field] = row[field]
|
| 253 |
+
else:
|
| 254 |
# Keep original if field doesn't exist or is empty
|
| 255 |
translated_row[field] = row.get(field, "")
|
| 256 |
|