Spaces:
Sleeping
Sleeping
Commit
·
96c5332
1
Parent(s):
d3ae861
Upd logger module
Browse files- requirements.txt +1 -0
- utils/augment.py +7 -0
requirements.txt
CHANGED
|
@@ -21,5 +21,6 @@ transformers
|
|
| 21 |
torch
|
| 22 |
sentencepiece
|
| 23 |
sacremoses
|
|
|
|
| 24 |
|
| 25 |
# Note: Heavy local LLM dependencies (accelerate, bitsandbytes, etc.) are excluded for cloud mode
|
|
|
|
| 21 |
torch
|
| 22 |
sentencepiece
|
| 23 |
sacremoses
|
| 24 |
+
protobuf
|
| 25 |
|
| 26 |
# Note: Heavy local LLM dependencies (accelerate, bitsandbytes, etc.) are excluded for cloud mode
|
utils/augment.py
CHANGED
|
@@ -5,6 +5,13 @@ import random
|
|
| 5 |
from typing import Dict, Tuple
|
| 6 |
import ftfy
|
| 7 |
import langid
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
P_EMAIL = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
|
| 10 |
P_PHONE = re.compile(r"(?:(?:\+?\d{1,3})?[\s-]?)?(?:\(?\d{2,4}\)?[\s-]?)?\d{3,4}[\s-]?\d{3,4}")
|
|
|
|
| 5 |
from typing import Dict, Tuple
|
| 6 |
import ftfy
|
| 7 |
import langid
|
| 8 |
+
import logging
|
| 9 |
+
|
| 10 |
+
# Module logger
|
| 11 |
+
logger = logging.getLogger("augment")
|
| 12 |
+
if not logger.handlers:
|
| 13 |
+
logger.setLevel(logging.INFO)
|
| 14 |
+
logger.addHandler(logging.StreamHandler())
|
| 15 |
|
| 16 |
P_EMAIL = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
|
| 17 |
P_PHONE = re.compile(r"(?:(?:\+?\d{1,3})?[\s-]?)?(?:\(?\d{2,4}\)?[\s-]?)?\d{3,4}[\s-]?\d{3,4}")
|