Spaces:

cassandrasestier
/

MoodHelper

Sleeping

App Files Files Community

cassandrasestier commited on Nov 1, 2025

Commit

d969648

verified ·

1 Parent(s): 3e7d5d4

Update app.py

Browse files

Files changed (1) hide show

app.py +160 -89

app.py CHANGED Viewed

@@ -1,62 +1,44 @@
 # ================================
 # 🪞 MoodMirror+ — Conversational Emotional Self-Care
-# Advice + Inspirational quotes + Emotion-based color + SQLite DB
-# GoEmotions model + loads GoEmotions dataset ("simplified" config)
 # ================================
 import os
 import re
 import random
 import sqlite3
 from datetime import datetime
 import gradio as gr
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
 from datasets import load_dataset
-# --- Storage paths (robust across local dev vs. HF Spaces) ---
 def _pick_data_dir():
-    # Prefer /data if it exists AND is writable (Spaces with persistent storage).
     if os.path.isdir("/data") and os.access("/data", os.W_OK):
         return "/data"
-    # Otherwise, fall back to the repo working directory.
     return os.getcwd()
 DATA_DIR = os.getenv("MM_DATA_DIR", _pick_data_dir())
 os.makedirs(DATA_DIR, exist_ok=True)
 DB_PATH = os.path.join(DATA_DIR, "moodmirror.db")
 print(f"[MM] Using data dir: {DATA_DIR}")
-print(f"[MM] SQLite path: {DB_PATH}")
-# --- Load GoEmotions dataset ("simplified") ---
-# This pulls from: google-research-datasets/go_emotions
-# The "simplified" config uses train/validation/test splits and label indices.
-try:
-    ds = load_dataset("google-research-datasets/go_emotions", "simplified")
-    LABEL_NAMES = ds["train"].features["labels"].feature.names  # e.g. ['admiration', ..., 'neutral']
-    print("[MM] GoEmotions dataset loaded.")
-except Exception as e:
-    ds = None
-    LABEL_NAMES = None
-    print(f"[WARN] Could not load GoEmotions dataset: {e}")
-# --- GoEmotions model (multi-label: 27 emotions + neutral) ---
-MODEL_ID = "SamLowe/roberta-base-go_emotions"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
-pipe = TextClassificationPipeline(
-    model=model,
-    tokenizer=tokenizer,
-    return_all_scores=True,      # list of dicts for every label
-    function_to_apply="sigmoid", # multi-label probabilities per label
-    device=0 if torch.cuda.is_available() else -1,
-)
-# --- Regex detection ---
 CRISIS_RE = re.compile(r"\b(self[- ]?harm|suicid|kill myself|end my life|overdose|cutting|i don.?t want to live|can.?t go on)\b", re.I)
 CLOSING_RE = re.compile(r"\b(thanks?|thank you|that'?s all|bye|goodbye|see you|take care|ok bye|no thanks?)\b", re.I)
-# --- Crisis resources ---
 CRISIS_NUMBERS = {
     "United States": "Call or text **988** (24/7 Suicide & Crisis Lifeline). If in immediate danger, call **911**.",
     "Canada": "Call or text **988** (Suicide Crisis Helpline, 24/7). If in immediate danger, call **911**.",
@@ -66,7 +48,6 @@ CRISIS_NUMBERS = {
     "Other / Not listed": "Call your local emergency number (**112/911**) or search “suicide crisis hotline” + your country.",
 }
-# --- Psychology-informed suggestions ---
 SUGGESTIONS = {
     "sadness": "Be gentle with yourself. Rest, cry, or connect — emotions pass when they’re acknowledged.",
     "fear": "Ground yourself: 5 things you see, 4 you feel, 3 you hear, 2 you smell, 1 you taste.",
@@ -81,7 +62,6 @@ SUGGESTIONS = {
     "neutral": "Take a mindful moment: breathe deeply and release any hidden tension in your shoulders.",
 }
-# --- Inspirational quotes (short & emotionally tuned) ---
 QUOTES = {
     "sadness": [
         "“Even the darkest night will end and the sun will rise.” – Victor Hugo",
@@ -133,7 +113,7 @@ COLOR_MAP = {
     "neutral": "#F5F5F5",
 }
-# --- Map GoEmotions → app categories (27 emotions + neutral) ---
 GOEMO_TO_APP = {
     "admiration": "gratitude",
     "amusement": "joy",
@@ -165,11 +145,10 @@ GOEMO_TO_APP = {
     "neutral": "neutral",
 }
-THRESHOLD = 0.35  # tune to be more/less sensitive
-# --- SQLite setup ---
 def get_conn():
-    # timeout helps if multiple requests hit the DB at once
     return sqlite3.connect(DB_PATH, check_same_thread=False, timeout=10)
 def init_db():
@@ -189,8 +168,7 @@ def init_db():
         conn.commit()
     finally:
         try:
-            if conn is not None:
-                conn.close()
         except Exception:
             pass
@@ -206,34 +184,130 @@ def log_session(country, msg, emotion):
         conn.commit()
     finally:
         try:
-            if conn is not None:
-                conn.close()
         except Exception:
             pass
-# --- Emotion detection (multi-label via model) ---
-def detect_emotions(text: str):
     """
-    Returns:
-      - chosen: list of (label, score) above threshold, sorted desc
-      - main_app: top mapped category for UI/tips/colors
     """
     try:
-        preds = pipe(text)[0]  # list of {'label': 'joy', 'score': 0.82} for all labels
-        chosen = [p for p in preds if p["score"] >= THRESHOLD]
-        chosen.sort(key=lambda x: x["score"], reverse=True)
-        # Map to app categories and pick the strongest mapped bucket
-        bucket = {}
-        for p in chosen:
-            app_label = GOEMO_TO_APP.get(p["label"].lower(), "neutral")
-            bucket[app_label] = max(bucket.get(app_label, 0.0), float(p["score"]))
-        main_app = max(bucket, key=bucket.get) if bucket else "neutral"
-        return chosen, main_app
-    except Exception:
-        return [], "neutral"
-# --- Chat logic ---
 def crisis_block(country):
     msg = CRISIS_NUMBERS.get(country, CRISIS_NUMBERS["Other / Not listed"])
     return (
@@ -243,19 +317,16 @@ def crisis_block(country):
     )
 def chat_step(message, history, country, save_session):
-    # Crisis check
     if CRISIS_RE.search(message):
         return crisis_block(country), "#FFD6E7"
     if CLOSING_RE.search(message):
         return ("You're very welcome 💛 Take care of yourself. Small steps matter. 🌿", "#FFFFFF")
-    # Focus on the most recent ~100 words (simple heuristic)
     recent = " ".join(message.split()[-100:])
     detected, main = detect_emotions(recent)
     color = COLOR_MAP.get(main, "#FFFFFF")
-    # Save anonymized session
     if save_session:
         log_session(country, message, main)
@@ -274,20 +345,14 @@ def chat_step(message, history, country, save_session):
     if not history:
         reply += "\n\n*Can you tell me a bit more about what’s behind that feeling?*"
     return reply, color
-# --- Helper: sample dataset rows for UI preview ---
-def sample_goemotions(n=5, split="train", seed=42):
-    if ds is None:
-        return [{"text": "Dataset not loaded", "labels": []}]
-    rows = ds[split].shuffle(seed=seed).select(range(min(n, len(ds[split]))))
-    out = []
-    names = LABEL_NAMES or []
-    for text, labs in zip(rows["text"], rows["labels"]):
-        out.append({"text": text, "labels": [names[i] for i in labs]})
-    return out
-# --- Gradio interface ---
 init_db()
 custom_css = """
@@ -296,11 +361,11 @@ custom_css = """
 @keyframes blink { 50% {opacity: 0.4;} }
 """
-with gr.Blocks(css=custom_css, title="🪞 MoodMirror+ (GoEmotions Edition)") as demo:
     style_injector = gr.HTML("")
     gr.Markdown(
         "### 🪞 MoodMirror+ — Emotional Support & Inspiration 🌸\n"
-        "Share how you feel — I’ll respond with care, science-based advice, or inspiring thoughts.\n\n"
         "_Not medical advice. If you feel unsafe, please reach out for help immediately._"
     )
@@ -308,13 +373,13 @@ with gr.Blocks(css=custom_css, title="🪞 MoodMirror+ (GoEmotions Edition)") as
         country = gr.Dropdown(choices=list(CRISIS_NUMBERS.keys()), value="Other / Not listed", label="Country")
         save_ok = gr.Checkbox(value=False, label="Save anonymized session (no personal data)")
-    chat = gr.Chatbot(height=350)
     msg = gr.Textbox(placeholder="Type how you feel...", label="Your message")
     send = gr.Button("Send")
     typing = gr.Markdown("", elem_classes="typing")
-    # Dataset preview UI
-    with gr.Accordion("🔎 Preview GoEmotions samples (from the linked dataset)", open=False):
         with gr.Row():
             n_examples = gr.Slider(1, 10, value=5, step=1, label="Number of examples")
             split = gr.Dropdown(["train", "validation", "test"], value="train", label="Split")
@@ -322,9 +387,13 @@ with gr.Blocks(css=custom_css, title="🪞 MoodMirror+ (GoEmotions Edition)") as
         table = gr.Dataframe(headers=["text", "labels"], row_count=5, wrap=True)
         def refresh_samples(n, split_name):
-            rows = sample_goemotions(int(n), split=split_name)
-            # Convert to a list of [text, "label1, label2, ..."] rows for display
-            return [[r["text"], ", ".join(r["labels"])] for r in rows]
         refresh.click(refresh_samples, inputs=[n_examples, split], outputs=[table])
@@ -337,8 +406,10 @@ with gr.Blocks(css=custom_css, title="🪞 MoodMirror+ (GoEmotions Edition)") as
         style_tag = f"<style>:root,body,.gradio-container{{background:{color}!important;}}</style>"
         yield chat_hist + [[user_msg, reply]], "", style_tag, ""
-    send.click(respond, inputs=[msg, chat, country, save_ok], outputs=[chat, typing, style_injector, msg], queue=True)
-    msg.submit(respond, inputs=[msg, chat, country, save_ok], outputs=[chat, typing, style_injector, msg], queue=True)
 if __name__ == "__main__":
     demo.queue()

 # ================================
 # 🪞 MoodMirror+ — Conversational Emotional Self-Care
+# Uses ONLY the GoEmotions dataset (no pretrained model)
+# Trains TF-IDF + OneVsRest Logistic Regression on first run, caches to /data
 # ================================
 import os
 import re
 import random
 import sqlite3
+import joblib
 from datetime import datetime
 import gradio as gr
 from datasets import load_dataset
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.preprocessing import MultiLabelBinarizer
+from sklearn.linear_model import LogisticRegression
+from sklearn.multiclass import OneVsRestClassifier
+from sklearn.pipeline import Pipeline
+from sklearn.metrics import f1_score
+# ---------------- Storage paths (robust local vs. HF Spaces) ----------------
 def _pick_data_dir():
     if os.path.isdir("/data") and os.access("/data", os.W_OK):
         return "/data"
     return os.getcwd()
 DATA_DIR = os.getenv("MM_DATA_DIR", _pick_data_dir())
 os.makedirs(DATA_DIR, exist_ok=True)
 DB_PATH = os.path.join(DATA_DIR, "moodmirror.db")
+MODEL_PATH = os.path.join(DATA_DIR, "goemo_sklearn.joblib")     # pipeline + mlb
+MODEL_VERSION = "v1-tfidf-lr-ovr"                               # bump if you change training
 print(f"[MM] Using data dir: {DATA_DIR}")
+print(f"[MM] SQLite path:   {DB_PATH}")
+print(f"[MM] Model path:    {MODEL_PATH}")
+# ---------------- Crisis & regex ----------------
 CRISIS_RE = re.compile(r"\b(self[- ]?harm|suicid|kill myself|end my life|overdose|cutting|i don.?t want to live|can.?t go on)\b", re.I)
 CLOSING_RE = re.compile(r"\b(thanks?|thank you|that'?s all|bye|goodbye|see you|take care|ok bye|no thanks?)\b", re.I)
 CRISIS_NUMBERS = {
     "United States": "Call or text **988** (24/7 Suicide & Crisis Lifeline). If in immediate danger, call **911**.",
     "Canada": "Call or text **988** (Suicide Crisis Helpline, 24/7). If in immediate danger, call **911**.",
     "Other / Not listed": "Call your local emergency number (**112/911**) or search “suicide crisis hotline” + your country.",
 }
 SUGGESTIONS = {
     "sadness": "Be gentle with yourself. Rest, cry, or connect — emotions pass when they’re acknowledged.",
     "fear": "Ground yourself: 5 things you see, 4 you feel, 3 you hear, 2 you smell, 1 you taste.",
     "neutral": "Take a mindful moment: breathe deeply and release any hidden tension in your shoulders.",
 }
 QUOTES = {
     "sadness": [
         "“Even the darkest night will end and the sun will rise.” – Victor Hugo",
     "neutral": "#F5F5F5",
 }
+# Map GoEmotions label -> your UI buckets
 GOEMO_TO_APP = {
     "admiration": "gratitude",
     "amusement": "joy",
     "neutral": "neutral",
 }
+THRESHOLD = 0.30  # probability threshold for selecting labels
+# ---------------- SQLite helpers ----------------
 def get_conn():
     return sqlite3.connect(DB_PATH, check_same_thread=False, timeout=10)
 def init_db():
         conn.commit()
     finally:
         try:
+            if conn: conn.close()
         except Exception:
             pass
         conn.commit()
     finally:
         try:
+            if conn: conn.close()
         except Exception:
             pass
+# ---------------- Train / Load model from DATASET ONLY ----------------
+def load_goemotions_dataset():
+    # "simplified" gives 'text' and 'labels' as list[int] indices
+    ds = load_dataset("google-research-datasets/go_emotions", "simplified")
+    label_names = ds["train"].features["labels"].feature.names
+    return ds, label_names
+def _prepare_xy(split):
+    # Each example has text and labels (list of ints)
+    X = split["text"]
+    y = split["labels"]  # list[list[int]]
+    return X, y
+def train_or_load_model():
+    # Try cache first
+    if os.path.isfile(MODEL_PATH):
+        print("[MM] Loading cached classifier...")
+        bundle = joblib.load(MODEL_PATH)
+        if bundle.get("version") == MODEL_VERSION:
+            return bundle["pipeline"], bundle["mlb"], bundle["label_names"]
+        else:
+            print("[MM] Cached model version mismatch; retraining...")
+    print("[MM] Loading GoEmotions dataset...")
+    ds, label_names = load_goemotions_dataset()
+    print("[MM] Preparing data...")
+    X_train, y_train_idx = _prepare_xy(ds["train"])
+    X_val, y_val_idx = _prepare_xy(ds["validation"])
+    # MultiLabelBinarizer to convert list[int] -> multi-hot
+    mlb = MultiLabelBinarizer(classes=list(range(len(label_names))))
+    Y_train = mlb.fit_transform(y_train_idx)
+    Y_val   = mlb.transform(y_val_idx)
+    # Build pipeline
+    # - TfidfVectorizer with simple English settings
+    # - LogisticRegression (saga) in One-vs-Rest for multi-label probabilities
+    clf = Pipeline(steps=[
+        ("tfidf", TfidfVectorizer(
+            lowercase=True,
+            ngram_range=(1,2),
+            min_df=2,
+            max_df=0.9,
+            strip_accents="unicode",
+        )),
+        ("ovr", OneVsRestClassifier(
+            LogisticRegression(
+                solver="saga",
+                max_iter=1000,
+                n_jobs=-1,
+                class_weight="balanced",
+            ),
+            n_jobs=-1
+        ))
+    ])
+    print("[MM] Training classifier (this happens once; cached afterward)...")
+    clf.fit(X_train, Y_train)
+    # Quick validation metric (macro F1 over labels present in val)
+    Y_val_pred = clf.predict(X_val)
+    macro_f1 = f1_score(Y_val, Y_val_pred, average="macro", zero_division=0)
+    print(f"[MM] Validation macro F1: {macro_f1:.3f}")
+    # Cache model
+    joblib.dump({
+        "version": MODEL_VERSION,
+        "pipeline": clf,
+        "mlb": mlb,
+        "label_names": label_names
+    }, MODEL_PATH)
+    print(f"[MM] Saved classifier to {MODEL_PATH}")
+    return clf, mlb, label_names
+# Train/load at startup
+try:
+    CLASSIFIER, MLB, LABEL_NAMES = train_or_load_model()
+except Exception as e:
+    print(f"[WARN] Failed to train/load classifier: {e}")
+    CLASSIFIER, MLB, LABEL_NAMES = None, None, None
+# ---------------- Inference using ONLY the trained classifier ----------------
+def classify_text(text: str):
     """
+    Returns list of (label_name, prob) for labels above THRESHOLD, sorted desc.
     """
+    if not CLASSIFIER or not MLB or not LABEL_NAMES:
+        return []
+    # predict_proba returns array shape (1, n_labels)
     try:
+        proba = CLASSIFIER.predict_proba([text])[0]
+    except AttributeError:
+        # If estimator doesn't support predict_proba (shouldn't happen with LR),
+        # fall back to decision_function -> sigmoid
+        import numpy as np
+        from scipy.special import expit
+        scores = CLASSIFIER.decision_function([text])[0]
+        proba = expit(scores)
+    idxs = [i for i, p in enumerate(proba) if p >= THRESHOLD]
+    # Sort by probability desc
+    idxs.sort(key=lambda i: proba[i], reverse=True)
+    return [(LABEL_NAMES[i], float(proba[i])) for i in idxs]
+def detect_emotions(text: str):
+    chosen = classify_text(text)
+    if not chosen:
+        return [], "neutral"
+    # Map to app buckets and take the strongest
+    bucket = {}
+    for label, p in chosen:
+        app = GOEMO_TO_APP.get(label.lower(), "neutral")
+        bucket[app] = max(bucket.get(app, 0.0), p)
+    main = max(bucket, key=bucket.get) if bucket else "neutral"
+    return chosen, main
+# ---------------- Chat logic ----------------
 def crisis_block(country):
     msg = CRISIS_NUMBERS.get(country, CRISIS_NUMBERS["Other / Not listed"])
     return (
     )
 def chat_step(message, history, country, save_session):
     if CRISIS_RE.search(message):
         return crisis_block(country), "#FFD6E7"
     if CLOSING_RE.search(message):
         return ("You're very welcome 💛 Take care of yourself. Small steps matter. 🌿", "#FFFFFF")
     recent = " ".join(message.split()[-100:])
     detected, main = detect_emotions(recent)
     color = COLOR_MAP.get(main, "#FFFFFF")
     if save_session:
         log_session(country, message, main)
     if not history:
         reply += "\n\n*Can you tell me a bit more about what’s behind that feeling?*"
+    # (Optional) append detected emotions summary
+    if detected:
+        summary = ", ".join([f"{lbl} ({p:.2f})" for lbl, p in detected[:3]])
+        reply += f"\n\nDetected: {summary}"
     return reply, color
+# ---------------- Gradio UI ----------------
 init_db()
 custom_css = """
 @keyframes blink { 50% {opacity: 0.4;} }
 """
+with gr.Blocks(css=custom_css, title="🪞 MoodMirror+ (Dataset-only Edition)") as demo:
     style_injector = gr.HTML("")
     gr.Markdown(
         "### 🪞 MoodMirror+ — Emotional Support & Inspiration 🌸\n"
+        "Powered only by the **GoEmotions dataset** (trained locally on startup).\n\n"
         "_Not medical advice. If you feel unsafe, please reach out for help immediately._"
     )
         country = gr.Dropdown(choices=list(CRISIS_NUMBERS.keys()), value="Other / Not listed", label="Country")
         save_ok = gr.Checkbox(value=False, label="Save anonymized session (no personal data)")
+    chat = gr.Chatbot(height=360)
     msg = gr.Textbox(placeholder="Type how you feel...", label="Your message")
     send = gr.Button("Send")
     typing = gr.Markdown("", elem_classes="typing")
+    # Optional: dataset sample preview (for transparency)
+    with gr.Accordion("🔎 Preview GoEmotions samples", open=False):
         with gr.Row():
             n_examples = gr.Slider(1, 10, value=5, step=1, label="Number of examples")
             split = gr.Dropdown(["train", "validation", "test"], value="train", label="Split")
         table = gr.Dataframe(headers=["text", "labels"], row_count=5, wrap=True)
         def refresh_samples(n, split_name):
+            try:
+                ds = load_dataset("google-research-datasets/go_emotions", "simplified")
+                names = ds["train"].features["labels"].feature.names
+                rows = ds[split_name].shuffle(seed=42).select(range(min(int(n), len(ds[split_name]))))
+                return [[t, ", ".join([names[i] for i in labs])] for t, labs in zip(rows["text"], rows["labels"])]
+            except Exception as e:
+                return [[f"Dataset load error: {e}", ""]]
         refresh.click(refresh_samples, inputs=[n_examples, split], outputs=[table])
         style_tag = f"<style>:root,body,.gradio-container{{background:{color}!important;}}</style>"
         yield chat_hist + [[user_msg, reply]], "", style_tag, ""
+    send.click(respond, inputs=[msg, chat, country, save_ok],
+               outputs=[chat, typing, style_injector, msg], queue=True)
+    msg.submit(respond, inputs=[msg, chat, country, save_ok],
+               outputs=[chat, typing, style_injector, msg], queue=True)
 if __name__ == "__main__":
     demo.queue()