cassandrasestier commited on
Commit
7b615b8
Β·
verified Β·
1 Parent(s): 24a867b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -10
app.py CHANGED
@@ -1,8 +1,10 @@
1
  # ================================
2
  # πŸͺž MoodMirror+ β€” Conversational Emotional Self-Care
3
- # Dataset-only: trains a TF-IDF + OneVsRest Logistic Regression on GoEmotions
4
- # Cache du modèle + DB dans /data quand dispo (HF Spaces: activer Persistent storage)
5
- # Toujours donner au moins 1 conseil + parfois une citation
 
 
6
  # ================================
7
  import os
8
  import re
@@ -30,7 +32,7 @@ DATA_DIR = os.getenv("MM_DATA_DIR", _pick_data_dir())
30
  os.makedirs(DATA_DIR, exist_ok=True)
31
  DB_PATH = os.path.join(DATA_DIR, "moodmirror.db")
32
  MODEL_PATH = os.path.join(DATA_DIR, "goemo_sklearn.joblib")
33
- MODEL_VERSION = "v1-tfidf-lr-ovr"
34
 
35
  print(f"[MM] Using data dir: {DATA_DIR}")
36
  print(f"[MM] SQLite path: {DB_PATH}")
@@ -238,7 +240,77 @@ GOEMO_TO_APP = {
238
  "sadness": "sadness", "surprise": "neutral", "neutral": "neutral",
239
  }
240
 
241
- THRESHOLD = 0.30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  # ---------------- SQLite helpers ----------------
244
  def get_conn():
@@ -317,15 +389,36 @@ except Exception as e:
317
 
318
  # ---------------- Emotion detection ----------------
319
  def classify_text(text: str):
320
- if not CLASSIFIER:
 
 
 
 
 
 
321
  return []
 
 
 
322
  try:
323
- proba = CLASSIFIER.predict_proba([text])[0]
324
  except AttributeError:
325
  from scipy.special import expit
326
- proba = expit(CLASSIFIER.decision_function([text])[0])
327
- idxs = [i for i, p in enumerate(proba) if p >= THRESHOLD]
 
 
 
 
 
 
328
  idxs.sort(key=lambda i: proba[i], reverse=True)
 
 
 
 
 
 
329
  return [(LABEL_NAMES[i], float(proba[i])) for i in idxs]
330
 
331
  def detect_emotions(text: str):
@@ -336,7 +429,7 @@ def detect_emotions(text: str):
336
  for label, p in chosen:
337
  app = GOEMO_TO_APP.get(label.lower(), "neutral")
338
  bucket[app] = max(bucket.get(app, 0.0), p)
339
- return max(bucket, key=bucket.get)
340
 
341
  # ---------------- Reply composer ----------------
342
  def compose_support(main_emotion: str, is_first_msg: bool) -> str:
 
1
  # ================================
2
  # πŸͺž MoodMirror+ β€” Conversational Emotional Self-Care
3
+ # Dataset-only: TF-IDF + OneVsRest Logistic Regression on GoEmotions
4
+ # - Persists model & SQLite DB to /data (enable Persistent storage on HF Spaces)
5
+ # - Always give at least one advice tip; sometimes add a quote
6
+ # - Implicit emotion recognition (emoji/slang/negations hints)
7
+ # - Dynamic threshold + Top-1 fallback
8
  # ================================
9
  import os
10
  import re
 
32
  os.makedirs(DATA_DIR, exist_ok=True)
33
  DB_PATH = os.path.join(DATA_DIR, "moodmirror.db")
34
  MODEL_PATH = os.path.join(DATA_DIR, "goemo_sklearn.joblib")
35
+ MODEL_VERSION = "v2-tfidf-lr-ovr-implicit" # bump when training recipe changes
36
 
37
  print(f"[MM] Using data dir: {DATA_DIR}")
38
  print(f"[MM] SQLite path: {DB_PATH}")
 
240
  "sadness": "sadness", "surprise": "neutral", "neutral": "neutral",
241
  }
242
 
243
+ # --- Threshold & implicit-emotion controls ---
244
+ THRESHOLD = 0.30 # standard selection threshold
245
+ MIN_THRESHOLD = 0.12 # floor if model is unsure
246
+ TOP1_FALLBACK = True # ensure at least one label if nothing passes threshold
247
+
248
+ # --- Implicit cues (emojis, slang, negations, intensity) ---
249
+ EMOJI_HINTS = {
250
+ "😒": "sadness", "😭": "sadness", "😞": "sadness", "πŸ’”": "grief",
251
+ "😑": "anger", "🀬": "anger", "😀": "anger",
252
+ "😱": "fear", "😨": "fear", "😰": "fear",
253
+ "😌": "relief", "πŸ™‚": "joy", "😊": "joy", "πŸ˜„": "joy", "😍": "love",
254
+ "πŸ€—": "love", "πŸ’–": "love", "πŸ™": "gratitude",
255
+ "πŸ’€": "boredom", "πŸ₯±": "boredom", "πŸ€”": "curiosity",
256
+ "😐": "neutral", "😢": "neutral"
257
+ }
258
+ SLANG_HINTS = {
259
+ "idk": "confusion", "meh": "boredom", "ugh": "annoyance", "nah": "disapproval",
260
+ "wtf": "anger", "omg": "surprise", "lol": "amusement", "lmao": "amusement",
261
+ "miss you": "grief", "miss her": "grief", "miss him": "grief",
262
+ "im fine": "sadness", "i'm fine": "sadness"
263
+ }
264
+ NEGATION_PATTERNS = [
265
+ ("not happy", "sadness"),
266
+ ("not okay", "sadness"),
267
+ ("not ok", "sadness"),
268
+ ("not fine", "sadness"),
269
+ ("no hope", "sadness"),
270
+ ("no energy", "sadness"),
271
+ ("no motivation", "boredom"),
272
+ ("not safe", "fear"),
273
+ ("not calm", "nervousness"),
274
+ ("not sure", "confusion"),
275
+ ]
276
+ INTENSIFIERS = ["!!", "!!!", "?!", "?!?", "soooo", "very", "really", "super", "extremely"]
277
+
278
+ def _lower(s: str) -> str:
279
+ return s.lower() if isinstance(s, str) else ""
280
+
281
+ def augment_text_for_classifier(text: str) -> str:
282
+ """
283
+ Inject 'hint tokens' based on implicit cues (emojis/slang/negations/intensity)
284
+ to help TF-IDF pick up emotions not explicitly named.
285
+ """
286
+ if not text:
287
+ return text
288
+ t = _lower(text)
289
+ hints = []
290
+
291
+ # emojis
292
+ for ch in text:
293
+ if ch in EMOJI_HINTS:
294
+ hints.append(EMOJI_HINTS[ch])
295
+
296
+ # slang
297
+ for k, v in SLANG_HINTS.items():
298
+ if k in t:
299
+ hints.append(v)
300
+
301
+ # common negations
302
+ for pat, lab in NEGATION_PATTERNS:
303
+ if pat in t:
304
+ hints.append(lab)
305
+
306
+ # intensity: duplicate hints to weigh more
307
+ if hints and any(x in t for x in INTENSIFIERS):
308
+ hints = hints + hints
309
+
310
+ if hints:
311
+ hint_tokens = " ".join([f"emo_{h}" for h in hints])
312
+ return text + " " + hint_tokens
313
+ return text
314
 
315
  # ---------------- SQLite helpers ----------------
316
  def get_conn():
 
389
 
390
  # ---------------- Emotion detection ----------------
391
  def classify_text(text: str):
392
+ """
393
+ Returns [(label_name, prob), ...] sorted desc.
394
+ - Augments input with implicit-cue tokens
395
+ - Uses dynamic threshold with a safety floor
396
+ - Top-1 fallback ensures at least one label
397
+ """
398
+ if not CLASSIFIER or not LABEL_NAMES:
399
  return []
400
+
401
+ augmented = augment_text_for_classifier(text)
402
+
403
  try:
404
+ proba = CLASSIFIER.predict_proba([augmented])[0]
405
  except AttributeError:
406
  from scipy.special import expit
407
+ scores = CLASSIFIER.decision_function([augmented])[0]
408
+ proba = expit(scores)
409
+
410
+ # Dynamic threshold
411
+ maxp = float(max(proba)) if len(proba) else 0.0
412
+ thr = THRESHOLD if maxp >= THRESHOLD else max(MIN_THRESHOLD, maxp * 0.8)
413
+
414
+ idxs = [i for i, p in enumerate(proba) if p >= thr]
415
  idxs.sort(key=lambda i: proba[i], reverse=True)
416
+
417
+ # Top-1 fallback
418
+ if not idxs and TOP1_FALLBACK and len(proba):
419
+ top1 = int(max(range(len(proba)), key=lambda i: proba[i]))
420
+ idxs = [top1]
421
+
422
  return [(LABEL_NAMES[i], float(proba[i])) for i in idxs]
423
 
424
  def detect_emotions(text: str):
 
429
  for label, p in chosen:
430
  app = GOEMO_TO_APP.get(label.lower(), "neutral")
431
  bucket[app] = max(bucket.get(app, 0.0), p)
432
+ return max(bucket, key=bucket.get) if bucket else "neutral"
433
 
434
  # ---------------- Reply composer ----------------
435
  def compose_support(main_emotion: str, is_first_msg: bool) -> str: