Spaces:

OliverPerrin
/

LexiMind

Running

App Files Files Community

OliverPerrin commited on 17 days ago

Commit

7977c7d

1 Parent(s): 18fc263

Clean up demo_gradio.py with consistent commenting style

Browse files

Files changed (1) hide show

scripts/demo_gradio.py +76 -32

scripts/demo_gradio.py CHANGED Viewed

@@ -1,4 +1,14 @@
-"""Minimal Gradio demo for LexiMind multitask model."""
 from __future__ import annotations
@@ -8,8 +18,12 @@ from pathlib import Path
 import gradio as gr
 SCRIPT_DIR = Path(__file__).resolve().parent
 PROJECT_ROOT = SCRIPT_DIR.parent
 if str(PROJECT_ROOT) not in sys.path:
     sys.path.insert(0, str(PROJECT_ROOT))
@@ -21,45 +35,71 @@ from src.utils.logging import configure_logging, get_logger
 configure_logging()
 logger = get_logger(__name__)
 OUTPUTS_DIR = PROJECT_ROOT / "outputs"
 EVAL_REPORT_PATH = OUTPUTS_DIR / "evaluation_report.json"
 _pipeline = None
 def get_pipeline():
     global _pipeline
-    if _pipeline is None:
-        checkpoint_path = Path("checkpoints/best.pt")
-        if not checkpoint_path.exists():
-            checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
-            hf_hub_download(
-                repo_id="OliverPerrin/LexiMind-Model",
-                filename="best.pt",
-                local_dir="checkpoints",
-                local_dir_use_symlinks=False,
-            )
-        _pipeline, _ = create_inference_pipeline(
-            tokenizer_dir="artifacts/hf_tokenizer/",
-            checkpoint_path="checkpoints/best.pt",
-            labels_path="artifacts/labels.json",
         )
     return _pipeline
 def analyze(text: str) -> str:
-    """Run all three tasks and return results as formatted text."""
     if not text or not text.strip():
         return "Please enter some text to analyze."
     try:
         pipe = get_pipeline()
-        # Summarization
         summary = pipe.summarize([text], max_length=128)[0].strip() or "(empty)"
-        # Emotion detection
         emotions = pipe.predict_emotions([text], threshold=0.5)[0]
         if emotions.labels:
             emotion_str = ", ".join(
                 f"{lbl} ({score:.1%})"
@@ -68,10 +108,6 @@ def analyze(text: str) -> str:
         else:
             emotion_str = "No strong emotions detected"
-        # Topic classification
-        topic = pipe.predict_topics([text])[0]
-        topic_str = f"{topic.label} ({topic.confidence:.1%})"
         return f"""## Summary
 {summary}
@@ -79,7 +115,7 @@ def analyze(text: str) -> str:
 {emotion_str}
 ## Topic
-{topic_str}
 """
     except Exception as e:
         logger.error("Analysis failed: %s", e, exc_info=True)
@@ -87,7 +123,7 @@ def analyze(text: str) -> str:
 def get_metrics() -> str:
-    """Load evaluation metrics as markdown."""
     if not EVAL_REPORT_PATH.exists():
         return "No evaluation report found. Run `scripts/evaluate.py` first."
@@ -95,6 +131,7 @@ def get_metrics() -> str:
         with open(EVAL_REPORT_PATH) as f:
             r = json.load(f)
         lines = [
             "## Model Performance\n",
             "| Task | Metric | Score |",
@@ -108,10 +145,13 @@ def get_metrics() -> str:
             "| Label | Precision | Recall | F1 |",
             "|-------|-----------|--------|-----|",
         ]
-        for k, v in r["topic"]["classification_report"].items():
-            if isinstance(v, dict) and "precision" in v:
                 lines.append(
-                    f"| {k} | {v['precision']:.3f} | {v['recall']:.3f} | {v['f1-score']:.3f} |"
                 )
         return "\n".join(lines)
@@ -119,15 +159,16 @@ def get_metrics() -> str:
         return f"Error loading metrics: {e}"
-SAMPLE = """Artificial intelligence is rapidly transforming technology. Machine learning algorithms process vast amounts of data, identifying patterns with unprecedented accuracy. From healthcare to finance, AI is revolutionizing industries worldwide. However, ethical considerations around privacy and bias remain critical challenges."""
 with gr.Blocks(title="LexiMind Demo") as demo:
     gr.Markdown(
-        "# LexiMind NLP Demo\nMulti-task model: summarization, emotion detection, topic classification."
     )
     with gr.Tab("Analyze"):
-        text_input = gr.Textbox(label="Input Text", lines=6, value=SAMPLE)
         analyze_btn = gr.Button("Analyze", variant="primary")
         output = gr.Markdown(label="Results")
         analyze_btn.click(fn=analyze, inputs=text_input, outputs=output)
@@ -135,6 +176,9 @@ with gr.Blocks(title="LexiMind Demo") as demo:
     with gr.Tab("Metrics"):
         gr.Markdown(get_metrics())
 if __name__ == "__main__":
-    get_pipeline()  # Pre-load
     demo.launch(server_name="0.0.0.0", server_port=7860)

+"""
+Gradio demo for LexiMind multi-task NLP model.
+Provides a simple web interface for the three core tasks:
+- Summarization: Generates concise summaries of input text
+- Emotion Detection: Identifies emotional content with confidence scores
+- Topic Classification: Categorizes text into predefined topics
+Author: Oliver Perrin
+Date: 2025-12-04
+"""
 from __future__ import annotations
 import gradio as gr
+# --------------- Path Setup ---------------
+# Ensure local src package is importable when running script directly
 SCRIPT_DIR = Path(__file__).resolve().parent
 PROJECT_ROOT = SCRIPT_DIR.parent
 if str(PROJECT_ROOT) not in sys.path:
     sys.path.insert(0, str(PROJECT_ROOT))
 configure_logging()
 logger = get_logger(__name__)
+# --------------- Constants ---------------
 OUTPUTS_DIR = PROJECT_ROOT / "outputs"
 EVAL_REPORT_PATH = OUTPUTS_DIR / "evaluation_report.json"
+SAMPLE_TEXT = (
+    "Artificial intelligence is rapidly transforming technology. "
+    "Machine learning algorithms process vast amounts of data, identifying "
+    "patterns with unprecedented accuracy. From healthcare to finance, AI is "
+    "revolutionizing industries worldwide. However, ethical considerations "
+    "around privacy and bias remain critical challenges."
+)
+# --------------- Pipeline Management ---------------
 _pipeline = None
 def get_pipeline():
+    """Lazy-load the inference pipeline, downloading checkpoint if needed."""
     global _pipeline
+    if _pipeline is not None:
+        return _pipeline
+    checkpoint_path = Path("checkpoints/best.pt")
+    # Download from HuggingFace Hub if checkpoint doesn't exist locally
+    if not checkpoint_path.exists():
+        checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
+        hf_hub_download(
+            repo_id="OliverPerrin/LexiMind-Model",
+            filename="best.pt",
+            local_dir="checkpoints",
+            local_dir_use_symlinks=False,
         )
+    _pipeline, _ = create_inference_pipeline(
+        tokenizer_dir="artifacts/hf_tokenizer/",
+        checkpoint_path="checkpoints/best.pt",
+        labels_path="artifacts/labels.json",
+    )
     return _pipeline
+# --------------- Core Functions ---------------
 def analyze(text: str) -> str:
+    """
+    Run all three tasks on input text.
+    Returns markdown-formatted results for display in Gradio.
+    """
     if not text or not text.strip():
         return "Please enter some text to analyze."
     try:
         pipe = get_pipeline()
+        # Run each task
         summary = pipe.summarize([text], max_length=128)[0].strip() or "(empty)"
         emotions = pipe.predict_emotions([text], threshold=0.5)[0]
+        topic = pipe.predict_topics([text])[0]
+        # Format emotion results
         if emotions.labels:
             emotion_str = ", ".join(
                 f"{lbl} ({score:.1%})"
         else:
             emotion_str = "No strong emotions detected"
         return f"""## Summary
 {summary}
 {emotion_str}
 ## Topic
+{topic.label} ({topic.confidence:.1%})
 """
     except Exception as e:
         logger.error("Analysis failed: %s", e, exc_info=True)
 def get_metrics() -> str:
+    """Load evaluation metrics from JSON and format as markdown tables."""
     if not EVAL_REPORT_PATH.exists():
         return "No evaluation report found. Run `scripts/evaluate.py` first."
         with open(EVAL_REPORT_PATH) as f:
             r = json.load(f)
+        # Build overall metrics table
         lines = [
             "## Model Performance\n",
             "| Task | Metric | Score |",
             "| Label | Precision | Recall | F1 |",
             "|-------|-----------|--------|-----|",
         ]
+        # Add per-class metrics
+        for label, metrics in r["topic"]["classification_report"].items():
+            if isinstance(metrics, dict) and "precision" in metrics:
                 lines.append(
+                    f"| {label} | {metrics['precision']:.3f} | "
+                    f"{metrics['recall']:.3f} | {metrics['f1-score']:.3f} |"
                 )
         return "\n".join(lines)
         return f"Error loading metrics: {e}"
+# --------------- Gradio Interface ---------------
 with gr.Blocks(title="LexiMind Demo") as demo:
     gr.Markdown(
+        "# LexiMind NLP Demo\n"
+        "Multi-task model: summarization, emotion detection, topic classification."
     )
     with gr.Tab("Analyze"):
+        text_input = gr.Textbox(label="Input Text", lines=6, value=SAMPLE_TEXT)
         analyze_btn = gr.Button("Analyze", variant="primary")
         output = gr.Markdown(label="Results")
         analyze_btn.click(fn=analyze, inputs=text_input, outputs=output)
     with gr.Tab("Metrics"):
         gr.Markdown(get_metrics())
+# --------------- Entry Point ---------------
 if __name__ == "__main__":
+    get_pipeline()  # Pre-load to fail fast if checkpoint missing
     demo.launch(server_name="0.0.0.0", server_port=7860)