Spaces:
Running
Running
Upload 7 files.
Browse files- LICENSE +21 -0
- __init__.py +1 -0
- agent.py +430 -0
- app.py +518 -0
- config.py +130 -0
- requirements.txt +17 -0
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 AskVeracity
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Root package initialization
|
agent.py
ADDED
|
@@ -0,0 +1,430 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agent module for the Fake News Detector application.
|
| 3 |
+
|
| 4 |
+
This module implements a LangGraph-based agent that orchestrates
|
| 5 |
+
the fact-checking process. It defines the agent setup, tools,
|
| 6 |
+
and processing pipeline for claim verification.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import time
|
| 11 |
+
import logging
|
| 12 |
+
import traceback
|
| 13 |
+
from langchain_core.tools import tool
|
| 14 |
+
from langchain.prompts import PromptTemplate
|
| 15 |
+
from langgraph.prebuilt import create_react_agent
|
| 16 |
+
|
| 17 |
+
from utils.models import get_llm_model
|
| 18 |
+
from utils.performance import PerformanceTracker
|
| 19 |
+
from modules.claim_extraction import extract_claims
|
| 20 |
+
from modules.evidence_retrieval import retrieve_combined_evidence
|
| 21 |
+
from modules.classification import classify_with_llm, aggregate_evidence
|
| 22 |
+
from modules.explanation import generate_explanation
|
| 23 |
+
|
| 24 |
+
# Configure logger
|
| 25 |
+
logger = logging.getLogger("misinformation_detector")
|
| 26 |
+
|
| 27 |
+
# Reference to global performance tracker
|
| 28 |
+
performance_tracker = PerformanceTracker()
|
| 29 |
+
|
| 30 |
+
# Define LangGraph Tools
|
| 31 |
+
@tool
|
| 32 |
+
def claim_extractor(query):
|
| 33 |
+
"""
|
| 34 |
+
Tool that extracts factual claims from a given text.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
query (str): Text containing potential factual claims
|
| 38 |
+
|
| 39 |
+
Returns:
|
| 40 |
+
str: Extracted factual claim
|
| 41 |
+
"""
|
| 42 |
+
performance_tracker.log_claim_processed()
|
| 43 |
+
return extract_claims(query)
|
| 44 |
+
|
| 45 |
+
@tool
|
| 46 |
+
def evidence_retriever(query):
|
| 47 |
+
"""
|
| 48 |
+
Tool that retrieves evidence from multiple sources for a claim.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
query (str): The factual claim to gather evidence for
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
list: List of evidence items from various sources
|
| 55 |
+
"""
|
| 56 |
+
return retrieve_combined_evidence(query)
|
| 57 |
+
|
| 58 |
+
@tool
|
| 59 |
+
def truth_classifier(query, evidence):
|
| 60 |
+
"""
|
| 61 |
+
Tool that classifies the truthfulness of a claim based on evidence.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
query (str): The factual claim to classify
|
| 65 |
+
evidence (list): Evidence items to evaluate
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
str: JSON string containing verdict, confidence, and results
|
| 69 |
+
"""
|
| 70 |
+
classification_results = classify_with_llm(query, evidence)
|
| 71 |
+
truth_label, confidence = aggregate_evidence(classification_results)
|
| 72 |
+
|
| 73 |
+
# Debug logging
|
| 74 |
+
logger.info(f"Classification results: {len(classification_results)} items")
|
| 75 |
+
logger.info(f"Aggregate result: {truth_label}, confidence: {confidence}")
|
| 76 |
+
|
| 77 |
+
# Ensure confidence is at least 0.6 for any definitive verdict
|
| 78 |
+
if "True" in truth_label or "False" in truth_label:
|
| 79 |
+
confidence = max(confidence, 0.6)
|
| 80 |
+
|
| 81 |
+
# Return a dictionary with all needed information
|
| 82 |
+
result = {
|
| 83 |
+
"verdict": truth_label,
|
| 84 |
+
"confidence": confidence,
|
| 85 |
+
"results": classification_results
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
# Convert to string for consistent handling
|
| 89 |
+
import json
|
| 90 |
+
return json.dumps(result)
|
| 91 |
+
|
| 92 |
+
@tool
|
| 93 |
+
def explanation_generator(claim, evidence_results, truth_label):
|
| 94 |
+
"""
|
| 95 |
+
Tool that generates a human-readable explanation for the verdict.
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
claim (str): The factual claim being verified
|
| 99 |
+
evidence_results (list): Evidence items and classification results
|
| 100 |
+
truth_label (str): The verdict (True/False/Uncertain)
|
| 101 |
+
|
| 102 |
+
Returns:
|
| 103 |
+
str: Natural language explanation of the verdict
|
| 104 |
+
"""
|
| 105 |
+
explanation = generate_explanation(claim, evidence_results, truth_label)
|
| 106 |
+
logger.info(f"Generated explanation: {explanation[:100]}...")
|
| 107 |
+
return explanation
|
| 108 |
+
|
| 109 |
+
def setup_agent():
|
| 110 |
+
"""
|
| 111 |
+
Create and configure a ReAct agent with the fact-checking tools.
|
| 112 |
+
|
| 113 |
+
This function configures a LangGraph ReAct agent with all the
|
| 114 |
+
necessary tools for fact checking, including claim extraction,
|
| 115 |
+
evidence retrieval, classification, and explanation generation.
|
| 116 |
+
|
| 117 |
+
Returns:
|
| 118 |
+
object: Configured LangGraph agent ready for claim processing
|
| 119 |
+
|
| 120 |
+
Raises:
|
| 121 |
+
ValueError: If OpenAI API key is not set
|
| 122 |
+
"""
|
| 123 |
+
# Make sure OpenAI API key is set
|
| 124 |
+
if "OPENAI_API_KEY" not in os.environ or not os.environ["OPENAI_API_KEY"].strip():
|
| 125 |
+
logger.error("OPENAI_API_KEY environment variable not set or empty.")
|
| 126 |
+
raise ValueError("OpenAI API key is required")
|
| 127 |
+
|
| 128 |
+
# Define tools with any customizations
|
| 129 |
+
tools = [
|
| 130 |
+
claim_extractor,
|
| 131 |
+
evidence_retriever,
|
| 132 |
+
truth_classifier,
|
| 133 |
+
explanation_generator
|
| 134 |
+
]
|
| 135 |
+
|
| 136 |
+
# Define the prompt template with clearer, more efficient instructions
|
| 137 |
+
FORMAT_INSTRUCTIONS_TEMPLATE = """
|
| 138 |
+
Use the following format:
|
| 139 |
+
Question: the input question you must answer
|
| 140 |
+
Action: the action to take, should be one of: {tool_names}
|
| 141 |
+
Action Input: the input to the action
|
| 142 |
+
Observation: the result of the action
|
| 143 |
+
... (this Action/Action Input/Observation can repeat N times)
|
| 144 |
+
Final Answer: the final answer to the original input question
|
| 145 |
+
"""
|
| 146 |
+
|
| 147 |
+
prompt = PromptTemplate(
|
| 148 |
+
input_variables=["input", "tool_names"],
|
| 149 |
+
template=f"""
|
| 150 |
+
You are a fact-checking assistant that verifies claims by gathering evidence and
|
| 151 |
+
determining their truthfulness. Follow these exact steps in sequence:
|
| 152 |
+
|
| 153 |
+
1. Call claim_extractor to extract the main factual claim
|
| 154 |
+
2. Call evidence_retriever to gather evidence about the claim
|
| 155 |
+
3. Call truth_classifier to evaluate the claim using the evidence
|
| 156 |
+
4. Call explanation_generator to explain the result
|
| 157 |
+
5. Provide your Final Answer that summarizes everything
|
| 158 |
+
|
| 159 |
+
Execute these steps in order without unnecessary thinking steps between tool calls.
|
| 160 |
+
Be direct and efficient in your verification process.
|
| 161 |
+
|
| 162 |
+
{FORMAT_INSTRUCTIONS_TEMPLATE}
|
| 163 |
+
"""
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
try:
|
| 167 |
+
# Get the LLM model
|
| 168 |
+
model = get_llm_model()
|
| 169 |
+
|
| 170 |
+
# Create the agent with a shorter timeout
|
| 171 |
+
graph = create_react_agent(model, tools=tools)
|
| 172 |
+
logger.info("Agent created successfully")
|
| 173 |
+
return graph
|
| 174 |
+
except Exception as e:
|
| 175 |
+
logger.error(f"Error creating agent: {str(e)}")
|
| 176 |
+
raise e
|
| 177 |
+
|
| 178 |
+
def process_claim(claim, agent=None, recursion_limit=20):
|
| 179 |
+
"""
|
| 180 |
+
Process a claim to determine its truthfulness using the agent.
|
| 181 |
+
|
| 182 |
+
This function invokes the LangGraph agent to process a factual claim,
|
| 183 |
+
extract supporting evidence, evaluate the claim's truthfulness, and
|
| 184 |
+
generate a human-readable explanation.
|
| 185 |
+
|
| 186 |
+
Args:
|
| 187 |
+
claim (str): The factual claim to be verified
|
| 188 |
+
agent (object, optional): Initialized LangGraph agent. If None, an error is logged.
|
| 189 |
+
recursion_limit (int, optional): Maximum recursion depth for agent. Default: 20.
|
| 190 |
+
Higher values allow more complex reasoning but increase processing time.
|
| 191 |
+
|
| 192 |
+
Returns:
|
| 193 |
+
dict: Result dictionary containing:
|
| 194 |
+
- claim: Extracted factual claim
|
| 195 |
+
- evidence: List of evidence pieces
|
| 196 |
+
- evidence_count: Number of evidence pieces
|
| 197 |
+
- classification: Verdict (True/False/Uncertain)
|
| 198 |
+
- confidence: Confidence score (0-1)
|
| 199 |
+
- explanation: Human-readable explanation of the verdict
|
| 200 |
+
- final_answer: Final answer from the agent
|
| 201 |
+
- Or error information if processing failed
|
| 202 |
+
"""
|
| 203 |
+
if agent is None:
|
| 204 |
+
logger.error("Agent not initialized. Call setup_agent() first.")
|
| 205 |
+
return None
|
| 206 |
+
|
| 207 |
+
start_time = time.time()
|
| 208 |
+
logger.info(f"Processing claim with agent: {claim}")
|
| 209 |
+
|
| 210 |
+
try:
|
| 211 |
+
# Format inputs for the agent
|
| 212 |
+
inputs = {"messages": [("user", claim)]}
|
| 213 |
+
|
| 214 |
+
# Set configuration - reduced recursion limit for faster processing
|
| 215 |
+
config = {"recursion_limit": recursion_limit}
|
| 216 |
+
|
| 217 |
+
# Invoke the agent
|
| 218 |
+
response = agent.invoke(inputs, config)
|
| 219 |
+
|
| 220 |
+
# Format the response
|
| 221 |
+
result = format_response(response)
|
| 222 |
+
|
| 223 |
+
# Log performance
|
| 224 |
+
elapsed = time.time() - start_time
|
| 225 |
+
logger.info(f"Claim processed in {elapsed:.2f} seconds")
|
| 226 |
+
|
| 227 |
+
return result
|
| 228 |
+
|
| 229 |
+
except Exception as e:
|
| 230 |
+
logger.error(f"Error processing claim with agent: {str(e)}")
|
| 231 |
+
logger.error(traceback.format_exc())
|
| 232 |
+
return {"error": str(e)}
|
| 233 |
+
|
| 234 |
+
def format_response(response):
|
| 235 |
+
"""
|
| 236 |
+
Format the agent's response into a structured result.
|
| 237 |
+
|
| 238 |
+
This function extracts key information from the agent's response,
|
| 239 |
+
including the claim, evidence, classification, and explanation.
|
| 240 |
+
It also performs error handling and provides fallback values.
|
| 241 |
+
|
| 242 |
+
Args:
|
| 243 |
+
response (dict): Raw response from the LangGraph agent
|
| 244 |
+
|
| 245 |
+
Returns:
|
| 246 |
+
dict: Structured result containing claim verification data
|
| 247 |
+
"""
|
| 248 |
+
try:
|
| 249 |
+
if not response or "messages" not in response:
|
| 250 |
+
return {"error": "Invalid response format"}
|
| 251 |
+
|
| 252 |
+
messages = response.get("messages", [])
|
| 253 |
+
|
| 254 |
+
# Initialize result container with default values
|
| 255 |
+
result = {
|
| 256 |
+
"claim": None,
|
| 257 |
+
"evidence": [],
|
| 258 |
+
"evidence_count": 0,
|
| 259 |
+
"classification": "Uncertain",
|
| 260 |
+
"confidence": 0.2, # Default low confidence
|
| 261 |
+
"explanation": "Insufficient evidence to evaluate this claim.",
|
| 262 |
+
"final_answer": None,
|
| 263 |
+
"thoughts": []
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
# Track if we found results from each tool
|
| 267 |
+
found_tools = {
|
| 268 |
+
"claim_extractor": False,
|
| 269 |
+
"evidence_retriever": False,
|
| 270 |
+
"truth_classifier": False,
|
| 271 |
+
"explanation_generator": False
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
# Extract information from messages
|
| 275 |
+
tool_outputs = {}
|
| 276 |
+
|
| 277 |
+
for idx, message in enumerate(messages):
|
| 278 |
+
# Extract agent thoughts
|
| 279 |
+
if hasattr(message, "content") and getattr(message, "type", "") == "assistant":
|
| 280 |
+
content = message.content
|
| 281 |
+
if "Thought:" in content:
|
| 282 |
+
thought_parts = content.split("Thought:", 1)
|
| 283 |
+
if len(thought_parts) > 1:
|
| 284 |
+
thought = thought_parts[1].split("\n")[0].strip()
|
| 285 |
+
result["thoughts"].append(thought)
|
| 286 |
+
|
| 287 |
+
# Extract tool outputs
|
| 288 |
+
if hasattr(message, "type") and message.type == "tool":
|
| 289 |
+
tool_name = getattr(message, "name", "unknown")
|
| 290 |
+
|
| 291 |
+
# Store tool outputs
|
| 292 |
+
tool_outputs[tool_name] = message.content
|
| 293 |
+
|
| 294 |
+
# Extract specific information
|
| 295 |
+
if tool_name == "claim_extractor":
|
| 296 |
+
found_tools["claim_extractor"] = True
|
| 297 |
+
if message.content:
|
| 298 |
+
result["claim"] = message.content
|
| 299 |
+
|
| 300 |
+
elif tool_name == "evidence_retriever":
|
| 301 |
+
found_tools["evidence_retriever"] = True
|
| 302 |
+
# Handle string representation of a list
|
| 303 |
+
if message.content:
|
| 304 |
+
if isinstance(message.content, list):
|
| 305 |
+
result["evidence"] = message.content
|
| 306 |
+
result["evidence_count"] = len(message.content)
|
| 307 |
+
elif isinstance(message.content, str) and message.content.startswith("[") and message.content.endswith("]"):
|
| 308 |
+
try:
|
| 309 |
+
import ast
|
| 310 |
+
parsed_content = ast.literal_eval(message.content)
|
| 311 |
+
if isinstance(parsed_content, list):
|
| 312 |
+
result["evidence"] = parsed_content
|
| 313 |
+
result["evidence_count"] = len(parsed_content)
|
| 314 |
+
else:
|
| 315 |
+
result["evidence"] = [message.content]
|
| 316 |
+
result["evidence_count"] = 1
|
| 317 |
+
except:
|
| 318 |
+
result["evidence"] = [message.content]
|
| 319 |
+
result["evidence_count"] = 1
|
| 320 |
+
else:
|
| 321 |
+
result["evidence"] = [message.content]
|
| 322 |
+
result["evidence_count"] = 1
|
| 323 |
+
logger.warning(f"Evidence retrieved is not a list: {type(message.content)}")
|
| 324 |
+
|
| 325 |
+
elif tool_name == "truth_classifier":
|
| 326 |
+
found_tools["truth_classifier"] = True
|
| 327 |
+
|
| 328 |
+
# Log the incoming content for debugging
|
| 329 |
+
logger.info(f"Truth classifier content type: {type(message.content)}")
|
| 330 |
+
logger.info(f"Truth classifier content: {message.content}")
|
| 331 |
+
|
| 332 |
+
# Handle JSON formatted result from truth_classifier
|
| 333 |
+
if isinstance(message.content, str):
|
| 334 |
+
try:
|
| 335 |
+
import json
|
| 336 |
+
# Parse the JSON string
|
| 337 |
+
parsed_content = json.loads(message.content)
|
| 338 |
+
|
| 339 |
+
# Extract the values from the parsed content
|
| 340 |
+
result["classification"] = parsed_content.get("verdict", "Uncertain")
|
| 341 |
+
result["confidence"] = float(parsed_content.get("confidence", 0.2))
|
| 342 |
+
result["classification_results"] = parsed_content.get("results", [])
|
| 343 |
+
|
| 344 |
+
logger.info(f"Extracted from JSON: verdict={result['classification']}, confidence={result['confidence']}")
|
| 345 |
+
except json.JSONDecodeError:
|
| 346 |
+
logger.warning(f"Could not parse truth classifier JSON: {message.content}")
|
| 347 |
+
except Exception as e:
|
| 348 |
+
logger.warning(f"Error extracting from truth classifier output: {e}")
|
| 349 |
+
else:
|
| 350 |
+
logger.warning(f"Unexpected truth_classifier content format: {message.content}")
|
| 351 |
+
|
| 352 |
+
elif tool_name == "explanation_generator":
|
| 353 |
+
found_tools["explanation_generator"] = True
|
| 354 |
+
if message.content:
|
| 355 |
+
result["explanation"] = message.content
|
| 356 |
+
logger.info(f"Found explanation from tool: {message.content[:100]}...")
|
| 357 |
+
|
| 358 |
+
# Get final answer from last message
|
| 359 |
+
elif idx == len(messages) - 1 and hasattr(message, "content"):
|
| 360 |
+
result["final_answer"] = message.content
|
| 361 |
+
|
| 362 |
+
# Log which tools weren't found
|
| 363 |
+
missing_tools = [tool for tool, found in found_tools.items() if not found]
|
| 364 |
+
if missing_tools:
|
| 365 |
+
logger.warning(f"Missing tool outputs in response: {', '.join(missing_tools)}")
|
| 366 |
+
|
| 367 |
+
# FALLBACK: If we have truth classification but explanation is missing, generate it now
|
| 368 |
+
if found_tools["truth_classifier"] and not found_tools["explanation_generator"]:
|
| 369 |
+
logger.info("Explanation generator was not called by the agent, using fallback explanation generation")
|
| 370 |
+
|
| 371 |
+
try:
|
| 372 |
+
# Get the necessary inputs for explanation generation
|
| 373 |
+
claim = result["claim"]
|
| 374 |
+
evidence = result["evidence"]
|
| 375 |
+
truth_label = result["classification"]
|
| 376 |
+
confidence_value = result["confidence"] # Pass the confidence value
|
| 377 |
+
classification_results = result.get("classification_results", [])
|
| 378 |
+
|
| 379 |
+
# Choose the best available evidence for explanation
|
| 380 |
+
explanation_evidence = classification_results if classification_results else evidence
|
| 381 |
+
|
| 382 |
+
# Generate explanation with confidence value
|
| 383 |
+
explanation = generate_explanation(claim, explanation_evidence, truth_label, confidence_value)
|
| 384 |
+
|
| 385 |
+
# Use the generated explanation
|
| 386 |
+
if explanation:
|
| 387 |
+
logger.info(f"Generated fallback explanation: {explanation[:100]}...")
|
| 388 |
+
result["explanation"] = explanation
|
| 389 |
+
except Exception as e:
|
| 390 |
+
logger.error(f"Error generating fallback explanation: {e}")
|
| 391 |
+
|
| 392 |
+
# Make sure evidence exists
|
| 393 |
+
if result["evidence_count"] > 0 and (not result["evidence"] or len(result["evidence"]) == 0):
|
| 394 |
+
logger.warning("Evidence count is non-zero but evidence list is empty. This is a data inconsistency.")
|
| 395 |
+
result["evidence_count"] = 0
|
| 396 |
+
|
| 397 |
+
# Add debug info about the final result
|
| 398 |
+
logger.info(f"Final classification: {result['classification']}, confidence: {result['confidence']}")
|
| 399 |
+
logger.info(f"Final explanation: {result['explanation'][:100]}...")
|
| 400 |
+
|
| 401 |
+
# Add performance metrics
|
| 402 |
+
result["performance"] = performance_tracker.get_summary()
|
| 403 |
+
|
| 404 |
+
# Memory management - limit the size of evidence and thoughts
|
| 405 |
+
# To keep memory usage reasonable for web deployment
|
| 406 |
+
if "evidence" in result and isinstance(result["evidence"], list):
|
| 407 |
+
limited_evidence = []
|
| 408 |
+
for ev in result["evidence"]:
|
| 409 |
+
if isinstance(ev, str) and len(ev) > 500:
|
| 410 |
+
limited_evidence.append(ev[:497] + "...")
|
| 411 |
+
else:
|
| 412 |
+
limited_evidence.append(ev)
|
| 413 |
+
result["evidence"] = limited_evidence
|
| 414 |
+
|
| 415 |
+
# Limit thoughts to conserve memory
|
| 416 |
+
if "thoughts" in result and len(result["thoughts"]) > 10:
|
| 417 |
+
result["thoughts"] = result["thoughts"][:10]
|
| 418 |
+
|
| 419 |
+
return result
|
| 420 |
+
|
| 421 |
+
except Exception as e:
|
| 422 |
+
logger.error(f"Error formatting agent response: {str(e)}")
|
| 423 |
+
logger.error(traceback.format_exc())
|
| 424 |
+
return {
|
| 425 |
+
"error": str(e),
|
| 426 |
+
"traceback": traceback.format_exc(),
|
| 427 |
+
"classification": "Error",
|
| 428 |
+
"confidence": 0.1,
|
| 429 |
+
"explanation": "An error occurred while processing this claim."
|
| 430 |
+
}
|
app.py
ADDED
|
@@ -0,0 +1,518 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Main Streamlit application for the Fake News Detector.
|
| 3 |
+
|
| 4 |
+
This module implements the user interface for claim verification,
|
| 5 |
+
rendering the results and handling user interactions. It also
|
| 6 |
+
manages the application lifecycle including initialization and cleanup.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import streamlit as st
|
| 10 |
+
import time
|
| 11 |
+
import json
|
| 12 |
+
import os
|
| 13 |
+
import logging
|
| 14 |
+
import atexit
|
| 15 |
+
import sys
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
|
| 18 |
+
# Configure logging first, before other imports
|
| 19 |
+
logging.basicConfig(
|
| 20 |
+
level=logging.INFO,
|
| 21 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 22 |
+
handlers=[logging.StreamHandler()]
|
| 23 |
+
)
|
| 24 |
+
logger = logging.getLogger("misinformation_detector")
|
| 25 |
+
|
| 26 |
+
# Check for critical environment variables
|
| 27 |
+
if not os.environ.get("OPENAI_API_KEY"):
|
| 28 |
+
logger.warning("OPENAI_API_KEY not set. Please configure this in your Hugging Face Spaces secrets.")
|
| 29 |
+
|
| 30 |
+
# Import our modules
|
| 31 |
+
from utils.models import initialize_models
|
| 32 |
+
from utils.performance import PerformanceTracker
|
| 33 |
+
|
| 34 |
+
# Import agent functionality
|
| 35 |
+
import agent
|
| 36 |
+
|
| 37 |
+
# Initialize performance tracker
|
| 38 |
+
performance_tracker = PerformanceTracker()
|
| 39 |
+
|
| 40 |
+
# Ensure data directory exists
|
| 41 |
+
data_dir = Path("data")
|
| 42 |
+
if not data_dir.exists():
|
| 43 |
+
logger.info("Creating data directory")
|
| 44 |
+
data_dir.mkdir(exist_ok=True)
|
| 45 |
+
|
| 46 |
+
# Set page configuration
|
| 47 |
+
st.set_page_config(
|
| 48 |
+
page_title="AskVeracity",
|
| 49 |
+
page_icon="🔍",
|
| 50 |
+
layout="wide",
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# Hide the "Press ⌘+Enter to apply" text with CSS
|
| 54 |
+
st.markdown("""
|
| 55 |
+
<style>
|
| 56 |
+
/* Hide the shortcut text that appears at the bottom of text areas */
|
| 57 |
+
.stTextArea div:has(textarea) + div {
|
| 58 |
+
visibility: hidden !important;
|
| 59 |
+
height: 0px !important;
|
| 60 |
+
position: absolute !important;
|
| 61 |
+
}
|
| 62 |
+
</style>
|
| 63 |
+
""", unsafe_allow_html=True)
|
| 64 |
+
|
| 65 |
+
@st.cache_resource
|
| 66 |
+
def get_agent():
|
| 67 |
+
"""
|
| 68 |
+
Initialize and cache the agent for reuse across requests.
|
| 69 |
+
|
| 70 |
+
This function creates and caches the fact-checking agent to avoid
|
| 71 |
+
recreating it for every request. It's decorated with st.cache_resource
|
| 72 |
+
to ensure the agent is only initialized once per session.
|
| 73 |
+
|
| 74 |
+
Returns:
|
| 75 |
+
object: Initialized LangGraph agent for fact checking
|
| 76 |
+
"""
|
| 77 |
+
logger.info("Initializing models and agent (cached)")
|
| 78 |
+
initialize_models()
|
| 79 |
+
return agent.setup_agent()
|
| 80 |
+
|
| 81 |
+
def cleanup_resources():
|
| 82 |
+
"""
|
| 83 |
+
Clean up resources when app is closed.
|
| 84 |
+
|
| 85 |
+
This function is registered with atexit to ensure resources
|
| 86 |
+
are properly released when the application terminates.
|
| 87 |
+
"""
|
| 88 |
+
try:
|
| 89 |
+
# Clear any cached data
|
| 90 |
+
st.cache_data.clear()
|
| 91 |
+
|
| 92 |
+
# Reset performance tracker
|
| 93 |
+
performance_tracker.reset()
|
| 94 |
+
|
| 95 |
+
# Log cleanup
|
| 96 |
+
logger.info("Resources cleaned up successfully")
|
| 97 |
+
except Exception as e:
|
| 98 |
+
logger.error(f"Error during cleanup: {e}")
|
| 99 |
+
|
| 100 |
+
# Register cleanup handler
|
| 101 |
+
atexit.register(cleanup_resources)
|
| 102 |
+
|
| 103 |
+
# App title and description
|
| 104 |
+
st.title("🔍 AskVeracity")
|
| 105 |
+
st.markdown("""
|
| 106 |
+
This is a simple AI-powered tool - a fact-checking system that analyzes claims to determine
|
| 107 |
+
their truthfulness by gathering and analyzing evidence from various sources, such as Wikipedia,
|
| 108 |
+
news outlets, and academic repositories.
|
| 109 |
+
""")
|
| 110 |
+
|
| 111 |
+
# Sidebar with app information
|
| 112 |
+
with st.sidebar:
|
| 113 |
+
st.header("About")
|
| 114 |
+
st.info(
|
| 115 |
+
"This system uses a combination of NLP techniques and LLMs to "
|
| 116 |
+
"extract claims, gather evidence, and classify the truthfulness of statements."
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
# Application information
|
| 120 |
+
st.markdown("### How It Works")
|
| 121 |
+
st.info(
|
| 122 |
+
"1. Enter any recent news or a factual claim\n"
|
| 123 |
+
"2. Our AI gathers evidence from Wikipedia, news sources, and academic repositories\n"
|
| 124 |
+
"3. The system analyzes the evidence to determine truthfulness\n"
|
| 125 |
+
"4. Results show the verdict with supporting evidence"
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
# Our Mission
|
| 129 |
+
st.markdown("### Our Mission")
|
| 130 |
+
st.info(
|
| 131 |
+
"AskVeracity aims to combat misinformation in real-time through an open-source application built with accessible tools. "
|
| 132 |
+
"We believe in empowering people with factual information to make informed decisions."
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
# Limitations and Usage
|
| 136 |
+
st.markdown("### Limitations")
|
| 137 |
+
st.warning(
|
| 138 |
+
"Due to resource constraints, AskVeracity may not always provide real-time results with perfect accuracy. "
|
| 139 |
+
"Performance is typically best with widely-reported news and information published within the last 48 hours. "
|
| 140 |
+
"Additionally, the system evaluates claims based on current evidence - a claim that was true in the past "
|
| 141 |
+
"may be judged false if circumstances have changed, and vice versa."
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
# Best Practices
|
| 145 |
+
st.markdown("### Best Practices")
|
| 146 |
+
st.success(
|
| 147 |
+
"For optimal results:\n\n"
|
| 148 |
+
"• Keep claims short and precise\n\n"
|
| 149 |
+
"• Include key details in your claim\n\n"
|
| 150 |
+
"• Phrase claims as direct statements rather than questions\n\n"
|
| 151 |
+
"• Be specific about who said what"
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
# Example comparison
|
| 155 |
+
with st.expander("📝 Examples of Effective Claims"):
|
| 156 |
+
st.markdown("""
|
| 157 |
+
**Less precise:** "Country A-Country B Relations Are Moving in Positive Direction as per Country B Minister John Doe."
|
| 158 |
+
|
| 159 |
+
**More precise:** "Country B's External Affairs Minister John Doe has claimed that Country A-Country B Relations Are Moving in Positive Direction."
|
| 160 |
+
""")
|
| 161 |
+
|
| 162 |
+
# Important Notes
|
| 163 |
+
st.markdown("### Important Notes")
|
| 164 |
+
st.info(
|
| 165 |
+
"• AskVeracity covers general topics and is not specialized in any single domain or location\n\n"
|
| 166 |
+
"• Results can vary based on available evidence and LLM behavior\n\n"
|
| 167 |
+
"• The system is designed to indicate uncertainty when evidence is insufficient\n\n"
|
| 168 |
+
"• AskVeracity is not a chatbot and does not maintain conversation history\n\n"
|
| 169 |
+
"• We recommend cross-verifying critical information with additional sources"
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
# Privacy Information
|
| 173 |
+
st.markdown("### Data Privacy")
|
| 174 |
+
st.info(
|
| 175 |
+
"We do not collect or store any data about the claims you submit. "
|
| 176 |
+
"Your interactions are processed by OpenAI's API. Please refer to "
|
| 177 |
+
"[OpenAI's privacy policy](https://openai.com/policies/privacy-policy) for details on their data handling practices."
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
# Feedback Section
|
| 181 |
+
st.markdown("### Feedback")
|
| 182 |
+
st.success(
|
| 183 |
+
"AskVeracity is evolving and we welcome your feedback to help us improve. "
|
| 184 |
+
"Please reach out to us with questions, suggestions, or concerns."
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
# Initialize session state variables
|
| 188 |
+
if 'processing' not in st.session_state:
|
| 189 |
+
st.session_state.processing = False
|
| 190 |
+
if 'claim_to_process' not in st.session_state:
|
| 191 |
+
st.session_state.claim_to_process = ""
|
| 192 |
+
if 'has_result' not in st.session_state:
|
| 193 |
+
st.session_state.has_result = False
|
| 194 |
+
if 'result' not in st.session_state:
|
| 195 |
+
st.session_state.result = None
|
| 196 |
+
if 'total_time' not in st.session_state:
|
| 197 |
+
st.session_state.total_time = 0
|
| 198 |
+
if 'fresh_state' not in st.session_state:
|
| 199 |
+
st.session_state.fresh_state = True
|
| 200 |
+
|
| 201 |
+
# Main interface
|
| 202 |
+
st.markdown("### Enter a claim to verify")
|
| 203 |
+
|
| 204 |
+
# Input area
|
| 205 |
+
claim_input = st.text_area("",
|
| 206 |
+
height=100,
|
| 207 |
+
placeholder=(
|
| 208 |
+
"Examples: The Eiffel Tower is located in Rome, Italy. "
|
| 209 |
+
"Meta recently released its Llama 4 large language model. "
|
| 210 |
+
"Justin Trudeau is not the Canadian Prime Minister anymore. "
|
| 211 |
+
"China retaliated with 125% tariffs against U.S. imports. "
|
| 212 |
+
"A recent piece of news."
|
| 213 |
+
),
|
| 214 |
+
key="claim_input_area",
|
| 215 |
+
label_visibility="collapsed",
|
| 216 |
+
max_chars=None)
|
| 217 |
+
|
| 218 |
+
# Information about result variability
|
| 219 |
+
st.caption("""
|
| 220 |
+
💡 **Note:** Results may vary slightly each time, even for the same claim. This is by design, allowing our system to:
|
| 221 |
+
- Incorporate the most recent evidence available
|
| 222 |
+
- Benefit from the AI's ability to consider multiple perspectives
|
| 223 |
+
- Adapt to evolving information landscapes
|
| 224 |
+
""")
|
| 225 |
+
|
| 226 |
+
st.warning("⏱️ **Note:** Processing times may vary from 10 seconds to 2 minutes depending on query complexity, available evidence, and current API response times.")
|
| 227 |
+
|
| 228 |
+
# Button for verifying claim
|
| 229 |
+
verify_button = st.button(
|
| 230 |
+
"Verify Claim",
|
| 231 |
+
type="primary",
|
| 232 |
+
disabled=st.session_state.processing,
|
| 233 |
+
key="verify_btn"
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
# Create a clean interface
|
| 237 |
+
if st.session_state.fresh_state:
|
| 238 |
+
# Show a clean interface for the first query or when we need to reset
|
| 239 |
+
analysis_placeholder = st.empty()
|
| 240 |
+
|
| 241 |
+
# When button is clicked and not already processing
|
| 242 |
+
if verify_button and not st.session_state.processing:
|
| 243 |
+
if not claim_input:
|
| 244 |
+
st.error("Please enter a claim to verify.")
|
| 245 |
+
else:
|
| 246 |
+
# Store the claim and set processing state
|
| 247 |
+
st.session_state.claim_to_process = claim_input
|
| 248 |
+
st.session_state.processing = True
|
| 249 |
+
st.session_state.fresh_state = False
|
| 250 |
+
# Force a rerun to refresh UI
|
| 251 |
+
st.rerun()
|
| 252 |
+
|
| 253 |
+
else:
|
| 254 |
+
# This is either during processing or showing results
|
| 255 |
+
|
| 256 |
+
# Create a container for processing and results
|
| 257 |
+
analysis_container = st.container()
|
| 258 |
+
|
| 259 |
+
with analysis_container:
|
| 260 |
+
# If we're processing, show the processing UI
|
| 261 |
+
if st.session_state.processing:
|
| 262 |
+
st.subheader("🔄 Processing...")
|
| 263 |
+
status = st.empty()
|
| 264 |
+
status.text("Verifying claim... (this may take a while)")
|
| 265 |
+
progress_bar = st.progress(0)
|
| 266 |
+
|
| 267 |
+
# Initialize models and agent if needed
|
| 268 |
+
if not hasattr(st.session_state, 'agent_initialized'):
|
| 269 |
+
with st.spinner("Initializing system..."):
|
| 270 |
+
st.session_state.agent = get_agent()
|
| 271 |
+
st.session_state.agent_initialized = True
|
| 272 |
+
|
| 273 |
+
try:
|
| 274 |
+
# Use the stored claim for processing
|
| 275 |
+
claim_to_process = st.session_state.claim_to_process
|
| 276 |
+
|
| 277 |
+
# Process the claim with the agent
|
| 278 |
+
start_time = time.time()
|
| 279 |
+
result = agent.process_claim(claim_to_process, st.session_state.agent)
|
| 280 |
+
total_time = time.time() - start_time
|
| 281 |
+
|
| 282 |
+
# Update progress as claim processing completes
|
| 283 |
+
progress_bar.progress(100)
|
| 284 |
+
|
| 285 |
+
# Check for None result
|
| 286 |
+
if result is None:
|
| 287 |
+
st.error("Failed to process the claim. Please try again.")
|
| 288 |
+
st.session_state.processing = False
|
| 289 |
+
st.session_state.fresh_state = True
|
| 290 |
+
else:
|
| 291 |
+
# If result exists but key values are missing, provide default values
|
| 292 |
+
if "classification" not in result or result["classification"] is None:
|
| 293 |
+
result["classification"] = "Uncertain"
|
| 294 |
+
|
| 295 |
+
if "confidence" not in result or result["confidence"] is None:
|
| 296 |
+
result["confidence"] = 0.6 # Default to 0.6 instead of 0.0
|
| 297 |
+
|
| 298 |
+
if "explanation" not in result or result["explanation"] is None:
|
| 299 |
+
result["explanation"] = "Insufficient evidence was found to determine the truthfulness of this claim."
|
| 300 |
+
|
| 301 |
+
# Update result with timing information
|
| 302 |
+
if "processing_times" not in result:
|
| 303 |
+
result["processing_times"] = {"total": total_time}
|
| 304 |
+
|
| 305 |
+
# Store the result and timing information
|
| 306 |
+
st.session_state.result = result
|
| 307 |
+
st.session_state.total_time = total_time
|
| 308 |
+
st.session_state.has_result = True
|
| 309 |
+
st.session_state.processing = False
|
| 310 |
+
|
| 311 |
+
# Clear processing indicators before showing results
|
| 312 |
+
status.empty()
|
| 313 |
+
progress_bar.empty()
|
| 314 |
+
|
| 315 |
+
# Force rerun to display results
|
| 316 |
+
st.rerun()
|
| 317 |
+
|
| 318 |
+
except Exception as e:
|
| 319 |
+
# Handle any exceptions and reset processing state
|
| 320 |
+
logger.error(f"Error during claim processing: {str(e)}")
|
| 321 |
+
st.error(f"An error occurred: {str(e)}")
|
| 322 |
+
st.session_state.processing = False
|
| 323 |
+
st.session_state.fresh_state = True
|
| 324 |
+
# Force rerun to re-enable button
|
| 325 |
+
st.rerun()
|
| 326 |
+
|
| 327 |
+
# Display results if available
|
| 328 |
+
elif st.session_state.has_result and st.session_state.result:
|
| 329 |
+
result = st.session_state.result
|
| 330 |
+
total_time = st.session_state.total_time
|
| 331 |
+
claim_to_process = st.session_state.claim_to_process
|
| 332 |
+
|
| 333 |
+
st.subheader("📊 Verification Results")
|
| 334 |
+
|
| 335 |
+
result_col1, result_col2 = st.columns([2, 1])
|
| 336 |
+
|
| 337 |
+
with result_col1:
|
| 338 |
+
# Display both original and processed claim if they differ
|
| 339 |
+
if "claim" in result and result["claim"] and result["claim"] != claim_to_process:
|
| 340 |
+
st.markdown(f"**Original Claim:** {claim_to_process}")
|
| 341 |
+
st.markdown(f"**Processed Claim:** {result['claim']}")
|
| 342 |
+
else:
|
| 343 |
+
st.markdown(f"**Claim:** {claim_to_process}")
|
| 344 |
+
|
| 345 |
+
# Make verdict colorful based on classification
|
| 346 |
+
truth_label = result.get('classification', 'Uncertain')
|
| 347 |
+
if truth_label and "True" in truth_label:
|
| 348 |
+
verdict_color = "green"
|
| 349 |
+
elif truth_label and "False" in truth_label:
|
| 350 |
+
verdict_color = "red"
|
| 351 |
+
else:
|
| 352 |
+
verdict_color = "gray"
|
| 353 |
+
|
| 354 |
+
st.markdown(f"**Verdict:** <span style='color:{verdict_color};font-size:1.2em'>{truth_label}</span>", unsafe_allow_html=True)
|
| 355 |
+
|
| 356 |
+
# Ensure confidence value is used
|
| 357 |
+
if "confidence" in result and result["confidence"] is not None:
|
| 358 |
+
confidence_value = result["confidence"]
|
| 359 |
+
# Make sure confidence is a numeric value between 0 and 1
|
| 360 |
+
try:
|
| 361 |
+
confidence_value = float(confidence_value)
|
| 362 |
+
if confidence_value < 0:
|
| 363 |
+
confidence_value = 0.0
|
| 364 |
+
elif confidence_value > 1:
|
| 365 |
+
confidence_value = 1.0
|
| 366 |
+
except (ValueError, TypeError):
|
| 367 |
+
confidence_value = 0.6 # Fallback to reasonable default
|
| 368 |
+
else:
|
| 369 |
+
confidence_value = 0.6 # Default confidence
|
| 370 |
+
|
| 371 |
+
# Display the confidence
|
| 372 |
+
st.markdown(f"**Confidence:** {confidence_value:.2%}")
|
| 373 |
+
st.markdown(f"**Explanation:** {result.get('explanation', 'No explanation available.')}")
|
| 374 |
+
|
| 375 |
+
# Add disclaimer about cross-verification
|
| 376 |
+
st.info("⚠️ **Note:** Please cross-verify important information with additional reliable sources.")
|
| 377 |
+
|
| 378 |
+
with result_col2:
|
| 379 |
+
st.markdown("**Processing Time**")
|
| 380 |
+
times = result.get("processing_times", {"total": total_time})
|
| 381 |
+
st.markdown(f"- **Total:** {times.get('total', total_time):.2f}s")
|
| 382 |
+
|
| 383 |
+
# Show agent thoughts
|
| 384 |
+
if "thoughts" in result and result["thoughts"]:
|
| 385 |
+
st.markdown("**AI Reasoning Process**")
|
| 386 |
+
thoughts = result.get("thoughts", [])
|
| 387 |
+
for i, thought in enumerate(thoughts[:5]): # Show top 5 thoughts
|
| 388 |
+
st.markdown(f"{i+1}. {thought}")
|
| 389 |
+
if len(thoughts) > 5:
|
| 390 |
+
with st.expander("Show all reasoning steps"):
|
| 391 |
+
for i, thought in enumerate(thoughts):
|
| 392 |
+
st.markdown(f"{i+1}. {thought}")
|
| 393 |
+
|
| 394 |
+
# Display evidence
|
| 395 |
+
st.subheader("📝 Evidence")
|
| 396 |
+
evidence_count = result.get("evidence_count", 0)
|
| 397 |
+
evidence = result.get("evidence", [])
|
| 398 |
+
|
| 399 |
+
# Ensure evidence is a list
|
| 400 |
+
if not isinstance(evidence, list):
|
| 401 |
+
if isinstance(evidence, str):
|
| 402 |
+
# Try to parse string as a list
|
| 403 |
+
try:
|
| 404 |
+
import ast
|
| 405 |
+
parsed_evidence = ast.literal_eval(evidence)
|
| 406 |
+
if isinstance(parsed_evidence, list):
|
| 407 |
+
evidence = parsed_evidence
|
| 408 |
+
else:
|
| 409 |
+
evidence = [evidence]
|
| 410 |
+
except:
|
| 411 |
+
evidence = [evidence]
|
| 412 |
+
else:
|
| 413 |
+
evidence = [str(evidence)] if evidence else []
|
| 414 |
+
|
| 415 |
+
# Update evidence count based on actual evidence list
|
| 416 |
+
evidence_count = len(evidence)
|
| 417 |
+
|
| 418 |
+
# Check for empty evidence
|
| 419 |
+
if evidence_count == 0 or not any(ev for ev in evidence if ev):
|
| 420 |
+
st.warning("No relevant evidence was found for this claim. The verdict may not be reliable.")
|
| 421 |
+
else:
|
| 422 |
+
st.markdown(f"Retrieved {evidence_count} pieces of evidence")
|
| 423 |
+
|
| 424 |
+
# Get classification results
|
| 425 |
+
classification_results = result.get("classification_results", [])
|
| 426 |
+
|
| 427 |
+
# Only show evidence tabs if we have evidence
|
| 428 |
+
if evidence and any(ev for ev in evidence if ev):
|
| 429 |
+
# Create tabs for different evidence categories
|
| 430 |
+
evidence_tabs = st.tabs(["All Evidence", "Top Evidence", "Evidence Details"])
|
| 431 |
+
|
| 432 |
+
with evidence_tabs[0]:
|
| 433 |
+
for i, ev in enumerate(evidence):
|
| 434 |
+
if ev and isinstance(ev, str) and ev.strip(): # Only show non-empty evidence
|
| 435 |
+
with st.expander(f"Evidence {i+1}", expanded=i==0):
|
| 436 |
+
st.text(ev)
|
| 437 |
+
|
| 438 |
+
with evidence_tabs[1]:
|
| 439 |
+
if classification_results:
|
| 440 |
+
# Check if classification_results items have the expected format
|
| 441 |
+
valid_results = []
|
| 442 |
+
for res in classification_results:
|
| 443 |
+
if isinstance(res, dict) and "confidence" in res and "evidence" in res and "label" in res:
|
| 444 |
+
if res.get("evidence"): # Only include results with actual evidence
|
| 445 |
+
valid_results.append(res)
|
| 446 |
+
|
| 447 |
+
if valid_results:
|
| 448 |
+
sorted_results = sorted(valid_results, key=lambda x: x.get("confidence", 0), reverse=True)
|
| 449 |
+
top_results = sorted_results[:min(3, len(sorted_results))]
|
| 450 |
+
|
| 451 |
+
for i, res in enumerate(top_results):
|
| 452 |
+
with st.expander(f"Top Evidence {i+1} (Confidence: {res.get('confidence', 0):.2%})", expanded=i == 0):
|
| 453 |
+
st.text(res.get("evidence", "No evidence text available"))
|
| 454 |
+
st.markdown(f"**Classification:** {res.get('label', 'unknown')}")
|
| 455 |
+
else:
|
| 456 |
+
# If no valid results, just show the evidence
|
| 457 |
+
shown = False
|
| 458 |
+
for i, ev in enumerate(evidence[:3]):
|
| 459 |
+
if ev and isinstance(ev, str) and ev.strip():
|
| 460 |
+
with st.expander(f"Evidence {i+1}", expanded=i==0):
|
| 461 |
+
st.text(ev)
|
| 462 |
+
shown = True
|
| 463 |
+
if not shown:
|
| 464 |
+
st.info("No detailed classification results available.")
|
| 465 |
+
else:
|
| 466 |
+
# Just show regular evidence if no classification details
|
| 467 |
+
shown = False
|
| 468 |
+
for i, ev in enumerate(evidence[:3]):
|
| 469 |
+
if ev and isinstance(ev, str) and ev.strip():
|
| 470 |
+
with st.expander(f"Evidence {i+1}", expanded=i==0):
|
| 471 |
+
st.text(ev)
|
| 472 |
+
shown = True
|
| 473 |
+
if not shown:
|
| 474 |
+
st.info("No detailed classification results available.")
|
| 475 |
+
|
| 476 |
+
with evidence_tabs[2]:
|
| 477 |
+
evidence_sources = {}
|
| 478 |
+
for ev in evidence:
|
| 479 |
+
if not ev or not isinstance(ev, str):
|
| 480 |
+
continue
|
| 481 |
+
|
| 482 |
+
source = "Unknown"
|
| 483 |
+
# Extract source info from evidence text
|
| 484 |
+
if "URL:" in ev:
|
| 485 |
+
import re
|
| 486 |
+
url_match = re.search(r'URL: https?://(?:www\.)?([^/]+)', ev)
|
| 487 |
+
if url_match:
|
| 488 |
+
source = url_match.group(1)
|
| 489 |
+
|
| 490 |
+
if source in evidence_sources:
|
| 491 |
+
evidence_sources[source] += 1
|
| 492 |
+
else:
|
| 493 |
+
evidence_sources[source] = 1
|
| 494 |
+
|
| 495 |
+
# Display evidence source distribution
|
| 496 |
+
if evidence_sources:
|
| 497 |
+
st.markdown("**Evidence Source Distribution**")
|
| 498 |
+
for source, count in evidence_sources.items():
|
| 499 |
+
st.markdown(f"- {source}: {count} item(s)")
|
| 500 |
+
else:
|
| 501 |
+
st.info("No source information available in the evidence.")
|
| 502 |
+
else:
|
| 503 |
+
st.warning("No evidence was retrieved for this claim.")
|
| 504 |
+
|
| 505 |
+
# Button to start a new verification
|
| 506 |
+
if st.button("Verify Another Claim", type="primary", key="new_verify_btn"):
|
| 507 |
+
# Reset to fresh state for a new verification
|
| 508 |
+
st.session_state.fresh_state = True
|
| 509 |
+
st.session_state.has_result = False
|
| 510 |
+
st.session_state.result = None
|
| 511 |
+
st.rerun()
|
| 512 |
+
|
| 513 |
+
# Footer with additional information
|
| 514 |
+
st.markdown("---")
|
| 515 |
+
st.caption("""
|
| 516 |
+
**AskVeracity** is an open-source tool designed to help combat misinformation through transparent evidence gathering and analysis.
|
| 517 |
+
While we strive for accuracy, the system has inherent limitations based on available data sources, API constraints, and the evolving nature of information.
|
| 518 |
+
""")
|
config.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration module for the Fake News Detector application.
|
| 3 |
+
|
| 4 |
+
This module handles loading configuration parameters, API keys,
|
| 5 |
+
and source credibility data needed for the fact checking system.
|
| 6 |
+
It manages environment variables and file-based configurations.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import json
|
| 11 |
+
import logging
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
# Configure logger
|
| 15 |
+
logger = logging.getLogger("misinformation_detector")
|
| 16 |
+
|
| 17 |
+
# Base paths
|
| 18 |
+
ROOT_DIR = Path(__file__).parent.absolute()
|
| 19 |
+
DATA_DIR = ROOT_DIR / "data"
|
| 20 |
+
|
| 21 |
+
# Ensure data directory exists
|
| 22 |
+
DATA_DIR.mkdir(exist_ok=True)
|
| 23 |
+
|
| 24 |
+
# First try to get API keys from Streamlit secrets, then fall back to environment variables
|
| 25 |
+
# try:
|
| 26 |
+
# import streamlit as st
|
| 27 |
+
# OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
|
| 28 |
+
# NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
|
| 29 |
+
# FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
|
| 30 |
+
# except (AttributeError, ImportError):
|
| 31 |
+
# # Fall back to environment variables if Streamlit secrets aren't available
|
| 32 |
+
# OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
|
| 33 |
+
# NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "")
|
| 34 |
+
# FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
import streamlit as st
|
| 39 |
+
OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
|
| 40 |
+
NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", ""))
|
| 41 |
+
FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", ""))
|
| 42 |
+
except (AttributeError, ImportError):
|
| 43 |
+
# For local testing only - REMOVE BEFORE COMMITTING!
|
| 44 |
+
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "sk-proj-iwzefHOGPoeAzC0mNUsIT3BlbkFJlGzELYyK52szvpv3MKMY")
|
| 45 |
+
NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "43ebe77036904dc1a150893a40d10bb3")
|
| 46 |
+
FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "AIzaSyD9VqVCk_9gsEfsvstES5HW-195F5WgUuA")
|
| 47 |
+
|
| 48 |
+
# Log secrets status (but not the values)
|
| 49 |
+
if OPENAI_API_KEY:
|
| 50 |
+
logger.info("OPENAI_API_KEY is set")
|
| 51 |
+
else:
|
| 52 |
+
logger.warning("OPENAI_API_KEY not set. The application will not function properly.")
|
| 53 |
+
|
| 54 |
+
if NEWS_API_KEY:
|
| 55 |
+
logger.info("NEWS_API_KEY is set")
|
| 56 |
+
else:
|
| 57 |
+
logger.warning("NEWS_API_KEY not set. News evidence retrieval will be limited.")
|
| 58 |
+
|
| 59 |
+
if FACTCHECK_API_KEY:
|
| 60 |
+
logger.info("FACTCHECK_API_KEY is set")
|
| 61 |
+
else:
|
| 62 |
+
logger.warning("FACTCHECK_API_KEY not set. Fact-checking evidence will be limited.")
|
| 63 |
+
|
| 64 |
+
# Set API key in environment to ensure it's available to all components
|
| 65 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
| 66 |
+
|
| 67 |
+
# Source credibility file path
|
| 68 |
+
source_cred_file = DATA_DIR / "source_credibility.json"
|
| 69 |
+
|
| 70 |
+
def load_source_credibility():
|
| 71 |
+
"""
|
| 72 |
+
Load source credibility data from JSON file
|
| 73 |
+
|
| 74 |
+
Returns:
|
| 75 |
+
dict: Mapping of domain names to credibility scores (0-1)
|
| 76 |
+
Empty dict if file is not found or has errors
|
| 77 |
+
"""
|
| 78 |
+
try:
|
| 79 |
+
if source_cred_file.exists():
|
| 80 |
+
with open(source_cred_file, 'r') as f:
|
| 81 |
+
return json.load(f)
|
| 82 |
+
else:
|
| 83 |
+
logger.warning(f"Source credibility file not found: {source_cred_file}")
|
| 84 |
+
return {}
|
| 85 |
+
except Exception as e:
|
| 86 |
+
logger.error(f"Error loading source credibility file: {e}")
|
| 87 |
+
return {}
|
| 88 |
+
|
| 89 |
+
# Load source credibility once at module import
|
| 90 |
+
SOURCE_CREDIBILITY = load_source_credibility()
|
| 91 |
+
|
| 92 |
+
# Rate limiting configuration
|
| 93 |
+
RATE_LIMITS = {
|
| 94 |
+
# api_name: {"requests": max_requests, "period": period_in_seconds}
|
| 95 |
+
"newsapi": {"requests": 100, "period": 3600}, # 100 requests per hour
|
| 96 |
+
"factcheck": {"requests": 1000, "period": 86400}, # 1000 requests per day
|
| 97 |
+
"semantic_scholar": {"requests": 10, "period": 300}, # 10 requests per 5 minutes
|
| 98 |
+
"wikidata": {"requests": 60, "period": 60}, # 60 requests per minute
|
| 99 |
+
"wikipedia": {"requests": 200, "period": 60}, # 200 requests per minute
|
| 100 |
+
"rss": {"requests": 300, "period": 3600} # 300 RSS requests per hour
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
# Error backoff settings
|
| 104 |
+
ERROR_BACKOFF = {
|
| 105 |
+
"max_retries": 5,
|
| 106 |
+
"initial_backoff": 1, # seconds
|
| 107 |
+
"backoff_factor": 2, # exponential backoff
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
# RSS feed settings
|
| 111 |
+
RSS_SETTINGS = {
|
| 112 |
+
"max_feeds_per_request": 10, # Maximum number of feeds to try per request
|
| 113 |
+
"max_age_days": 3, # Maximum age of RSS items to consider
|
| 114 |
+
"timeout_seconds": 5, # Timeout for RSS feed requests
|
| 115 |
+
"max_workers": 5 # Number of parallel workers for fetching feeds
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
# Semantic analysis settings
|
| 119 |
+
SEMANTIC_ANALYSIS_CONFIG = {
|
| 120 |
+
"similarity_weight": 0.4, # Weight for semantic similarity
|
| 121 |
+
"entity_overlap_weight": 0.3, # Weight for entity matching
|
| 122 |
+
"base_weight": 0.3, # Base relevance weight
|
| 123 |
+
"temporal_boost": 1.2, # Boost for recent evidence
|
| 124 |
+
"temporal_penalty": 0.7, # Penalty for outdated evidence
|
| 125 |
+
"authority_boosts": {
|
| 126 |
+
"scientific_consensus": 1.8,
|
| 127 |
+
"fact_check": 1.5,
|
| 128 |
+
"high_authority": 1.3
|
| 129 |
+
}
|
| 130 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit==1.32.0
|
| 2 |
+
langchain>=0.1.6
|
| 3 |
+
langchain_openai>=0.0.5
|
| 4 |
+
langchain_core>=0.1.25
|
| 5 |
+
langgraph>=0.0.27
|
| 6 |
+
transformers==4.36.2
|
| 7 |
+
requests==2.31.0
|
| 8 |
+
beautifulsoup4==4.12.2
|
| 9 |
+
langdetect==1.0.9
|
| 10 |
+
spacy==3.7.2
|
| 11 |
+
SPARQLWrapper==2.0.0
|
| 12 |
+
python-dotenv==1.0.0
|
| 13 |
+
pydantic==2.5.3
|
| 14 |
+
feedparser==6.0.10
|
| 15 |
+
scikit-learn>=1.3.0
|
| 16 |
+
numpy>=1.21.0
|
| 17 |
+
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.0/en_core_web_sm-3.7.0-py3-none-any.whl
|