Agentic-Rag-Langgraph

Runtime error

sayed99 commited on Jun 11

Commit

4826e54

1 Parent(s): 9570ac3

project transferred to Langgraph implementation. |

Add .gitignore, enhance app.py with Azure OpenAI integration, update README for setup instructions, and modify requirements.txt for new dependencies

Files changed (7) hide show

.gitignore +149 -0
README.md +36 -1
app.py +79 -24
requirements.txt +7 -1
retriever.py +80 -26
test_tavily.py +182 -0
tools.py +113 -50

.gitignore ADDED Viewed

	@@ -0,0 +1,149 @@

+# Environment variables
+.env
+# Python
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db

README.md CHANGED Viewed

@@ -10,4 +10,39 @@ pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 license: apache-2.0
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# Agentic RAG with LangGraph
+A simple agentic RAG system using LangGraph with Azure OpenAI.
+## Setup
+1. **Install dependencies:**
+   ```bash
+   pip install -r requirements.txt
+   ```
+2. **Create `.env` file in the project root:**
+   ```
+   AZURE_OPENAI_ENDPOINT=your_azure_endpoint
+   AZURE_OPENAI_API_KEY=your_api_key
+   AZURE_OPENAI_DEPLOYMENT_NAME=your_deployment_name
+   AZURE_OPENAI_API_VERSION=2024-02-01
+   ```
+## Run
+```bash
+python app.py
+```
+The agent will automatically query for "Lady Ada Lovelace" and show the response.
+## Tools Available
+- **Guest Info**: Retrieves guest information from the dataset
+- **Weather Info**: Provides dummy weather data
+- **Hub Stats**: Gets Hugging Face model statistics

app.py CHANGED Viewed

@@ -1,33 +1,88 @@
-import gradio as gr
-import random
-from smolagents import GradioUI, CodeAgent, HfApiModel
-# Import our custom tools from their modules
-from tools import DuckDuckGoSearchTool, WeatherInfoTool, HubStatsTool
-from retriever import load_guest_dataset
-# Initialize the Hugging Face model
-model = HfApiModel()
-# Initialize the web search tool
-search_tool = DuckDuckGoSearchTool()
-# Initialize the weather tool
-weather_info_tool = WeatherInfoTool()
-# Initialize the Hub stats tool
-hub_stats_tool = HubStatsTool()
-# Load the guest dataset and initialize the guest info tool
-guest_info_tool = load_guest_dataset()
-# Create Alfred with all the tools
-alfred = CodeAgent(
-    tools=[guest_info_tool, weather_info_tool, hub_stats_tool, search_tool],
-    model=model,
-    add_base_tools=True,  # Add any additional base tools
-    planning_interval=3   # Enable planning every 3 steps
 )
-if __name__ == "__main__":
-    GradioUI(alfred).launch()

+import os
+from typing import TypedDict, Annotated
+from dotenv import load_dotenv
+from langgraph.graph.message import add_messages
+from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
+from langgraph.prebuilt import ToolNode
+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt import tools_condition
+from langchain_openai import AzureChatOpenAI
+from retriever import guest_info_tool
+from tools import weather_info_tool, hub_stats_tool, news_search_tool
+load_dotenv()
+chat = AzureChatOpenAI(
+    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+    deployment_name=os.getenv("DEPLOYMENT_NAME"),
+    openai_api_version=os.getenv("OPENAI_API_VERSION"),
+    temperature=0.75,
+    streaming=True,
+    verbose=True
+)
+tools = [guest_info_tool, weather_info_tool, hub_stats_tool, news_search_tool]
+chat_with_tools = chat.bind_tools(tools)
+class AgentState(TypedDict):
+    messages: Annotated[list[AnyMessage], add_messages]
+def assistant(state: AgentState):
+    return {
+        "messages": [chat_with_tools.invoke(state["messages"])],
+    }
+# The graph
+builder = StateGraph(AgentState)
+# Define nodes: these do the work
+builder.add_node("assistant", assistant)
+builder.add_node("tools", ToolNode(tools))
+# Define edges: these determine how the control flow moves
+builder.add_edge(START, "assistant")
+builder.add_conditional_edges(
+    "assistant",
+    # If the latest message requires a tool, route to tools
+    # Otherwise, provide a direct response
+    tools_condition,
 )
+builder.add_edge("tools", "assistant")
+# Compile with debug mode for verbosity
+alfred = builder.compile(debug=True)
+messages = [HumanMessage(
+    content="One of our guests is from Qwen. What can you tell me about their most recent popular AI model(search about it )?")]
+print("🔍 Starting Agent Execution...")
+print("="*50)
+# Use stream instead of invoke to see step-by-step execution
+final_messages = None
+for step in alfred.stream({"messages": messages}):
+    print(f"📍 Current Step: {list(step.keys())}")
+    for node_name, node_output in step.items():
+        print(f"🔧 Node '{node_name}' output:")
+        if 'messages' in node_output:
+            latest_message = node_output['messages'][-1]
+            # Keep track of final messages
+            final_messages = node_output['messages']
+            print(f"   Type: {type(latest_message).__name__}")
+            if hasattr(latest_message, 'content'):
+                print(f"   Content: {latest_message.content[:200]}...")
+            if hasattr(latest_message, 'tool_calls') and latest_message.tool_calls:
+                print(f"   Tool Calls: {latest_message.tool_calls}")
+        print("-" * 30)
+print("\n"*3)
+print("="*50)
+print("🎩 Alfred's Final Response:")
+if final_messages:
+    print(final_messages[-1].content)

requirements.txt CHANGED Viewed

@@ -2,4 +2,10 @@ datasets
 smolagents
 langchain-community
 rank_bm25
-duckduckgo-search

 smolagents
 langchain-community
 rank_bm25
+duckduckgo-search
+python-dotenv
+langchain
+langchain-openai
+langgraph
+huggingface_hub
+langchain-tavily

retriever.py CHANGED Viewed

@@ -1,36 +1,30 @@
-from smolagents import Tool
 from langchain_community.retrievers import BM25Retriever
 from langchain.docstore.document import Document
 import datasets
-class GuestInfoRetrieverTool(Tool):
-    name = "guest_info_retriever"
-    description = "Retrieves detailed information about gala guests based on their name or relation."
-    inputs = {
-        "query": {
-            "type": "string",
-            "description": "The name or relation of the guest you want information about."
-        }
-    }
-    output_type = "string"
-    def __init__(self, docs):
-        self.is_initialized = False
-        self.retriever = BM25Retriever.from_documents(docs)
-    def forward(self, query: str):
-        results = self.retriever.get_relevant_documents(query)
-        if results:
-            return "\n\n".join([doc.page_content for doc in results[:3]])
-        else:
-            return "No matching guest information found."
 def load_guest_dataset():
     # Load the dataset
-    guest_dataset = datasets.load_dataset("agents-course/unit3-invitees", split="train")
     # Convert dataset entries into Document objects
     docs = [
@@ -46,8 +40,68 @@ def load_guest_dataset():
         for guest in guest_dataset
     ]
-    # Return the tool
-    return GuestInfoRetrieverTool(docs)

+from langchain.tools import Tool
 from langchain_community.retrievers import BM25Retriever
 from langchain.docstore.document import Document
+from langchain_core.messages import HumanMessage
 import datasets
+from langchain_openai import AzureChatOpenAI
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Create LLM instance once
+conversation_llm = AzureChatOpenAI(
+    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
+    deployment_name=os.getenv("DEPLOYMENT_NAME"),
+    openai_api_version=os.getenv("OPENAI_API_VERSION"),
+    temperature=0.75,
+    streaming=False,
+    verbose=False
+)
 def load_guest_dataset():
     # Load the dataset
+    guest_dataset = datasets.load_dataset(
+        "agents-course/unit3-invitees", split="train")
     # Convert dataset entries into Document objects
     docs = [
         for guest in guest_dataset
     ]
+    return docs
+docs = load_guest_dataset()
+bm25_retriever = BM25Retriever.from_documents(docs)
+def generate_conversation_starter(description: str) -> str:
+    """Generate a conversation starter based on guest description"""
+    try:
+        generate_prompt = (
+            f"Generate a very simple and short conversation starter from the description of the person.\n\n"
+            f"For example:\n"
+            f"Description: Rear Admiral Grace Hopper was a trailblazer in computer programming and helped invent the first compiler. "
+            f"She's passionate about teaching and loves telling stories about debugging.\n\n"
+            f"Conversation Starter: Ask her about the time she found a real bug in a computer — she loves that story!\n\n"
+            f"Description: {description}\n\n"
+            f"Conversation Starter:"
+        )
+        response = conversation_llm.invoke(
+            [HumanMessage(content=generate_prompt)])
+        return response.content.strip()
+    except Exception:
+        return "Ask them about their background and interests!"
+def retrieve_info_from_name(query: str) -> str:
+    """Retrieves detailed information about gala guests based on their name or relation."""
+    results = bm25_retriever.invoke(query)
+    if results:
+        guest_info_with_starters = []
+        for i, doc in enumerate(results[:3], 1):
+            guest_info = doc.page_content
+            # Extract description from the content
+            lines = guest_info.split('\n')
+            description = ""
+            for line in lines:
+                if line.startswith("Description:"):
+                    description = line.replace("Description:", "").strip()
+                    break
+            # Add guest info
+            result_text = f"Guest {i}:\n{guest_info}"
+            # Add conversation starter if description exists
+            if description:
+                conversation_starter = generate_conversation_starter(
+                    description)
+                result_text += f"\n💬 Conversation Starter: {conversation_starter}"
+            guest_info_with_starters.append(result_text)
+        return "\n\n" + "="*50 + "\n\n".join(guest_info_with_starters)
+    else:
+        return "No matching guest information found."
+guest_info_tool = Tool(
+    name="guest_info_retriever",
+    func=retrieve_info_from_name,
+    description="Retrieves detailed information about gala guests based on their name or relation, including conversation starters."
+)

test_tavily.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import os
+from dotenv import load_dotenv
+from langchain_tavily import TavilySearch
+# Load environment variables
+load_dotenv()
+def test_tavily_search():
+    """Test the Tavily search functionality independently"""
+    print("🔍 Testing Tavily Search Tool...")
+    print("="*50)
+    # Check if API key is available
+    api_key = os.getenv("TAVILY_API_KEY")
+    if not api_key:
+        print("❌ Error: TAVILY_API_KEY not found in environment variables")
+        print("Please add TAVILY_API_KEY to your .env file")
+        return
+    print(f"✅ API Key found: {api_key[:10]}...{api_key[-4:]}")
+    try:
+        # Initialize Tavily search
+        print("\n🚀 Initializing Tavily search...")
+        tavily_search = TavilySearch(
+            api_key=api_key,
+            max_results=2,
+            topic="general",
+            search_depth="basic",
+            include_answer=True,
+            include_raw_content=False,
+            include_images=False
+        )
+        print("✅ Tavily search initialized successfully")
+        # Test search query
+        test_query = "latest portugal won"
+        print(f"\n🔎 Searching for: '{test_query}'")
+        # Perform the search
+        results = tavily_search.invoke({"query": test_query})
+        print(f"\n📊 Results type: {type(results)}")
+        print(
+            f"📊 Results length: {len(results) if hasattr(results, '__len__') else 'N/A'}")
+        # Display results
+        print("\n📰 Raw Results:")
+        print("-" * 30)
+        print(results)
+        print("-" * 30)
+        # Process and format results
+        if isinstance(results, list) and results:
+            print(f"\n✅ Found {len(results)} results")
+            formatted_news = f"📰 Latest News about '{test_query}':\n\n"
+            for i, result in enumerate(results, 1):
+                print(f"\n🔍 Processing result {i}:")
+                print(f"   Type: {type(result)}")
+                if isinstance(result, dict):
+                    print(f"   Keys: {list(result.keys())}")
+                    title = result.get('title', 'No title')
+                    content = result.get('content', 'No content available')
+                    url = result.get('url', 'No URL')
+                    formatted_news += f"{i}. **{title}**\n"
+                    formatted_news += f"   Summary: {content[:200]}...\n"
+                    formatted_news += f"   Source: {url}\n\n"
+                else:
+                    # Handle case where result is a string
+                    formatted_news += f"{i}. {str(result)[:300]}...\n\n"
+            print(f"\n📝 Formatted Output:")
+            print("="*50)
+            print(formatted_news)
+            print("="*50)
+        else:
+            print(f"❌ No results found or unexpected result format")
+            print(f"Results: {results}")
+    except Exception as e:
+        print(f"❌ Error during search: {e}")
+        print(f"Error type: {type(e)}")
+        import traceback
+        print(f"Full traceback:\n{traceback.format_exc()}")
+def test_tavily_raw():
+    """Test the raw Tavily search functionality"""
+    print("🔍 Testing Raw Tavily Search...")
+    print("="*50)
+    # Check if API key is available
+    api_key = os.getenv("TAVILY_API_KEY")
+    if not api_key:
+        print("❌ Error: TAVILY_API_KEY not found in environment variables")
+        return False
+    try:
+        # Initialize Tavily search
+        tavily_search = TavilySearch(
+            api_key=api_key,
+            max_results=2,
+            topic="general",
+            search_depth="basic",
+            include_answer=True,
+            include_raw_content=False,
+            include_images=False
+        )
+        # Test search query
+        test_query = "latest portugal won"
+        print(f"🔎 Searching for: '{test_query}'")
+        # Perform the search
+        results = tavily_search.invoke({"query": test_query})
+        print(f"✅ Raw search successful!")
+        print(f"📊 Results type: {type(results)}")
+        if isinstance(results, dict) and 'results' in results:
+            print(f"📊 Number of results: {len(results['results'])}")
+            return True
+        return False
+    except Exception as e:
+        print(f"❌ Raw search failed: {e}")
+        return False
+def test_tools_function():
+    """Test our get_latest_news function from tools.py"""
+    print("\n🔧 Testing get_latest_news Function...")
+    print("="*50)
+    try:
+        # Import our function
+        from tools import get_latest_news
+        # Test the function
+        test_query = "portugal won"
+        print(f"🔎 Testing with query: '{test_query}'")
+        result = get_latest_news(test_query)
+        print("✅ Function executed successfully!")
+        print("\n📰 Function Output:")
+        print("-" * 50)
+        print(result)
+        print("-" * 50)
+        return True
+    except Exception as e:
+        print(f"❌ Function test failed: {e}")
+        import traceback
+        print(f"Full traceback:\n{traceback.format_exc()}")
+        return False
+if __name__ == "__main__":
+    print("🧪 Running Tavily Tests...\n")
+    # Test 1: Raw Tavily
+    raw_success = test_tavily_raw()
+    # Test 2: Our function
+    if raw_success:
+        function_success = test_tools_function()
+        if function_success:
+            print("\n🎉 All tests passed! The news search tool is working correctly.")
+        else:
+            print("\n⚠️ Raw search works but our function has issues.")
+    else:
+        print("\n❌ Raw search failed - check your API key and connection.")

tools.py CHANGED Viewed

@@ -1,56 +1,119 @@
-from smolagents import DuckDuckGoSearchTool
-from smolagents import Tool
 import random
 from huggingface_hub import list_models
 # Initialize the DuckDuckGo search tool
-#search_tool = DuckDuckGoSearchTool()
-class WeatherInfoTool(Tool):
-    name = "weather_info"
-    description = "Fetches dummy weather information for a given location."
-    inputs = {
-        "location": {
-            "type": "string",
-            "description": "The location to get weather information for."
-        }
-    }
-    output_type = "string"
-    def forward(self, location: str):
-        # Dummy weather data
-        weather_conditions = [
-            {"condition": "Rainy", "temp_c": 15},
-            {"condition": "Clear", "temp_c": 25},
-            {"condition": "Windy", "temp_c": 20}
-        ]
-        # Randomly select a weather condition
-        data = random.choice(weather_conditions)
-        return f"Weather in {location}: {data['condition']}, {data['temp_c']}°C"
-class HubStatsTool(Tool):
-    name = "hub_stats"
-    description = "Fetches the most downloaded model from a specific author on the Hugging Face Hub."
-    inputs = {
-        "author": {
-            "type": "string",
-            "description": "The username of the model author/organization to find models from."
-        }
-    }
-    output_type = "string"
-    def forward(self, author: str):
-        try:
-            # List models from the specified author, sorted by downloads
-            models = list(list_models(author=author, sort="downloads", direction=-1, limit=1))
-            if models:
-                model = models[0]
-                return f"The most downloaded model by {author} is {model.id} with {model.downloads:,} downloads."
-            else:
-                return f"No models found for author {author}."
-        except Exception as e:
-            return f"Error fetching models for {author}: {str(e)}"

+from langchain.tools import Tool
 import random
+import os
 from huggingface_hub import list_models
+# from langchain_tavily import TavilySearch
+from langchain_community.tools import DuckDuckGoSearchRun
+from dotenv import load_dotenv
+load_dotenv()
 # Initialize the DuckDuckGo search tool
+search_tool = DuckDuckGoSearchRun()
+def get_weather_info(location: str) -> str:
+    """Fetches dummy weather information for a given location."""
+    # Dummy weather data
+    weather_conditions = [
+        {"condition": "Rainy", "temp_c": 15},
+        {"condition": "Clear", "temp_c": 25},
+        {"condition": "Windy", "temp_c": 20},
+    ]
+    # Randomly select a weather condition
+    data = random.choice(weather_conditions)
+    return f"Weather in {location}: {data['condition']}, {data['temp_c']}°C"
+def get_hub_stats(author: str) -> str:
+    """Fetches the most downloaded model from a specific author on the Hugging Face Hub."""
+    try:
+        # List models from the specified author, sorted by downloads
+        models = list(list_models(
+            author=author, sort="downloads", direction=-1, limit=1))
+        if models:
+            model = models[0]
+            return f"The most downloaded model by {author} is {model.id} with {model.downloads:,} downloads."
+        else:
+            return f"No models found for author {author}."
+    except Exception as e:
+        return f"Error fetching models for {author}: {str(e)}"
+# def get_latest_news(topic: str) -> str:
+#     """Fetches the latest news about a specific topic using Tavily search."""
+#     try:
+#         # Initialize Tavily search with API key from environment
+#         tavily_search = TavilySearch(
+#             api_key=os.getenv("TAVILY_API_KEY"),
+#             max_results=2,
+#             topic="general",
+#             search_depth="basic",
+#             include_answer=True,
+#             include_raw_content=False,
+#             include_images=False
+#         )
+#         # Search for news about the topic
+#         response = tavily_search.invoke(
+#             {"query": f"latest news about {topic}"})
+#         # Handle the correct Tavily response format
+#         if isinstance(response, dict) and 'results' in response:
+#             results = response['results']
+#             answer = response.get('answer', '')
+#             if results:
+#                 # Format the results nicely
+#                 formatted_news = f"📰 Latest News about '{topic}':\n\n"
+#                 # Add AI-generated answer if available
+#                 if answer:
+#                     formatted_news += f"🤖 **Quick Summary**: {answer}\n\n"
+#                 # Add detailed results
+#                 formatted_news += "📋 **Detailed Results**:\n\n"
+#                 for i, result in enumerate(results, 1):
+#                     title = result.get('title', 'No title')
+#                     content = result.get('content', 'No content available')
+#                     url = result.get('url', 'No URL')
+#                     score = result.get('score', 0)
+#                     formatted_news += f"{i}. **{title}**\n"
+#                     formatted_news += f"   📄 Summary: {content}\n"
+#                     formatted_news += f"   🔗 Source: {url}\n"
+#                     formatted_news += f"   ⭐ Relevance: {score:.2f}\n\n"
+#                 return formatted_news
+#             else:
+#                 return f"No recent news found about '{topic}'. Please try a different search term."
+#         else:
+#             return f"Unexpected response format from search. Raw response: {str(response)[:500]}..."
+#     except Exception as e:
+#         return f"Error fetching news about '{topic}': {str(e)}. Please check your Tavily API key and try again."
+weather_info_tool = Tool(
+    name="weather_info",
+    func=get_weather_info,
+    description="Fetches dummy weather information for a given location."
+)
+hub_stats_tool = Tool(
+    name="hub_stats",
+    func=get_hub_stats,
+    description="Fetches the most downloaded model from a specific author on the Hugging Face Hub."
+)
+# news_search_tool = Tool(
+#     name="news_search",
+#     func=get_latest_news,
+#     description="Fetches the latest news about a specific topic using Tavily search. Provide a topic or keyword to search for recent news articles."
+# )
+news_search_tool = search_tool