Spaces:

mrowaisabdullah
/

ai-humanoid-robotics

Paused

App Files Files Community

GitHub Actions commited on 14 days ago

Commit

6e614bb

1 Parent(s): 2d58a98

Deploy backend from GitHub Actions

Browse files

🚀 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

Files changed (4) hide show

main.py +1 -1
rag/chat.py +89 -30
rag/qdrant_client.py +1 -1
rag/retrieval.py +8 -3

main.py CHANGED Viewed

@@ -329,7 +329,7 @@ async def health_check(request: Request):
     return health_status
-@app.post("/chat")
 @limiter.limit(f"{settings.rate_limit_requests}/{settings.rate_limit_window}minute")
 async def chat_endpoint(
     request: Request,

     return health_status
+@app.post("/api/chat")
 @limiter.limit(f"{settings.rate_limit_requests}/{settings.rate_limit_window}minute")
 async def chat_endpoint(
     request: Request,

rag/chat.py CHANGED Viewed

@@ -58,7 +58,7 @@ class ChatHandler:
         self.retrieval_engine = RetrievalEngine(
             qdrant_manager=qdrant_manager,
             embedder=self.embedder,
-            score_threshold=0.7,  # Updated to 0.7 for better precision
             enable_mmr=True,
             mmr_lambda=0.5
         )
@@ -77,7 +77,7 @@ class ChatHandler:
         Returns:
             Adaptive threshold value
         """
-        base_threshold = 0.7
         # Lower threshold for very specific queries (longer)
         if query_length > 100:
@@ -196,15 +196,39 @@ class ChatHandler:
                     )
                     retrieved_docs = retrieved_docs[:k]
-                # If still no results, handle appropriately
                 if not retrieved_docs:
                     logger.info(f"No content found for query: {query[:100]}...")
-                    from api.exceptions import ContentNotFoundError
-                    raise ContentNotFoundError(
-                        query=query,
-                        threshold=self.retrieval_engine.score_threshold
                     )
             # Log monitoring metrics
             logger.info(
                 "Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
@@ -351,15 +375,22 @@ class ChatHandler:
                 response_time = (datetime.utcnow() - start_time).total_seconds()
-                return ChatResponse(
-                    answer=answer,
-                    sources=[],
-                    session_id=session_id,
-                    query=query,
-                    response_time=response_time,
-                    tokens_used=self.count_tokens(answer),
-                    model=self.model
-                )
             # Get or create conversation context
             context = self._get_or_create_context(session_id)
@@ -400,15 +431,39 @@ class ChatHandler:
                     )
                     retrieved_docs = retrieved_docs[:k]
-                # If still no results, handle appropriately
                 if not retrieved_docs:
                     logger.info(f"No content found for query: {query[:100]}...")
-                    from api.exceptions import ContentNotFoundError
-                    raise ContentNotFoundError(
-                        query=query,
-                        threshold=self.retrieval_engine.score_threshold
                     )
             # Log monitoring metrics
             logger.info(
                 "Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
@@ -485,15 +540,19 @@ class ChatHandler:
             # Calculate response time
             response_time = (datetime.utcnow() - start_time).total_seconds()
-            return ChatResponse(
-                answer=answer,
-                sources=citations,
-                session_id=session_id,
-                query=query,
-                response_time=response_time,
-                tokens_used=tokens_used,
-                model=self.model
-            )
         except Exception as e:
             logger.error(f"Chat failed: {str(e)}", exc_info=True)

         self.retrieval_engine = RetrievalEngine(
             qdrant_manager=qdrant_manager,
             embedder=self.embedder,
+            score_threshold=0.5,  # Lowered to 0.5 to better match document scores
             enable_mmr=True,
             mmr_lambda=0.5
         )
         Returns:
             Adaptive threshold value
         """
+        base_threshold = 0.5
         # Lower threshold for very specific queries (longer)
         if query_length > 100:
                     )
                     retrieved_docs = retrieved_docs[:k]
+                # If still no results, handle gracefully
                 if not retrieved_docs:
                     logger.info(f"No content found for query: {query[:100]}...")
+                    # Provide a helpful response when no content is found
+                    no_content_response = (
+                        "I couldn't find specific information about that topic in the book. "
+                        "This book covers Physical AI & Humanoid Robotics. Try asking about:\n"
+                        "• Introduction to physical AI\n"
+                        "• Types of humanoid robots\n"
+                        "• AI control systems\n"
+                        "• Robot locomotion\n"
+                        "• Specific chapters or sections"
                     )
+                    # Stream the helpful response
+                    words = no_content_response.split()
+                    for word in words:
+                        yield self._format_sse_message({
+                            "type": "chunk",
+                            "content": word + " "
+                        })
+                        await asyncio.sleep(0.05)
+                    yield self._format_sse_message({
+                        "type": "done",
+                        "session_id": session_id,
+                        "response_time": 0.1,
+                        "tokens_used": self.count_tokens(no_content_response),
+                        "no_results": True
+                    })
+                    return
             # Log monitoring metrics
             logger.info(
                 "Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
                 response_time = (datetime.utcnow() - start_time).total_seconds()
+                # Return greeting as JSON response
+                greeting_response = {
+                    "type": "final",
+                    "answer": answer,
+                    "sources": [],
+                    "session_id": session_id,
+                    "query": query,
+                    "response_time": response_time,
+                    "tokens_used": self.count_tokens(answer),
+                    "context_used": False,
+                    "model": self.model,
+                    "has_context": False
+                }
+                yield f"data: {json.dumps(greeting_response)}\n\n"
+                yield f"data: [DONE]\n\n"
+                return
             # Get or create conversation context
             context = self._get_or_create_context(session_id)
                     )
                     retrieved_docs = retrieved_docs[:k]
+                # If still no results, handle gracefully
                 if not retrieved_docs:
                     logger.info(f"No content found for query: {query[:100]}...")
+                    # Provide a helpful response when no content is found
+                    no_content_response = (
+                        "I couldn't find specific information about that topic in the book. "
+                        "This book covers Physical AI & Humanoid Robotics. Try asking about:\n"
+                        "• Introduction to physical AI\n"
+                        "• Types of humanoid robots\n"
+                        "• AI control systems\n"
+                        "• Robot locomotion\n"
+                        "• Specific chapters or sections"
                     )
+                    # Stream the helpful response
+                    words = no_content_response.split()
+                    for word in words:
+                        yield self._format_sse_message({
+                            "type": "chunk",
+                            "content": word + " "
+                        })
+                        await asyncio.sleep(0.05)
+                    yield self._format_sse_message({
+                        "type": "done",
+                        "session_id": session_id,
+                        "response_time": 0.1,
+                        "tokens_used": self.count_tokens(no_content_response),
+                        "no_results": True
+                    })
+                    return
             # Log monitoring metrics
             logger.info(
                 "Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
             # Calculate response time
             response_time = (datetime.utcnow() - start_time).total_seconds()
+            # Final response
+            final_response = {
+                "type": "final",
+                "answer": answer,
+                "sources": [citation.to_dict() if hasattr(citation, 'to_dict') else citation for citation in citations],
+                "session_id": session_id,
+                "query": query,
+                "response_time": response_time,
+                "tokens_used": tokens_used,
+                "model": self.model
+            }
+            yield f"data: {json.dumps(final_response)}\n\n"
+            yield f"data: [DONE]\n\n"
         except Exception as e:
             logger.error(f"Chat failed: {str(e)}", exc_info=True)

rag/qdrant_client.py CHANGED Viewed

@@ -178,7 +178,7 @@ class QdrantManager:
         self,
         query_embedding: List[float],
         limit: int = 5,
-        score_threshold: float = 0.7,
         filters: Optional[Dict[str, Any]] = None
     ) -> List[Dict[str, Any]]:
         """Search for similar chunks using vector similarity."""

         self,
         query_embedding: List[float],
         limit: int = 5,
+        score_threshold: float = 0.5,
         filters: Optional[Dict[str, Any]] = None
     ) -> List[Dict[str, Any]]:
         """Search for similar chunks using vector similarity."""

rag/retrieval.py CHANGED Viewed

@@ -36,7 +36,7 @@ class RetrievalEngine:
         qdrant_manager: QdrantManager,
         embedder: EmbeddingGenerator,
         default_k: int = 5,
-        score_threshold: float = 0.7,  # Updated to 0.7 for better precision
         max_context_tokens: int = 4000,
         enable_mmr: bool = True,
         mmr_lambda: float = 0.5
@@ -177,6 +177,9 @@ class RetrievalEngine:
             # Apply similarity threshold filtering
             logger.info(f"Applying threshold filter: {len(chunks)} chunks before filtering, threshold={threshold}")
             initial_count = len(chunks)
             chunks = [
                 chunk for chunk in chunks
@@ -184,9 +187,11 @@ class RetrievalEngine:
             ]
             logger.info(f"After threshold filter: {len(chunks)} chunks remaining (filtered out {initial_count - len(chunks)} chunks)")
-            # Apply MMR if enabled and we have enough results
-            if use_mmr and len(chunks) > 1:
                 chunks = await self._apply_mmr(query_embedding, chunks, max_results, lambda_param)
             # Sort by score and limit
             chunks.sort(key=lambda x: x.score, reverse=True)

         qdrant_manager: QdrantManager,
         embedder: EmbeddingGenerator,
         default_k: int = 5,
+        score_threshold: float = 0.5,  # Lowered to 0.5 to better match document scores
         max_context_tokens: int = 4000,
         enable_mmr: bool = True,
         mmr_lambda: float = 0.5
             # Apply similarity threshold filtering
             logger.info(f"Applying threshold filter: {len(chunks)} chunks before filtering, threshold={threshold}")
+            # Debug: Log scores of first few chunks
+            for i, chunk in enumerate(chunks[:5]):
+                logger.info(f"Chunk {i} score: {chunk.score}, content preview: {chunk.content[:100]}...")
             initial_count = len(chunks)
             chunks = [
                 chunk for chunk in chunks
             ]
             logger.info(f"After threshold filter: {len(chunks)} chunks remaining (filtered out {initial_count - len(chunks)} chunks)")
+            # Apply MMR if enabled and we have enough results (but not for very few results)
+            if use_mmr and len(chunks) > 3:
                 chunks = await self._apply_mmr(query_embedding, chunks, max_results, lambda_param)
+            elif use_mmr and len(chunks) <= 3:
+                logger.info(f"Skipping MMR due to low result count: {len(chunks)} chunks")
             # Sort by score and limit
             chunks.sort(key=lambda x: x.score, reverse=True)