GitHub Actions
commited on
Commit
·
6e614bb
1
Parent(s):
2d58a98
Deploy backend from GitHub Actions
Browse files🚀 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <[email protected]>
- main.py +1 -1
- rag/chat.py +89 -30
- rag/qdrant_client.py +1 -1
- rag/retrieval.py +8 -3
main.py
CHANGED
|
@@ -329,7 +329,7 @@ async def health_check(request: Request):
|
|
| 329 |
return health_status
|
| 330 |
|
| 331 |
|
| 332 |
-
@app.post("/chat")
|
| 333 |
@limiter.limit(f"{settings.rate_limit_requests}/{settings.rate_limit_window}minute")
|
| 334 |
async def chat_endpoint(
|
| 335 |
request: Request,
|
|
|
|
| 329 |
return health_status
|
| 330 |
|
| 331 |
|
| 332 |
+
@app.post("/api/chat")
|
| 333 |
@limiter.limit(f"{settings.rate_limit_requests}/{settings.rate_limit_window}minute")
|
| 334 |
async def chat_endpoint(
|
| 335 |
request: Request,
|
rag/chat.py
CHANGED
|
@@ -58,7 +58,7 @@ class ChatHandler:
|
|
| 58 |
self.retrieval_engine = RetrievalEngine(
|
| 59 |
qdrant_manager=qdrant_manager,
|
| 60 |
embedder=self.embedder,
|
| 61 |
-
score_threshold=0.
|
| 62 |
enable_mmr=True,
|
| 63 |
mmr_lambda=0.5
|
| 64 |
)
|
|
@@ -77,7 +77,7 @@ class ChatHandler:
|
|
| 77 |
Returns:
|
| 78 |
Adaptive threshold value
|
| 79 |
"""
|
| 80 |
-
base_threshold = 0.
|
| 81 |
|
| 82 |
# Lower threshold for very specific queries (longer)
|
| 83 |
if query_length > 100:
|
|
@@ -196,15 +196,39 @@ class ChatHandler:
|
|
| 196 |
)
|
| 197 |
retrieved_docs = retrieved_docs[:k]
|
| 198 |
|
| 199 |
-
# If still no results, handle
|
| 200 |
if not retrieved_docs:
|
| 201 |
logger.info(f"No content found for query: {query[:100]}...")
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
)
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
# Log monitoring metrics
|
| 209 |
logger.info(
|
| 210 |
"Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
|
|
@@ -351,15 +375,22 @@ class ChatHandler:
|
|
| 351 |
|
| 352 |
response_time = (datetime.utcnow() - start_time).total_seconds()
|
| 353 |
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
# Get or create conversation context
|
| 365 |
context = self._get_or_create_context(session_id)
|
|
@@ -400,15 +431,39 @@ class ChatHandler:
|
|
| 400 |
)
|
| 401 |
retrieved_docs = retrieved_docs[:k]
|
| 402 |
|
| 403 |
-
# If still no results, handle
|
| 404 |
if not retrieved_docs:
|
| 405 |
logger.info(f"No content found for query: {query[:100]}...")
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
)
|
| 411 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
# Log monitoring metrics
|
| 413 |
logger.info(
|
| 414 |
"Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
|
|
@@ -485,15 +540,19 @@ class ChatHandler:
|
|
| 485 |
# Calculate response time
|
| 486 |
response_time = (datetime.utcnow() - start_time).total_seconds()
|
| 487 |
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
|
| 498 |
except Exception as e:
|
| 499 |
logger.error(f"Chat failed: {str(e)}", exc_info=True)
|
|
|
|
| 58 |
self.retrieval_engine = RetrievalEngine(
|
| 59 |
qdrant_manager=qdrant_manager,
|
| 60 |
embedder=self.embedder,
|
| 61 |
+
score_threshold=0.5, # Lowered to 0.5 to better match document scores
|
| 62 |
enable_mmr=True,
|
| 63 |
mmr_lambda=0.5
|
| 64 |
)
|
|
|
|
| 77 |
Returns:
|
| 78 |
Adaptive threshold value
|
| 79 |
"""
|
| 80 |
+
base_threshold = 0.5
|
| 81 |
|
| 82 |
# Lower threshold for very specific queries (longer)
|
| 83 |
if query_length > 100:
|
|
|
|
| 196 |
)
|
| 197 |
retrieved_docs = retrieved_docs[:k]
|
| 198 |
|
| 199 |
+
# If still no results, handle gracefully
|
| 200 |
if not retrieved_docs:
|
| 201 |
logger.info(f"No content found for query: {query[:100]}...")
|
| 202 |
+
|
| 203 |
+
# Provide a helpful response when no content is found
|
| 204 |
+
no_content_response = (
|
| 205 |
+
"I couldn't find specific information about that topic in the book. "
|
| 206 |
+
"This book covers Physical AI & Humanoid Robotics. Try asking about:\n"
|
| 207 |
+
"• Introduction to physical AI\n"
|
| 208 |
+
"• Types of humanoid robots\n"
|
| 209 |
+
"• AI control systems\n"
|
| 210 |
+
"• Robot locomotion\n"
|
| 211 |
+
"• Specific chapters or sections"
|
| 212 |
)
|
| 213 |
|
| 214 |
+
# Stream the helpful response
|
| 215 |
+
words = no_content_response.split()
|
| 216 |
+
for word in words:
|
| 217 |
+
yield self._format_sse_message({
|
| 218 |
+
"type": "chunk",
|
| 219 |
+
"content": word + " "
|
| 220 |
+
})
|
| 221 |
+
await asyncio.sleep(0.05)
|
| 222 |
+
|
| 223 |
+
yield self._format_sse_message({
|
| 224 |
+
"type": "done",
|
| 225 |
+
"session_id": session_id,
|
| 226 |
+
"response_time": 0.1,
|
| 227 |
+
"tokens_used": self.count_tokens(no_content_response),
|
| 228 |
+
"no_results": True
|
| 229 |
+
})
|
| 230 |
+
return
|
| 231 |
+
|
| 232 |
# Log monitoring metrics
|
| 233 |
logger.info(
|
| 234 |
"Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
|
|
|
|
| 375 |
|
| 376 |
response_time = (datetime.utcnow() - start_time).total_seconds()
|
| 377 |
|
| 378 |
+
# Return greeting as JSON response
|
| 379 |
+
greeting_response = {
|
| 380 |
+
"type": "final",
|
| 381 |
+
"answer": answer,
|
| 382 |
+
"sources": [],
|
| 383 |
+
"session_id": session_id,
|
| 384 |
+
"query": query,
|
| 385 |
+
"response_time": response_time,
|
| 386 |
+
"tokens_used": self.count_tokens(answer),
|
| 387 |
+
"context_used": False,
|
| 388 |
+
"model": self.model,
|
| 389 |
+
"has_context": False
|
| 390 |
+
}
|
| 391 |
+
yield f"data: {json.dumps(greeting_response)}\n\n"
|
| 392 |
+
yield f"data: [DONE]\n\n"
|
| 393 |
+
return
|
| 394 |
|
| 395 |
# Get or create conversation context
|
| 396 |
context = self._get_or_create_context(session_id)
|
|
|
|
| 431 |
)
|
| 432 |
retrieved_docs = retrieved_docs[:k]
|
| 433 |
|
| 434 |
+
# If still no results, handle gracefully
|
| 435 |
if not retrieved_docs:
|
| 436 |
logger.info(f"No content found for query: {query[:100]}...")
|
| 437 |
+
|
| 438 |
+
# Provide a helpful response when no content is found
|
| 439 |
+
no_content_response = (
|
| 440 |
+
"I couldn't find specific information about that topic in the book. "
|
| 441 |
+
"This book covers Physical AI & Humanoid Robotics. Try asking about:\n"
|
| 442 |
+
"• Introduction to physical AI\n"
|
| 443 |
+
"• Types of humanoid robots\n"
|
| 444 |
+
"• AI control systems\n"
|
| 445 |
+
"• Robot locomotion\n"
|
| 446 |
+
"• Specific chapters or sections"
|
| 447 |
)
|
| 448 |
|
| 449 |
+
# Stream the helpful response
|
| 450 |
+
words = no_content_response.split()
|
| 451 |
+
for word in words:
|
| 452 |
+
yield self._format_sse_message({
|
| 453 |
+
"type": "chunk",
|
| 454 |
+
"content": word + " "
|
| 455 |
+
})
|
| 456 |
+
await asyncio.sleep(0.05)
|
| 457 |
+
|
| 458 |
+
yield self._format_sse_message({
|
| 459 |
+
"type": "done",
|
| 460 |
+
"session_id": session_id,
|
| 461 |
+
"response_time": 0.1,
|
| 462 |
+
"tokens_used": self.count_tokens(no_content_response),
|
| 463 |
+
"no_results": True
|
| 464 |
+
})
|
| 465 |
+
return
|
| 466 |
+
|
| 467 |
# Log monitoring metrics
|
| 468 |
logger.info(
|
| 469 |
"Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
|
|
|
|
| 540 |
# Calculate response time
|
| 541 |
response_time = (datetime.utcnow() - start_time).total_seconds()
|
| 542 |
|
| 543 |
+
# Final response
|
| 544 |
+
final_response = {
|
| 545 |
+
"type": "final",
|
| 546 |
+
"answer": answer,
|
| 547 |
+
"sources": [citation.to_dict() if hasattr(citation, 'to_dict') else citation for citation in citations],
|
| 548 |
+
"session_id": session_id,
|
| 549 |
+
"query": query,
|
| 550 |
+
"response_time": response_time,
|
| 551 |
+
"tokens_used": tokens_used,
|
| 552 |
+
"model": self.model
|
| 553 |
+
}
|
| 554 |
+
yield f"data: {json.dumps(final_response)}\n\n"
|
| 555 |
+
yield f"data: [DONE]\n\n"
|
| 556 |
|
| 557 |
except Exception as e:
|
| 558 |
logger.error(f"Chat failed: {str(e)}", exc_info=True)
|
rag/qdrant_client.py
CHANGED
|
@@ -178,7 +178,7 @@ class QdrantManager:
|
|
| 178 |
self,
|
| 179 |
query_embedding: List[float],
|
| 180 |
limit: int = 5,
|
| 181 |
-
score_threshold: float = 0.
|
| 182 |
filters: Optional[Dict[str, Any]] = None
|
| 183 |
) -> List[Dict[str, Any]]:
|
| 184 |
"""Search for similar chunks using vector similarity."""
|
|
|
|
| 178 |
self,
|
| 179 |
query_embedding: List[float],
|
| 180 |
limit: int = 5,
|
| 181 |
+
score_threshold: float = 0.5,
|
| 182 |
filters: Optional[Dict[str, Any]] = None
|
| 183 |
) -> List[Dict[str, Any]]:
|
| 184 |
"""Search for similar chunks using vector similarity."""
|
rag/retrieval.py
CHANGED
|
@@ -36,7 +36,7 @@ class RetrievalEngine:
|
|
| 36 |
qdrant_manager: QdrantManager,
|
| 37 |
embedder: EmbeddingGenerator,
|
| 38 |
default_k: int = 5,
|
| 39 |
-
score_threshold: float = 0.
|
| 40 |
max_context_tokens: int = 4000,
|
| 41 |
enable_mmr: bool = True,
|
| 42 |
mmr_lambda: float = 0.5
|
|
@@ -177,6 +177,9 @@ class RetrievalEngine:
|
|
| 177 |
|
| 178 |
# Apply similarity threshold filtering
|
| 179 |
logger.info(f"Applying threshold filter: {len(chunks)} chunks before filtering, threshold={threshold}")
|
|
|
|
|
|
|
|
|
|
| 180 |
initial_count = len(chunks)
|
| 181 |
chunks = [
|
| 182 |
chunk for chunk in chunks
|
|
@@ -184,9 +187,11 @@ class RetrievalEngine:
|
|
| 184 |
]
|
| 185 |
logger.info(f"After threshold filter: {len(chunks)} chunks remaining (filtered out {initial_count - len(chunks)} chunks)")
|
| 186 |
|
| 187 |
-
# Apply MMR if enabled and we have enough results
|
| 188 |
-
if use_mmr and len(chunks) >
|
| 189 |
chunks = await self._apply_mmr(query_embedding, chunks, max_results, lambda_param)
|
|
|
|
|
|
|
| 190 |
|
| 191 |
# Sort by score and limit
|
| 192 |
chunks.sort(key=lambda x: x.score, reverse=True)
|
|
|
|
| 36 |
qdrant_manager: QdrantManager,
|
| 37 |
embedder: EmbeddingGenerator,
|
| 38 |
default_k: int = 5,
|
| 39 |
+
score_threshold: float = 0.5, # Lowered to 0.5 to better match document scores
|
| 40 |
max_context_tokens: int = 4000,
|
| 41 |
enable_mmr: bool = True,
|
| 42 |
mmr_lambda: float = 0.5
|
|
|
|
| 177 |
|
| 178 |
# Apply similarity threshold filtering
|
| 179 |
logger.info(f"Applying threshold filter: {len(chunks)} chunks before filtering, threshold={threshold}")
|
| 180 |
+
# Debug: Log scores of first few chunks
|
| 181 |
+
for i, chunk in enumerate(chunks[:5]):
|
| 182 |
+
logger.info(f"Chunk {i} score: {chunk.score}, content preview: {chunk.content[:100]}...")
|
| 183 |
initial_count = len(chunks)
|
| 184 |
chunks = [
|
| 185 |
chunk for chunk in chunks
|
|
|
|
| 187 |
]
|
| 188 |
logger.info(f"After threshold filter: {len(chunks)} chunks remaining (filtered out {initial_count - len(chunks)} chunks)")
|
| 189 |
|
| 190 |
+
# Apply MMR if enabled and we have enough results (but not for very few results)
|
| 191 |
+
if use_mmr and len(chunks) > 3:
|
| 192 |
chunks = await self._apply_mmr(query_embedding, chunks, max_results, lambda_param)
|
| 193 |
+
elif use_mmr and len(chunks) <= 3:
|
| 194 |
+
logger.info(f"Skipping MMR due to low result count: {len(chunks)} chunks")
|
| 195 |
|
| 196 |
# Sort by score and limit
|
| 197 |
chunks.sort(key=lambda x: x.score, reverse=True)
|