GitHub Actions commited on
Commit
6e614bb
·
1 Parent(s): 2d58a98

Deploy backend from GitHub Actions

Browse files

🚀 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

Files changed (4) hide show
  1. main.py +1 -1
  2. rag/chat.py +89 -30
  3. rag/qdrant_client.py +1 -1
  4. rag/retrieval.py +8 -3
main.py CHANGED
@@ -329,7 +329,7 @@ async def health_check(request: Request):
329
  return health_status
330
 
331
 
332
- @app.post("/chat")
333
  @limiter.limit(f"{settings.rate_limit_requests}/{settings.rate_limit_window}minute")
334
  async def chat_endpoint(
335
  request: Request,
 
329
  return health_status
330
 
331
 
332
+ @app.post("/api/chat")
333
  @limiter.limit(f"{settings.rate_limit_requests}/{settings.rate_limit_window}minute")
334
  async def chat_endpoint(
335
  request: Request,
rag/chat.py CHANGED
@@ -58,7 +58,7 @@ class ChatHandler:
58
  self.retrieval_engine = RetrievalEngine(
59
  qdrant_manager=qdrant_manager,
60
  embedder=self.embedder,
61
- score_threshold=0.7, # Updated to 0.7 for better precision
62
  enable_mmr=True,
63
  mmr_lambda=0.5
64
  )
@@ -77,7 +77,7 @@ class ChatHandler:
77
  Returns:
78
  Adaptive threshold value
79
  """
80
- base_threshold = 0.7
81
 
82
  # Lower threshold for very specific queries (longer)
83
  if query_length > 100:
@@ -196,15 +196,39 @@ class ChatHandler:
196
  )
197
  retrieved_docs = retrieved_docs[:k]
198
 
199
- # If still no results, handle appropriately
200
  if not retrieved_docs:
201
  logger.info(f"No content found for query: {query[:100]}...")
202
- from api.exceptions import ContentNotFoundError
203
- raise ContentNotFoundError(
204
- query=query,
205
- threshold=self.retrieval_engine.score_threshold
 
 
 
 
 
 
206
  )
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  # Log monitoring metrics
209
  logger.info(
210
  "Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
@@ -351,15 +375,22 @@ class ChatHandler:
351
 
352
  response_time = (datetime.utcnow() - start_time).total_seconds()
353
 
354
- return ChatResponse(
355
- answer=answer,
356
- sources=[],
357
- session_id=session_id,
358
- query=query,
359
- response_time=response_time,
360
- tokens_used=self.count_tokens(answer),
361
- model=self.model
362
- )
 
 
 
 
 
 
 
363
 
364
  # Get or create conversation context
365
  context = self._get_or_create_context(session_id)
@@ -400,15 +431,39 @@ class ChatHandler:
400
  )
401
  retrieved_docs = retrieved_docs[:k]
402
 
403
- # If still no results, handle appropriately
404
  if not retrieved_docs:
405
  logger.info(f"No content found for query: {query[:100]}...")
406
- from api.exceptions import ContentNotFoundError
407
- raise ContentNotFoundError(
408
- query=query,
409
- threshold=self.retrieval_engine.score_threshold
 
 
 
 
 
 
410
  )
411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  # Log monitoring metrics
413
  logger.info(
414
  "Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
@@ -485,15 +540,19 @@ class ChatHandler:
485
  # Calculate response time
486
  response_time = (datetime.utcnow() - start_time).total_seconds()
487
 
488
- return ChatResponse(
489
- answer=answer,
490
- sources=citations,
491
- session_id=session_id,
492
- query=query,
493
- response_time=response_time,
494
- tokens_used=tokens_used,
495
- model=self.model
496
- )
 
 
 
 
497
 
498
  except Exception as e:
499
  logger.error(f"Chat failed: {str(e)}", exc_info=True)
 
58
  self.retrieval_engine = RetrievalEngine(
59
  qdrant_manager=qdrant_manager,
60
  embedder=self.embedder,
61
+ score_threshold=0.5, # Lowered to 0.5 to better match document scores
62
  enable_mmr=True,
63
  mmr_lambda=0.5
64
  )
 
77
  Returns:
78
  Adaptive threshold value
79
  """
80
+ base_threshold = 0.5
81
 
82
  # Lower threshold for very specific queries (longer)
83
  if query_length > 100:
 
196
  )
197
  retrieved_docs = retrieved_docs[:k]
198
 
199
+ # If still no results, handle gracefully
200
  if not retrieved_docs:
201
  logger.info(f"No content found for query: {query[:100]}...")
202
+
203
+ # Provide a helpful response when no content is found
204
+ no_content_response = (
205
+ "I couldn't find specific information about that topic in the book. "
206
+ "This book covers Physical AI & Humanoid Robotics. Try asking about:\n"
207
+ "• Introduction to physical AI\n"
208
+ "• Types of humanoid robots\n"
209
+ "• AI control systems\n"
210
+ "• Robot locomotion\n"
211
+ "• Specific chapters or sections"
212
  )
213
 
214
+ # Stream the helpful response
215
+ words = no_content_response.split()
216
+ for word in words:
217
+ yield self._format_sse_message({
218
+ "type": "chunk",
219
+ "content": word + " "
220
+ })
221
+ await asyncio.sleep(0.05)
222
+
223
+ yield self._format_sse_message({
224
+ "type": "done",
225
+ "session_id": session_id,
226
+ "response_time": 0.1,
227
+ "tokens_used": self.count_tokens(no_content_response),
228
+ "no_results": True
229
+ })
230
+ return
231
+
232
  # Log monitoring metrics
233
  logger.info(
234
  "Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
 
375
 
376
  response_time = (datetime.utcnow() - start_time).total_seconds()
377
 
378
+ # Return greeting as JSON response
379
+ greeting_response = {
380
+ "type": "final",
381
+ "answer": answer,
382
+ "sources": [],
383
+ "session_id": session_id,
384
+ "query": query,
385
+ "response_time": response_time,
386
+ "tokens_used": self.count_tokens(answer),
387
+ "context_used": False,
388
+ "model": self.model,
389
+ "has_context": False
390
+ }
391
+ yield f"data: {json.dumps(greeting_response)}\n\n"
392
+ yield f"data: [DONE]\n\n"
393
+ return
394
 
395
  # Get or create conversation context
396
  context = self._get_or_create_context(session_id)
 
431
  )
432
  retrieved_docs = retrieved_docs[:k]
433
 
434
+ # If still no results, handle gracefully
435
  if not retrieved_docs:
436
  logger.info(f"No content found for query: {query[:100]}...")
437
+
438
+ # Provide a helpful response when no content is found
439
+ no_content_response = (
440
+ "I couldn't find specific information about that topic in the book. "
441
+ "This book covers Physical AI & Humanoid Robotics. Try asking about:\n"
442
+ "• Introduction to physical AI\n"
443
+ "• Types of humanoid robots\n"
444
+ "• AI control systems\n"
445
+ "• Robot locomotion\n"
446
+ "• Specific chapters or sections"
447
  )
448
 
449
+ # Stream the helpful response
450
+ words = no_content_response.split()
451
+ for word in words:
452
+ yield self._format_sse_message({
453
+ "type": "chunk",
454
+ "content": word + " "
455
+ })
456
+ await asyncio.sleep(0.05)
457
+
458
+ yield self._format_sse_message({
459
+ "type": "done",
460
+ "session_id": session_id,
461
+ "response_time": 0.1,
462
+ "tokens_used": self.count_tokens(no_content_response),
463
+ "no_results": True
464
+ })
465
+ return
466
+
467
  # Log monitoring metrics
468
  logger.info(
469
  "Retrieval metrics - query_length=%d, retrieved_count=%d, threshold=%.2f, session_id=%s",
 
540
  # Calculate response time
541
  response_time = (datetime.utcnow() - start_time).total_seconds()
542
 
543
+ # Final response
544
+ final_response = {
545
+ "type": "final",
546
+ "answer": answer,
547
+ "sources": [citation.to_dict() if hasattr(citation, 'to_dict') else citation for citation in citations],
548
+ "session_id": session_id,
549
+ "query": query,
550
+ "response_time": response_time,
551
+ "tokens_used": tokens_used,
552
+ "model": self.model
553
+ }
554
+ yield f"data: {json.dumps(final_response)}\n\n"
555
+ yield f"data: [DONE]\n\n"
556
 
557
  except Exception as e:
558
  logger.error(f"Chat failed: {str(e)}", exc_info=True)
rag/qdrant_client.py CHANGED
@@ -178,7 +178,7 @@ class QdrantManager:
178
  self,
179
  query_embedding: List[float],
180
  limit: int = 5,
181
- score_threshold: float = 0.7,
182
  filters: Optional[Dict[str, Any]] = None
183
  ) -> List[Dict[str, Any]]:
184
  """Search for similar chunks using vector similarity."""
 
178
  self,
179
  query_embedding: List[float],
180
  limit: int = 5,
181
+ score_threshold: float = 0.5,
182
  filters: Optional[Dict[str, Any]] = None
183
  ) -> List[Dict[str, Any]]:
184
  """Search for similar chunks using vector similarity."""
rag/retrieval.py CHANGED
@@ -36,7 +36,7 @@ class RetrievalEngine:
36
  qdrant_manager: QdrantManager,
37
  embedder: EmbeddingGenerator,
38
  default_k: int = 5,
39
- score_threshold: float = 0.7, # Updated to 0.7 for better precision
40
  max_context_tokens: int = 4000,
41
  enable_mmr: bool = True,
42
  mmr_lambda: float = 0.5
@@ -177,6 +177,9 @@ class RetrievalEngine:
177
 
178
  # Apply similarity threshold filtering
179
  logger.info(f"Applying threshold filter: {len(chunks)} chunks before filtering, threshold={threshold}")
 
 
 
180
  initial_count = len(chunks)
181
  chunks = [
182
  chunk for chunk in chunks
@@ -184,9 +187,11 @@ class RetrievalEngine:
184
  ]
185
  logger.info(f"After threshold filter: {len(chunks)} chunks remaining (filtered out {initial_count - len(chunks)} chunks)")
186
 
187
- # Apply MMR if enabled and we have enough results
188
- if use_mmr and len(chunks) > 1:
189
  chunks = await self._apply_mmr(query_embedding, chunks, max_results, lambda_param)
 
 
190
 
191
  # Sort by score and limit
192
  chunks.sort(key=lambda x: x.score, reverse=True)
 
36
  qdrant_manager: QdrantManager,
37
  embedder: EmbeddingGenerator,
38
  default_k: int = 5,
39
+ score_threshold: float = 0.5, # Lowered to 0.5 to better match document scores
40
  max_context_tokens: int = 4000,
41
  enable_mmr: bool = True,
42
  mmr_lambda: float = 0.5
 
177
 
178
  # Apply similarity threshold filtering
179
  logger.info(f"Applying threshold filter: {len(chunks)} chunks before filtering, threshold={threshold}")
180
+ # Debug: Log scores of first few chunks
181
+ for i, chunk in enumerate(chunks[:5]):
182
+ logger.info(f"Chunk {i} score: {chunk.score}, content preview: {chunk.content[:100]}...")
183
  initial_count = len(chunks)
184
  chunks = [
185
  chunk for chunk in chunks
 
187
  ]
188
  logger.info(f"After threshold filter: {len(chunks)} chunks remaining (filtered out {initial_count - len(chunks)} chunks)")
189
 
190
+ # Apply MMR if enabled and we have enough results (but not for very few results)
191
+ if use_mmr and len(chunks) > 3:
192
  chunks = await self._apply_mmr(query_embedding, chunks, max_results, lambda_param)
193
+ elif use_mmr and len(chunks) <= 3:
194
+ logger.info(f"Skipping MMR due to low result count: {len(chunks)} chunks")
195
 
196
  # Sort by score and limit
197
  chunks.sort(key=lambda x: x.score, reverse=True)