|
|
|
|
|
""" |
|
|
Test script to check RAG system status and document retrieval. |
|
|
""" |
|
|
|
|
|
import asyncio |
|
|
import sys |
|
|
import os |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
sys.path.append(str(Path(__file__).parent)) |
|
|
|
|
|
from rag.qdrant_client import QdrantManager |
|
|
from rag.embeddings import EmbeddingGenerator |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
async def check_system(): |
|
|
"""Check RAG system status.""" |
|
|
print("=" * 60) |
|
|
print("RAG SYSTEM STATUS CHECK") |
|
|
print("=" * 60) |
|
|
|
|
|
|
|
|
print("\n1. Checking environment variables:") |
|
|
print(f" OPENAI_API_KEY: {'[OK] Configured' if os.getenv('OPENAI_API_KEY') else '[ERROR] Missing'}") |
|
|
print(f" QDRANT_URL: {os.getenv('QDRANT_URL', 'Not set')}") |
|
|
print(f" BOOK_CONTENT_PATH: {os.getenv('BOOK_CONTENT_PATH', 'Not set')}") |
|
|
|
|
|
|
|
|
print("\n2. Connecting to Qdrant...") |
|
|
try: |
|
|
qdrant_manager = QdrantManager( |
|
|
url=os.getenv("QDRANT_URL"), |
|
|
api_key=os.getenv("QDRANT_API_KEY") |
|
|
) |
|
|
await qdrant_manager.initialize() |
|
|
print(" [OK] Connected to Qdrant") |
|
|
|
|
|
|
|
|
collections = await qdrant_manager.list_collections() |
|
|
print(f" Collections: {collections}") |
|
|
|
|
|
|
|
|
stats = await qdrant_manager.get_collection_stats() |
|
|
print(f" Collection stats: {stats}") |
|
|
|
|
|
if stats.get("vector_count", 0) == 0: |
|
|
print(" [WARNING] No documents found in collection!") |
|
|
print("\n POSSIBLE SOLUTIONS:") |
|
|
print(" 1. Run ingestion: python scripts/ingest.py --content-path ./book_content --force-reindex") |
|
|
print(" 2. Check if BOOK_CONTENT_PATH is correct") |
|
|
print(" 3. Verify documents exist at the specified path") |
|
|
else: |
|
|
print(f" [OK] Found {stats.get('vector_count', 0)} documents in collection") |
|
|
|
|
|
|
|
|
print("\n3. Testing document search...") |
|
|
try: |
|
|
|
|
|
embedder = EmbeddingGenerator( |
|
|
api_key=os.getenv("OPENAI_API_KEY"), |
|
|
model="text-embedding-3-small" |
|
|
) |
|
|
|
|
|
|
|
|
test_query = "What is humanoid robotics?" |
|
|
query_result = await embedder.generate_embedding(test_query) |
|
|
query_embedding = query_result["embedding"] |
|
|
|
|
|
|
|
|
search_results = await qdrant_manager.search_similar( |
|
|
query_embedding=query_embedding, |
|
|
limit=3, |
|
|
score_threshold=0.1 |
|
|
) |
|
|
|
|
|
print(f" Query: {test_query}") |
|
|
print(f" Results found: {len(search_results)}") |
|
|
|
|
|
if search_results: |
|
|
print("\n Top results:") |
|
|
for i, result in enumerate(search_results): |
|
|
score = result.get("score", 0) |
|
|
content = result.get("content", "")[:200] + "..." |
|
|
file_name = result.get("metadata", {}).get("file_name", "unknown") |
|
|
print(f"\n Result {i+1}:") |
|
|
print(f" Score: {score:.4f}") |
|
|
print(f" File: {file_name}") |
|
|
print(f" Content: {content}") |
|
|
else: |
|
|
print(" [WARNING] No documents retrieved even with low threshold!") |
|
|
|
|
|
except Exception as e: |
|
|
print(f" [ERROR] Search test failed: {str(e)}") |
|
|
|
|
|
await qdrant_manager.close() |
|
|
|
|
|
except Exception as e: |
|
|
print(f" [ERROR] Failed to connect to Qdrant: {str(e)}") |
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
asyncio.run(check_system()) |