HonestAI / src /models_config.py
JatsTheAIGen's picture
Fix: BitsAndBytes compatibility and error handling
13fa6c4
raw
history blame
2.41 kB
# models_config.py
# Optimized for NVIDIA T4 Medium (16GB VRAM) with 4-bit quantization
# UPDATED: Local models only - no API fallback
LLM_CONFIG = {
"primary_provider": "local",
"models": {
"reasoning_primary": {
"model_id": "Qwen/Qwen2.5-7B-Instruct", # Single primary model for all text tasks
"task": "general_reasoning",
"max_tokens": 8000, # Reduced from 10000
"temperature": 0.7,
"fallback": "microsoft/Phi-3-mini-4k-instruct", # Non-gated fallback model (3.8B, verified non-gated)
"is_chat_model": True,
"use_4bit_quantization": True, # Enable 4-bit quantization for 16GB T4
"use_8bit_quantization": False
},
"embedding_specialist": {
"model_id": "intfloat/e5-large-v2", # 1024-dim embeddings for semantic similarity
"task": "embeddings",
"vector_dimensions": 1024,
"purpose": "semantic_similarity",
"is_chat_model": False
},
"classification_specialist": {
"model_id": "Qwen/Qwen2.5-7B-Instruct", # Same model for all text tasks
"task": "intent_classification",
"max_length": 512,
"specialization": "fast_inference",
"latency_target": "<100ms",
"is_chat_model": True,
"use_4bit_quantization": True,
"fallback": "microsoft/Phi-3-mini-4k-instruct" # Non-gated fallback (3.8B, verified non-gated)
},
"safety_checker": {
"model_id": "Qwen/Qwen2.5-7B-Instruct", # Same model for all text tasks
"task": "content_moderation",
"confidence_threshold": 0.85,
"purpose": "bias_detection",
"is_chat_model": True,
"use_4bit_quantization": True,
"fallback": "microsoft/Phi-3-mini-4k-instruct" # Non-gated fallback (3.8B, verified non-gated)
}
},
"routing_logic": {
"strategy": "task_based_routing",
"fallback_chain": ["primary"], # No API fallback
"load_balancing": "single_model_reuse"
},
"quantization_settings": {
"default_4bit": True, # Enable 4-bit quantization by default for T4 16GB
"default_8bit": False,
"bnb_4bit_compute_dtype": "float16",
"bnb_4bit_use_double_quant": True,
"bnb_4bit_quant_type": "nf4"
}
}