Spaces:
Paused
Paused
File size: 1,564 Bytes
399f3c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# Docker Compose配置文件 - GPU部署
version: '3.8'
services:
adaptive-rag:
build:
context: .
dockerfile: Dockerfile.gpu
container_name: adaptive-rag-gpu
restart: unless-stopped
environment:
- CUDA_VISIBLE_DEVICES=0
- PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
- TOKENIZERS_PARALLELISM=false
- HF_HOME=/app/models
- TRANSFORMERS_CACHE=/app/models
env_file:
- .env
ports:
- "8000:8000"
- "8001:8001" # 可选:监控端口
volumes:
- ./data:/app/data
- ./models:/app/models
- ./logs:/app/logs
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
depends_on:
- ollama
ollama:
image: ollama/ollama:latest
container_name: ollama-gpu
restart: unless-stopped
ports:
- "11434:11434"
volumes:
- ollama-data:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
command: ["ollama", "serve"]
# 可选:监控服务
nvidia-smi-exporter:
image: mindprince/nvidia_gpu_prometheus_exporter:0.1
container_name: gpu-monitor
restart: unless-stopped
ports:
- "9445:9445"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
volumes:
ollama-data:
driver: local |