Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
·
b3e31f5
1
Parent(s):
e795d0f
Update docker-compose files
Browse files
deployment/docker-compose-0.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
services:
|
| 2 |
MPT-7B:
|
| 3 |
container_name: worker0
|
| 4 |
-
image: mlenergy/tgi:
|
| 5 |
command: ["--model-id", "mosaicml/mpt-7b-chat", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
| 6 |
shm_size: 1g
|
| 7 |
networks:
|
|
@@ -19,7 +19,7 @@ services:
|
|
| 19 |
capabilities: [gpu]
|
| 20 |
Llama2-7B:
|
| 21 |
container_name: worker1
|
| 22 |
-
image: mlenergy/tgi:
|
| 23 |
command: ["--model-id", "/weights/metaai/Llama-2-7b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
| 24 |
shm_size: 1g
|
| 25 |
networks:
|
|
@@ -38,7 +38,7 @@ services:
|
|
| 38 |
capabilities: [gpu]
|
| 39 |
Vicuna-13B:
|
| 40 |
container_name: worker2
|
| 41 |
-
image: mlenergy/tgi:
|
| 42 |
command: ["--model-id", "lmsys/vicuna-13b-v1.5", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
| 43 |
shm_size: 1g
|
| 44 |
networks:
|
|
@@ -56,7 +56,7 @@ services:
|
|
| 56 |
capabilities: [gpu]
|
| 57 |
Llama2-13B:
|
| 58 |
container_name: worker3
|
| 59 |
-
image: mlenergy/tgi:
|
| 60 |
command: ["--model-id", "/weights/metaai/Llama-2-13b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
| 61 |
shm_size: 1g
|
| 62 |
networks:
|
|
|
|
| 1 |
services:
|
| 2 |
MPT-7B:
|
| 3 |
container_name: worker0
|
| 4 |
+
image: mlenergy/tgi:v1.0.0
|
| 5 |
command: ["--model-id", "mosaicml/mpt-7b-chat", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
| 6 |
shm_size: 1g
|
| 7 |
networks:
|
|
|
|
| 19 |
capabilities: [gpu]
|
| 20 |
Llama2-7B:
|
| 21 |
container_name: worker1
|
| 22 |
+
image: mlenergy/tgi:v1.0.0
|
| 23 |
command: ["--model-id", "/weights/metaai/Llama-2-7b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
| 24 |
shm_size: 1g
|
| 25 |
networks:
|
|
|
|
| 38 |
capabilities: [gpu]
|
| 39 |
Vicuna-13B:
|
| 40 |
container_name: worker2
|
| 41 |
+
image: mlenergy/tgi:v1.0.0
|
| 42 |
command: ["--model-id", "lmsys/vicuna-13b-v1.5", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
| 43 |
shm_size: 1g
|
| 44 |
networks:
|
|
|
|
| 56 |
capabilities: [gpu]
|
| 57 |
Llama2-13B:
|
| 58 |
container_name: worker3
|
| 59 |
+
image: mlenergy/tgi:v1.0.0
|
| 60 |
command: ["--model-id", "/weights/metaai/Llama-2-13b-chat-hf", "--num-shard", "1", "--otlp-endpoint", "http://jaeger:4317"]
|
| 61 |
shm_size: 1g
|
| 62 |
networks:
|
deployment/docker-compose-1.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
services:
|
| 2 |
Llama2-70B-INT8:
|
| 3 |
container_name: worker4
|
| 4 |
-
image: mlenergy/tgi:
|
| 5 |
command: ["--model-id", "meta-llama/Llama-2-70b-chat-hf", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317", "--quantize", "bitsandbytes"]
|
| 6 |
shm_size: 1g
|
| 7 |
environment:
|
|
@@ -21,7 +21,7 @@ services:
|
|
| 21 |
capabilities: [gpu]
|
| 22 |
MPT-30B:
|
| 23 |
container_name: worker5
|
| 24 |
-
image: mlenergy/tgi:
|
| 25 |
command: ["--model-id", "mosaicml/mpt-30b-chat", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317"]
|
| 26 |
shm_size: 1g
|
| 27 |
networks:
|
|
|
|
| 1 |
services:
|
| 2 |
Llama2-70B-INT8:
|
| 3 |
container_name: worker4
|
| 4 |
+
image: mlenergy/tgi:v1.0.0
|
| 5 |
command: ["--model-id", "meta-llama/Llama-2-70b-chat-hf", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317", "--quantize", "bitsandbytes"]
|
| 6 |
shm_size: 1g
|
| 7 |
environment:
|
|
|
|
| 21 |
capabilities: [gpu]
|
| 22 |
MPT-30B:
|
| 23 |
container_name: worker5
|
| 24 |
+
image: mlenergy/tgi:v1.0.0
|
| 25 |
command: ["--model-id", "mosaicml/mpt-30b-chat", "--num-shard", "2", "--otlp-endpoint", "http://jaeger:4317"]
|
| 26 |
shm_size: 1g
|
| 27 |
networks:
|