jerryzh168 commited on
Commit
7421b93
·
verified ·
1 Parent(s): cc5b425

Upload Gemma3ForConditionalGeneration

Browse files
config.json CHANGED
@@ -39,7 +39,7 @@
39
  }
40
  },
41
  "_type": "Int4WeightOnlyConfig",
42
- "_version": 1
43
  }
44
  },
45
  "quant_type_kwargs": {}
 
39
  }
40
  },
41
  "_type": "Int4WeightOnlyConfig",
42
+ "_version": 2
43
  }
44
  },
45
  "quant_type_kwargs": {}
pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a6a9149e08d1dc203c84fbf19996d037e1d5c5e44803c143e7b12405218a805
3
- size 4987253534
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:538b990ced48d6dca85707e82960f5d82522e8fa1dee7ce94c6bf6d36f8026c4
3
+ size 4975899385
pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88eaa4edc5e786f4788d61a962656f7328265f4f686121a201a609e02ada46f8
3
- size 3548090296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baa4d2f81b76200df8e943c0e4eab95336b5782b36c09e8a731e54191ff2a0dd
3
+ size 3190968481
pytorch_model.bin.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 8534683872
4
  },
5
  "weight_map": {
6
  "language_model.lm_head.weight": "pytorch_model-00001-of-00002.bin",
@@ -148,13 +148,13 @@
148
  "language_model.model.layers.18.self_attn.q_norm.weight": "pytorch_model-00001-of-00002.bin",
149
  "language_model.model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
150
  "language_model.model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
151
- "language_model.model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
152
- "language_model.model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
153
  "language_model.model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
154
- "language_model.model.layers.19.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
155
- "language_model.model.layers.19.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
156
- "language_model.model.layers.19.post_feedforward_layernorm.weight": "pytorch_model-00002-of-00002.bin",
157
- "language_model.model.layers.19.pre_feedforward_layernorm.weight": "pytorch_model-00002-of-00002.bin",
158
  "language_model.model.layers.19.self_attn.k_norm.weight": "pytorch_model-00001-of-00002.bin",
159
  "language_model.model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
160
  "language_model.model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
@@ -174,19 +174,19 @@
174
  "language_model.model.layers.2.self_attn.q_norm.weight": "pytorch_model-00001-of-00002.bin",
175
  "language_model.model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
176
  "language_model.model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
177
- "language_model.model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
178
- "language_model.model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
179
- "language_model.model.layers.20.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
180
- "language_model.model.layers.20.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
181
- "language_model.model.layers.20.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
182
- "language_model.model.layers.20.post_feedforward_layernorm.weight": "pytorch_model-00002-of-00002.bin",
183
- "language_model.model.layers.20.pre_feedforward_layernorm.weight": "pytorch_model-00002-of-00002.bin",
184
- "language_model.model.layers.20.self_attn.k_norm.weight": "pytorch_model-00002-of-00002.bin",
185
- "language_model.model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
186
- "language_model.model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
187
- "language_model.model.layers.20.self_attn.q_norm.weight": "pytorch_model-00002-of-00002.bin",
188
- "language_model.model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
189
- "language_model.model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
190
  "language_model.model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
191
  "language_model.model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
192
  "language_model.model.layers.21.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
@@ -194,12 +194,12 @@
194
  "language_model.model.layers.21.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
195
  "language_model.model.layers.21.post_feedforward_layernorm.weight": "pytorch_model-00002-of-00002.bin",
196
  "language_model.model.layers.21.pre_feedforward_layernorm.weight": "pytorch_model-00002-of-00002.bin",
197
- "language_model.model.layers.21.self_attn.k_norm.weight": "pytorch_model-00002-of-00002.bin",
198
- "language_model.model.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
199
- "language_model.model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
200
- "language_model.model.layers.21.self_attn.q_norm.weight": "pytorch_model-00002-of-00002.bin",
201
- "language_model.model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
202
- "language_model.model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
203
  "language_model.model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
204
  "language_model.model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
205
  "language_model.model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
 
1
  {
2
  "metadata": {
3
+ "total_size": 8166129312
4
  },
5
  "weight_map": {
6
  "language_model.lm_head.weight": "pytorch_model-00001-of-00002.bin",
 
148
  "language_model.model.layers.18.self_attn.q_norm.weight": "pytorch_model-00001-of-00002.bin",
149
  "language_model.model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
150
  "language_model.model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
151
+ "language_model.model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
152
+ "language_model.model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
153
  "language_model.model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
154
+ "language_model.model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
155
+ "language_model.model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
156
+ "language_model.model.layers.19.post_feedforward_layernorm.weight": "pytorch_model-00001-of-00002.bin",
157
+ "language_model.model.layers.19.pre_feedforward_layernorm.weight": "pytorch_model-00001-of-00002.bin",
158
  "language_model.model.layers.19.self_attn.k_norm.weight": "pytorch_model-00001-of-00002.bin",
159
  "language_model.model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
160
  "language_model.model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
 
174
  "language_model.model.layers.2.self_attn.q_norm.weight": "pytorch_model-00001-of-00002.bin",
175
  "language_model.model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
176
  "language_model.model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
177
+ "language_model.model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
178
+ "language_model.model.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
179
+ "language_model.model.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
180
+ "language_model.model.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
181
+ "language_model.model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
182
+ "language_model.model.layers.20.post_feedforward_layernorm.weight": "pytorch_model-00001-of-00002.bin",
183
+ "language_model.model.layers.20.pre_feedforward_layernorm.weight": "pytorch_model-00001-of-00002.bin",
184
+ "language_model.model.layers.20.self_attn.k_norm.weight": "pytorch_model-00001-of-00002.bin",
185
+ "language_model.model.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
186
+ "language_model.model.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
187
+ "language_model.model.layers.20.self_attn.q_norm.weight": "pytorch_model-00001-of-00002.bin",
188
+ "language_model.model.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
189
+ "language_model.model.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
190
  "language_model.model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
191
  "language_model.model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
192
  "language_model.model.layers.21.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
 
194
  "language_model.model.layers.21.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
195
  "language_model.model.layers.21.post_feedforward_layernorm.weight": "pytorch_model-00002-of-00002.bin",
196
  "language_model.model.layers.21.pre_feedforward_layernorm.weight": "pytorch_model-00002-of-00002.bin",
197
+ "language_model.model.layers.21.self_attn.k_norm.weight": "pytorch_model-00001-of-00002.bin",
198
+ "language_model.model.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
199
+ "language_model.model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
200
+ "language_model.model.layers.21.self_attn.q_norm.weight": "pytorch_model-00001-of-00002.bin",
201
+ "language_model.model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
202
+ "language_model.model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
203
  "language_model.model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
204
  "language_model.model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
205
  "language_model.model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",