Text Classification
Transformers
PyTorch
Arabic
English
distilbert
chemistry
biology
finance
legal
music
code
art
climate
medical
emotion
endpoints-template
Instructions to use PetraAI/Zalmati with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use PetraAI/Zalmati with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="PetraAI/Zalmati")# Load model directly from transformers import AutoTokenizer, AutoModelForSequenceClassification tokenizer = AutoTokenizer.from_pretrained("PetraAI/Zalmati") model = AutoModelForSequenceClassification.from_pretrained("PetraAI/Zalmati") - Notebooks
- Google Colab
- Kaggle
| from exllamav2 import ExLlamaV2, ExLlamaV2Config, ExLlamaV2Tokenizer | |
| import argparse, os | |
| import sys | |
| import json | |
| from conversion.tokenize import tokenize | |
| from conversion.quantize import embeddings, measure_quant, quant | |
| from conversion.optimize import optimize | |
| from conversion.compile import compile_model | |
| # import tracemalloc | |
| # tracemalloc.start() | |
| parser = argparse.ArgumentParser(description = "Convert model to ExLlamaV2") | |
| parser.add_argument("-i", "--in_dir", type = str, help = "Input directory", default = "") | |
| parser.add_argument("-o", "--out_dir", type = str, help = "Output directory") | |
| parser.add_argument("-c", "--cal_dataset", type = str, help = "Calibration dataset (.parquet file)", default = "") | |
| parser.add_argument("-r", "--dataset_rows", type = int, default = 100, help = "Number of rows to apply from dataset") | |
| parser.add_argument("-mr", "--measurement_rows", type = int, default = 16, help = "Number of rows to apply from dataset when measuring") | |
| parser.add_argument("-gr", "--gpu_rows", type = int, default = 16, help = "Threshold for paging hidden state to CPU") | |
| parser.add_argument("-l", "--length", type = int, default = 2048, help = "Max no. tokens per sample") | |
| parser.add_argument("-ml", "--measurement_length", type = int, default = 2048, help = "Max no. tokens per sample when measuring") | |
| parser.add_argument("-b", "--bits", type = float, default = 4.156, help = "Target bits per weight") | |
| parser.add_argument("-hb", "--head_bits", type = int, default = 6, help = "Target bits per weight (head layer)") | |
| parser.add_argument("-m", "--measurement", type = str, help = "Reuse previous measurement") | |
| args = parser.parse_args() | |
| # Arguments | |
| in_dir = None if args.in_dir == "" else os.path.abspath(args.in_dir) | |
| out_dir = os.path.abspath(args.out_dir) | |
| cal_dataset = None if args.cal_dataset == "" else os.path.abspath(args.cal_dataset) | |
| dataset_rows = args.dataset_rows | |
| measurement_rows = args.measurement_rows | |
| gpu_rows = args.gpu_rows | |
| length = args.length | |
| measurement_length = args.measurement_length | |
| bits = args.bits | |
| head_bits = args.head_bits | |
| reuse_measurement = args.measurement | |
| if not os.path.exists(out_dir): | |
| print(f" ## Error: Directory not found: {out_dir}") | |
| sys.exit() | |
| # Create model without loading weights | |
| config = ExLlamaV2Config() | |
| config.model_dir = in_dir | |
| config.prepare() | |
| model = ExLlamaV2(config) | |
| model.load(lazy = True) | |
| tokenizer = ExLlamaV2Tokenizer(config) | |
| # Job file | |
| job_file = os.path.join(out_dir, "job.json") | |
| # Create new job | |
| def save_job(): | |
| global job_file, job | |
| with open(job_file, "w") as f: | |
| f.write(json.dumps(job, indent = 4)) | |
| if not os.path.exists(job_file): | |
| print(f" -- Beginning new job") | |
| if len(os.listdir(out_dir)) != 0: | |
| print(f" !! Warning: Output directory is not empty: {out_dir}") | |
| if in_dir is None: | |
| print(f" ## Error: No input directory specified") | |
| sys.exit() | |
| if cal_dataset is None: | |
| print(f" ## Error: No calibration dataset specified") | |
| sys.exit() | |
| job = { "in_dir": in_dir, | |
| "out_dir": out_dir, | |
| "cal_dataset": cal_dataset, | |
| "dataset_rows": dataset_rows, | |
| "measurement_rows": measurement_rows, | |
| "gpu_rows": gpu_rows, | |
| "length": length, | |
| "measurement_length": measurement_length, | |
| "bits": bits, | |
| "head_bits": head_bits, | |
| "progress": "begin", | |
| } | |
| if reuse_measurement is not None: | |
| with open(reuse_measurement, "r") as f: | |
| imp_measurement = json.load(f) | |
| job["measurement"] = imp_measurement["measurement"] | |
| job["last_module_idx"] = imp_measurement["last_module_idx"] | |
| job["base_perplexity"] = imp_measurement["base_perplexity"] | |
| job["reuse_measurement"] = reuse_measurement | |
| save_job() | |
| # Resume existing job | |
| else: | |
| print(f" -- Resuming job") | |
| print(f" !! Note: Overriding options with settings from existing job") | |
| with open(job_file, "r") as f: | |
| job = json.load(f) | |
| if "invalid" in job: | |
| print(" ** Error: Corrupted job") | |
| sys.exit() | |
| job["out_dir"] = out_dir | |
| # Feedback | |
| print(f" -- Input: {job['in_dir']}") | |
| print(f" -- Output: {out_dir}") | |
| print(f" -- Calibration dataset: {job['cal_dataset']}, {job['dataset_rows']} / {job['measurement_rows']} ({job['gpu_rows']}) rows, {job['length']} tokens per sample") | |
| print(f" -- Target bits per weight: {job['bits']} (decoder), {job['head_bits']} (head)") | |
| # Make sure subfolders exist | |
| out_tensor_dir = os.path.join(job["out_dir"], "out_tensor") | |
| if not os.path.exists(out_tensor_dir): | |
| os.makedirs(out_tensor_dir) | |
| # Do the things | |
| while True: | |
| progress = job["progress"] | |
| if progress == "begin": | |
| if "reuse_measurement" in job: | |
| print(f" -- Reusing measurement: {job['reuse_measurement']}") | |
| job["progress"] = "optimize" | |
| save_job() | |
| else: | |
| print(f" -- Tokenizing samples (measurement)...") | |
| tokenize(job, save_job, tokenizer, measure = True) | |
| job["progress"] = "initial_embeddings" | |
| save_job() | |
| if progress == "initial_embeddings": | |
| print(f" -- Token embeddings (measurement)...") | |
| embeddings(job, save_job, model) | |
| job["progress"] = "measure_quant" | |
| save_job() | |
| if progress == "measure_quant": | |
| print(f" -- Measuring quantization impact...") | |
| measure_quant(job, save_job, model) | |
| job["progress"] = "optimize" | |
| save_job() | |
| if progress == "optimize": | |
| print(f" -- Optimizing...") | |
| optimize(job, save_job) | |
| job["progress"] = "tokens_cal" | |
| save_job() | |
| if progress == "tokens_cal": | |
| print(f" -- Tokenizing samples...") | |
| tokenize(job, save_job, tokenizer) | |
| job["progress"] = "embeddings" | |
| save_job() | |
| if progress == "embeddings": | |
| print(f" -- Token embeddings again...") | |
| embeddings(job, save_job, model) | |
| job["progress"] = "quant" | |
| save_job() | |
| if progress == "quant": | |
| print(f" -- Quantizing...") | |
| quant(job, save_job, model) | |
| job["progress"] = "compile" | |
| save_job() | |
| if progress == "compile": | |
| print(f" -- Compiling output file...") | |
| compile_model(job, save_job, model) | |
| job["progress"] = "finished" | |
| save_job() | |
| if progress == "finished": break | |
| print(f" -- Finished") |