Spaces:

Werli
/

Multi-Tagger

Sleeping

App Files Files Community

Werli commited on Oct 1

Commit

b0b0867

verified ·

1 Parent(s): 83782c8

Upload 6 files

Browse files

Files changed (5) hide show

app.py +851 -823
modules/classifyTags.py +429 -256
modules/media_handler.py +212 -0
modules/pixai.py +801 -810
modules/video_processor.py +206 -0

app.py CHANGED Viewed

@@ -1,823 +1,851 @@
-import os, io, json, requests, spaces, argparse, traceback, tempfile, zipfile, re, ast, time
-import gradio as gr
-import numpy as np
-import huggingface_hub
-import onnxruntime as ort
-import pandas as pd
-from datetime import datetime, timezone
-from collections import defaultdict
-from PIL import Image, ImageOps
-from apscheduler.schedulers.background import BackgroundScheduler
-from modules.classifyTags import categorize_tags_output, generate_tags_json
-from modules.pixai import create_pixai_interface
-from modules.booru import create_booru_interface
-""" For GPU install all the requirements.txt and the following:
-pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu126
-pip install onnxruntime-gpu
-"""
-""" It's recommended to create a venv:
-python -m venv venv
-venv\Scripts\activate
-pip install ...
-python app.py
-"""
-TITLE = 'Multi-Tagger v1.4'
-DESCRIPTION = '\nMulti-Tagger is a versatile application for advanced image analysis and captioning. Supports <b>CUDA</b> and <b>CPU</b>.\n'
-SWINV2_MODEL_DSV3_REPO = 'SmilingWolf/wd-swinv2-tagger-v3'
-CONV_MODEL_DSV3_REPO = 'SmilingWolf/wd-convnext-tagger-v3'
-VIT_MODEL_DSV3_REPO = 'SmilingWolf/wd-vit-tagger-v3'
-VIT_LARGE_MODEL_DSV3_REPO = 'SmilingWolf/wd-vit-large-tagger-v3'
-EVA02_LARGE_MODEL_DSV3_REPO = 'SmilingWolf/wd-eva02-large-tagger-v3'
-MOAT_MODEL_DSV2_REPO = 'SmilingWolf/wd-v1-4-moat-tagger-v2'
-SWIN_MODEL_DSV2_REPO = 'SmilingWolf/wd-v1-4-swinv2-tagger-v2'
-CONV_MODEL_DSV2_REPO = 'SmilingWolf/wd-v1-4-convnext-tagger-v2'
-CONV2_MODEL_DSV2_REPO = 'SmilingWolf/wd-v1-4-convnextv2-tagger-v2'
-VIT_MODEL_DSV2_REPO = 'SmilingWolf/wd-v1-4-vit-tagger-v2'
-EVA02_LARGE_MODEL_IS_DSV1_REPO = 'deepghs/idolsankaku-eva02-large-tagger-v1'
-SWINV2_MODEL_IS_DSV1_REPO = 'deepghs/idolsankaku-swinv2-tagger-v1'
-# Global variables for model components (for memory management)
-CURRENT_MODEL = None
-CURRENT_MODEL_NAME = None
-CURRENT_TAGS_DF = None
-CURRENT_TAG_NAMES = None
-CURRENT_RATING_INDEXES = None
-CURRENT_GENERAL_INDEXES = None
-CURRENT_CHARACTER_INDEXES = None
-CURRENT_MODEL_TARGET_SIZE = None
-# Custom CSS for gallery styling
-css = """
-#custom-gallery {--row-height: 180px;display: grid;grid-auto-rows: min-content;gap: 10px;}
-#custom-gallery .thumbnail-item {height: var(--row-height);width: 100%;position: relative;overflow: hidden;border-radius: 8px;box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);transition: transform 0.2s ease,  box-shadow 0.2s ease;}
-#custom-gallery .thumbnail-item:hover {transform: translateY(-3px);box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);}
-#custom-gallery .thumbnail-item img {width: auto;height: 100%;max-width: 100%;max-height: var(--row-height);object-fit: contain;margin: 0 auto;display: block;}
-#custom-gallery .thumbnail-item img.portrait {max-width: 100%;}
-#custom-gallery .thumbnail-item img.landscape {max-height: 100%;}
-.gallery-container {max-height: 500px;overflow-y: auto;padding-right: 0px;--size-80: 500px;}
-.thumbnails {display: flex;position: absolute;bottom: 0;width: 120px;overflow-x: scroll;padding-top: 320px;padding-bottom: 280px;padding-left: 4px;flex-wrap: wrap;}
-#custom-gallery .thumbnail-item img {width: auto;height: 100%;max-width: 100%;max-height: var(--row-height);object-fit: initial;width: fit-content;margin: 0px auto;display: block;}
-"""
-MODEL_FILENAME = 'model.onnx'
-LABEL_FILENAME = 'selected_tags.csv'
-class Timer:
-    """Utility class for measuring execution time of different operations"""
-    def __init__(self):
-        self.start_time = time.perf_counter()
-        self.checkpoints = [('Start', self.start_time)]
-    def checkpoint(self, label='Checkpoint'):
-        """Add a checkpoint with a label"""
-        now = time.perf_counter()
-        self.checkpoints.append((label, now))
-    def report(self, is_clear_checkpoints=True):
-        """Report time elapsed since last checkpoint"""
-        max_label_length = max(len(label) for (label, _) in self.checkpoints) if self.checkpoints else 0
-        prev_time = self.checkpoints[0][1] if self.checkpoints else self.start_time
-        for (label, curr_time) in self.checkpoints[1:]:
-            elapsed = curr_time - prev_time
-            print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds")
-            prev_time = curr_time
-        if is_clear_checkpoints:
-            self.checkpoints.clear()
-            self.checkpoint()
-    def report_all(self):
-        """Report all checkpoint times including total execution time"""
-        print('\n> Execution Time Report:')
-        max_label_length = max(len(label) for (label, _) in self.checkpoints) if len(self.checkpoints) > 0 else 0
-        prev_time = self.start_time
-        for (label, curr_time) in self.checkpoints[1:]:
-            elapsed = curr_time - prev_time
-            print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds")
-            prev_time = curr_time
-        total_time = self.checkpoints[-1][1] - self.start_time if self.checkpoints else 0
-        print(f"{'Total Execution Time'.ljust(max_label_length)}: {total_time:.3f} seconds\n")
-        self.checkpoints.clear()
-    def restart(self):
-        """Restart the timer"""
-        self.start_time = time.perf_counter()
-        self.checkpoints = [('Start', self.start_time)]
-def parse_args() -> argparse.Namespace:
-    """Parse command line arguments"""
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--score-slider-step', type=float, default=0.05)
-    parser.add_argument('--score-general-threshold', type=float, default=0.35)
-    parser.add_argument('--score-character-threshold', type=float, default=0.85)
-    parser.add_argument('--share', action='store_true')
-    return parser.parse_args()
-def load_labels(dataframe) -> tuple:
-    """Load tag names and their category indexes from the dataframe"""
-    name_series = dataframe['name']
-    tag_names = name_series.tolist()
-    # Find indexes for different tag categories
-    rating_indexes = list(np.where(dataframe['category'] == 9)[0])
-    general_indexes = list(np.where(dataframe['category'] == 0)[0])
-    character_indexes = list(np.where(dataframe['category'] == 4)[0])
-    return tag_names, rating_indexes, general_indexes, character_indexes
-def mcut_threshold(probs):
-    """Calculate threshold using Maximum Change in second derivative (MCut) method"""
-    sorted_probs = probs[probs.argsort()[::-1]]
-    difs = sorted_probs[:-1] - sorted_probs[1:]
-    t = difs.argmax()
-    thresh = (sorted_probs[t] + sorted_probs[t + 1]) / 2
-    return thresh
-def _download_model_files(model_repo):
-    """Download model files from HuggingFace Hub"""
-    csv_path = huggingface_hub.hf_hub_download(model_repo, LABEL_FILENAME)
-    model_path = huggingface_hub.hf_hub_download(model_repo, MODEL_FILENAME)
-    return csv_path, model_path
-def create_optimized_ort_session(model_path):
-    """Create an optimized ONNX Runtime session with GPU support"""
-    # Configure session options for better performance
-    sess_options = ort.SessionOptions()
-    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-    sess_options.intra_op_num_threads = 0  # Use all available cores
-    sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
-    sess_options.enable_mem_pattern = True
-    sess_options.enable_cpu_mem_arena = True
-    # Check available providers
-    available_providers = ort.get_available_providers()
-    print(f"Available ONNX Runtime providers: {available_providers}")
-    # Configure execution providers (prefer CUDA if available)
-    providers = []
-    # Use CUDA if available
-    if 'CUDAExecutionProvider' in available_providers:
-        providers.append('CUDAExecutionProvider')
-        print("Using CUDA provider for ONNX inference")
-    else:
-        print("CUDA provider not available, falling back to CPU")
-    # Always include CPU as fallback
-    providers.append('CPUExecutionProvider')
-    try:
-        session = ort.InferenceSession(model_path, sess_options, providers=providers)
-        print(f"Model loaded with providers: {session.get_providers()}")
-        return session
-    except Exception as e:
-        print(f"Failed to create ONNX session: {e}")
-        raise
-def _load_model_components_optimized(model_repo):
-    """Load and optimize model components"""
-    global CURRENT_MODEL, CURRENT_MODEL_NAME, CURRENT_TAGS_DF, CURRENT_TAG_NAMES
-    global CURRENT_RATING_INDEXES, CURRENT_GENERAL_INDEXES, CURRENT_CHARACTER_INDEXES, CURRENT_MODEL_TARGET_SIZE
-    # Only reload if model changed
-    if model_repo == CURRENT_MODEL_NAME and CURRENT_MODEL is not None:
-        return
-    # Download files
-    csv_path, model_path = _download_model_files(model_repo)
-    # Load optimized ONNX model
-    CURRENT_MODEL = create_optimized_ort_session(model_path)
-    # Load tags
-    tags_df = pd.read_csv(csv_path)
-    tag_names, rating_indexes, general_indexes, character_indexes = load_labels(tags_df)
-    # Store in global variables
-    CURRENT_TAGS_DF = tags_df
-    CURRENT_TAG_NAMES = tag_names
-    CURRENT_RATING_INDEXES = rating_indexes
-    CURRENT_GENERAL_INDEXES = general_indexes
-    CURRENT_CHARACTER_INDEXES = character_indexes
-    # Get model input size
-    _, height, width, _ = CURRENT_MODEL.get_inputs()[0].shape
-    CURRENT_MODEL_TARGET_SIZE = height
-    CURRENT_MODEL_NAME = model_repo
-def _raw_predict(image_array, model_session):
-    """Run raw prediction using the model session"""
-    input_name = model_session.get_inputs()[0].name
-    label_name = model_session.get_outputs()[0].name
-    preds = model_session.run([label_name], {input_name: image_array})[0]
-    return preds[0].astype(float)
-def unload_model():
-    """Explicitly unload the current model from memory"""
-    global CURRENT_MODEL, CURRENT_MODEL_NAME, CURRENT_TAGS_DF, CURRENT_TAG_NAMES
-    global CURRENT_RATING_INDEXES, CURRENT_GENERAL_INDEXES, CURRENT_CHARACTER_INDEXES, CURRENT_MODEL_TARGET_SIZE
-    # Delete the model session
-    if CURRENT_MODEL is not None:
-        del CURRENT_MODEL
-        CURRENT_MODEL = None
-    # Clear other large objects
-    CURRENT_TAGS_DF = None
-    CURRENT_TAG_NAMES = None
-    CURRENT_RATING_INDEXES = None
-    CURRENT_GENERAL_INDEXES = None
-    CURRENT_CHARACTER_INDEXES = None
-    CURRENT_MODEL_TARGET_SIZE = None
-    CURRENT_MODEL_NAME = None
-    # Force garbage collection
-    import gc
-    gc.collect()
-    # Clear CUDA cache if using GPU
-    try:
-        import torch
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-    except ImportError:
-        pass
-def cleanup_after_processing():
-    """Cleanup resources after processing"""
-    unload_model()
-class Predictor:
-    """Main predictor class for handling image tagging"""
-    def __init__(self):
-        self.model_components = None
-        self.last_loaded_repo = None
-    def load_model(self, model_repo):
-        """Load model if not already loaded"""
-        if model_repo == self.last_loaded_repo and self.model_components is not None:
-            return
-        _load_model_components_optimized(model_repo)
-        self.last_loaded_repo = model_repo
-    def prepare_image(self, path):
-        """Prepare image for model input"""
-        image = Image.open(path)
-        image = image.convert('RGBA')
-        target_size = CURRENT_MODEL_TARGET_SIZE
-        # Create white background and composite
-        canvas = Image.new('RGBA', image.size, (255, 255, 255))
-        canvas.alpha_composite(image)
-        image = canvas.convert('RGB')
-        # Pad to square
-        image_shape = image.size
-        max_dim = max(image_shape)
-        pad_left = (max_dim - image_shape[0]) // 2
-        pad_top = (max_dim - image_shape[1]) // 2
-        padded_image = Image.new('RGB', (max_dim, max_dim), (255, 255, 255))
-        padded_image.paste(image, (pad_left, pad_top))
-        # Resize if needed
-        if max_dim != target_size:
-            padded_image = padded_image.resize((target_size, target_size), Image.BICUBIC)
-        # Convert to array and preprocess
-        image_array = np.asarray(padded_image, dtype=np.float32)
-        image_array = image_array[:, :, ::-1]  # BGR to RGB
-        return np.expand_dims(image_array, axis=0)
-    def create_file(self, content: str, directory: str, fileName: str) -> str:
-        """Create a file with the given content"""
-        file_path = os.path.join(directory, fileName)
-        if fileName.endswith('.json'):
-            with open(file_path, 'w', encoding='utf-8') as file:
-                file.write(content)
-        else:
-            with open(file_path, 'w+', encoding='utf-8') as file:
-                file.write(content)
-        return file_path
-    def predict(self, gallery, model_repo, model_repo_2, general_thresh, general_mcut_enabled,
-                character_thresh, character_mcut_enabled, characters_merge_enabled,
-                additional_tags_prepend, additional_tags_append, tag_results, progress=gr.Progress()):
-        """Main prediction function for processing images"""
-        tag_results.clear()
-        gallery_len = len(gallery)
-        print(f"Predict load model: {model_repo}, gallery length: {gallery_len}")
-        timer = Timer()
-        progressRatio = 1
-        progressTotal = gallery_len + 1
-        current_progress = 0
-        txt_infos = []
-        output_dir = tempfile.mkdtemp()
-        if not os.path.exists(output_dir):
-            os.makedirs(output_dir)
-        # Load initial model
-        self.load_model(model_repo)
-        current_progress += progressRatio / progressTotal
-        progress(current_progress, desc='Initialize wd model finished')
-        timer.checkpoint("Initialize wd model")
-        timer.report()
-        name_counters = defaultdict(int)
-        for (idx, value) in enumerate(gallery):
-            try:
-                # Handle duplicate filenames
-                image_path = value[0]
-                image_name = os.path.splitext(os.path.basename(image_path))[0]
-                name_counters[image_name] += 1
-                if name_counters[image_name] > 1:
-                    image_name = f"{image_name}_{name_counters[image_name]:02d}"
-                # Prepare image
-                image = self.prepare_image(image_path)
-                print(f"Gallery {idx:02d}: Starting run first model ({model_repo})...")
-                # Load and run first model
-                self.load_model(model_repo)
-                preds = _raw_predict(image, CURRENT_MODEL)
-                labels = list(zip(CURRENT_TAG_NAMES, preds))
-                # Process ratings
-                ratings_names = [labels[i] for i in CURRENT_RATING_INDEXES]
-                rating = dict(ratings_names)
-                # Process general tags
-                general_names = [labels[i] for i in CURRENT_GENERAL_INDEXES]
-                if general_mcut_enabled:
-                    general_probs = np.array([x[1] for x in general_names])
-                    general_thresh_temp = mcut_threshold(general_probs)
-                else:
-                    general_thresh_temp = general_thresh
-                general_res = [x for x in general_names if x[1] > general_thresh_temp]
-                general_res = dict(general_res)
-                # Process character tags
-                character_names = [labels[i] for i in CURRENT_CHARACTER_INDEXES]
-                if character_mcut_enabled:
-                    character_probs = np.array([x[1] for x in character_names])
-                    character_thresh_temp = mcut_threshold(character_probs)
-                    character_thresh_temp = max(0.15, character_thresh_temp)
-                else:
-                    character_thresh_temp = character_thresh
-                character_res = [x for x in character_names if x[1] > character_thresh_temp]
-                character_res = dict(character_res)
-                character_list_1 = list(character_res.keys())
-                # Sort general tags by confidence
-                sorted_general_list_1 = sorted(general_res.items(), key=lambda x: x[1], reverse=True)
-                sorted_general_list_1 = [x[0] for x in sorted_general_list_1]
-                # Handle second model if provided
-                if model_repo_2 and model_repo_2 != model_repo:
-                    print(f"Gallery {idx:02d}: Starting run second model ({model_repo_2})...")
-                    self.load_model(model_repo_2)
-                    preds_2 = _raw_predict(image, CURRENT_MODEL)
-                    labels_2 = list(zip(CURRENT_TAG_NAMES, preds_2))
-                    # Process general tags from second model
-                    general_names_2 = [labels_2[i] for i in CURRENT_GENERAL_INDEXES]
-                    if general_mcut_enabled:
-                        general_probs_2 = np.array([x[1] for x in general_names_2])
-                        general_thresh_temp_2 = mcut_threshold(general_probs_2)
-                    else:
-                        general_thresh_temp_2 = general_thresh
-                    general_res_2 = [x for x in general_names_2 if x[1] > general_thresh_temp_2]
-                    general_res_2 = dict(general_res_2)
-                    # Process character tags from second model
-                    character_names_2 = [labels_2[i] for i in CURRENT_CHARACTER_INDEXES]
-                    if character_mcut_enabled:
-                        character_probs_2 = np.array([x[1] for x in character_names_2])
-                        character_thresh_temp_2 = mcut_threshold(character_probs_2)
-                        character_thresh_temp_2 = max(0.15, character_thresh_temp_2)
-                    else:
-                        character_thresh_temp_2 = character_thresh
-                    character_res_2 = [x for x in character_names_2 if x[1] > character_thresh_temp_2]
-                    character_res_2 = dict(character_res_2)
-                    character_list_2 = list(character_res_2.keys())
-                    # Sort general tags from second model
-                    sorted_general_list_2 = sorted(general_res_2.items(), key=lambda x: x[1], reverse=True)
-                    sorted_general_list_2 = [x[0] for x in sorted_general_list_2]
-                    # Combine results from both models
-                    combined_character_list = list(set(character_list_1 + character_list_2))
-                    combined_general_list = list(set(sorted_general_list_1 + sorted_general_list_2))
-                else:
-                    combined_character_list = character_list_1
-                    combined_general_list = sorted_general_list_1
-                # Remove characters from general tags if merging is disabled
-                if not characters_merge_enabled:
-                    combined_character_list = [item for item in combined_character_list
-                                             if item not in combined_general_list]
-                # Handle additional tags
-                prepend_list = [tag.strip() for tag in additional_tags_prepend.split(',') if tag.strip()]
-                append_list = [tag.strip() for tag in additional_tags_append.split(',') if tag.strip()]
-                # Avoid duplicates in prepend/append lists
-                if prepend_list and append_list:
-                    append_list = [item for item in append_list if item not in prepend_list]
-                # Remove prepended tags from main list
-                if prepend_list:
-                    combined_general_list = [item for item in combined_general_list if item not in prepend_list]
-                # Remove appended tags from main list
-                if append_list:
-                    combined_general_list = [item for item in combined_general_list if item not in append_list]
-                # Combine all tags
-                combined_general_list = prepend_list + combined_general_list + append_list
-                # Format output string
-                sorted_general_strings = ', '.join(
-                    (combined_character_list if characters_merge_enabled else []) +
-                    combined_general_list
-                ).replace('(', '\\(').replace(')', '\\)').replace('_', ' ')
-                # Generate categorized output
-                categorized_strings = categorize_tags_output(sorted_general_strings, character_res).replace('(', '\\(').replace(')', '\\)')
-                categorized_json = generate_tags_json(sorted_general_strings, character_res)
-                # Create output files
-                txt_content = f"Output (string): {sorted_general_strings}\n\nCategorized Output: {categorized_strings}"
-                txt_file = self.create_file(txt_content, output_dir, f"{image_name}_output.txt")
-                txt_infos.append({'path': txt_file, 'name': f"{image_name}_output.txt"})
-                # Save image copy
-                image_path = value[0]
-                image = Image.open(image_path)
-                image.save(os.path.join(output_dir, f"{image_name}.png"), format='PNG')
-                txt_infos.append({'path': os.path.join(output_dir, f"{image_name}.png"), 'name': f"{image_name}.png"})
-                # Create tags text file
-                txt_file = self.create_file(sorted_general_strings, output_dir, image_name + '.txt')
-                # Create categorized tags file
-                categorized_file = self.create_file(categorized_strings, output_dir, f"{image_name}_categorized.txt")
-                txt_infos.append({'path': categorized_file, 'name': f"{image_name}_categorized.txt"})
-                txt_infos.append({'path': txt_file, 'name': image_name + '.txt'})
-                # Create JSON file
-                json_content = json.dumps(categorized_json, indent=2, ensure_ascii=False)
-                json_file = self.create_file(json_content, output_dir, f"{image_name}_categorized.json")
-                txt_infos.append({'path': json_file, 'name': f"{image_name}_categorized.json"})
-                # Store results
-                tag_results[image_path] = {
-                    'strings': sorted_general_strings,
-                    'categorized_strings': categorized_strings,
-                    'categorized_json': categorized_json,
-                    'rating': rating,
-                    'character_res': character_res,
-                    'general_res': general_res
-                }
-                # Update progress
-                current_progress += progressRatio / progressTotal
-                progress(current_progress, desc=f"image{idx:02d}, predict finished")
-                timer.checkpoint(f"image{idx:02d}, predict finished")
-                timer.report()
-            except Exception as e:
-                print(traceback.format_exc())
-                print('Error predict: ' + str(e))
-        # Create download zip
-        download = []
-        if txt_infos is not None and len(txt_infos) > 0:
-            downloadZipPath = os.path.join(
-                output_dir,
-                'Multi-Tagger-' + datetime.now().strftime('%Y%m%d-%H%M%S') + '.zip'
-            )
-            with zipfile.ZipFile(downloadZipPath, 'w', zipfile.ZIP_DEFLATED) as taggers_zip:
-                for info in txt_infos:
-                    taggers_zip.write(info['path'], arcname=info['name'])
-                # If using GPU, model will auto unload after zip file creation
-                cleanup_after_processing() # Comment here to turn off this behavior
-            download.append(downloadZipPath)
-        progress(1, desc=f"Predict completed")
-        timer.report_all()
-        print('Predict is complete.')
-        # Return first image results as default
-        first_image_results = '', {}, {}, {}, '', {}
-        if gallery and len(gallery) > 0:
-            first_image_path = gallery[0][0]
-            if first_image_path in tag_results:
-                first_result = tag_results[first_image_path]
-                character_tags_formatted = ", ".join([name.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
-                                                    for name in first_result['character_res'].keys()])
-                first_image_results = (
-                    first_result['strings'],
-                    first_result['rating'],
-                    character_tags_formatted,
-                    first_result['general_res'],
-                    first_result.get('categorized_strings', ''),
-                    first_result.get('categorized_json', {})
-                )
-        return (
-            download,
-            first_image_results[0],
-            first_image_results[1],
-            first_image_results[2],
-            first_image_results[3],
-            first_image_results[4],
-            first_image_results[5],
-            tag_results
-        )
-def get_selection_from_gallery(gallery: list, tag_results: dict, selected_state: gr.SelectData):
-    # Return first image results if no selection
-    if not selected_state and gallery and len(gallery) > 0:
-        first_image_path = gallery[0][0]
-        if first_image_path in tag_results:
-            first_result = tag_results[first_image_path]
-            character_tags_formatted = ", ".join([name.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
-                                                for name in first_result['character_res'].keys()])
-            return (
-                first_result['strings'],
-                first_result['rating'],
-                character_tags_formatted,
-                first_result['general_res'],
-                first_result.get('categorized_strings', ''),
-                first_result.get('categorized_json', {})
-            )
-    if not selected_state:
-        return '', {}, '', {}, '', {}
-    # Get selected image path
-    selected_value = selected_state.value
-    image_path = None
-    if isinstance(selected_value, dict) and 'image' in selected_value:
-        image_path = selected_value['image']['path']
-    elif isinstance(selected_value, (list, tuple)) and len(selected_value) > 0:
-        image_path = selected_value[0]
-    else:
-        image_path = str(selected_value)
-    # Return stored results
-    if image_path in tag_results:
-        result = tag_results[image_path]
-        character_tags_formatted = ", ".join([name.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
-                                            for name in result['character_res'].keys()])
-        return (
-            result['strings'],
-            result['rating'],
-            character_tags_formatted,
-            result['general_res'],
-            result.get('categorized_strings', ''),
-            result.get('categorized_json', {})
-        )
-    return '', {}, '', {}, '', {}
-def append_gallery(gallery: list, image: str):
-    """Add a single image to the gallery"""
-    if gallery is None:
-        gallery = []
-    if not image:
-        return gallery, None
-    gallery.append(image)
-    return gallery, None
-def extend_gallery(gallery: list, images):
-    """Add multiple images to the gallery"""
-    if gallery is None:
-        gallery = []
-    if not images:
-        return gallery
-    gallery.extend(images)
-    return gallery
-# Parse arguments and initialize predictor
-args = parse_args()
-predictor = Predictor()
-dropdown_list = [
-    EVA02_LARGE_MODEL_DSV3_REPO, VIT_LARGE_MODEL_DSV3_REPO, SWINV2_MODEL_DSV3_REPO,
-    CONV_MODEL_DSV3_REPO, VIT_MODEL_DSV3_REPO, MOAT_MODEL_DSV2_REPO,
-    SWIN_MODEL_DSV2_REPO, CONV_MODEL_DSV2_REPO, CONV2_MODEL_DSV2_REPO,
-    VIT_MODEL_DSV2_REPO, EVA02_LARGE_MODEL_IS_DSV1_REPO, SWINV2_MODEL_IS_DSV1_REPO
-]
-def _restart_space():
-    """Restart the HuggingFace Space periodically for stability"""
-    HF_TOKEN = os.getenv('HF_TOKEN')
-    if not HF_TOKEN:
-        raise ValueError('HF_TOKEN environment variable is not set.')
-    huggingface_hub.HfApi().restart_space(
-        repo_id='Werli/Multi-Tagger',
-        token=HF_TOKEN,
-        factory_reboot=False
-    )
-# Setup scheduler for periodic restarts
-scheduler = BackgroundScheduler()
-restart_space_job = scheduler.add_job(_restart_space, 'interval', seconds=172800)
-scheduler.start()
-next_run_time_utc = restart_space_job.next_run_time.astimezone(timezone.utc)
-NEXT_RESTART = f"Next Restart: {next_run_time_utc.strftime('%Y-%m-%d %H:%M:%S')} (UTC) - The space will restart every 2 days to ensure stability and performance. It uses a background scheduler to handle the restart process."
-with gr.Blocks(title=TITLE, css=css, theme='Werli/Purple-Crimson-Gradio-Theme', fill_width=True) as demo:
-    gr.Markdown(value=f"<h1 style='text-align: center; margin-bottom: 1rem'>{TITLE}</h1>")
-    gr.Markdown(value=f"<p style='text-align: center;'>{DESCRIPTION}</p>")
-    with gr.Tab(label='Waifu Diffusion'):
-        with gr.Row():
-            with gr.Column():
-                with gr.Column(variant='panel'):
-                    image_input = gr.Image(
-                        label='Upload an Image or clicking paste from clipboard button',
-                        type='filepath',
-                        sources=['upload', 'clipboard'],
-                        height=150
-                    )
-                    with gr.Row():
-                        upload_button = gr.UploadButton(
-                            'Upload multiple images',
-                            file_types=['image'],
-                            file_count='multiple',
-                            size='sm'
-                        )
-                    gallery = gr.Gallery(
-                        columns=2,
-                        show_share_button=False,
-                        interactive=True,
-                        height='auto',
-                        label='Grid of images',
-                        preview=False,
-                        elem_id='custom-gallery'
-                    )
-                submit = gr.Button(value='Analyze Images', variant='primary', size='lg')
-                with gr.Column(variant='panel'):
-                    model_repo = gr.Dropdown(
-                        dropdown_list,
-                        value=EVA02_LARGE_MODEL_DSV3_REPO,
-                        label='1st Model'
-                    )
-                    PLUS = '+?'
-                    gr.Markdown(value=f"<p style='text-align: center;'>{PLUS}</p>")
-                    model_repo_2 = gr.Dropdown(
-                        [None] + dropdown_list,
-                        value=None,
-                        label='2nd Model (Optional)',
-                        info='Select another model for diversified results.'
-                    )
-                with gr.Row():
-                    general_thresh = gr.Slider(
-                        0, 1,
-                        step=args.score_slider_step,
-                        value=args.score_general_threshold,
-                        label='General Tags Threshold',
-                        scale=3
-                    )
-                    general_mcut_enabled = gr.Checkbox(
-                        value=False,
-                        label='Use MCut threshold',
-                        scale=1
-                    )
-                with gr.Row():
-                    character_thresh = gr.Slider(
-                        0, 1,
-                        step=args.score_slider_step,
-                        value=args.score_character_threshold,
-                        label='Character Tags Threshold',
-                        scale=3
-                    )
-                    character_mcut_enabled = gr.Checkbox(
-                        value=False,
-                        label='Use MCut threshold',
-                        scale=1
-                    )
-                with gr.Row():
-                    characters_merge_enabled = gr.Checkbox(
-                        value=False,
-                        label='Merge characters into the string output',
-                        scale=1
-                    )
-                with gr.Row():
-                    additional_tags_prepend = gr.Text(
-                        label='Prepend Additional tags (comma split)'
-                    )
-                    additional_tags_append = gr.Text(
-                        label='Append Additional tags (comma split)'
-                    )
-                with gr.Row():
-                    clear = gr.ClearButton(
-                        components=[
-                            gallery, model_repo, general_thresh, general_mcut_enabled,
-                            character_thresh, character_mcut_enabled, characters_merge_enabled,
-                            additional_tags_prepend, additional_tags_append
-                        ],
-                        variant='secondary',
-                        size='lg'
-                    )
-            with gr.Column(variant='panel'):
-                download_file = gr.File(label='Download')
-                character_res = gr.Textbox(
-                    label="Character tags",
-                    show_copy_button=True,
-                    lines=3
-                )
-                sorted_general_strings = gr.Textbox(
-                    label='Output',
-                    show_label=True,
-                    show_copy_button=True,
-                    lines=5
-                )
-                categorized_strings = gr.Textbox(
-                    label='Categorized',
-                    show_label=True,
-                    show_copy_button=True,
-                    lines=5
-                )
-                tags_json = gr.JSON(
-                    label='Categorized Tags (JSON)',
-                    visible=True
-                )
-                rating = gr.Label(label='Rating')
-                general_res = gr.Textbox(
-                    label="General tags",
-                    show_copy_button=True,
-                    lines=3,
-                    visible=False # Temp
-                )
-            # State to store results
-            tag_results = gr.State({})
-            # Event handlers
-            image_input.change(
-                append_gallery,
-                inputs=[gallery, image_input],
-                outputs=[gallery, image_input]
-            )
-            upload_button.upload(
-                extend_gallery,
-                inputs=[gallery, upload_button],
-                outputs=gallery
-            )
-            gallery.select(
-                get_selection_from_gallery,
-                inputs=[gallery, tag_results],
-                outputs=[sorted_general_strings, rating, character_res, general_res, categorized_strings, tags_json]
-            )
-        submit.click(
-            predictor.predict,
-            inputs=[
-                gallery, model_repo, model_repo_2, general_thresh, general_mcut_enabled,
-                character_thresh, character_mcut_enabled, characters_merge_enabled,
-                additional_tags_prepend, additional_tags_append, tag_results
-            ],
-            outputs=[download_file, sorted_general_strings, rating, character_res, general_res, categorized_strings, tags_json, tag_results]
-        )
-        gr.Examples(
-            [['images/1girl.png', EVA02_LARGE_MODEL_DSV3_REPO, 0.35, False, 0.85, False]],
-            inputs=[image_input, model_repo, general_thresh, general_mcut_enabled, character_thresh, character_mcut_enabled]
-        )
-        gr.Markdown('[Based on SmilingWolf/wd-tagger](https://huggingface.co/spaces/SmilingWolf/wd-tagger) <p style="text-align:right"><a href="https://huggingface.co/spaces/John6666/danbooru-tags-transformer-v2-with-wd-tagger-b">Prompt Enhancer</a></p>')
-    with gr.Tab("PixAI"):
-        pixai_interface = create_pixai_interface()
-    with gr.Tab("Booru Image Fetcher"):
-        booru_interface = create_booru_interface()
-    gr.Markdown(NEXT_RESTART)
-demo.queue(max_size=5).launch(show_error=True)

+import os, io, json, requests, spaces, argparse, traceback, tempfile, zipfile, re, ast, time
+import gradio as gr
+import numpy as np
+import huggingface_hub
+import onnxruntime as ort
+import pandas as pd
+from datetime import datetime, timezone
+from collections import defaultdict
+from PIL import Image, ImageOps
+from apscheduler.schedulers.background import BackgroundScheduler
+from modules.classifyTags import categorize_tags_output, generate_tags_json, process_tags_for_misc
+from modules.pixai import create_pixai_interface
+from modules.booru import create_booru_interface
+from modules.multi_comfy import create_multi_comfy
+from modules.media_handler import handle_single_media_upload, handle_multiple_media_uploads
+""" For GPU install all the requirements.txt and the following:
+pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu126
+pip install onnxruntime-gpu
+"""
+""" It's recommended to create a venv:
+python -m venv venv
+venv\Scripts\activate
+pip install ...
+python app.py
+"""
+TITLE = 'Multi-Tagger v1.4'
+DESCRIPTION = '\nMulti-Tagger is a versatile application for advanced image analysis and captioning. Supports <b>CUDA</b> and <b>CPU</b>.\n'
+SWINV2_MODEL_DSV3_REPO = 'SmilingWolf/wd-swinv2-tagger-v3'
+CONV_MODEL_DSV3_REPO = 'SmilingWolf/wd-convnext-tagger-v3'
+VIT_MODEL_DSV3_REPO = 'SmilingWolf/wd-vit-tagger-v3'
+VIT_LARGE_MODEL_DSV3_REPO = 'SmilingWolf/wd-vit-large-tagger-v3'
+EVA02_LARGE_MODEL_DSV3_REPO = 'SmilingWolf/wd-eva02-large-tagger-v3'
+MOAT_MODEL_DSV2_REPO = 'SmilingWolf/wd-v1-4-moat-tagger-v2'
+SWIN_MODEL_DSV2_REPO = 'SmilingWolf/wd-v1-4-swinv2-tagger-v2'
+CONV_MODEL_DSV2_REPO = 'SmilingWolf/wd-v1-4-convnext-tagger-v2'
+CONV2_MODEL_DSV2_REPO = 'SmilingWolf/wd-v1-4-convnextv2-tagger-v2'
+VIT_MODEL_DSV2_REPO = 'SmilingWolf/wd-v1-4-vit-tagger-v2'
+EVA02_LARGE_MODEL_IS_DSV1_REPO = 'deepghs/idolsankaku-eva02-large-tagger-v1'
+SWINV2_MODEL_IS_DSV1_REPO = 'deepghs/idolsankaku-swinv2-tagger-v1'
+# Global variables for model components (for memory management)
+CURRENT_MODEL = None
+CURRENT_MODEL_NAME = None
+CURRENT_TAGS_DF = None
+CURRENT_TAG_NAMES = None
+CURRENT_RATING_INDEXES = None
+CURRENT_GENERAL_INDEXES = None
+CURRENT_CHARACTER_INDEXES = None
+CURRENT_MODEL_TARGET_SIZE = None
+# Custom CSS for gallery styling
+css = """
+#custom-gallery {--row-height: 180px;display: grid;grid-auto-rows: min-content;gap: 10px;}
+#custom-gallery .thumbnail-item {height: var(--row-height);width: 100%;position: relative;overflow: hidden;border-radius: 8px;box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);transition: transform 0.2s ease,  box-shadow 0.2s ease;}
+#custom-gallery .thumbnail-item:hover {transform: translateY(-3px);box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);}
+#custom-gallery .thumbnail-item img {width: auto;height: 100%;max-width: 100%;max-height: var(--row-height);object-fit: contain;margin: 0 auto;display: block;}
+#custom-gallery .thumbnail-item img.portrait {max-width: 100%;}
+#custom-gallery .thumbnail-item img.landscape {max-height: 100%;}
+.gallery-container {max-height: 500px;overflow-y: auto;padding-right: 0px;--size-80: 500px;}
+.thumbnails {display: flex;position: absolute;bottom: 0;width: 120px;overflow-x: scroll;padding-top: 320px;padding-bottom: 280px;padding-left: 4px;flex-wrap: wrap;}
+#custom-gallery .thumbnail-item img {width: auto;height: 100%;max-width: 100%;max-height: var(--row-height);object-fit: initial;width: fit-content;margin: 0px auto;display: block;}
+"""
+MODEL_FILENAME = 'model.onnx'
+LABEL_FILENAME = 'selected_tags.csv'
+class Timer:
+    """Utility class for measuring execution time of different operations"""
+    def __init__(self):
+        self.start_time = time.perf_counter()
+        self.checkpoints = [('Start', self.start_time)]
+    def checkpoint(self, label='Checkpoint'):
+        """Add a checkpoint with a label"""
+        now = time.perf_counter()
+        self.checkpoints.append((label, now))
+    def report(self, is_clear_checkpoints=True):
+        """Report time elapsed since last checkpoint"""
+        max_label_length = max(len(label) for (label, _) in self.checkpoints) if self.checkpoints else 0
+        prev_time = self.checkpoints[0][1] if self.checkpoints else self.start_time
+        for (label, curr_time) in self.checkpoints[1:]:
+            elapsed = curr_time - prev_time
+            print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds")
+            prev_time = curr_time
+        if is_clear_checkpoints:
+            self.checkpoints.clear()
+            self.checkpoint()
+    def report_all(self):
+        """Report all checkpoint times including total execution time"""
+        print('\n> Execution Time Report:')
+        max_label_length = max(len(label) for (label, _) in self.checkpoints) if len(self.checkpoints) > 0 else 0
+        prev_time = self.start_time
+        for (label, curr_time) in self.checkpoints[1:]:
+            elapsed = curr_time - prev_time
+            print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds")
+            prev_time = curr_time
+        total_time = self.checkpoints[-1][1] - self.start_time if self.checkpoints else 0
+        print(f"{'Total Execution Time'.ljust(max_label_length)}: {total_time:.3f} seconds\n")
+        self.checkpoints.clear()
+    def restart(self):
+        """Restart the timer"""
+        self.start_time = time.perf_counter()
+        self.checkpoints = [('Start', self.start_time)]
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments"""
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--score-slider-step', type=float, default=0.05)
+    parser.add_argument('--score-general-threshold', type=float, default=0.35)
+    parser.add_argument('--score-character-threshold', type=float, default=0.85)
+    parser.add_argument('--share', action='store_true')
+    return parser.parse_args()
+def load_labels(dataframe) -> tuple:
+    """Load tag names and their category indexes from the dataframe"""
+    name_series = dataframe['name']
+    tag_names = name_series.tolist()
+    # Find indexes for different tag categories
+    rating_indexes = list(np.where(dataframe['category'] == 9)[0])
+    general_indexes = list(np.where(dataframe['category'] == 0)[0])
+    character_indexes = list(np.where(dataframe['category'] == 4)[0])
+    return tag_names, rating_indexes, general_indexes, character_indexes
+def mcut_threshold(probs):
+    """Calculate threshold using Maximum Change in second derivative (MCut) method"""
+    sorted_probs = probs[probs.argsort()[::-1]]
+    difs = sorted_probs[:-1] - sorted_probs[1:]
+    t = difs.argmax()
+    thresh = (sorted_probs[t] + sorted_probs[t + 1]) / 2
+    return thresh
+def _download_model_files(model_repo):
+    """Download model files from HuggingFace Hub"""
+    csv_path = huggingface_hub.hf_hub_download(model_repo, LABEL_FILENAME)
+    model_path = huggingface_hub.hf_hub_download(model_repo, MODEL_FILENAME)
+    return csv_path, model_path
+def create_optimized_ort_session(model_path):
+    """Create an optimized ONNX Runtime session with GPU support"""
+    # Configure session options for better performance
+    sess_options = ort.SessionOptions()
+    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+    sess_options.intra_op_num_threads = 0  # Use all available cores
+    sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
+    sess_options.enable_mem_pattern = True
+    sess_options.enable_cpu_mem_arena = True
+    # Check available providers
+    available_providers = ort.get_available_providers()
+    print(f"Available ONNX Runtime providers: {available_providers}")
+    # Configure execution providers (prefer CUDA if available)
+    providers = []
+    # Use CUDA if available
+    if 'CUDAExecutionProvider' in available_providers:
+        providers.append('CUDAExecutionProvider')
+        print("Using CUDA provider for ONNX inference")
+    else:
+        print("CUDA provider not available, falling back to CPU")
+    # Always include CPU as fallback
+    providers.append('CPUExecutionProvider')
+    try:
+        session = ort.InferenceSession(model_path, sess_options, providers=providers)
+        print(f"Model loaded with providers: {session.get_providers()}")
+        return session
+    except Exception as e:
+        print(f"Failed to create ONNX session: {e}")
+        raise
+def _load_model_components_optimized(model_repo):
+    """Load and optimize model components"""
+    global CURRENT_MODEL, CURRENT_MODEL_NAME, CURRENT_TAGS_DF, CURRENT_TAG_NAMES
+    global CURRENT_RATING_INDEXES, CURRENT_GENERAL_INDEXES, CURRENT_CHARACTER_INDEXES, CURRENT_MODEL_TARGET_SIZE
+    # Only reload if model changed
+    if model_repo == CURRENT_MODEL_NAME and CURRENT_MODEL is not None:
+        return
+    # Download files
+    csv_path, model_path = _download_model_files(model_repo)
+    # Load optimized ONNX model
+    CURRENT_MODEL = create_optimized_ort_session(model_path)
+    # Load tags
+    tags_df = pd.read_csv(csv_path)
+    tag_names, rating_indexes, general_indexes, character_indexes = load_labels(tags_df)
+    # Store in global variables
+    CURRENT_TAGS_DF = tags_df
+    CURRENT_TAG_NAMES = tag_names
+    CURRENT_RATING_INDEXES = rating_indexes
+    CURRENT_GENERAL_INDEXES = general_indexes
+    CURRENT_CHARACTER_INDEXES = character_indexes
+    # Get model input size
+    _, height, width, _ = CURRENT_MODEL.get_inputs()[0].shape
+    CURRENT_MODEL_TARGET_SIZE = height
+    CURRENT_MODEL_NAME = model_repo
+def _raw_predict(image_array, model_session):
+    """Run raw prediction using the model session"""
+    input_name = model_session.get_inputs()[0].name
+    label_name = model_session.get_outputs()[0].name
+    preds = model_session.run([label_name], {input_name: image_array})[0]
+    return preds[0].astype(float)
+def unload_model():
+    """Explicitly unload the current model from memory"""
+    global CURRENT_MODEL, CURRENT_MODEL_NAME, CURRENT_TAGS_DF, CURRENT_TAG_NAMES
+    global CURRENT_RATING_INDEXES, CURRENT_GENERAL_INDEXES, CURRENT_CHARACTER_INDEXES, CURRENT_MODEL_TARGET_SIZE
+    # Delete the model session
+    if CURRENT_MODEL is not None:
+        del CURRENT_MODEL
+        CURRENT_MODEL = None
+    # Clear other large objects
+    CURRENT_TAGS_DF = None
+    CURRENT_TAG_NAMES = None
+    CURRENT_RATING_INDEXES = None
+    CURRENT_GENERAL_INDEXES = None
+    CURRENT_CHARACTER_INDEXES = None
+    CURRENT_MODEL_TARGET_SIZE = None
+    CURRENT_MODEL_NAME = None
+    # Force garbage collection
+    import gc
+    gc.collect()
+    # Clear CUDA cache if using GPU
+    try:
+        import torch
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    except ImportError:
+        pass
+def cleanup_after_processing():
+    """Cleanup resources after processing"""
+    unload_model()
+class Predictor:
+    """Main predictor class for handling image tagging"""
+    def __init__(self):
+        self.model_components = None
+        self.last_loaded_repo = None
+    def load_model(self, model_repo):
+        """Load model if not already loaded"""
+        if model_repo == self.last_loaded_repo and self.model_components is not None:
+            return
+        _load_model_components_optimized(model_repo)
+        self.last_loaded_repo = model_repo
+    def prepare_image(self, path):
+        """Prepare image for model input"""
+        image = Image.open(path)
+        image = image.convert('RGBA')
+        target_size = CURRENT_MODEL_TARGET_SIZE
+        # Create white background and composite
+        canvas = Image.new('RGBA', image.size, (255, 255, 255))
+        canvas.alpha_composite(image)
+        image = canvas.convert('RGB')
+        # Pad to square
+        image_shape = image.size
+        max_dim = max(image_shape)
+        pad_left = (max_dim - image_shape[0]) // 2
+        pad_top = (max_dim - image_shape[1]) // 2
+        padded_image = Image.new('RGB', (max_dim, max_dim), (255, 255, 255))
+        padded_image.paste(image, (pad_left, pad_top))
+        # Resize if needed
+        if max_dim != target_size:
+            padded_image = padded_image.resize((target_size, target_size), Image.BICUBIC)
+        # Convert to array and preprocess
+        image_array = np.asarray(padded_image, dtype=np.float32)
+        image_array = image_array[:, :, ::-1]  # BGR to RGB
+        return np.expand_dims(image_array, axis=0)
+    def create_file(self, content: str, directory: str, fileName: str) -> str:
+        """Create a file with the given content"""
+        file_path = os.path.join(directory, fileName)
+        if fileName.endswith('.json'):
+            with open(file_path, 'w', encoding='utf-8') as file:
+                file.write(content)
+        else:
+            with open(file_path, 'w+', encoding='utf-8') as file:
+                file.write(content)
+        return file_path
+    def predict(self, gallery, model_repo, model_repo_2, general_thresh, general_mcut_enabled,
+                character_thresh, character_mcut_enabled, characters_merge_enabled,
+                additional_tags_prepend, additional_tags_append, tag_results, progress=gr.Progress()):
+        """Main prediction function for processing images"""
+        tag_results.clear()
+        gallery_len = len(gallery)
+        print(f"Predict load model: {model_repo}, gallery length: {gallery_len}")
+        timer = Timer()
+        progressRatio = 1
+        progressTotal = gallery_len + 1
+        current_progress = 0
+        txt_infos = []
+        output_dir = tempfile.mkdtemp()
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+        # Load initial model
+        self.load_model(model_repo)
+        current_progress += progressRatio / progressTotal
+        progress(current_progress, desc='Initialize wd model finished')
+        timer.checkpoint("Initialize wd model")
+        timer.report()
+        name_counters = defaultdict(int)
+        for (idx, value) in enumerate(gallery):
+            try:
+                # Handle duplicate filenames
+                image_path = value[0]
+                image_name = os.path.splitext(os.path.basename(image_path))[0]
+                name_counters[image_name] += 1
+                if name_counters[image_name] > 1:
+                    image_name = f"{image_name}_{name_counters[image_name]:02d}"
+                # Prepare image
+                image = self.prepare_image(image_path)
+                print(f"Gallery {idx:02d}: Starting run first model ({model_repo})...")
+                # Load and run first model
+                self.load_model(model_repo)
+                preds = _raw_predict(image, CURRENT_MODEL)
+                labels = list(zip(CURRENT_TAG_NAMES, preds))
+                # Process ratings
+                ratings_names = [labels[i] for i in CURRENT_RATING_INDEXES]
+                rating = dict(ratings_names)
+                # Process general tags
+                general_names = [labels[i] for i in CURRENT_GENERAL_INDEXES]
+                if general_mcut_enabled:
+                    general_probs = np.array([x[1] for x in general_names])
+                    general_thresh_temp = mcut_threshold(general_probs)
+                else:
+                    general_thresh_temp = general_thresh
+                general_res = [x for x in general_names if x[1] > general_thresh_temp]
+                general_res = dict(general_res)
+                # Process character tags
+                character_names = [labels[i] for i in CURRENT_CHARACTER_INDEXES]
+                if character_mcut_enabled:
+                    character_probs = np.array([x[1] for x in character_names])
+                    character_thresh_temp = mcut_threshold(character_probs)
+                    character_thresh_temp = max(0.15, character_thresh_temp)
+                else:
+                    character_thresh_temp = character_thresh
+                character_res = [x for x in character_names if x[1] > character_thresh_temp]
+                character_res = dict(character_res)
+                character_list_1 = list(character_res.keys())
+                # Sort general tags by confidence
+                sorted_general_list_1 = sorted(general_res.items(), key=lambda x: x[1], reverse=True)
+                sorted_general_list_1 = [x[0] for x in sorted_general_list_1]
+                # Handle second model if provided
+                if model_repo_2 and model_repo_2 != model_repo:
+                    print(f"Gallery {idx:02d}: Starting run second model ({model_repo_2})...")
+                    self.load_model(model_repo_2)
+                    preds_2 = _raw_predict(image, CURRENT_MODEL)
+                    labels_2 = list(zip(CURRENT_TAG_NAMES, preds_2))
+                    # Process general tags from second model
+                    general_names_2 = [labels_2[i] for i in CURRENT_GENERAL_INDEXES]
+                    if general_mcut_enabled:
+                        general_probs_2 = np.array([x[1] for x in general_names_2])
+                        general_thresh_temp_2 = mcut_threshold(general_probs_2)
+                    else:
+                        general_thresh_temp_2 = general_thresh
+                    general_res_2 = [x for x in general_names_2 if x[1] > general_thresh_temp_2]
+                    general_res_2 = dict(general_res_2)
+                    # Process character tags from second model
+                    character_names_2 = [labels_2[i] for i in CURRENT_CHARACTER_INDEXES]
+                    if character_mcut_enabled:
+                        character_probs_2 = np.array([x[1] for x in character_names_2])
+                        character_thresh_temp_2 = mcut_threshold(character_probs_2)
+                        character_thresh_temp_2 = max(0.15, character_thresh_temp_2)
+                    else:
+                        character_thresh_temp_2 = character_thresh
+                    character_res_2 = [x for x in character_names_2 if x[1] > character_thresh_temp_2]
+                    character_res_2 = dict(character_res_2)
+                    character_list_2 = list(character_res_2.keys())
+                    # Sort general tags from second model
+                    sorted_general_list_2 = sorted(general_res_2.items(), key=lambda x: x[1], reverse=True)
+                    sorted_general_list_2 = [x[0] for x in sorted_general_list_2]
+                    # Combine results from both models
+                    combined_character_list = list(set(character_list_1 + character_list_2))
+                    combined_general_list = list(set(sorted_general_list_1 + sorted_general_list_2))
+                else:
+                    combined_character_list = character_list_1
+                    combined_general_list = sorted_general_list_1
+                # Remove characters from general tags if merging is disabled
+                if not characters_merge_enabled:
+                    combined_character_list = [item for item in combined_character_list
+                                             if item not in combined_general_list]
+                # Handle additional tags
+                prepend_list = [tag.strip() for tag in additional_tags_prepend.split(',') if tag.strip()]
+                append_list = [tag.strip() for tag in additional_tags_append.split(',') if tag.strip()]
+                # Avoid duplicates in prepend/append lists
+                if prepend_list and append_list:
+                    append_list = [item for item in append_list if item not in prepend_list]
+                # Remove prepended tags from main list
+                if prepend_list:
+                    combined_general_list = [item for item in combined_general_list if item not in prepend_list]
+                # Remove appended tags from main list
+                if append_list:
+                    combined_general_list = [item for item in combined_general_list if item not in append_list]
+                # Combine all tags
+                combined_general_list = prepend_list + combined_general_list + append_list
+                # Format output string
+                sorted_general_strings = ', '.join(
+                    (combined_character_list if characters_merge_enabled else []) +
+                    combined_general_list
+                ).replace('(', '\\(').replace(')', '\\)').replace('_', ' ')
+                # Generate categorized output
+                categorized_strings = categorize_tags_output(sorted_general_strings, character_res).replace('(', '\\(').replace(')', '\\)')
+                categorized_json = generate_tags_json(sorted_general_strings, character_res)
+                # Create output files
+                txt_content = f"Output (string): {sorted_general_strings}\n\nCategorized Output: {categorized_strings}"
+                txt_file = self.create_file(txt_content, output_dir, f"{image_name}_output.txt")
+                txt_infos.append({'path': txt_file, 'name': f"{image_name}_output.txt"})
+                # Save image copy
+                image_path = value[0]
+                image = Image.open(image_path)
+                image.save(os.path.join(output_dir, f"{image_name}.png"), format='PNG')
+                txt_infos.append({'path': os.path.join(output_dir, f"{image_name}.png"), 'name': f"{image_name}.png"})
+                # Create tags text file
+                txt_file = self.create_file(sorted_general_strings, output_dir, image_name + '.txt')
+                # Create categorized tags file
+                categorized_file = self.create_file(categorized_strings, output_dir, f"{image_name}_categorized.txt")
+                txt_infos.append({'path': categorized_file, 'name': f"{image_name}_categorized.txt"})
+                txt_infos.append({'path': txt_file, 'name': image_name + '.txt'})
+                # Create JSON file
+                json_content = json.dumps(categorized_json, indent=2, ensure_ascii=False)
+                json_file = self.create_file(json_content, output_dir, f"{image_name}_categorized.json")
+                txt_infos.append({'path': json_file, 'name': f"{image_name}_categorized.json"})
+                # Store results
+                tag_results[image_path] = {
+                    'strings': sorted_general_strings,
+                    'categorized_strings': categorized_strings,
+                    'categorized_json': categorized_json,
+                    'rating': rating,
+                    'character_res': character_res,
+                    'general_res': general_res
+                }
+                # Update progress
+                current_progress += progressRatio / progressTotal
+                progress(current_progress, desc=f"image{idx:02d}, predict finished")
+                timer.checkpoint(f"image{idx:02d}, predict finished")
+                timer.report()
+            except Exception as e:
+                print(traceback.format_exc())
+                print('Error predict: ' + str(e))
+        # Create download zip
+        download = []
+        if txt_infos is not None and len(txt_infos) > 0:
+            downloadZipPath = os.path.join(
+                output_dir,
+                'Multi-Tagger-' + datetime.now().strftime('%Y%m%d-%H%M%S') + '.zip'
+            )
+            with zipfile.ZipFile(downloadZipPath, 'w', zipfile.ZIP_DEFLATED) as taggers_zip:
+                for info in txt_infos:
+                    taggers_zip.write(info['path'], arcname=info['name'])
+                # If using GPU, model will auto unload after zip file creation
+                cleanup_after_processing() # Comment here to turn off this behavior
+            download.append(downloadZipPath)
+        progress(1, desc=f"Predict completed")
+        timer.report_all()
+        print('Predict is complete.')
+        # Return first image results as default
+        first_image_results = '', {}, {}, {}, '', {}
+        if gallery and len(gallery) > 0:
+            first_image_path = gallery[0][0]
+            if first_image_path in tag_results:
+                first_result = tag_results[first_image_path]
+                character_tags_formatted = ", ".join([name.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
+                                                    for name in first_result['character_res'].keys()])
+                first_image_results = (
+                    first_result['strings'],
+                    first_result['rating'],
+                    character_tags_formatted,
+                    first_result['general_res'],
+                    first_result.get('categorized_strings', ''),
+                    first_result.get('categorized_json', {})
+                )
+        return (
+            download,
+            first_image_results[0],
+            first_image_results[1],
+            first_image_results[2],
+            first_image_results[3],
+            first_image_results[4],
+            first_image_results[5],
+            tag_results
+        )
+def get_selection_from_gallery(gallery: list, tag_results: dict, selected_state: gr.SelectData):
+    # Return first image results if no selection
+    if not selected_state and gallery and len(gallery) > 0:
+        first_image_path = gallery[0][0]
+        if first_image_path in tag_results:
+            first_result = tag_results[first_image_path]
+            character_tags_formatted = ", ".join([name.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
+                                                for name in first_result['character_res'].keys()])
+            return (
+                first_result['strings'],
+                first_result['rating'],
+                character_tags_formatted,
+                first_result['general_res'],
+                first_result.get('categorized_strings', ''),
+                first_result.get('categorized_json', {})
+            )
+    if not selected_state:
+        return '', {}, '', {}, '', {}
+    # Get selected image path
+    selected_value = selected_state.value
+    image_path = None
+    if isinstance(selected_value, dict) and 'image' in selected_value:
+        image_path = selected_value['image']['path']
+    elif isinstance(selected_value, (list, tuple)) and len(selected_value) > 0:
+        image_path = selected_value[0]
+    else:
+        image_path = str(selected_value)
+    # Return stored results
+    if image_path in tag_results:
+        result = tag_results[image_path]
+        character_tags_formatted = ", ".join([name.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
+                                            for name in result['character_res'].keys()])
+        return (
+            result['strings'],
+            result['rating'],
+            character_tags_formatted,
+            result['general_res'],
+            result.get('categorized_strings', ''),
+            result.get('categorized_json', {})
+        )
+    return '', {}, '', {}, '', {}
+def append_gallery(gallery: list, image: str):
+    """Add a single media file (image or video) to the gallery"""
+    return handle_single_media_upload(image, gallery)
+def extend_gallery(gallery: list, images):
+    """Add multiple media files (images or videos) to the gallery"""
+    return handle_multiple_media_uploads(images, gallery)
+# Parse arguments and initialize predictor
+args = parse_args()
+predictor = Predictor()
+dropdown_list = [
+    EVA02_LARGE_MODEL_DSV3_REPO, VIT_LARGE_MODEL_DSV3_REPO, SWINV2_MODEL_DSV3_REPO,
+    CONV_MODEL_DSV3_REPO, VIT_MODEL_DSV3_REPO, MOAT_MODEL_DSV2_REPO,
+    SWIN_MODEL_DSV2_REPO, CONV_MODEL_DSV2_REPO, CONV2_MODEL_DSV2_REPO,
+    VIT_MODEL_DSV2_REPO, EVA02_LARGE_MODEL_IS_DSV1_REPO, SWINV2_MODEL_IS_DSV1_REPO
+]
+def _restart_space():
+    """Restart the HuggingFace Space periodically for stability"""
+    HF_TOKEN = os.getenv('HF_TOKEN')
+    if not HF_TOKEN:
+        raise ValueError('HF_TOKEN environment variable is not set.')
+    huggingface_hub.HfApi().restart_space(
+        repo_id='Werli/Multi-Tagger',
+        token=HF_TOKEN,
+        factory_reboot=False
+    )
+# Setup scheduler for periodic restarts
+scheduler = BackgroundScheduler()
+restart_space_job = scheduler.add_job(_restart_space, 'interval', seconds=172800)
+scheduler.start()
+next_run_time_utc = restart_space_job.next_run_time.astimezone(timezone.utc)
+NEXT_RESTART = f"Next Restart: {next_run_time_utc.strftime('%Y-%m-%d %H:%M:%S')} (UTC) - The space will restart every 2 days to ensure stability and performance. It uses a background scheduler to handle the restart process."
+with gr.Blocks(title=TITLE, css=css, theme="Werli/Purple-Crimson-Gradio-Theme", fill_width=True) as demo:
+    gr.Markdown(value=f"<h1 style='text-align: center; margin-bottom: 1rem'>{TITLE}</h1>")
+    gr.Markdown(value=f"<p style='text-align: center;'>{DESCRIPTION}</p>")
+    with gr.Tab(label='Waifu Diffusion'):
+        with gr.Row():
+            with gr.Column():
+                with gr.Column(variant='panel'):
+                    image_input = gr.Image(
+                        label='Upload an Image (or paste from clipboard)',
+                        type='filepath',
+                        sources=['upload', 'clipboard'],
+                        height=150
+                    )
+                    with gr.Row():
+                        upload_button = gr.UploadButton(
+                            'Upload multiple images or videos',
+                            file_types=['image', 'video'],
+                            file_count='multiple',
+                            size='sm'
+                        )
+                    gallery = gr.Gallery(
+                        columns=2,
+                        show_share_button=False,
+                        interactive=True,
+                        height='auto',
+                        label='Grid of images',
+                        preview=False,
+                        elem_id='custom-gallery'
+                    )
+                submit = gr.Button(value='Analyze Images', variant='primary', size='lg')
+                with gr.Column(variant='panel'):
+                    model_repo = gr.Dropdown(
+                        dropdown_list,
+                        value=EVA02_LARGE_MODEL_DSV3_REPO,
+                        label='1st Model'
+                    )
+                    PLUS = '+?'
+                    gr.Markdown(value=f"<p style='text-align: center;'>{PLUS}</p>")
+                    model_repo_2 = gr.Dropdown(
+                        [None] + dropdown_list,
+                        value=None,
+                        label='2nd Model (Optional)',
+                        info='Select another model for diversified results.'
+                    )
+                with gr.Row():
+                    general_thresh = gr.Slider(
+                        0, 1,
+                        step=args.score_slider_step,
+                        value=args.score_general_threshold,
+                        label='General Tags Threshold',
+                        scale=3
+                    )
+                    general_mcut_enabled = gr.Checkbox(
+                        value=False,
+                        label='Use MCut threshold',
+                        scale=1
+                    )
+                with gr.Row():
+                    character_thresh = gr.Slider(
+                        0, 1,
+                        step=args.score_slider_step,
+                        value=args.score_character_threshold,
+                        label='Character Tags Threshold',
+                        scale=3
+                    )
+                    character_mcut_enabled = gr.Checkbox(
+                        value=False,
+                        label='Use MCut threshold',
+                        scale=1
+                    )
+                with gr.Row():
+                    characters_merge_enabled = gr.Checkbox(
+                        value=False,
+                        label='Merge characters into the string output',
+                        scale=1
+                    )
+                with gr.Row():
+                    additional_tags_prepend = gr.Text(
+                        label='Prepend Additional tags (comma split)'
+                    )
+                    additional_tags_append = gr.Text(
+                        label='Append Additional tags (comma split)'
+                    )
+                with gr.Row():
+                    clear = gr.ClearButton(
+                        components=[
+                            gallery, model_repo, general_thresh, general_mcut_enabled,
+                            character_thresh, character_mcut_enabled, characters_merge_enabled,
+                            additional_tags_prepend, additional_tags_append
+                        ],
+                        variant='secondary',
+                        size='lg'
+                    )
+            with gr.Column(variant='panel'):
+                download_file = gr.File(label='Download')
+                character_res = gr.Textbox(
+                    label="Character tags",
+                    show_copy_button=True,
+                    lines=3
+                )
+                sorted_general_strings = gr.Textbox(
+                    label='Output',
+                    show_label=True,
+                    show_copy_button=True,
+                    lines=5
+                )
+                categorized_strings = gr.Textbox(
+                    label='Categorized',
+                    show_label=True,
+                    show_copy_button=True,
+                    lines=5
+                )
+                tags_json = gr.JSON(
+                    label='Categorized Tags (JSON)',
+                    visible=True
+                )
+                rating = gr.Label(label='Rating')
+                general_res = gr.Textbox(
+                    label="General tags",
+                    show_copy_button=True,
+                    lines=3,
+                    visible=False # Temp
+                )
+            # State to store results
+            tag_results = gr.State({})
+            # Event handlers
+            image_input.change(
+                append_gallery,
+                inputs=[gallery, image_input],
+                outputs=[gallery, image_input]
+            )
+            upload_button.upload(
+                extend_gallery,
+                inputs=[gallery, upload_button],
+                outputs=gallery
+            )
+            gallery.select(
+                get_selection_from_gallery,
+                inputs=[gallery, tag_results],
+                outputs=[sorted_general_strings, rating, character_res, general_res, categorized_strings, tags_json]
+            )
+        submit.click(
+            predictor.predict,
+            inputs=[
+                gallery, model_repo, model_repo_2, general_thresh, general_mcut_enabled,
+                character_thresh, character_mcut_enabled, characters_merge_enabled,
+                additional_tags_prepend, additional_tags_append, tag_results
+            ],
+            outputs=[download_file, sorted_general_strings, rating, character_res, general_res, categorized_strings, tags_json, tag_results]
+        )
+        gr.Examples(
+            [['images/1girl.png', EVA02_LARGE_MODEL_DSV3_REPO, 0.35, False, 0.85, False]],
+            inputs=[image_input, model_repo, general_thresh, general_mcut_enabled, character_thresh, character_mcut_enabled]
+        )
+        gr.Markdown('[Based on SmilingWolf/wd-tagger](https://huggingface.co/spaces/SmilingWolf/wd-tagger) <p style="text-align:right"><a href="https://huggingface.co/spaces/John6666/danbooru-tags-transformer-v2-with-wd-tagger-b">Prompt Enhancer</a></p>')
+    with gr.Tab("PixAI"):
+        pixai_interface = create_pixai_interface()
+    with gr.Tab("Booru Image Fetcher"):
+        booru_interface = create_booru_interface()
+    with gr.Tab("ComfyUI Extractor"):
+        comfy_interface = create_multi_comfy()
+    with gr.Tab(label="Misc"):
+        with gr.Row():
+            with gr.Column(variant="panel"):
+                tag_string = gr.Textbox(
+                    label="Input Tags",
+                    placeholder="1girl, cat, horns, blue hair, ...\nor\n? 1girl 1234567? cat 1234567? horns 1234567? blue hair 1234567? ...",
+                    lines=4
+                )
+                submit_button = gr.Button(value="START", variant="primary", size="lg")
+            with gr.Column(variant="panel"):
+                cleaned_tags_output = gr.Textbox(
+                    label="Cleaned Tags",
+                    show_label=True,
+                    show_copy_button=True,
+                    lines=4,
+                    info="Tags with ? and numbers removed, formatted with commas. Useful for clearing tags from Booru sites."
+                )
+                classify_tags_for_display = gr.Textbox(
+                    label="Categorized (string)",
+                    show_label=True,
+                    show_copy_button=True,
+                    lines=8,
+                    info="Tags organized by categories"
+                )
+                generate_categorized_json = gr.JSON(
+                    label="Categorized JSON (tags)"
+                )
+                # Fix the event handler to properly call the function
+                submit_button.click(
+                    process_tags_for_misc,
+                    inputs=[tag_string],
+                    outputs=[cleaned_tags_output, classify_tags_for_display, generate_categorized_json]
+                )
+    gr.Markdown(NEXT_RESTART)
+demo.queue(max_size=5).launch(show_error=True)

modules/classifyTags.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import re
 from collections import defaultdict
 # Test: Define priority tags that should always come first
 PRIORITY_TAGS = [
-    '1girl', '2girls', '3girls', '4girls', '5girls', '6+girls', 'multiple_girls',
     '1boy', '2boys', '3boys', '4boys', '5boys', '6+boys', 'multiple_boys',
     'male_focus', 'female_focus', 'other_focus'
 ]
@@ -45,293 +46,465 @@ categories = {
 'Others':['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', 'artist', 'artist_name', 'artistic_error', 'asian', '(company)', 'character_name', 'content_rating', 'copyright', 'cover_page', 'dated', 'english_text', 'japan', 'layer', 'logo', 'name', 'numbered', 'page_number', 'pixiv_id', 'language', 'reference_sheet', 'signature', 'speech_bubble', 'subtitled', 'text', 'thank_you', 'typo', 'username', 'wallpaper', 'watermark', 'web_address', 'screwdriver', 'translated'],
 'Quality Tags':['masterpiece', '_quality', 'highres', 'absurdres', 'ultra-detailed', 'lowres']}
-# Build a trie for efficient prefix matching
-class TrieNode:
-    def __init__(self):
-        self.children = {}
-        self.category = None
-class TagTrie:
-    def __init__(self):
-        self.root = TrieNode()
-        self._build_trie()
-    def _build_trie(self):
-        for category, tags in categories.items():
             for tag in tags:
-                node = self.root
-                for char in tag:
-                    if char not in node.children:
-                        node.children[char] = TrieNode()
-                    node = node.children[char]
-                node.category = category
-    def find_category(self, tag):
-        node = self.root
-        matched_category = None
         # Try exact match first
-        for char in tag:
-            if char in node.children:
-                node = node.children[char]
-                if node.category:
-                    matched_category = node.category
-            else:
-                break
-        # If exact match found, return it
-        if matched_category and node.children == {}:
-            return matched_category
-        # If partial match found, check if it's a valid prefix
-        if matched_category:
-            return matched_category
-        # Try substring matching for longer than 3 characters
         for i in range(len(tag)):
-            for j in range(i+4, len(tag)+1):  # Only check substrings longer than 3 chars
                 substring = tag[i:j]
-                node = self.root
-                valid = True
-                for char in substring:
-                    if char in node.children:
-                        node = node.children[char]
-                    else:
-                        valid = False
-                        break
-                if valid and node.category:
-                    return node.category
         return None
-tag_trie = TagTrie()
-def normalize_tag(tag):
-    """Normalize tag by converting spaces/hyphens to underscores"""
-    return re.sub(r'[-\s]+', '_', tag.strip())
-def classify_single_tag(tag):
-    """Classify a single tag into its category"""
-    normalized_tag = normalize_tag(tag)
-    # Try exact match through Trie lookup first
-    category = tag_trie.find_category(normalized_tag)
-    # If no match and has underscores, try parts
-    if not category and '_' in normalized_tag:
-        parts = normalized_tag.split('_')
-        for part in parts:
-            if len(part) > 3:  # Only check parts longer than 3 characters
-                category = tag_trie.find_category(part)
-                if category:
-                    break
-    # Special handling for escaped parentheses
-    if not category and ('\\(' in normalized_tag or '\\)' in normalized_tag):
-        unescaped = normalized_tag.replace('\\(', '(').replace('\\)', ')')
-        category = tag_trie.find_category(unescaped)
-        if not category and '_' in unescaped:
-            parts = unescaped.split('_')
-            for part in parts:
-                if len(part) > 3:
-                    category = tag_trie.find_category(part)
-                    if category:
-                        break
-    return category if category else 'Uncategorized'
-def extract_priority_and_character_tags(tags_list, character_tags):
     """
-    Extract priority tags and character tags from the tags list
     Args:
-        tags_list (list): List of all tags
-        character_tags (dict): Dictionary of character tags with confidence scores
     Returns:
-        tuple: (priority_tags, character_tag_names, remaining_tags)
     """
-    priority_tags_found = []
-    character_tag_names = list(character_tags.keys()) if character_tags else []
-    remaining_tags = []
-    # Convert priority tags to set for faster lookup
-    priority_set = set(PRIORITY_TAGS)
-    for tag in tags_list:
-        if tag in priority_set:
-            priority_tags_found.append(tag)
-        elif tag in character_tag_names:
-            # Character tags are already handled separately
-            remaining_tags.append(tag)
-        else:
-            remaining_tags.append(tag)
-    return priority_tags_found, character_tag_names, remaining_tags
-def classify_tags_for_display(tag_string, character_tags=None):
     """
-    Classify a string of tags and organize them by categories with priority ordering for display
     Args:
-        tag_string (str): Comma-separated tags string
-        character_tags (dict): Dictionary of character tags with confidence scores
     Returns:
-        str: Categorized and organized tags as a comma-separated string
     """
-    if not tag_string:
-        return ""
-    # Split tags by common delimiters
-    delimiters = r'[,\n\r\.!?]+'
-    raw_tags = re.split(delimiters, tag_string)
-    # Clean and normalize tags
-    cleaned_tags = []
-    for tag in raw_tags:
-        tag = tag.strip()
-        if tag:
-            cleaned_tags.append(tag)
-    # Extract priority and character tags
-    priority_tags_found, character_tag_names, remaining_tags = extract_priority_and_character_tags(cleaned_tags, character_tags)
-    # Classify remaining tags
-    categorized = defaultdict(list)
-    uncategorized = []
-    for tag in remaining_tags:
-        # Skip character tags as they're already in their own list
-        if tag in character_tag_names:
-            continue
-        category = classify_single_tag(tag)
-        if category == 'Uncategorized':
-            uncategorized.append(tag)
-        else:
-            categorized[category].append(tag)
-    # Build result string with priority ordering
-    result_parts = []
-    # 1. Add priority subject tags first
-    result_parts.extend(priority_tags_found)
-    # 2. Add character tags next
-    result_parts.extend(character_tag_names)
-    # 3. Add categorized tags in category order
-    for category in categories.keys():
-        if category in categorized and categorized[category]:
-            result_parts.extend(categorized[category])
-    # 4. Add uncategorized tags at the end
-    result_parts.extend(uncategorized)
-    # Process tags: replace underscores with spaces and handle escaped characters
-    processed_tags = []
-    for tag in result_parts:
-        processed_tag = tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')')
-        processed_tags.append(processed_tag)
-    return ', '.join(processed_tags)
-def generate_categorized_json(tag_string, character_tags=None):
-    """
-    Generate JSON object organizing tags by categories
-    Args:
-        tag_string (str): Comma-separated tags string
-        character_tags (dict): Dictionary of character tags with confidence scores
-    Returns:
-        dict: JSON-compatible dictionary with categories as keys and tag lists as values
-    """
-    if not tag_string:
-        return {}
-    # Split tags by common delimiters
-    delimiters = r'[,\n\r\.!?]+'
-    raw_tags = re.split(delimiters, tag_string)
-    # Clean and normalize tags
-    cleaned_tags = []
-    for tag in raw_tags:
-        tag = tag.strip()
-        if tag:
-            cleaned_tags.append(tag)
-    # Extract priority and character tags
-    priority_tags_found, character_tag_names, remaining_tags = extract_priority_and_character_tags(cleaned_tags, character_tags)
-    # Classify remaining tags
-    categorized = defaultdict(list)
-    uncategorized = []
-    for tag in remaining_tags:
-        # Skip character tags as they're already in their own list
-        if tag in character_tag_names:
-            continue
-        category = classify_single_tag(tag)
-        if category == 'Uncategorized':
-            uncategorized.append(tag)
-        else:
-            # Store the original tag (with underscores) for JSON
-            categorized[category].append(tag)
-    # Build JSON result
-    json_result = {}
-    # Add special categories if they have content
-    if priority_tags_found:
-        # Process priority tags for display (replace underscores with spaces) # Replacement is not 100% necessary, but will do anyway
-        processed_priority = [tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')') for tag in priority_tags_found]
-        json_result['Subject'] = processed_priority
-    if character_tag_names:
-        # Process character tags for display
-        processed_characters = [tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')') for tag in character_tag_names]
-        json_result['Characters'] = processed_characters
-    # Add categorized tags (process for display)
-    for category, tags in categorized.items():
-        if tags:
-            processed_tags = [tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')') for tag in tags]
-            json_result[category] = processed_tags
-    # Add uncategorized tags if any
-    if uncategorized:
-        processed_uncategorized = [tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')') for tag in uncategorized]
-        json_result['Uncategorized'] = processed_uncategorized
-    return json_result
-def categorize_tags_output(tag_string, character_tags=None):
-    """
-    Main function to categorize tags output for display
-    Args:
-        tag_string (str): Raw tags string from the model
-        character_tags (dict): Dictionary of character tags with confidence scores
-    Returns:
-        str: Organized, categorized tags string
-    """
-    return classify_tags_for_display(tag_string, character_tags)
-def generate_tags_json(tag_string, character_tags=None):
-    """
-    Main function to generate categorized JSON
-    Args:
-        tag_string (str): Raw tags string from the model
-        character_tags (dict): Dictionary of character tags with confidence scores
-    Returns:
-        dict: JSON object with categorized tags
-    """
-    return generate_categorized_json(tag_string, character_tags)

 import re
 from collections import defaultdict
+from typing import List, Dict, Tuple, Optional, Set
 # Test: Define priority tags that should always come first
 PRIORITY_TAGS = [
+    '1girl', '2girls', '3girls', '4girls', '5girls', '6+girls', 'multiple_girls', '1other',
     '1boy', '2boys', '3boys', '4boys', '5boys', '6+boys', 'multiple_boys',
     'male_focus', 'female_focus', 'other_focus'
 ]
 'Others':['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024', 'artist', 'artist_name', 'artistic_error', 'asian', '(company)', 'character_name', 'content_rating', 'copyright', 'cover_page', 'dated', 'english_text', 'japan', 'layer', 'logo', 'name', 'numbered', 'page_number', 'pixiv_id', 'language', 'reference_sheet', 'signature', 'speech_bubble', 'subtitled', 'text', 'thank_you', 'typo', 'username', 'wallpaper', 'watermark', 'web_address', 'screwdriver', 'translated'],
 'Quality Tags':['masterpiece', '_quality', 'highres', 'absurdres', 'ultra-detailed', 'lowres']}
+class TagCleaner:
+    """Handles tag cleaning and normalization operations."""
+    @staticmethod
+    def clean_raw_tags(tag_string: str) -> List[str]:
+        """
+        Clean raw tags by removing question marks, numbers, and normalizing format.
+        Args:
+            tag_string: Raw tags string with potential ? and numbers
+        Returns:
+            List of cleaned tags
+        """
+        if not tag_string:
+            return []
+        # Split by common delimiters
+        delimiters = r'[,\n\r\.!?]+'
+        raw_tags = re.split(delimiters, tag_string)
+        cleaned_tags = []
+        for tag in raw_tags:
+            tag = tag.strip()
+            if not tag:
+                continue
+            # Remove question marks
+            tag = tag.replace('?', '')
+            # Test
+            # Remove standalone numbers (4+ digit IDs) but keep numbers that are part of tags
+            # This removes "123456" but keeps "3girls", "1boy", and years like "2025"..
+            # First, remove patterns like "tag 123456" or "123456 tag" (excluding years)
+            tag = re.sub(r'\s+(?!19\d{2}|20\d{2})\d{4,}\b|\b(?!19\d{2}|20\d{2})\d{4,}\s+', ' ', tag)
+            # Then remove any remaining standalone numbers (excluding years and 3-digit numbers)
+            tag = re.sub(r'\b(?!19\d{2}|20\d{2})\d{4,}\b', '', tag)
+            # Finally, remove any remaining 5+ digit numbers that might be attached to tags
+            tag = re.sub(r'\b\w+\d{5,}\b|\b\d{5,}\w+\b', lambda m: re.sub(r'\d{5,}', '', m.group()), tag)
+            # Clean up extra spaces
+            tag = re.sub(r'\s+', ' ', tag).strip()
+            if tag:  # Only add if tag is not empty after cleaning
+                cleaned_tags.append(tag)
+        return cleaned_tags
+    @staticmethod
+    def normalize_tag(tag: str) -> str:
+        """
+        Normalize tag by converting spaces/hyphens to underscores.
+        Args:
+            tag: Raw tag string
+        Returns:
+            Normalized tag string
+        """
+        return re.sub(r'[-\s]+', '_', tag.strip())
+    @staticmethod
+    def format_tags_for_display(tags: List[str]) -> str:
+        """
+        Format tags as a comma-separated string for display.
+        Args:
+            tags: List of tags
+        Returns:
+            Comma-separated string
+        """
+        return ', '.join(tags)
+class CategoryMatcher:
+    """Optimized category matching using trie data structure."""
+    def __init__(self, categories_dict: Dict[str, List[str]]):
+        """Initialize with categories dictionary."""
+        self.categories = categories_dict
+        self._build_lookup_tables()
+    def _build_lookup_tables(self):
+        """Build efficient lookup tables for category matching."""
+        self.tag_to_category = {}
+        self.priority_set = set(PRIORITY_TAGS)
+        # Build direct lookup table
+        for category, tags in self.categories.items():
             for tag in tags:
+                self.tag_to_category[tag] = category
+                # Also add escaped version for matching
+                if '(' in tag or ')' in tag:
+                    escaped_tag = tag.replace('(', '\\(').replace(')', '\\)')
+                    self.tag_to_category[escaped_tag] = category
+    def find_category(self, tag: str) -> Optional[str]:
+        """
+        Find the category for a given tag.
+        Args:
+            tag: Tag to categorize
+        Returns:
+            Category name or None if not found
+        """
         # Try exact match first
+        if tag in self.tag_to_category:
+            return self.tag_to_category[tag]
+        # Try normalized version
+        normalized_tag = TagCleaner.normalize_tag(tag)
+        if normalized_tag in self.tag_to_category:
+            return self.tag_to_category[normalized_tag]
+        # Try partial matching for compound tags
+        if '_' in normalized_tag:
+            parts = normalized_tag.split('_')
+            for part in parts:
+                if len(part) > 3 and part in self.tag_to_category:
+                    return self.tag_to_category[part]
+        # Try substring matching for longer tags
         for i in range(len(tag)):
+            for j in range(i + 4, len(tag) + 1):  # Only check substrings longer than 3 chars
                 substring = tag[i:j]
+                if substring in self.tag_to_category:
+                    return self.tag_to_category[substring]
         return None
+class TagClassifier:
+    """Main tag classification engine."""
+    def __init__(self, categories_dict: Dict[str, List[str]] = None):
+        """
+        Initialize the classifier.
+        Args:
+            categories_dict: Dictionary of categories and their tags
+        """
+        self.categories = categories_dict or categories
+        self.matcher = CategoryMatcher(self.categories)
+        self.priority_set = set(PRIORITY_TAGS)
+    def extract_special_tags(self, tags: List[str], character_tags: Optional[Dict] = None) -> Tuple[List[str], List[str], List[str]]:
+        """
+        Extract priority and character tags from the tags list.
+        Args:
+            tags: List of all tags
+            character_tags: Dictionary of character tags with confidence scores
+        Returns:
+            Tuple of (priority_tags, character_tag_names, remaining_tags)
+        """
+        priority_tags_found = []
+        character_tag_names = list(character_tags.keys()) if character_tags else []
+        remaining_tags = []
+        for tag in tags:
+            if tag in self.priority_set:
+                priority_tags_found.append(tag)
+            elif tag in character_tag_names:
+                remaining_tags.append(tag)  # Character tags are handled separately
+            else:
+                remaining_tags.append(tag)
+        return priority_tags_found, character_tag_names, remaining_tags
+    def classify_tags(self, tags: List[str], character_tags: Optional[Dict] = None) -> Dict[str, List[str]]:
+        """
+        Classify tags into categories.
+        Args:
+            tags: List of tags to classify
+            character_tags: Dictionary of character tags with confidence scores
+        Returns:
+            Dictionary with categories as keys and tag lists as values
+        """
+        # Extract special tags first
+        priority_tags, character_tag_names, remaining_tags = self.extract_special_tags(tags, character_tags)
+        # Classify remaining tags
+        categorized = defaultdict(list)
+        uncategorized = []
+        for tag in remaining_tags:
+            # Skip character tags as they're handled separately
+            if tag in character_tag_names:
+                continue
+            category = self.matcher.find_category(tag)
+            if category:
+                categorized[category].append(tag)
+            else:
+                uncategorized.append(tag)
+        # Build result dictionary
+        result = {}
+        # Add special categories if they have content
+        if priority_tags:
+            result['Subject'] = priority_tags
+        if character_tag_names:
+            result['Characters'] = character_tag_names
+        # Add categorized tags
+        for category in self.categories.keys():
+            if category in categorized and categorized[category]:
+                result[category] = categorized[category]
+        # Add uncategorized tags if any
+        if uncategorized:
+            result['Uncategorized'] = uncategorized
+        return result
+    def get_ordered_tags_string(self, tags: List[str], character_tags: Optional[Dict] = None) -> str:
+        """
+        Get tags ordered by priority and categories as a string.
+        Args:
+            tags: List of tags to order
+            character_tags: Dictionary of character tags with confidence scores
+        Returns:
+            Ordered comma-separated string
+        """
+        # Extract special tags
+        priority_tags, character_tag_names, remaining_tags = self.extract_special_tags(tags, character_tags)
+        # Classify remaining tags
+        categorized = defaultdict(list)
+        uncategorized = []
+        for tag in remaining_tags:
+            if tag in character_tag_names:
+                continue
+            category = self.matcher.find_category(tag)
+            if category and category != 'Uncategorized':
+                categorized[category].append(tag)
+            else:
+                uncategorized.append(tag)
+        # Build ordered result
+        result_parts = []
+        # 1. Add priority subject tags first
+        result_parts.extend(priority_tags)
+        # 2. Add character tags next
+        result_parts.extend(character_tag_names)
+        # 3. Add categorized tags in category order
+        for category in self.categories.keys():
+            if category in categorized and categorized[category]:
+                result_parts.extend(categorized[category])
+        # 4. Add uncategorized tags at the end
+        result_parts.extend(uncategorized)
+        # Process tags for display
+        processed_tags = []
+        for tag in result_parts:
+            processed_tag = tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')')
+            processed_tags.append(processed_tag)
+        return ', '.join(processed_tags)
+class TagFormatter:
+    """Handles output formatting for different display types."""
+    @staticmethod
+    def format_for_display(categorized_tags: Dict[str, List[str]]) -> str:
+        """
+        Format categorized tags as a display string.
+        Args:
+            categorized_tags: Dictionary of categorized tags
+        Returns:
+            Formatted string for display
+        """
+        result_parts = []
+        # Order categories for display
+        display_order = ['Subject', 'Characters'] + [cat for cat in categories.keys() if cat not in ['Subject', 'Characters']] + ['Uncategorized']
+        for category in display_order:
+            if category in categorized_tags and categorized_tags[category]:
+                # Process tags for display
+                processed_tags = []
+                for tag in categorized_tags[category]:
+                    processed_tag = tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')')
+                    processed_tags.append(processed_tag)
+                result_parts.extend(processed_tags)
+        return ', '.join(result_parts)
+    @staticmethod
+    def format_for_json(categorized_tags: Dict[str, List[str]]) -> Dict[str, List[str]]:
+        """
+        Format categorized tags as JSON-compatible dictionary.
+        Args:
+            categorized_tags: Dictionary of categorized tags
+        Returns:
+            JSON-compatible dictionary
+        """
+        json_result = {}
+        for category, tags in categorized_tags.items():
+            if tags:
+                # Process tags for display
+                processed_tags = []
+                for tag in tags:
+                    processed_tag = tag.replace('_', ' ').replace('\\(', '(').replace('\\)', ')')
+                    processed_tags.append(processed_tag)
+                json_result[category] = processed_tags
+        return json_result
+# Global classifier instance
+_classifier = TagClassifier()
+_cleaner = TagCleaner()
+_formatter = TagFormatter()
+# Public API Functions
+def clean_tags(tag_string: str) -> List[str]:
     """
+    Clean tags by removing question marks and numbers.
     Args:
+        tag_string: Raw tags string with potential ? and numbers
     Returns:
+        List of cleaned tags
     """
+    return _cleaner.clean_raw_tags(tag_string)
+def clean_and_format_tags(tag_string: str) -> str:
     """
+    Clean tags and format them as a comma-separated string.
     Args:
+        tag_string: Raw tags string with potential ? and numbers
     Returns:
+        Comma-separated cleaned tags
     """
+    cleaned_tags = clean_tags(tag_string)
+    return _cleaner.format_tags_for_display(cleaned_tags)
+def categorize_tags_output(tag_string: str, character_tags: Optional[Dict] = None) -> str:
+    """
+    Main function to categorize tags output for display.
+    Args:
+        tag_string: Raw tags string from the model
+        character_tags: Dictionary of character tags with confidence scores
+    Returns:
+        Organized, categorized tags string
+    """
+    # Clean tags first
+    cleaned_tags = clean_tags(tag_string)
+    # Get ordered string
+    return _classifier.get_ordered_tags_string(cleaned_tags, character_tags)
+def generate_tags_json(tag_string: str, character_tags: Optional[Dict] = None) -> Dict[str, List[str]]:
+    """
+    Main function to generate categorized JSON.
+    Args:
+        tag_string: Raw tags string from the model
+        character_tags: Dictionary of character tags with confidence scores
+    Returns:
+        JSON object with categorized tags
+    """
+    # Clean tags first
+    cleaned_tags = clean_tags(tag_string)
+    # Classify tags
+    categorized = _classifier.classify_tags(cleaned_tags, character_tags)
+    # Format for JSON
+    return _formatter.format_for_json(categorized)
+def process_tags_for_misc(tag_string: str) -> Tuple[str, str, Dict[str, List[str]]]:
+    """
+    Process tags for the Misc tab - clean and categorize them.
+    Args:
+        tag_string: Raw tags string with potential ? and numbers
+    Returns:
+        Tuple of (cleaned_tags_string, categorized_string, categorized_json)
+    """
+    # Clean the tags first
+    cleaned_tags_string = clean_and_format_tags(tag_string)
+    # Then categorize the cleaned tags
+    categorized_string = categorize_tags_output(tag_string)
+    categorized_json = generate_tags_json(tag_string)
+    return cleaned_tags_string, categorized_string, categorized_json
+# Legacy compatibility functions
+def classify_tags_for_display(tag_string: str, character_tags: Optional[Dict] = None) -> str:
+    """Legacy function - use categorize_tags_output instead."""
+    return categorize_tags_output(tag_string, character_tags)
+def generate_categorized_json(tag_string: str, character_tags: Optional[Dict] = None) -> Dict[str, List[str]]:
+    """Legacy function - use generate_tags_json instead."""
+    return generate_tags_json(tag_string, character_tags)
+"""
+How to test:
+python -c "
+from modules.classifyTags import process_tags_for_misc, clean_tags
+# Test example
+test_input = 'tags here'
+print('Input:', test_input)
+print()
+# Test cleaning
+cleaned = clean_tags(test_input)
+print('Cleaned tags:', cleaned)
+print()
+# Test full processing
+cleaned_str, categorized_str, categorized_json = process_tags_for_misc(test_input)
+print('Cleaned output:', cleaned_str)
+print('Categorized output:', categorized_str)
+print('Categorized JSON:', categorized_json)
+"
+"""

modules/media_handler.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import os
+import tempfile
+from typing import List, Union, Tuple, Optional
+from modules.video_processor import is_video_file, process_video_upload, SUPPORTED_VIDEO_FORMATS
+# Supported image formats
+SUPPORTED_IMAGE_FORMATS = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp', '.gif']
+def get_media_type(file_path: str) -> str:
+    """
+    Determine if a file is an image, video, or unsupported.
+    Args:
+        file_path: Path to the file
+    Returns:
+        'image', 'video', or 'unsupported'
+    """
+    if not file_path:
+        return 'unsupported'
+    _, ext = os.path.splitext(file_path.lower())
+    if ext in SUPPORTED_IMAGE_FORMATS:
+        return 'image'
+    elif ext in SUPPORTED_VIDEO_FORMATS:
+        return 'video'
+    else:
+        return 'unsupported'
+def is_supported_media(file_path: str) -> bool:
+    """Check if the file is a supported image or video format."""
+    return get_media_type(file_path) in ['image', 'video']
+def create_gallery_item(file_path: str) -> Optional[Tuple[str, str]]:
+    """
+    Create a gallery-compatible item from a media file path.
+    Args:
+        file_path: Path to the media file
+    Returns:
+        Tuple suitable for gallery (file_path, filename) or None if unsupported
+    """
+    if not os.path.exists(file_path):
+        return None
+    if not is_supported_media(file_path):
+        return None
+    filename = os.path.basename(file_path)
+    return (file_path, filename)
+def process_single_media_upload(file_path: str, max_video_duration: int = 30, frame_interval: int = 1) -> List[str]:
+    """
+    Process a single media file upload (image or video).
+    Args:
+        file_path: Path to the uploaded file
+        max_video_duration: Maximum duration to process for videos (seconds)
+        frame_interval: Interval between frames for videos (seconds)
+    Returns:
+        List of file paths to be added to gallery (images or extracted frames)
+    """
+    if not file_path or not os.path.exists(file_path):
+        return []
+    media_type = get_media_type(file_path)
+    if media_type == 'image':
+        # For images, just return the original path
+        return [file_path]
+    elif media_type == 'video':
+        # For videos, extract frames
+        frame_paths, _ = process_video_upload(file_path, max_video_duration, frame_interval)
+        return frame_paths
+    else:
+        # Unsupported format
+        return []
+def process_multiple_media_uploads(
+    file_paths: List[str],
+    max_video_duration: int = 30,
+    frame_interval: int = 1
+) -> List[str]:
+    """
+    Process multiple media file uploads.
+    Args:
+        file_paths: List of paths to uploaded files
+        max_video_duration: Maximum duration to process for videos (seconds)
+        frame_interval: Interval between frames for videos (seconds)
+    Returns:
+        List of file paths to be added to gallery (images and extracted frames)
+    """
+    all_paths = []
+    for file_path in file_paths:
+        processed_paths = process_single_media_upload(file_path, max_video_duration, frame_interval)
+        all_paths.extend(processed_paths)
+    return all_paths
+def handle_single_media_upload(file_path: str, gallery: List, max_video_duration: int = 30, frame_interval: int = 1) -> Tuple[List, Optional[str]]:
+    """
+    Handle a single media file upload and update gallery.
+    Args:
+        file_path: Path to the uploaded file
+        gallery: Current gallery list
+        max_video_duration: Maximum duration to process for videos (seconds)
+        frame_interval: Interval between frames for videos (seconds)
+    Returns:
+        Tuple of (updated_gallery, None) for Gradio compatibility
+    """
+    if gallery is None:
+        gallery = []
+    if not file_path:
+        return gallery, None
+    # Process the media file
+    processed_paths = process_single_media_upload(file_path, max_video_duration, frame_interval)
+    # Create gallery items and add to gallery
+    for path in processed_paths:
+        gallery_item = create_gallery_item(path)
+        if gallery_item:
+            gallery.append(gallery_item)
+    return gallery, None
+def handle_multiple_media_uploads(
+    file_paths: List,
+    gallery: List,
+    max_video_duration: int = 30,
+    frame_interval: int = 1
+) -> List:
+    """
+    Handle multiple media file uploads and update gallery.
+    Args:
+        file_paths: List of uploaded file paths
+        gallery: Current gallery list
+        max_video_duration: Maximum duration to process for videos (seconds)
+        frame_interval: Interval between frames for videos (seconds)
+    Returns:
+        Updated gallery list
+    """
+    if gallery is None:
+        gallery = []
+    if not file_paths:
+        return gallery
+    # Process all media files
+    processed_paths = process_multiple_media_uploads(file_paths, max_video_duration, frame_interval)
+    # Create gallery items and add to gallery
+    for path in processed_paths:
+        gallery_item = create_gallery_item(path)
+        if gallery_item:
+            gallery.append(gallery_item)
+    return gallery
+def get_supported_formats() -> dict:
+    """Get dictionary of supported file formats."""
+    return {
+        'images': SUPPORTED_IMAGE_FORMATS,
+        'videos': SUPPORTED_VIDEO_FORMATS,
+        'all': SUPPORTED_IMAGE_FORMATS + SUPPORTED_VIDEO_FORMATS
+    }
+def validate_media_files(file_paths: List[str]) -> Tuple[List[str], List[str]]:
+    """
+    Validate a list of media files.
+    Args:
+        file_paths: List of file paths to validate
+    Returns:
+        Tuple of (valid_files, invalid_files)
+    """
+    valid_files = []
+    invalid_files = []
+    for file_path in file_paths:
+        if is_supported_media(file_path):
+            valid_files.append(file_path)
+        else:
+            invalid_files.append(file_path)
+    return valid_files, invalid_files
+# Export functions
+__all__ = [
+    'get_media_type',
+    'is_supported_media',
+    'create_gallery_item',
+    'process_single_media_upload',
+    'process_multiple_media_uploads',
+    'handle_single_media_upload',
+    'handle_multiple_media_uploads',
+    'get_supported_formats',
+    'validate_media_files',
+    'SUPPORTED_IMAGE_FORMATS'
+]

modules/pixai.py CHANGED Viewed

@@ -1,810 +1,801 @@
-import os, json, zipfile, tempfile, time, traceback
-import gradio as gr
-import pandas as pd
-import numpy as np
-import onnxruntime as ort
-from collections import defaultdict
-from typing import Union, Dict, Any, Tuple, List
-from PIL import Image
-from huggingface_hub import hf_hub_download
-from huggingface_hub.errors import EntryNotFoundError
-from datetime import datetime
-# Global variables for model components (for memory management)
-CURRENT_MODEL = None
-CURRENT_MODEL_NAME = None
-CURRENT_TAGS_DF = None
-CURRENT_D_IPS = None
-CURRENT_PREPROCESS_FUNC = None
-CURRENT_THRESHOLDS = None
-CURRENT_CATEGORY_NAMES = None
-css = """
-#custom-gallery {--row-height: 180px;display: grid;grid-auto-rows: min-content;gap: 10px;}
-#custom-gallery .thumbnail-item {height: var(--row-height);width: 100%;position: relative;overflow: hidden;border-radius: 8px;box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);transition: transform 0.2s ease,  box-shadow 0.2s ease;}
-#custom-gallery .thumbnail-item:hover {transform: translateY(-3px);box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);}
-#custom-gallery .thumbnail-item img {width: auto;height: 100%;max-width: 100%;max-height: var(--row-height);object-fit: contain;margin: 0 auto;display: block;}
-#custom-gallery .thumbnail-item img.portrait {max-width: 100%;}
-#custom-gallery .thumbnail-item img.landscape {max-height: 100%;}
-.gallery-container {max-height: 500px;overflow-y: auto;padding-right: 0px;--size-80: 500px;}
-.thumbnails {display: flex;position: absolute;bottom: 0;width: 120px;overflow-x: scroll;padding-top: 320px;padding-bottom: 280px;padding-left: 4px;flex-wrap: wrap;}
-#custom-gallery .thumbnail-item img {width: auto;height: 100%;max-width: 100%;max-height: var(--row-height);object-fit: initial;width: fit-content;margin: 0px auto;display: block;}
-"""
-def preprocess_on_gpu(img, device='cuda'):
-    """Preprocess image on GPU using PyTorch"""
-    import torch
-    import torchvision.transforms as transforms
-    # Convert PIL to tensor and move to GPU
-    transform = transforms.Compose([transforms.Resize((448, 448)), transforms.ToTensor(), transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])])
-    # Move to GPU if available
-    tensor_img = transform(img).unsqueeze(0)
-    if torch.cuda.is_available():
-        tensor_img = tensor_img.to(device)
-    return tensor_img.cpu().numpy()
-class Timer: # Report the execution time & process
-    def __init__(self):
-        self.start_time = time.perf_counter()
-        self.checkpoints = [('Start', self.start_time)]
-    def checkpoint(self, label='Checkpoint'):
-        now = time.perf_counter()
-        self.checkpoints.append((label, now))
-    def report(self, is_clear_checkpoints=True):
-        max_label_length = max(len(label) for (label, _) in self.checkpoints) if self.checkpoints else 0
-        prev_time = self.checkpoints[0][1] if self.checkpoints else self.start_time
-        for (label, curr_time) in self.checkpoints[1:]:
-            elapsed = curr_time - prev_time
-            print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds")
-            prev_time = curr_time
-        if is_clear_checkpoints:
-            self.checkpoints.clear()
-            self.checkpoint()
-    def report_all(self):
-        print('\n> Execution Time Report:')
-        max_label_length = max(len(label) for (label, _) in self.checkpoints) if len(self.checkpoints) > 0 else 0
-        prev_time = self.start_time
-        for (label, curr_time) in self.checkpoints[1:]:
-            elapsed = curr_time - prev_time
-            print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds")
-            prev_time = curr_time
-        total_time = self.checkpoints[-1][1] - self.start_time if self.checkpoints else 0
-        print(f"{'Total Execution Time'.ljust(max_label_length)}: {total_time:.3f} seconds\n") # Performance tests
-        self.checkpoints.clear()
-    def restart(self):
-        self.start_time = time.perf_counter()
-        self.checkpoints = [('Start', self.start_time)]
-def _get_repo_id(model_name: str) -> str:
-    """Get the repository ID for the specified model name."""
-    if '/' in model_name:
-        return model_name
-    else:
-        return f'deepghs/pixai-tagger-{model_name}-onnx'
-def _download_model_files(model_name: str):
-    """Download all required model files."""
-    repo_id = _get_repo_id(model_name)
-    # Download the necessary files using hf_hub_download instead of local cache...
-    model_path = hf_hub_download(
-        repo_id=repo_id,
-        filename='model.onnx',
-        library_name="pixai-tagger"
-    )
-    tags_path = hf_hub_download(
-        repo_id=repo_id,
-        filename='selected_tags.csv',
-        library_name="pixai-tagger"
-    )
-    preprocess_path = hf_hub_download(
-        repo_id=repo_id,
-        filename='preprocess.json',
-        library_name="pixai-tagger"
-    )
-    try:
-        thresholds_path = hf_hub_download(
-            repo_id=repo_id,
-            filename='thresholds.csv',
-            library_name="pixai-tagger"
-        )
-    except EntryNotFoundError:
-        thresholds_path = None
-    return model_path, tags_path, preprocess_path, thresholds_path
-def create_optimized_ort_session(model_path):
-    """Create an optimized ONNX Runtime session with GPU support"""
-    # Test: Session options for better performance
-    sess_options = ort.SessionOptions()
-    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-    sess_options.intra_op_num_threads = 0  # Use all available cores
-    sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
-    sess_options.enable_mem_pattern = True
-    sess_options.enable_cpu_mem_arena = True
-    # Check available providers
-    available_providers = ort.get_available_providers()
-    print(f"Available ONNX Runtime providers: {available_providers}")
-    # Use appropriate execution providers (in order of preference)
-    providers = []
-    # Use CUDA if available
-    if 'CUDAExecutionProvider' in available_providers:
-        cuda_provider = ('CUDAExecutionProvider', {
-            'device_id': 0,
-            'arena_extend_strategy': 'kNextPowerOfTwo',
-            'gpu_mem_limit': 4 * 1024 * 1024 * 1024,  # 4GB VRAM
-            'cudnn_conv_algo_search': 'EXHAUSTIVE',
-            'do_copy_in_default_stream': True,
-        })
-        providers.append(cuda_provider)
-        print("Using CUDA provider for ONNX inference")
-    else:
-        print("CUDA provider not available, falling back to CPU")
-    # Always include CPU as fallback (FOR HF)
-    providers.append('CPUExecutionProvider')
-    try:
-        session = ort.InferenceSession(model_path, sess_options, providers=providers)
-        print(f"Model loaded with providers: {session.get_providers()}")
-        return session
-    except Exception as e:
-        print(f"Failed to create ONNX session: {e}")
-        raise
-def _load_model_components_optimized(model_name: str):
-    global CURRENT_MODEL, CURRENT_MODEL_NAME, CURRENT_TAGS_DF, CURRENT_D_IPS
-    global CURRENT_PREPROCESS_FUNC, CURRENT_THRESHOLDS, CURRENT_CATEGORY_NAMES
-    # Only reload if model changed
-    if CURRENT_MODEL_NAME != model_name:
-        # Download files
-        model_path, tags_path, preprocess_path, thresholds_path = _download_model_files(model_name)
-        # Load optimized ONNX model
-        CURRENT_MODEL = create_optimized_ort_session(model_path)
-        # Load tags
-        CURRENT_TAGS_DF = pd.read_csv(tags_path)
-        CURRENT_D_IPS = {}
-        if 'ips' in CURRENT_TAGS_DF.columns:
-            CURRENT_TAGS_DF['ips'] = CURRENT_TAGS_DF['ips'].fillna('{}').map(json.loads)
-            for name, ips in zip(CURRENT_TAGS_DF['name'], CURRENT_TAGS_DF['ips']):
-                if ips:
-                    CURRENT_D_IPS[name] = ips
-        # Load preprocessing
-        with open(preprocess_path, 'r') as f:
-            data_ = json.load(f)
-            # Simple preprocessing function
-            def transform(img):
-                # Ensure image is in RGB mode
-                if img.mode != 'RGB':
-                    img = img.convert('RGB')
-                # Resize to 448x448 <- Very important.
-                img = img.resize((448, 448), Image.Resampling.LANCZOS)
-                # Convert to numpy array and normalize
-                img_array = np.array(img).astype(np.float32)
-                # Normalize pixel values to [0, 1]
-                img_array = img_array / 255.0
-                # Normalize with ImageNet mean and std
-                mean = np.array([0.48145466, 0.4578275, 0.40821073]).astype(np.float32)
-                std = np.array([0.26862954, 0.26130258, 0.27577711]).astype(np.float32)
-                img_array = (img_array - mean) / std
-                # Transpose to (C, H, W)
-                img_array = np.transpose(img_array, (2, 0, 1))
-                return img_array
-            CURRENT_PREPROCESS_FUNC = transform
-        # Load thresholds
-        CURRENT_THRESHOLDS = {}
-        CURRENT_CATEGORY_NAMES = {}
-        if thresholds_path and os.path.exists(thresholds_path):
-            df_category_thresholds = pd.read_csv(thresholds_path, keep_default_na=False)
-            for item in df_category_thresholds.to_dict('records'):
-                if item['category'] not in CURRENT_THRESHOLDS:
-                    CURRENT_THRESHOLDS[item['category']] = item['threshold']
-                CURRENT_CATEGORY_NAMES[item['category']] = item['name']
-        else:
-            # Default thresholds if file doesn't exist
-            CURRENT_THRESHOLDS = {0: 0.3, 4: 0.85, 9: 0.85}
-            CURRENT_CATEGORY_NAMES = {0: 'general', 4: 'character', 9: 'rating'}
-        CURRENT_MODEL_NAME = model_name
-    return (CURRENT_MODEL, CURRENT_TAGS_DF, CURRENT_D_IPS, CURRENT_PREPROCESS_FUNC,
-            CURRENT_THRESHOLDS, CURRENT_CATEGORY_NAMES)
-def _raw_predict(image: Image.Image, model_name: str):
-    """Make a raw prediction with the PixAI tagger model."""
-    try:
-        # Ensure we have a PIL Image
-        if not isinstance(image, Image.Image):
-            raise ValueError("Input must be a PIL Image") # <-
-        # Load model components
-        model, _, _, preprocess_func, _, _ = _load_model_components_optimized(model_name)
-        # Preprocess image
-        input_tensor = preprocess_func(image)
-        # Add batch dimension
-        if len(input_tensor.shape) == 3:
-            input_tensor = np.expand_dims(input_tensor, axis=0)
-        # Run inference
-        output_names = [output.name for output in model.get_outputs()]
-        output_values = model.run(output_names, {'input': input_tensor.astype(np.float32)})
-        return {name: value[0] for name, value in zip(output_names, output_values)}
-    except Exception as e:
-        raise RuntimeError(f"Error processing image: {str(e)}")
-def get_pixai_tags(
-    image: Union[str, Image.Image],
-    model_name: str = 'deepghs/pixai-tagger-v0.9-onnx',
-    thresholds: Union[float, Dict[Any, float]] = None,
-    fmt='all'
-):
-    try:
-        # Load image if it's a path
-        if isinstance(image, str):
-            pil_image = Image.open(image)
-        elif isinstance(image, Image.Image):
-            pil_image = image
-        else:
-            raise ValueError("Image must be a file path or PIL Image")
-        # Load model components
-        _, df_tags, d_ips, _, default_thresholds, category_names = _load_model_components_optimized(model_name)
-        values = _raw_predict(pil_image, model_name)
-        prediction = values.get('prediction', np.array([]))
-        if prediction.size == 0:
-            raise RuntimeError("Model did not return valid predictions")
-        tags = {}
-        # Process tags by category
-        for category in sorted(set(df_tags['category'].tolist())):
-            mask = df_tags['category'] == category
-            tag_names = df_tags.loc[mask, 'name']
-            category_pred = prediction[mask]
-            # Determine threshold for this category
-            if isinstance(thresholds, float):
-                category_threshold = thresholds
-            elif isinstance(thresholds, dict) and \
-                    (category in thresholds or category_names.get(category, '') in thresholds):
-                if category in thresholds:
-                    category_threshold = thresholds[category]
-                elif category_names.get(category, '') in thresholds:
-                    category_threshold = thresholds[category_names[category]]
-                else:
-                    category_threshold = 0.85
-            else:
-                category_threshold = default_thresholds.get(category, 0.85)
-            # Apply threshold
-            pred_mask = category_pred >= category_threshold
-            filtered_tag_names = tag_names[pred_mask].tolist()
-            filtered_predictions = category_pred[pred_mask].tolist()
-            # Sort by confidence
-            cate_tags = dict(sorted(
-                zip(filtered_tag_names, filtered_predictions),
-                key=lambda x: (-x[1], x[0])
-            ))
-            category_name = category_names.get(category, f"category_{category}")
-            values[category_name] = cate_tags
-            tags.update(cate_tags)
-        values['tag'] = tags
-        # Handle IPs if available
-        if 'ips' in df_tags.columns:
-            ips_mapping, ips_counts = {}, defaultdict(int)
-            for tag, _ in tags.items():
-                if tag in d_ips:
-                    ips_mapping[tag] = d_ips[tag]
-                    for ip_name in d_ips[tag]:
-                        ips_counts[ip_name] += 1
-            values['ips_mapping'] = ips_mapping
-            values['ips_count'] = dict(ips_counts)
-            values['ips'] = [x for x, _ in sorted(ips_counts.items(), key=lambda x: (-x[1], x[0]))]
-        # Return based on format
-        if fmt == 'all':
-            # Return all available categories
-            available_categories = [category_names.get(cat, f"category_{cat}")
-                                  for cat in sorted(set(df_tags['category'].tolist()))]
-            return tuple(values.get(cat, {}) for cat in available_categories)
-        elif fmt in values:
-            return values[fmt]
-        else:
-            return values
-    except Exception as e:
-        raise RuntimeError(f"Error processing image: {str(e)}")
-def format_ips_output(ips_result, ips_mapping):
-    """Format IP detection output as a single string with proper escaping."""
-    if not ips_result and not ips_mapping:
-        return ""
-    # Format detected IPs
-    ips_list = []
-    if ips_result:
-        ips_list = [ip.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
-                   for ip in ips_result]
-    # Format character-to-IP mapping
-    mapping_list = []
-    if ips_mapping:
-        for char, ips in ips_mapping.items():
-            formatted_char = char.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
-            formatted_ips = [ip.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
-                            for ip in ips]
-            mapping_list.append(f"{formatted_char}: {', '.join(formatted_ips)}")
-    # Combine all into a single string
-    result_parts = []
-    if ips_list:
-        result_parts.append(", ".join(ips_list))
-    if mapping_list:
-        result_parts.extend(mapping_list)
-    return ", ".join(result_parts)
-def process_single_image(
-    image_path,
-    model_name="deepghs/pixai-tagger-v0.9-onnx", ###
-    general_threshold=0.3,
-    character_threshold=0.85,
-    progress=None,
-    idx=0,
-    total_images=1
-):
-    """Process a single image and return all formatted outputs."""
-    try:
-        if image_path is None:
-            return "", "", "", "", {}, {}
-        if progress:
-            progress((idx)/total_images, desc=f"Processing image {idx+1}/{total_images}")
-        # Load image from path
-        pil_image = Image.open(image_path)
-        # Set thresholds
-        thresholds = {
-            'general': general_threshold,
-            'character': character_threshold
-        }
-        # Get all tag categories
-        all_categories = get_pixai_tags(
-            pil_image, model_name, thresholds, fmt='all'
-        )
-        # Ensure we have at least 3 categories (general, character, rating)
-        while len(all_categories) < 3:
-            all_categories += ({},)
-        general_tags = all_categories[0] if len(all_categories) > 0 else {}
-        character_tags = all_categories[1] if len(all_categories) > 1 else {}
-        rating_tags = all_categories[2] if len(all_categories) > 2 else {}
-        # Get IP detection data
-        ips_result = get_pixai_tags(pil_image, model_name, thresholds, fmt='ips') or []
-        ips_mapping = get_pixai_tags(pil_image, model_name, thresholds, fmt='ips_mapping') or {}
-        # Format character tags (names only)
-        character_names = [name.replace("(", "\\(").replace(")", "\\)").replace("_", " ") # Replacement shouldn't be necessary here, but I'll do anyway
-                          for name in character_tags.keys()]
-        character_output = ", ".join(character_names)
-        # Format general tags (names only)
-        general_names = [name.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
-                        for name in general_tags.keys()]
-        general_output = ", ".join(general_names)
-        # Format IP detection output
-        ips_output = format_ips_output(ips_result, ips_mapping)
-        # Format combined tags (Character tags first, then General tags, then IP tags)
-        combined_parts = []
-        if character_names:
-            combined_parts.append(", ".join(character_names))
-        if general_names:
-            combined_parts.append(", ".join(general_names))
-        if ips_output:
-            combined_parts.append(ips_output)
-        combined_output = ", ".join(combined_parts)
-        # Get detailed JSON data
-        json_data = {
-            "character_tags": character_tags,
-            "general_tags": general_tags,
-            "rating_tags": rating_tags,
-            "ips_result": ips_result,
-            "ips_mapping": ips_mapping
-        }
-        # Format rating as label-compatible dict
-        rating_output = {k.replace("(", "\\(").replace(")", "\\)").replace("_", " "): v
-                        for k, v in rating_tags.items()}
-        return (
-            character_output,      # Character tags
-            general_output,        # General tags
-            ips_output,            # IP Detection
-            combined_output,       # Combined tags
-            json_data,             # Detailed JSON
-            rating_output          # Rating <- Not working atm
-        )
-    except Exception as e:
-        error_msg = f"Error: {str(e)}"
-        # Return error message for all 6 outputs
-        return error_msg, error_msg, error_msg, error_msg, {}, {} # 6
-    """GPU"""
-def unload_model():
-    """Explicitly unload the current model from memory"""
-    global CURRENT_MODEL, CURRENT_MODEL_NAME, CURRENT_TAGS_DF, CURRENT_D_IPS
-    global CURRENT_PREPROCESS_FUNC, CURRENT_THRESHOLDS, CURRENT_CATEGORY_NAMES
-    # Delete the model session
-    if CURRENT_MODEL is not None:
-        del CURRENT_MODEL
-        CURRENT_MODEL = None
-    # Clear other large objects
-    CURRENT_TAGS_DF = None
-    CURRENT_D_IPS = None
-    CURRENT_PREPROCESS_FUNC = None
-    CURRENT_THRESHOLDS = None
-    CURRENT_CATEGORY_NAMES = None
-    CURRENT_MODEL_NAME = None
-    # Force garbage collection
-    import gc
-    gc.collect()
-    # Clear CUDA cache if using GPU
-    try:
-        import torch
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-    except ImportError:
-        pass
-    # print("Model unloaded and memory cleared")
-def cleanup_after_processing():
-    unload_model()
-def process_gallery_images(
-    gallery,
-    model_name,
-    general_threshold,
-    character_threshold,
-    progress=gr.Progress()
-):
-    """Process all images in the gallery and return results with download file."""
-    if not gallery:
-        return [], "", "", "", {}, {}, {}, None
-    tag_results = {}
-    txt_infos = []
-    output_dir = tempfile.mkdtemp()
-    if not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-    total_images = len(gallery)
-    timer = Timer()
-    try:
-        for idx, image_data in enumerate(gallery):
-            try:
-                image_path = image_data[0] if isinstance(image_data, (list, tuple)) else image_data
-                # Process image
-                results = process_single_image(
-                    image_path, model_name, general_threshold, character_threshold,
-                    progress, idx, total_images
-                )
-                # Store results
-                tag_results[image_path] = {
-                    'character_tags': results[0],
-                    'general_tags': results[1],
-                    'ips_detection': results[2],
-                    'combined_tags': results[3],
-                    'json_data': results[4],
-                    'rating': results[5]
-                }
-                # Create output files with descriptive names
-                image_name = os.path.splitext(os.path.basename(image_path))[0]
-                # Save all output files with descriptive prefixes
-                files_to_create = [
-                    (f"character_tags-{image_name}.txt", results[0]),
-                    (f"general_tags-{image_name}.txt", results[1]),
-                    (f"ips_detection-{image_name}.txt", results[2]),
-                    (f"combined_tags-{image_name}.txt", results[3]),
-                    (f"detailed_json-{image_name}.json", json.dumps(results[4], indent=4, ensure_ascii=False))
-                ]
-                for file_name, content in files_to_create:
-                    file_path = os.path.join(output_dir, file_name)
-                    with open(file_path, 'w', encoding='utf-8') as f:
-                        f.write(content if isinstance(content, str) else content)
-                    txt_infos.append({'path': file_path, 'name': file_name})
-                # Copy original image
-                original_image = Image.open(image_path)
-                image_copy_path = os.path.join(output_dir, f"{image_name}{os.path.splitext(image_path)[1]}")
-                original_image.save(image_copy_path)
-                txt_infos.append({'path': image_copy_path, 'name': f"{image_name}{os.path.splitext(image_path)[1]}"})
-                timer.checkpoint(f"image{idx:02d}, processed")
-            except Exception as e:
-                print(f"Error processing image {image_path}: {str(e)}")
-                print(traceback.format_exc())
-                continue
-        # Create zip file
-        download_zip_path = os.path.join(output_dir, f"Multi-Tagger-{datetime.now().strftime('%Y%m%d-%H%M%S')}.zip")
-        with zipfile.ZipFile(download_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            for info in txt_infos:
-                zipf.write(info['path'], arcname=info['name'])
-                # If using GPU, model will auto unload after zip file creation
-                cleanup_after_processing() # Comment here to turn off this behavior
-        progress(1.0, desc="Processing complete")
-        timer.report_all()
-        print('Processing is complete.')
-        # Return first image results as default if available even if we are tagging 1000+ images.
-        first_image_results = ("", "", "", {}, {}, "") # 6
-        if gallery and len(gallery) > 0:
-            first_image_path = gallery[0][0] if isinstance(gallery[0], (list, tuple)) else gallery[0]
-            if first_image_path in tag_results:
-                result = tag_results[first_image_path]
-                first_image_results = (
-                    result['character_tags'],
-                    result['general_tags'],
-                    result['combined_tags'],
-                    result['json_data'],
-                    result['rating'],
-                    result['ips_detection']
-                )
-        return tag_results, first_image_results[0], first_image_results[1], first_image_results[2], first_image_results[3], first_image_results[4], first_image_results[5], download_zip_path
-    except Exception as e:
-        print(f"Error in process_gallery_images: {str(e)}")
-        print(traceback.format_exc())
-        progress(1.0, desc="Processing failed")
-        return {}, "", "", "", {}, {}, "", None
-def get_selection_from_gallery(gallery, tag_results, selected_state: gr.SelectData):
-    """Handle gallery image selection and update UI with stored results."""
-    if not selected_state or not tag_results:
-        return "", "", "", {}, {}, ""
-    # Get selected image path
-    selected_value = selected_state.value
-    if isinstance(selected_value, dict) and 'image' in selected_value:
-        image_path = selected_value['image']['path']
-    elif isinstance(selected_value, (list, tuple)) and len(selected_value) > 0:
-        image_path = selected_value[0]
-    else:
-        image_path = str(selected_value)
-    # Retrieve stored results
-    if image_path in tag_results:
-        result = tag_results[image_path]
-        return (
-            result['character_tags'],
-            result['general_tags'],
-            result['combined_tags'],
-            result['json_data'],
-            result['rating'],
-            result['ips_detection']
-        )
-    # Return empty if not found
-    return "", "", "", {}, {}, ""
-def append_gallery(gallery, image):
-    """Add a single image to the gallery."""
-    if gallery is None:
-        gallery = []
-    if not image:
-        return gallery, None
-    gallery.append(image)
-    return gallery, None
-def extend_gallery(gallery, images):
-    """Add multiple images to the gallery."""
-    if gallery is None:
-        gallery = []
-    if not images:
-        return gallery
-    gallery.extend(images)
-    return gallery
-def create_pixai_interface():
-    """Create the PixAI Gradio interface"""
-    with gr.Blocks(css=css, fill_width=True) as demo:
-        # gr.Markdown("Upload anime-style images to extract tags using PixAI")
-        # State to store results
-        tag_results = gr.State({})
-        selected_image = gr.Textbox(label='Selected Image', visible=False)
-        with gr.Row():
-            with gr.Column():
-                # Image upload section
-                with gr.Column(variant='panel'):
-                    image_input = gr.Image(
-                        label='Upload an Image or clicking paste from clipboard button',
-                        type='filepath',
-                        sources=['upload', 'clipboard'],
-                        height=150
-                    )
-                    with gr.Row():
-                        upload_button = gr.UploadButton(
-                            'Upload multiple images',
-                            file_types=['image'],
-                            file_count='multiple',
-                            size='sm'
-                        )
-                    gallery = gr.Gallery(
-                        columns=2,
-                        show_share_button=False,
-                        interactive=True,
-                        height='auto',
-                        label='Grid of images',
-                        preview=False,
-                        elem_id='custom-gallery'
-                    )
-                run_button = gr.Button("Analyze Images", variant="primary", size='lg')
-                model_dropdown = gr.Dropdown(
-                    choices=["deepghs/pixai-tagger-v0.9-onnx"],
-                    value="deepghs/pixai-tagger-v0.9-onnx",
-                    label="Model"
-                )
-                # Threshold controls
-                with gr.Row():
-                    general_threshold = gr.Slider(
-                        minimum=0.0, maximum=1.0, value=0.30, step=0.05,
-                        label="General Tags Threshold", scale=3
-                    )
-                    character_threshold = gr.Slider(
-                        minimum=0.0, maximum=1.0, value=0.85, step=0.05,
-                        label="Character Tags Threshold", scale=3
-                    )
-                with gr.Row():
-                    clear = gr.ClearButton(
-                        components=[gallery, model_dropdown, general_threshold, character_threshold],
-                        variant='secondary',
-                        size='lg'
-                    )
-                clear.add([tag_results])
-                detailed_json_output = gr.JSON(label="Detailed JSON")
-            with gr.Column(variant='panel'):
-                download_file = gr.File(label="Download")
-                # Output blocks
-                character_tags_output = gr.Textbox(
-                    label="Character tags",
-                    show_copy_button=True,
-                    lines=3
-                )
-                general_tags_output = gr.Textbox(
-                    label="General tags",
-                    show_copy_button=True,
-                    lines=3
-                )
-                ips_detection_output = gr.Textbox(
-                    label="IPs Detection",
-                    show_copy_button=True,
-                    lines=5
-                )
-                combined_tags_output = gr.Textbox(
-                    label="Combined tags",
-                    show_copy_button=True,
-                    lines=6
-                )
-                rating_output = gr.Label(label="Rating")
-                # Clear button targets
-                clear.add([
-                    download_file,
-                    character_tags_output,
-                    general_tags_output,
-                    ips_detection_output,
-                    combined_tags_output,
-                    rating_output,
-                    detailed_json_output
-                ])
-        # Event handlers
-        image_input.change(
-            append_gallery,
-            inputs=[gallery, image_input],
-            outputs=[gallery, image_input]
-        )
-        upload_button.upload(
-            extend_gallery,
-            inputs=[gallery, upload_button],
-            outputs=gallery
-        )
-        gallery.select(
-            get_selection_from_gallery,
-            inputs=[gallery, tag_results],
-            outputs=[
-                character_tags_output,
-                general_tags_output,
-                combined_tags_output,
-                detailed_json_output,
-                rating_output,
-                ips_detection_output
-            ]
-        )
-        run_button.click(
-            process_gallery_images,
-            inputs=[gallery, model_dropdown, general_threshold, character_threshold],
-            outputs=[
-                tag_results,
-                character_tags_output,
-                general_tags_output,
-                combined_tags_output,
-                detailed_json_output,
-                rating_output,
-                ips_detection_output,
-                download_file
-            ]
-        )
-        gr.Markdown('[Based on Source code for imgutils.tagging.pixai](https://dghs-imgutils.deepghs.org/main/_modules/imgutils/tagging/pixai.html) & [pixai-labs/pixai-tagger-demo](https://huggingface.co/spaces/pixai-labs/pixai-tagger-demo)')
-    return demo
-# Export public API
-__all__ = [
-    'get_pixai_tags',
-    'process_single_image',
-    'process_gallery_images',
-    'create_pixai_interface',
-    'unload_model',
-    'cleanup_after_processing'
-]

+import os, json, zipfile, tempfile, time, traceback
+import gradio as gr
+import pandas as pd
+import numpy as np
+import onnxruntime as ort
+from collections import defaultdict
+from typing import Union, Dict, Any, Tuple, List
+from PIL import Image
+from huggingface_hub import hf_hub_download
+from huggingface_hub.errors import EntryNotFoundError
+from datetime import datetime
+from modules.media_handler import handle_single_media_upload, handle_multiple_media_uploads
+# Global variables for model components (for memory management)
+CURRENT_MODEL = None
+CURRENT_MODEL_NAME = None
+CURRENT_TAGS_DF = None
+CURRENT_D_IPS = None
+CURRENT_PREPROCESS_FUNC = None
+CURRENT_THRESHOLDS = None
+CURRENT_CATEGORY_NAMES = None
+css = """
+#custom-gallery {--row-height: 180px;display: grid;grid-auto-rows: min-content;gap: 10px;}
+#custom-gallery .thumbnail-item {height: var(--row-height);width: 100%;position: relative;overflow: hidden;border-radius: 8px;box-shadow: 0 2px 5px rgba(0, 0, 0, 0.1);transition: transform 0.2s ease,  box-shadow 0.2s ease;}
+#custom-gallery .thumbnail-item:hover {transform: translateY(-3px);box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);}
+#custom-gallery .thumbnail-item img {width: auto;height: 100%;max-width: 100%;max-height: var(--row-height);object-fit: contain;margin: 0 auto;display: block;}
+#custom-gallery .thumbnail-item img.portrait {max-width: 100%;}
+#custom-gallery .thumbnail-item img.landscape {max-height: 100%;}
+.gallery-container {max-height: 500px;overflow-y: auto;padding-right: 0px;--size-80: 500px;}
+.thumbnails {display: flex;position: absolute;bottom: 0;width: 120px;overflow-x: scroll;padding-top: 320px;padding-bottom: 280px;padding-left: 4px;flex-wrap: wrap;}
+#custom-gallery .thumbnail-item img {width: auto;height: 100%;max-width: 100%;max-height: var(--row-height);object-fit: initial;width: fit-content;margin: 0px auto;display: block;}
+"""
+def preprocess_on_gpu(img, device='cuda'):
+    """Preprocess image on GPU using PyTorch"""
+    import torch
+    import torchvision.transforms as transforms
+    # Convert PIL to tensor and move to GPU
+    transform = transforms.Compose([transforms.Resize((448, 448)), transforms.ToTensor(), transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])])
+    # Move to GPU if available
+    tensor_img = transform(img).unsqueeze(0)
+    if torch.cuda.is_available():
+        tensor_img = tensor_img.to(device)
+    return tensor_img.cpu().numpy()
+class Timer: # Report the execution time & process
+    def __init__(self):
+        self.start_time = time.perf_counter()
+        self.checkpoints = [('Start', self.start_time)]
+    def checkpoint(self, label='Checkpoint'):
+        now = time.perf_counter()
+        self.checkpoints.append((label, now))
+    def report(self, is_clear_checkpoints=True):
+        max_label_length = max(len(label) for (label, _) in self.checkpoints) if self.checkpoints else 0
+        prev_time = self.checkpoints[0][1] if self.checkpoints else self.start_time
+        for (label, curr_time) in self.checkpoints[1:]:
+            elapsed = curr_time - prev_time
+            print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds")
+            prev_time = curr_time
+        if is_clear_checkpoints:
+            self.checkpoints.clear()
+            self.checkpoint()
+    def report_all(self):
+        print('\n> Execution Time Report:')
+        max_label_length = max(len(label) for (label, _) in self.checkpoints) if len(self.checkpoints) > 0 else 0
+        prev_time = self.start_time
+        for (label, curr_time) in self.checkpoints[1:]:
+            elapsed = curr_time - prev_time
+            print(f"{label.ljust(max_label_length)}: {elapsed:.3f} seconds")
+            prev_time = curr_time
+        total_time = self.checkpoints[-1][1] - self.start_time if self.checkpoints else 0
+        print(f"{'Total Execution Time'.ljust(max_label_length)}: {total_time:.3f} seconds\n") # Performance tests
+        self.checkpoints.clear()
+    def restart(self):
+        self.start_time = time.perf_counter()
+        self.checkpoints = [('Start', self.start_time)]
+def _get_repo_id(model_name: str) -> str:
+    """Get the repository ID for the specified model name."""
+    if '/' in model_name:
+        return model_name
+    else:
+        return f'deepghs/pixai-tagger-{model_name}-onnx'
+def _download_model_files(model_name: str):
+    """Download all required model files."""
+    repo_id = _get_repo_id(model_name)
+    # Download the necessary files using hf_hub_download instead of local cache...
+    model_path = hf_hub_download(
+        repo_id=repo_id,
+        filename='model.onnx',
+        library_name="pixai-tagger"
+    )
+    tags_path = hf_hub_download(
+        repo_id=repo_id,
+        filename='selected_tags.csv',
+        library_name="pixai-tagger"
+    )
+    preprocess_path = hf_hub_download(
+        repo_id=repo_id,
+        filename='preprocess.json',
+        library_name="pixai-tagger"
+    )
+    try:
+        thresholds_path = hf_hub_download(
+            repo_id=repo_id,
+            filename='thresholds.csv',
+            library_name="pixai-tagger"
+        )
+    except EntryNotFoundError:
+        thresholds_path = None
+    return model_path, tags_path, preprocess_path, thresholds_path
+def create_optimized_ort_session(model_path):
+    """Create an optimized ONNX Runtime session with GPU support"""
+    # Test: Session options for better performance
+    sess_options = ort.SessionOptions()
+    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+    sess_options.intra_op_num_threads = 0  # Use all available cores
+    sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
+    sess_options.enable_mem_pattern = True
+    sess_options.enable_cpu_mem_arena = True
+    # Check available providers
+    available_providers = ort.get_available_providers()
+    print(f"Available ONNX Runtime providers: {available_providers}")
+    # Use appropriate execution providers (in order of preference)
+    providers = []
+    # Use CUDA if available
+    if 'CUDAExecutionProvider' in available_providers:
+        cuda_provider = ('CUDAExecutionProvider', {
+            'device_id': 0,
+            'arena_extend_strategy': 'kNextPowerOfTwo',
+            'gpu_mem_limit': 4 * 1024 * 1024 * 1024,  # 4GB VRAM
+            'cudnn_conv_algo_search': 'EXHAUSTIVE',
+            'do_copy_in_default_stream': True,
+        })
+        providers.append(cuda_provider)
+        print("Using CUDA provider for ONNX inference")
+    else:
+        print("CUDA provider not available, falling back to CPU")
+    # Always include CPU as fallback (FOR HF)
+    providers.append('CPUExecutionProvider')
+    try:
+        session = ort.InferenceSession(model_path, sess_options, providers=providers)
+        print(f"Model loaded with providers: {session.get_providers()}")
+        return session
+    except Exception as e:
+        print(f"Failed to create ONNX session: {e}")
+        raise
+def _load_model_components_optimized(model_name: str):
+    global CURRENT_MODEL, CURRENT_MODEL_NAME, CURRENT_TAGS_DF, CURRENT_D_IPS
+    global CURRENT_PREPROCESS_FUNC, CURRENT_THRESHOLDS, CURRENT_CATEGORY_NAMES
+    # Only reload if model changed
+    if CURRENT_MODEL_NAME != model_name:
+        # Download files
+        model_path, tags_path, preprocess_path, thresholds_path = _download_model_files(model_name)
+        # Load optimized ONNX model
+        CURRENT_MODEL = create_optimized_ort_session(model_path)
+        # Load tags
+        CURRENT_TAGS_DF = pd.read_csv(tags_path)
+        CURRENT_D_IPS = {}
+        if 'ips' in CURRENT_TAGS_DF.columns:
+            CURRENT_TAGS_DF['ips'] = CURRENT_TAGS_DF['ips'].fillna('{}').map(json.loads)
+            for name, ips in zip(CURRENT_TAGS_DF['name'], CURRENT_TAGS_DF['ips']):
+                if ips:
+                    CURRENT_D_IPS[name] = ips
+        # Load preprocessing
+        with open(preprocess_path, 'r') as f:
+            data_ = json.load(f)
+            # Simple preprocessing function
+            def transform(img):
+                # Ensure image is in RGB mode
+                if img.mode != 'RGB':
+                    img = img.convert('RGB')
+                # Resize to 448x448 <- Very important.
+                img = img.resize((448, 448), Image.Resampling.LANCZOS)
+                # Convert to numpy array and normalize
+                img_array = np.array(img).astype(np.float32)
+                # Normalize pixel values to [0, 1]
+                img_array = img_array / 255.0
+                # Normalize with ImageNet mean and std
+                mean = np.array([0.48145466, 0.4578275, 0.40821073]).astype(np.float32)
+                std = np.array([0.26862954, 0.26130258, 0.27577711]).astype(np.float32)
+                img_array = (img_array - mean) / std
+                # Transpose to (C, H, W)
+                img_array = np.transpose(img_array, (2, 0, 1))
+                return img_array
+            CURRENT_PREPROCESS_FUNC = transform
+        # Load thresholds
+        CURRENT_THRESHOLDS = {}
+        CURRENT_CATEGORY_NAMES = {}
+        if thresholds_path and os.path.exists(thresholds_path):
+            df_category_thresholds = pd.read_csv(thresholds_path, keep_default_na=False)
+            for item in df_category_thresholds.to_dict('records'):
+                if item['category'] not in CURRENT_THRESHOLDS:
+                    CURRENT_THRESHOLDS[item['category']] = item['threshold']
+                CURRENT_CATEGORY_NAMES[item['category']] = item['name']
+        else:
+            # Default thresholds if file doesn't exist
+            CURRENT_THRESHOLDS = {0: 0.3, 4: 0.85, 9: 0.85}
+            CURRENT_CATEGORY_NAMES = {0: 'general', 4: 'character', 9: 'rating'}
+        CURRENT_MODEL_NAME = model_name
+    return (CURRENT_MODEL, CURRENT_TAGS_DF, CURRENT_D_IPS, CURRENT_PREPROCESS_FUNC,
+            CURRENT_THRESHOLDS, CURRENT_CATEGORY_NAMES)
+def _raw_predict(image: Image.Image, model_name: str):
+    """Make a raw prediction with the PixAI tagger model."""
+    try:
+        # Ensure we have a PIL Image
+        if not isinstance(image, Image.Image):
+            raise ValueError("Input must be a PIL Image") # <-
+        # Load model components
+        model, _, _, preprocess_func, _, _ = _load_model_components_optimized(model_name)
+        # Preprocess image
+        input_tensor = preprocess_func(image)
+        # Add batch dimension
+        if len(input_tensor.shape) == 3:
+            input_tensor = np.expand_dims(input_tensor, axis=0)
+        # Run inference
+        output_names = [output.name for output in model.get_outputs()]
+        output_values = model.run(output_names, {'input': input_tensor.astype(np.float32)})
+        return {name: value[0] for name, value in zip(output_names, output_values)}
+    except Exception as e:
+        raise RuntimeError(f"Error processing image: {str(e)}")
+def get_pixai_tags(
+    image: Union[str, Image.Image],
+    model_name: str = 'deepghs/pixai-tagger-v0.9-onnx',
+    thresholds: Union[float, Dict[Any, float]] = None,
+    fmt='all'
+):
+    try:
+        # Load image if it's a path
+        if isinstance(image, str):
+            pil_image = Image.open(image)
+        elif isinstance(image, Image.Image):
+            pil_image = image
+        else:
+            raise ValueError("Image must be a file path or PIL Image")
+        # Load model components
+        _, df_tags, d_ips, _, default_thresholds, category_names = _load_model_components_optimized(model_name)
+        values = _raw_predict(pil_image, model_name)
+        prediction = values.get('prediction', np.array([]))
+        if prediction.size == 0:
+            raise RuntimeError("Model did not return valid predictions")
+        tags = {}
+        # Process tags by category
+        for category in sorted(set(df_tags['category'].tolist())):
+            mask = df_tags['category'] == category
+            tag_names = df_tags.loc[mask, 'name']
+            category_pred = prediction[mask]
+            # Determine threshold for this category
+            if isinstance(thresholds, float):
+                category_threshold = thresholds
+            elif isinstance(thresholds, dict) and \
+                    (category in thresholds or category_names.get(category, '') in thresholds):
+                if category in thresholds:
+                    category_threshold = thresholds[category]
+                elif category_names.get(category, '') in thresholds:
+                    category_threshold = thresholds[category_names[category]]
+                else:
+                    category_threshold = 0.85
+            else:
+                category_threshold = default_thresholds.get(category, 0.85)
+            # Apply threshold
+            pred_mask = category_pred >= category_threshold
+            filtered_tag_names = tag_names[pred_mask].tolist()
+            filtered_predictions = category_pred[pred_mask].tolist()
+            # Sort by confidence
+            cate_tags = dict(sorted(
+                zip(filtered_tag_names, filtered_predictions),
+                key=lambda x: (-x[1], x[0])
+            ))
+            category_name = category_names.get(category, f"category_{category}")
+            values[category_name] = cate_tags
+            tags.update(cate_tags)
+        values['tag'] = tags
+        # Handle IPs if available
+        if 'ips' in df_tags.columns:
+            ips_mapping, ips_counts = {}, defaultdict(int)
+            for tag, _ in tags.items():
+                if tag in d_ips:
+                    ips_mapping[tag] = d_ips[tag]
+                    for ip_name in d_ips[tag]:
+                        ips_counts[ip_name] += 1
+            values['ips_mapping'] = ips_mapping
+            values['ips_count'] = dict(ips_counts)
+            values['ips'] = [x for x, _ in sorted(ips_counts.items(), key=lambda x: (-x[1], x[0]))]
+        # Return based on format
+        if fmt == 'all':
+            # Return all available categories
+            available_categories = [category_names.get(cat, f"category_{cat}")
+                                  for cat in sorted(set(df_tags['category'].tolist()))]
+            return tuple(values.get(cat, {}) for cat in available_categories)
+        elif fmt in values:
+            return values[fmt]
+        else:
+            return values
+    except Exception as e:
+        raise RuntimeError(f"Error processing image: {str(e)}")
+def format_ips_output(ips_result, ips_mapping):
+    """Format IP detection output as a single string with proper escaping."""
+    if not ips_result and not ips_mapping:
+        return ""
+    # Format detected IPs
+    ips_list = []
+    if ips_result:
+        ips_list = [ip.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
+                   for ip in ips_result]
+    # Format character-to-IP mapping
+    mapping_list = []
+    if ips_mapping:
+        for char, ips in ips_mapping.items():
+            formatted_char = char.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
+            formatted_ips = [ip.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
+                            for ip in ips]
+            mapping_list.append(f"{formatted_char}: {', '.join(formatted_ips)}")
+    # Combine all into a single string
+    result_parts = []
+    if ips_list:
+        result_parts.append(", ".join(ips_list))
+    if mapping_list:
+        result_parts.extend(mapping_list)
+    return ", ".join(result_parts)
+def process_single_image(
+    image_path,
+    model_name="deepghs/pixai-tagger-v0.9-onnx", ###
+    general_threshold=0.3,
+    character_threshold=0.85,
+    progress=None,
+    idx=0,
+    total_images=1
+):
+    """Process a single image and return all formatted outputs."""
+    try:
+        if image_path is None:
+            return "", "", "", "", {}, {}
+        if progress:
+            progress((idx)/total_images, desc=f"Processing image {idx+1}/{total_images}")
+        # Load image from path
+        pil_image = Image.open(image_path)
+        # Set thresholds
+        thresholds = {
+            'general': general_threshold,
+            'character': character_threshold
+        }
+        # Get all tag categories
+        all_categories = get_pixai_tags(
+            pil_image, model_name, thresholds, fmt='all'
+        )
+        # Ensure we have at least 3 categories (general, character, rating)
+        while len(all_categories) < 3:
+            all_categories += ({},)
+        general_tags = all_categories[0] if len(all_categories) > 0 else {}
+        character_tags = all_categories[1] if len(all_categories) > 1 else {}
+        rating_tags = all_categories[2] if len(all_categories) > 2 else {}
+        # Get IP detection data
+        ips_result = get_pixai_tags(pil_image, model_name, thresholds, fmt='ips') or []
+        ips_mapping = get_pixai_tags(pil_image, model_name, thresholds, fmt='ips_mapping') or {}
+        # Format character tags (names only)
+        character_names = [name.replace("(", "\\(").replace(")", "\\)").replace("_", " ") # Replacement shouldn't be necessary here, but I'll do anyway
+                          for name in character_tags.keys()]
+        character_output = ", ".join(character_names)
+        # Format general tags (names only)
+        general_names = [name.replace("(", "\\(").replace(")", "\\)").replace("_", " ")
+                        for name in general_tags.keys()]
+        general_output = ", ".join(general_names)
+        # Format IP detection output
+        ips_output = format_ips_output(ips_result, ips_mapping)
+        # Format combined tags (Character tags first, then General tags, then IP tags)
+        combined_parts = []
+        if character_names:
+            combined_parts.append(", ".join(character_names))
+        if general_names:
+            combined_parts.append(", ".join(general_names))
+        if ips_output:
+            combined_parts.append(ips_output)
+        combined_output = ", ".join(combined_parts)
+        # Get detailed JSON data
+        json_data = {
+            "character_tags": character_tags,
+            "general_tags": general_tags,
+            "rating_tags": rating_tags,
+            "ips_result": ips_result,
+            "ips_mapping": ips_mapping
+        }
+        # Format rating as label-compatible dict
+        rating_output = {k.replace("(", "\\(").replace(")", "\\)").replace("_", " "): v
+                        for k, v in rating_tags.items()}
+        return (
+            character_output,      # Character tags
+            general_output,        # General tags
+            ips_output,            # IP Detection
+            combined_output,       # Combined tags
+            json_data,             # Detailed JSON
+            rating_output          # Rating <- Not working atm
+        )
+    except Exception as e:
+        error_msg = f"Error: {str(e)}"
+        # Return error message for all 6 outputs
+        return error_msg, error_msg, error_msg, error_msg, {}, {} # 6
+    """GPU"""
+def unload_model():
+    """Explicitly unload the current model from memory"""
+    global CURRENT_MODEL, CURRENT_MODEL_NAME, CURRENT_TAGS_DF, CURRENT_D_IPS
+    global CURRENT_PREPROCESS_FUNC, CURRENT_THRESHOLDS, CURRENT_CATEGORY_NAMES
+    # Delete the model session
+    if CURRENT_MODEL is not None:
+        del CURRENT_MODEL
+        CURRENT_MODEL = None
+    # Clear other large objects
+    CURRENT_TAGS_DF = None
+    CURRENT_D_IPS = None
+    CURRENT_PREPROCESS_FUNC = None
+    CURRENT_THRESHOLDS = None
+    CURRENT_CATEGORY_NAMES = None
+    CURRENT_MODEL_NAME = None
+    # Force garbage collection
+    import gc
+    gc.collect()
+    # Clear CUDA cache if using GPU
+    try:
+        import torch
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    except ImportError:
+        pass
+    # print("Model unloaded and memory cleared")
+def cleanup_after_processing():
+    unload_model()
+def process_gallery_images(
+    gallery,
+    model_name,
+    general_threshold,
+    character_threshold,
+    progress=gr.Progress()
+):
+    """Process all images in the gallery and return results with download file."""
+    if not gallery:
+        return [], "", "", "", {}, {}, {}, None
+    tag_results = {}
+    txt_infos = []
+    output_dir = tempfile.mkdtemp()
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    total_images = len(gallery)
+    timer = Timer()
+    try:
+        for idx, image_data in enumerate(gallery):
+            try:
+                image_path = image_data[0] if isinstance(image_data, (list, tuple)) else image_data
+                # Process image
+                results = process_single_image(
+                    image_path, model_name, general_threshold, character_threshold,
+                    progress, idx, total_images
+                )
+                # Store results
+                tag_results[image_path] = {
+                    'character_tags': results[0],
+                    'general_tags': results[1],
+                    'ips_detection': results[2],
+                    'combined_tags': results[3],
+                    'json_data': results[4],
+                    'rating': results[5]
+                }
+                # Create output files with descriptive names
+                image_name = os.path.splitext(os.path.basename(image_path))[0]
+                # Save all output files with descriptive prefixes
+                files_to_create = [
+                    (f"character_tags-{image_name}.txt", results[0]),
+                    (f"general_tags-{image_name}.txt", results[1]),
+                    (f"ips_detection-{image_name}.txt", results[2]),
+                    (f"combined_tags-{image_name}.txt", results[3]),
+                    (f"detailed_json-{image_name}.json", json.dumps(results[4], indent=4, ensure_ascii=False))
+                ]
+                for file_name, content in files_to_create:
+                    file_path = os.path.join(output_dir, file_name)
+                    with open(file_path, 'w', encoding='utf-8') as f:
+                        f.write(content if isinstance(content, str) else content)
+                    txt_infos.append({'path': file_path, 'name': file_name})
+                # Copy original image
+                original_image = Image.open(image_path)
+                image_copy_path = os.path.join(output_dir, f"{image_name}{os.path.splitext(image_path)[1]}")
+                original_image.save(image_copy_path)
+                txt_infos.append({'path': image_copy_path, 'name': f"{image_name}{os.path.splitext(image_path)[1]}"})
+                timer.checkpoint(f"image{idx:02d}, processed")
+            except Exception as e:
+                print(f"Error processing image {image_path}: {str(e)}")
+                print(traceback.format_exc())
+                continue
+        # Create zip file
+        download_zip_path = os.path.join(output_dir, f"Multi-Tagger-{datetime.now().strftime('%Y%m%d-%H%M%S')}.zip")
+        with zipfile.ZipFile(download_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+            for info in txt_infos:
+                zipf.write(info['path'], arcname=info['name'])
+                # If using GPU, model will auto unload after zip file creation
+                cleanup_after_processing() # Comment here to turn off this behavior
+        progress(1.0, desc="Processing complete")
+        timer.report_all()
+        print('Processing is complete.')
+        # Return first image results as default if available even if we are tagging 1000+ images.
+        first_image_results = ("", "", "", {}, {}, "") # 6
+        if gallery and len(gallery) > 0:
+            first_image_path = gallery[0][0] if isinstance(gallery[0], (list, tuple)) else gallery[0]
+            if first_image_path in tag_results:
+                result = tag_results[first_image_path]
+                first_image_results = (
+                    result['character_tags'],
+                    result['general_tags'],
+                    result['combined_tags'],
+                    result['json_data'],
+                    result['rating'],
+                    result['ips_detection']
+                )
+        return tag_results, first_image_results[0], first_image_results[1], first_image_results[2], first_image_results[3], first_image_results[4], first_image_results[5], download_zip_path
+    except Exception as e:
+        print(f"Error in process_gallery_images: {str(e)}")
+        print(traceback.format_exc())
+        progress(1.0, desc="Processing failed")
+        return {}, "", "", "", {}, {}, "", None
+def get_selection_from_gallery(gallery, tag_results, selected_state: gr.SelectData):
+    """Handle gallery image selection and update UI with stored results."""
+    if not selected_state or not tag_results:
+        return "", "", "", {}, {}, ""
+    # Get selected image path
+    selected_value = selected_state.value
+    if isinstance(selected_value, dict) and 'image' in selected_value:
+        image_path = selected_value['image']['path']
+    elif isinstance(selected_value, (list, tuple)) and len(selected_value) > 0:
+        image_path = selected_value[0]
+    else:
+        image_path = str(selected_value)
+    # Retrieve stored results
+    if image_path in tag_results:
+        result = tag_results[image_path]
+        return (
+            result['character_tags'],
+            result['general_tags'],
+            result['combined_tags'],
+            result['json_data'],
+            result['rating'],
+            result['ips_detection']
+        )
+    # Return empty if not found
+    return "", "", "", {}, {}, ""
+def append_gallery(gallery, image):
+    """Add a single media file (image or video) to the gallery."""
+    return handle_single_media_upload(image, gallery)
+def extend_gallery(gallery, images):
+    """Add multiple media files (images or videos) to the gallery."""
+    return handle_multiple_media_uploads(images, gallery)
+def create_pixai_interface():
+    """Create the PixAI Gradio interface"""
+    with gr.Blocks(css=css, fill_width=True) as demo:
+        # gr.Markdown("Upload anime-style images to extract tags using PixAI")
+        # State to store results
+        tag_results = gr.State({})
+        selected_image = gr.Textbox(label='Selected Image', visible=False)
+        with gr.Row():
+            with gr.Column():
+                # Image upload section
+                with gr.Column(variant='panel'):
+                    image_input = gr.Image(
+                        label='Upload an Image (or paste from clipboard)',
+                        type='filepath',
+                        sources=['upload', 'clipboard'],
+                        height=150
+                    )
+                    with gr.Row():
+                        upload_button = gr.UploadButton(
+                            'Upload multiple images or videos',
+                            file_types=['image', 'video'],
+                            file_count='multiple',
+                            size='sm'
+                        )
+                    gallery = gr.Gallery(
+                        columns=2,
+                        show_share_button=False,
+                        interactive=True,
+                        height='auto',
+                        label='Grid of images',
+                        preview=False,
+                        elem_id='custom-gallery'
+                    )
+                run_button = gr.Button("Analyze Images", variant="primary", size='lg')
+                model_dropdown = gr.Dropdown(
+                    choices=["deepghs/pixai-tagger-v0.9-onnx"],
+                    value="deepghs/pixai-tagger-v0.9-onnx",
+                    label="Model"
+                )
+                # Threshold controls
+                with gr.Row():
+                    general_threshold = gr.Slider(
+                        minimum=0.0, maximum=1.0, value=0.30, step=0.05,
+                        label="General Tags Threshold", scale=3
+                    )
+                    character_threshold = gr.Slider(
+                        minimum=0.0, maximum=1.0, value=0.85, step=0.05,
+                        label="Character Tags Threshold", scale=3
+                    )
+                with gr.Row():
+                    clear = gr.ClearButton(
+                        components=[gallery, model_dropdown, general_threshold, character_threshold],
+                        variant='secondary',
+                        size='lg'
+                    )
+                clear.add([tag_results])
+                detailed_json_output = gr.JSON(label="Detailed JSON")
+            with gr.Column(variant='panel'):
+                download_file = gr.File(label="Download")
+                # Output blocks
+                character_tags_output = gr.Textbox(
+                    label="Character tags",
+                    show_copy_button=True,
+                    lines=3
+                )
+                general_tags_output = gr.Textbox(
+                    label="General tags",
+                    show_copy_button=True,
+                    lines=3
+                )
+                ips_detection_output = gr.Textbox(
+                    label="IPs Detection",
+                    show_copy_button=True,
+                    lines=5
+                )
+                combined_tags_output = gr.Textbox(
+                    label="Combined tags",
+                    show_copy_button=True,
+                    lines=6
+                )
+                rating_output = gr.Label(label="Rating")
+                # Clear button targets
+                clear.add([
+                    download_file,
+                    character_tags_output,
+                    general_tags_output,
+                    ips_detection_output,
+                    combined_tags_output,
+                    rating_output,
+                    detailed_json_output
+                ])
+        # Event handlers
+        image_input.change(
+            append_gallery,
+            inputs=[gallery, image_input],
+            outputs=[gallery, image_input]
+        )
+        upload_button.upload(
+            extend_gallery,
+            inputs=[gallery, upload_button],
+            outputs=gallery
+        )
+        gallery.select(
+            get_selection_from_gallery,
+            inputs=[gallery, tag_results],
+            outputs=[
+                character_tags_output,
+                general_tags_output,
+                combined_tags_output,
+                detailed_json_output,
+                rating_output,
+                ips_detection_output
+            ]
+        )
+        run_button.click(
+            process_gallery_images,
+            inputs=[gallery, model_dropdown, general_threshold, character_threshold],
+            outputs=[
+                tag_results,
+                character_tags_output,
+                general_tags_output,
+                combined_tags_output,
+                detailed_json_output,
+                rating_output,
+                ips_detection_output,
+                download_file
+            ]
+        )
+        gr.Markdown('[Based on Source code for imgutils.tagging.pixai](https://dghs-imgutils.deepghs.org/main/_modules/imgutils/tagging/pixai.html) & [pixai-labs/pixai-tagger-demo](https://huggingface.co/spaces/pixai-labs/pixai-tagger-demo)')
+    return demo
+# Export public API
+__all__ = [
+    'get_pixai_tags',
+    'process_single_image',
+    'process_gallery_images',
+    'create_pixai_interface',
+    'unload_model',
+    'cleanup_after_processing'
+]

modules/video_processor.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import os
+import cv2
+import tempfile
+from typing import List, Tuple, Optional
+from PIL import Image
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Supported video formats
+SUPPORTED_VIDEO_FORMATS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv', '.wmv', '.m4v']
+def is_video_file(file_path: str) -> bool:
+    """Check if the file is a supported video format."""
+    if not file_path:
+        return False
+    _, ext = os.path.splitext(file_path.lower())
+    return ext in SUPPORTED_VIDEO_FORMATS
+def get_video_duration(video_path: str) -> float:
+    """Get the duration of a video in seconds."""
+    try:
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            logger.error(f"Could not open video: {video_path}")
+            return 0.0
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
+        if fps <= 0:
+            logger.warning(f"Invalid FPS for video {video_path}, using fallback method")
+            # Fallback method: get duration directly
+            duration = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
+            # Seek to end to get duration
+            cap.set(cv2.CAP_PROP_POS_AVI_RATIO, 1.0)
+            duration = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
+        else:
+            duration = frame_count / fps
+        cap.release()
+        return max(0.0, duration)  # Ensure non-negative duration
+    except Exception as e:
+        logger.error(f"Error getting video duration for {video_path}: {str(e)}")
+        return 0.0
+def extract_frames_from_video(
+    video_path: str,
+    max_duration: int = 30,
+    frame_interval: int = 1,
+    output_dir: Optional[str] = None
+) -> List[str]:
+    """
+    Extract frames from a video at specified intervals.
+    Args:
+        video_path: Path to the video file
+        max_duration: Maximum duration to process (seconds)
+        frame_interval: Interval between frames (seconds)
+        output_dir: Directory to save frames (creates temp if None)
+    Returns:
+        List of paths to extracted frame images
+    """
+    if not os.path.exists(video_path):
+        logger.error(f"Video file does not exist: {video_path}")
+        return []
+    if not is_video_file(video_path):
+        logger.error(f"Unsupported video format: {video_path}")
+        return []
+    # Create output directory if not provided
+    if output_dir is None:
+        output_dir = tempfile.mkdtemp(prefix="video_frames_")
+    try:
+        # Get video info
+        duration = get_video_duration(video_path)
+        logger.info(f"Video duration: {duration:.2f} seconds")
+        # Limit duration if necessary
+        process_duration = min(duration, max_duration)
+        logger.info(f"Processing {process_duration:.2f} seconds of video")
+        # Open video
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            logger.error(f"Could not open video: {video_path}")
+            return []
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        if fps <= 0:
+            logger.error(f"Invalid FPS: {fps}")
+            cap.release()
+            return []
+        # Calculate frame positions
+        frame_positions = []
+        current_time = 0
+        while current_time < process_duration:
+            frame_number = int(current_time * fps)
+            frame_positions.append(frame_number)
+            current_time += frame_interval
+        logger.info(f"Extracting {len(frame_positions)} frames")
+        # Extract frames
+        frame_paths = []
+        video_name = os.path.splitext(os.path.basename(video_path))[0]
+        for i, frame_number in enumerate(frame_positions):
+            # Set position to desired frame
+            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
+            ret, frame = cap.read()
+            if not ret:
+                logger.warning(f"Could not read frame {frame_number}")
+                continue
+            # Convert BGR to RGB
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            # Convert to PIL Image
+            pil_image = Image.fromarray(frame_rgb)
+            # Save frame
+            frame_filename = f"{video_name}_frame_{i+1:03d}.png"
+            frame_path = os.path.join(output_dir, frame_filename)
+            pil_image.save(frame_path, 'PNG')
+            frame_paths.append(frame_path)
+            logger.debug(f"Saved frame {i+1}/{len(frame_positions)}: {frame_filename}")
+        cap.release()
+        logger.info(f"Successfully extracted {len(frame_paths)} frames from {video_path}")
+        return frame_paths
+    except Exception as e:
+        logger.error(f"Error extracting frames from {video_path}: {str(e)}")
+        return []
+def process_video_upload(video_path: str, max_duration: int = 30, frame_interval: int = 1) -> Tuple[List[str], str]:
+    """
+    Process a video upload and extract frames.
+    Args:
+        video_path: Path to the uploaded video
+        max_duration: Maximum duration to process (seconds)
+        frame_interval: Interval between frames (seconds)
+    Returns:
+        Tuple of (list of frame paths, output directory)
+    """
+    # Create temporary directory for frames
+    output_dir = tempfile.mkdtemp(prefix="video_frames_")
+    # Extract frames
+    frame_paths = extract_frames_from_video(
+        video_path,
+        max_duration,
+        frame_interval,
+        output_dir
+    )
+    return frame_paths, output_dir
+def get_video_info(video_path: str) -> dict:
+    """Get detailed information about a video file."""
+    try:
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            return {"error": "Could not open video"}
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        duration = frame_count / fps if fps > 0 else 0
+        cap.release()
+        return {
+            "duration": duration,
+            "fps": fps,
+            "frame_count": frame_count,
+            "width": width,
+            "height": height,
+            "resolution": f"{width}x{height}"
+        }
+    except Exception as e:
+        return {"error": str(e)}
+# Export functions
+__all__ = [
+    'is_video_file',
+    'get_video_duration',
+    'extract_frames_from_video',
+    'process_video_upload',
+    'get_video_info',
+    'SUPPORTED_VIDEO_FORMATS'
+]