| import gradio as gr |
| import os |
| import spaces |
| import torch |
| from diffusers import AuraFlowPipeline, Lumina2Pipeline, NewbiePipeline |
| from transformers import AutoModel, AutoTokenizer |
| import random |
| import numpy as np |
| from PIL import Image |
| import copy |
| import warnings |
| import math |
| import time |
| from stablepy import SCHEDULER_CONFIG_MAP, FLUX_SCHEDULE_TYPES, scheduler_names, SCHEDULE_TYPE_OPTIONS, FLUX_SCHEDULE_TYPE_OPTIONS |
|
|
| from constants import BASE_PROMPT_NEWBIE, BASE_NEG_PROMPT_NEWBIE, EXAMPLES_NEWBIE, BASE_NEG_PROMPT_PONY7, BASE_PROMPT_NETA |
| from pipeline_newbie_img2img import NewbieImg2ImgPipeline |
|
|
| FLOW_MATCH_ONLY_MAP = { |
| k: v for k, v in SCHEDULER_CONFIG_MAP.items() if "FlowMatch" in k |
| } |
| FLOW_MATCH_LIST = list(FLOW_MATCH_ONLY_MAP.keys()) |
| SAMPLER_NEWBIE = [ |
| k for k in FLOW_MATCH_ONLY_MAP.keys() |
| if k not in ["FlowMatch DPM++ SDE", "FlowMatch DPM++ 3M SDE"] |
| ] |
|
|
| os.environ["TOKENIZERS_PARALLELISM"] = "false" |
| warnings.filterwarnings("ignore") |
| NEWBIE_TOKEN_LIMIT = 1100 |
|
|
| model_path = "Disty0/NewBie-image-Exp0.1-Diffusers" |
| text_encoder_2 = AutoModel.from_pretrained(model_path, subfolder="text_encoder_2", trust_remote_code=True, torch_dtype=torch.bfloat16) |
| pipe_newbie = NewbiePipeline.from_pretrained(model_path, text_encoder_2=text_encoder_2, torch_dtype=torch.bfloat16) |
| pipe_newbie.to("cuda") |
| del text_encoder_2 |
| newbie_default_scheduler = copy.deepcopy(pipe_newbie.scheduler) |
| pipe_newbie_img2img = NewbieImg2ImgPipeline(**pipe_newbie.components).to("cuda") |
|
|
| pipe_pony = AuraFlowPipeline.from_pretrained("purplesmartai/pony-v7-base", torch_dtype=torch.bfloat16) |
| pipe_pony.to("cuda") |
|
|
| pipe_netayume = Lumina2Pipeline.from_pretrained( |
| "duongve/NetaYume-Lumina-Image-2.0-Diffusers-v35-pretrained", |
| torch_dtype=torch.bfloat16 |
| ) |
| pipe_netayume.to("cuda") |
|
|
|
|
| def set_sampler(pipe, sampler_name, schedule_type, default_config): |
| if sampler_name != FLOW_MATCH_LIST[0]: |
| scheduler_class, config = FLOW_MATCH_ONLY_MAP[sampler_name] |
| pipe.scheduler = scheduler_class.from_config(default_config.config, **config) |
| |
| flux_schedule_config = FLUX_SCHEDULE_TYPES.get(schedule_type) |
| |
| if flux_schedule_config: |
| pipe.scheduler.register_to_config(**flux_schedule_config) |
|
|
| return pipe |
|
|
|
|
| def get_newbie_token_details(prompt, system_prompt, tokenizer): |
| if prompt is None: prompt = "" |
| if system_prompt is None: system_prompt = "" |
|
|
| t_sys = tokenizer(str(system_prompt), add_special_tokens=False)["input_ids"] |
| t_sep = tokenizer(" <Prompt Start> ", add_special_tokens=False)["input_ids"] |
| t_prm = tokenizer(str(prompt), add_special_tokens=False)["input_ids"] |
| |
| total_tokens = len(t_sys) + len(t_sep) + len(t_prm) + 2 |
|
|
| if total_tokens <= 512: |
| sequence_length = 512 |
| else: |
| sequence_length = math.ceil(total_tokens / 512) * 512 |
|
|
| return total_tokens, sequence_length |
|
|
|
|
| def check_token_count(prompt, system_prompt): |
| try: |
| time.sleep(2) |
| |
| tokenizer = pipe_newbie.tokenizer_2 |
| total, seq_len = get_newbie_token_details(prompt, system_prompt, tokenizer) |
|
|
| if total > NEWBIE_TOKEN_LIMIT: |
| return gr.update( |
| value=f"<div style='color: #ef4444; border: 1px solid #ef4444; background-color: #fef2f2; padding: 8px; border-radius: 5px; font-weight: bold; width: 100%; text-align: center;'>" |
| f"⚠️ Token limit exceeded! ({total}/{NEWBIE_TOKEN_LIMIT}). <br>" |
| f"Text will be truncated.</div>", |
| visible=True |
| ) |
| else: |
| return gr.update( |
| value=f"<div style='color: #6b7280; font-size: 0.9em; text-align: right; width: 100%;'> {total}/{min(seq_len, NEWBIE_TOKEN_LIMIT)}</div>", |
| visible=True |
| ) |
| except Exception: |
| return gr.update(visible=False) |
|
|
|
|
| @spaces.GPU() |
| def generate_image_newbie(prompt, negative_prompt, system_prompt, height, width, num_inference_steps, guidance_scale, cfg_trunc_ratio, cfg_normalization, seed, sigmas_factor, sampler, schedule_type, image, strength, progress=gr.Progress(track_tqdm=True)): |
| if seed < 0: |
| seed = random.randint(0, 2**32 - 1) |
|
|
| generator = torch.Generator("cuda").manual_seed(int(seed)) |
|
|
| total_tokens, seq_len = get_newbie_token_details(prompt, system_prompt, pipe_newbie.tokenizer_2) |
| if total_tokens > NEWBIE_TOKEN_LIMIT: |
| raise ValueError(f"The prompt is longer than the allowed limit of {NEWBIE_TOKEN_LIMIT} tokens.") |
| seq_len = min(seq_len, NEWBIE_TOKEN_LIMIT) |
|
|
| pipeline_args = { |
| "prompt": prompt, |
| "negative_prompt": negative_prompt, |
| "height": int(height), |
| "width": int(width), |
| "num_inference_steps": int(num_inference_steps), |
| "guidance_scale": guidance_scale, |
| "system_prompt": system_prompt, |
| "cfg_trunc_ratio": cfg_trunc_ratio, |
| "cfg_normalization": cfg_normalization, |
| "generator": generator, |
| "max_sequence_length": int(seq_len) |
| } |
|
|
| if sigmas_factor != 1.0: |
| steps = int(num_inference_steps) |
| sigmas = np.linspace(1.0, 1 / steps, steps) |
| sigmas = sigmas * sigmas_factor |
| pipeline_args["sigmas"] = sigmas |
|
|
| if image is not None: |
| pipe_task_nb = pipe_newbie_img2img |
| if isinstance(image, np.ndarray): |
| img_pil = Image.fromarray(image) |
| else: |
| img_pil = Image.open(image) |
| img_pil.thumbnail((width, height), Image.Resampling.LANCZOS) |
| pipeline_args["image"] = img_pil |
| pipeline_args["strength"] = strength |
| else: |
| pipe_task_nb = pipe_newbie |
|
|
| set_sampler(pipe_task_nb, sampler, schedule_type, newbie_default_scheduler) |
| |
| image = pipe_task_nb(**pipeline_args).images[0] |
| pipe_task_nb.scheduler = newbie_default_scheduler |
|
|
| return image, seed |
|
|
|
|
| @spaces.GPU() |
| def generate_image_pony(prompt, negative_prompt, height, width, num_inference_steps, guidance_scale, sigmas_factor, seed, progress=gr.Progress(track_tqdm=True)): |
| if seed < 0: |
| seed = random.randint(0, 2**32 - 1) |
|
|
| generator = torch.Generator("cuda").manual_seed(int(seed)) |
|
|
| pipeline_args = { |
| "prompt": prompt, |
| "negative_prompt": negative_prompt, |
| "height": int(height), |
| "width": int(width), |
| "num_inference_steps": int(num_inference_steps), |
| "guidance_scale": guidance_scale, |
| "generator": generator, |
| } |
|
|
| if sigmas_factor != 1.0: |
| steps = int(num_inference_steps) |
| sigmas = np.linspace(1.0, 1 / steps, steps) |
| sigmas = sigmas * sigmas_factor |
| pipeline_args["sigmas"] = sigmas.tolist() |
|
|
| image = pipe_pony(**pipeline_args).images[0] |
| return image, seed |
|
|
|
|
| @spaces.GPU() |
| def generate_image_netayume(prompt, negative_prompt, system_prompt, height, width, guidance_scale, num_inference_steps, cfg_trunc_ratio, cfg_normalization, seed, sigmas_factor, progress=gr.Progress(track_tqdm=True)): |
| if seed < 0: |
| seed = random.randint(0, 2**32 - 1) |
|
|
| generator = torch.Generator("cuda").manual_seed(int(seed)) |
|
|
| pipeline_args = { |
| "prompt": prompt, |
| "negative_prompt": negative_prompt if negative_prompt and negative_prompt.strip() else None, |
| "system_prompt": system_prompt, |
| "height": int(height), |
| "width": int(width), |
| "guidance_scale": guidance_scale, |
| "num_inference_steps": int(num_inference_steps), |
| "cfg_trunc_ratio": cfg_trunc_ratio, |
| "cfg_normalization": cfg_normalization, |
| "generator": generator, |
| } |
|
|
| if sigmas_factor != 1.0: |
| steps = int(num_inference_steps) |
| sigmas = np.linspace(1.0, 1 / steps, steps) |
| sigmas = sigmas * sigmas_factor |
| pipeline_args["sigmas"] = sigmas.tolist() |
|
|
| image = pipe_netayume(**pipeline_args).images[0] |
|
|
| return image, seed |
|
|
|
|
| with gr.Blocks(theme=gr.themes.Soft(), title="Image Generation Playground") as demo: |
| gr.Markdown("# Image Generation Playground") |
| with gr.Tabs(): |
| with gr.Tab(label="NewBie Image"): |
| gr.Markdown("## 🐣 NewBie Image Exp0.1") |
| gr.Markdown("A 3.5B parameter experimental DiT model built on Next-DiT and Lumina insights") |
| with gr.Row(variant="panel"): |
| with gr.Column(scale=2): |
| prompt_newbie = gr.Textbox( |
| label="Prompt", |
| value=BASE_PROMPT_NEWBIE, |
| lines=3 |
| ) |
|
|
| token_counter_display = gr.HTML( |
| value="<div style='color: #6b7280; font-size: 0.9em; text-align: right;'>Token usage: Calculating...</div>", |
| visible=True |
| ) |
|
|
| negative_prompt_newbie = gr.Textbox( |
| label="Negative Prompt", |
| value=BASE_NEG_PROMPT_NEWBIE, |
| lines=2 |
| ) |
| |
| system_prompt_newbie = gr.Dropdown( |
| label="System Prompt", |
| choices=[ |
| "You are an assistant designed to generate superior images with the superior degree of image-text alignment based on textual prompts or user prompts.", |
| "You are an advanced assistant designed to generate high-quality images from user prompts, utilizing danbooru tags to accurately guide the image creation process.", |
| ], |
| allow_custom_value=True, |
| value="You are an assistant designed to generate superior images with the superior degree of image-text alignment based on textual prompts or user prompts." |
| ) |
|
|
| with gr.Row(): |
| height_newbie = gr.Slider(label="Height", minimum=512, maximum=2048, step=64, value=1264) |
| width_newbie = gr.Slider(label="Width", minimum=512, maximum=2048, step=64, value=832) |
| with gr.Row(): |
| steps_newbie = gr.Slider(label="Inference Steps", minimum=1, maximum=100, step=1, value=30) |
| guidance_scale_newbie = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=20.0, step=0.1, value=6.5) |
| with gr.Row(): |
| sigmas_newbie = gr.Slider(label="Sigmas Factor", info="Lower values increase detail and complexity. Higher values simplify and clean the image.", minimum=0.9, maximum=1.1, step=0.001, value=0.99) |
| seed_newbie = gr.Number(label="Seed (-1 for random)", value=-1, precision=0) |
|
|
| with gr.Accordion("More settings", open=False): |
| with gr.Row(): |
| sampler_newbie = gr.Dropdown(label="Sampler", choices=SAMPLER_NEWBIE, value="FlowMatch DPM++ 2M SDE") |
| schedule_type_newbie = gr.Dropdown(label="Schedule Type", choices=FLUX_SCHEDULE_TYPE_OPTIONS, value=FLUX_SCHEDULE_TYPE_OPTIONS[0]) |
| with gr.Row(): |
| cfg_norm_newbie = gr.Checkbox(label="CFG Normalization", value=True) |
| cfg_trunc_newbie = gr.Slider(label="CFG Truncation Ratio", minimum=0.0, maximum=1.0, step=0.05, value=1.0) |
|
|
| with gr.Row(): |
| image_newbie = gr.Image(label="Reference image", interactive=True) |
| strength_newbie = gr.Slider(label="Reference Image Adherence", info="Lower values = strong adherence; higher values = weak adherence.", minimum=0.1, maximum=1., step=0.01, value=0.65) |
|
|
| generate_btn_newbie = gr.Button("Generate", variant="primary") |
|
|
| with gr.Column(scale=1): |
| image_output_newbie = gr.Image(label="Generated Image", format="png", interactive=False) |
| used_seed_newbie = gr.Number(label="Used Seed", interactive=False) |
|
|
| gr.Examples( |
| examples=EXAMPLES_NEWBIE, |
| inputs=[prompt_newbie], |
| label="Example Prompts" |
| ) |
|
|
| with gr.Tab(label="Pony v7"): |
| gr.Markdown("## ✨ Pony v7 AuraFlow") |
| gr.Markdown("Generate images from text prompts using the AuraFlow model.") |
| with gr.Row(variant="panel"): |
| with gr.Column(scale=2): |
| prompt_pony = gr.Textbox(label="Prompt", value="Score_9, ", lines=3) |
| neg_prompt_pony = gr.Textbox( |
| label="Negative Prompt", |
| value=BASE_NEG_PROMPT_PONY7, |
| lines=3 |
| ) |
| with gr.Row(): |
| height_pony = gr.Slider(label="Height", minimum=512, maximum=1536, step=64, value=1024) |
| width_pony = gr.Slider(label="Width", minimum=512, maximum=1536, step=64, value=1024) |
| with gr.Row(): |
| steps_pony = gr.Slider(label="Inference Steps", minimum=1, maximum=100, step=1, value=30) |
| cfg_pony = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=20.0, step=0.1, value=3.5) |
| with gr.Row(): |
| sigmas_pony = gr.Slider(label="Sigmas Factor", minimum=0.95, maximum=1.05, step=0.01, value=.99) |
| seed_pony = gr.Number(label="Seed (-1 for random)", value=-1, precision=0) |
| |
| generate_btn_pony = gr.Button("Generate", variant="primary") |
|
|
| with gr.Column(scale=1): |
| image_output_pony = gr.Image(label="Generated Image", format="png", interactive=False) |
| used_seed_pony = gr.Number(label="Used Seed", interactive=False) |
| |
| with gr.Tab(label="NetaYume v3.5"): |
| gr.Markdown("## 🌌 NetaYume v3.5 Lumina") |
| gr.Markdown("Generate images from text prompts using the Lumina 2 model with a focus on anime aesthetics.") |
| with gr.Row(variant="panel"): |
| with gr.Column(scale=2): |
| prompt_neta = gr.Textbox( |
| label="Prompt", |
| value=BASE_PROMPT_NETA, |
| lines=5 |
| ) |
| neg_prompt_neta = gr.Textbox(label="Negative Prompt", value="low quality, bad quality, blurry, low resolution, deformed, ugly, bad anatomy", placeholder="Enter concepts to avoid...", lines=2) |
| system_prompt_neta = gr.Dropdown( |
| label="System Prompt", |
| choices=[ |
| "You are an advanced assistant designed to generate high-quality images from user prompts, utilizing danbooru tags to accurately guide the image creation process.", |
| "You are an assistant designed to generate high-quality images based on user prompts and danbooru tags.", |
| "You are an assistant designed to generate superior images with the superior degree of image-text alignment based on textual prompts or user prompts.", |
| "You are an assistant designed to generate high-quality images with the highest degree of image-text alignment based on textual prompts." |
| ], |
| value="You are an advanced assistant designed to generate high-quality images from user prompts, utilizing danbooru tags to accurately guide the image creation process." |
| ) |
| with gr.Row(): |
| height_neta = gr.Slider(label="Height", minimum=512, maximum=2048, step=64, value=1536) |
| width_neta = gr.Slider(label="Width", minimum=512, maximum=2048, step=64, value=1024) |
| with gr.Row(): |
| cfg_neta = gr.Slider(label="Guidance Scale (CFG)", minimum=1.0, maximum=10.0, step=0.1, value=4.0) |
| steps_neta = gr.Slider(label="Sampling Steps", minimum=10, maximum=100, step=1, value=50) |
| with gr.Row(): |
| cfg_trunc_neta = gr.Slider(label="CFG Truncation Ratio", minimum=0.0, maximum=10.0, step=0.1, value=6.0) |
| sigmas_neta = gr.Slider(label="Sigmas Factor", minimum=0.9, maximum=1.1, step=0.01, value=1.0) |
| with gr.Row(): |
| cfg_norm_neta = gr.Checkbox(label="CFG Normalization", value=False) |
| seed_neta = gr.Number(label="Seed (-1 for random)", value=-1, precision=0) |
| |
| generate_btn_neta = gr.Button("Generate", variant="primary") |
|
|
| with gr.Column(scale=1): |
| image_output_neta = gr.Image(label="Generated Image", format="png", interactive=False) |
| used_seed_neta = gr.Number(label="Used Seed", interactive=False) |
|
|
| prompt_newbie.change( |
| fn=check_token_count, |
| inputs=[prompt_newbie, system_prompt_newbie], |
| outputs=token_counter_display, |
| show_progress="hidden", |
| queue=False, |
| trigger_mode="always_last", |
| api_name=False |
| ) |
| system_prompt_newbie.change( |
| fn=check_token_count, |
| inputs=[prompt_newbie, system_prompt_newbie], |
| outputs=token_counter_display, |
| show_progress="hidden", |
| queue=False, |
| trigger_mode="always_last", |
| api_name=False |
| ) |
| |
| demo.load( |
| fn=check_token_count, |
| inputs=[prompt_newbie, system_prompt_newbie], |
| outputs=token_counter_display, |
| queue=False, |
| trigger_mode="always_last", |
| api_name=False |
| ) |
|
|
| generate_btn_newbie.click( |
| fn=generate_image_newbie, |
| inputs=[ |
| prompt_newbie, |
| negative_prompt_newbie, |
| system_prompt_newbie, |
| height_newbie, |
| width_newbie, |
| steps_newbie, |
| guidance_scale_newbie, |
| cfg_trunc_newbie, |
| cfg_norm_newbie, |
| seed_newbie, |
| sigmas_newbie, |
| sampler_newbie, |
| schedule_type_newbie, |
| image_newbie, |
| strength_newbie, |
| ], |
| outputs=[image_output_newbie, used_seed_newbie] |
| ) |
|
|
| generate_btn_pony.click( |
| fn=generate_image_pony, |
| inputs=[prompt_pony, neg_prompt_pony, height_pony, width_pony, steps_pony, cfg_pony, sigmas_pony, seed_pony], |
| outputs=[image_output_pony, used_seed_pony] |
| ) |
|
|
| generate_btn_neta.click( |
| fn=generate_image_netayume, |
| inputs=[prompt_neta, neg_prompt_neta, system_prompt_neta, height_neta, width_neta, cfg_neta, steps_neta, cfg_trunc_neta, cfg_norm_neta, seed_neta, sigmas_neta], |
| outputs=[image_output_neta, used_seed_neta] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(show_error=True) |