| | import spaces |
| | import os |
| | import torch |
| | import random |
| | from huggingface_hub import snapshot_download |
| | from kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256 import StableDiffusionXLPipeline |
| | from kolors.models.modeling_chatglm import ChatGLMModel |
| | from kolors.models.tokenization_chatglm import ChatGLMTokenizer |
| | from diffusers import UNet2DConditionModel, AutoencoderKL |
| | from diffusers import EulerDiscreteScheduler |
| | import gradio as gr |
| |
|
| | |
| | ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors") |
| |
|
| | |
| | text_encoder = ChatGLMModel.from_pretrained( |
| | os.path.join(ckpt_dir, 'text_encoder'), |
| | torch_dtype=torch.float16).half() |
| | tokenizer = ChatGLMTokenizer.from_pretrained(os.path.join(ckpt_dir, 'text_encoder')) |
| | vae = AutoencoderKL.from_pretrained(os.path.join(ckpt_dir, "vae"), revision=None).half() |
| | scheduler = EulerDiscreteScheduler.from_pretrained(os.path.join(ckpt_dir, "scheduler")) |
| | unet = UNet2DConditionModel.from_pretrained(os.path.join(ckpt_dir, "unet"), revision=None).half() |
| |
|
| | pipe = StableDiffusionXLPipeline( |
| | vae=vae, |
| | text_encoder=text_encoder, |
| | tokenizer=tokenizer, |
| | unet=unet, |
| | scheduler=scheduler, |
| | force_zeros_for_empty_prompt=False) |
| | pipe = pipe.to("cuda") |
| |
|
| | @spaces.GPU(duration=200) |
| | def generate_image(prompt, negative_prompt, height, width, num_inference_steps, guidance_scale, num_images_per_prompt, use_random_seed, seed, progress=gr.Progress(track_tqdm=True)): |
| | if use_random_seed: |
| | seed = random.randint(0, 2**32 - 1) |
| | else: |
| | seed = int(seed) |
| | |
| | image = pipe( |
| | prompt=prompt, |
| | negative_prompt=negative_prompt, |
| | height=height, |
| | width=width, |
| | num_inference_steps=num_inference_steps, |
| | guidance_scale=guidance_scale, |
| | num_images_per_prompt=num_images_per_prompt, |
| | generator=torch.Generator(pipe.device).manual_seed(seed) |
| | ).images |
| | return image, seed |
| |
|
| | description = """ |
| | <p align="center">Effective Training of Diffusion Model for Photorealistic Text-to-Image Synthesis</p> |
| | <p><center> |
| | <a href="https://kolors.kuaishou.com/" target="_blank">[Official Website]</a> |
| | <a href="https://github.com/Kwai-Kolors/Kolors/blob/master/imgs/Kolors_paper.pdf" target="_blank">[Tech Report]</a> |
| | <a href="https://huggingface.co/Kwai-Kolors/Kolors" target="_blank">[Model Page]</a> |
| | <a href="https://github.com/Kwai-Kolors/Kolors" target="_blank">[Github]</a> |
| | </center></p> |
| | """ |
| |
|
| | |
| | iface = gr.Interface( |
| | fn=generate_image, |
| | inputs=[ |
| | gr.Textbox(label="Prompt"), |
| | gr.Textbox(label="Negative Prompt") |
| | ], |
| | additional_inputs=[ |
| | gr.Slider(512, 2048, 1024, step=64, label="Height"), |
| | gr.Slider(512, 2048, 1024, step=64, label="Width"), |
| | gr.Slider(20, 50, 20, step=1, label="Number of Inference Steps"), |
| | gr.Slider(1, 20, 5, step=0.5, label="Guidance Scale"), |
| | gr.Slider(1, 4, 1, step=1, label="Number of images per prompt"), |
| | gr.Checkbox(label="Use Random Seed", value=True), |
| | gr.Number(label="Seed", value=0, precision=0) |
| | ], |
| | additional_inputs_accordion=gr.Accordion(label="Advanced settings", open=False), |
| | outputs=[ |
| | gr.Gallery(label="Result", elem_id="gallery", show_label=False), |
| | gr.Number(label="Seed Used") |
| | ], |
| | title="Kolors", |
| | description=description, |
| | theme='bethecloud/storj_theme', |
| | ) |
| |
|
| | iface.launch(debug=True) |