Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
497e461
1
Parent(s):
8b1006a
feat: replace new model and delete unnecessary loading
Browse files
app.py
CHANGED
|
@@ -47,15 +47,14 @@ MARKDOWN = \
|
|
| 47 |
</h2> \
|
| 48 |
|
| 49 |
<div style="display: flex; flex-wrap: wrap; justify-content: center; gap: 2rem; margin-bottom: 1rem;">
|
| 50 |
-
<!-- 第一行按钮 -->
|
| 51 |
<a href="https://arxiv.org/abs/2505.21491" target="_blank"
|
| 52 |
style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; /* 浅灰色背景 */ color: #333; /* 深色文字 */ text-decoration: none; border-radius: 9999px; font-weight: 500; transition: background-color 0.3s;">
|
| 53 |
-
<span style="margin-right: 0.5rem;">📄</span>
|
| 54 |
<span>Paper</span>
|
| 55 |
</a>
|
| 56 |
<a href="https://github.com/UVA-Computer-Vision-Lab/FrameINO" target="_blank"
|
| 57 |
style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; color: #333; text-decoration: none; border-radius: 9999px; font-weight: 500; transition: background-color 0.3s;">
|
| 58 |
-
<span style="margin-right: 0.5rem;">💻</span>
|
| 59 |
<span>GitHub</span>
|
| 60 |
</a>
|
| 61 |
<a href="https://uva-computer-vision-lab.github.io/Frame-In-N-Out" target="_blank"
|
|
@@ -87,7 +86,7 @@ MARKDOWN = \
|
|
| 87 |
❗️❗️❗️Instruction Steps:<br>
|
| 88 |
1️⃣ Upload your first frame image. Set the size you want to resize to for <b>Resized Height for Input Image</b> and <b>Resized Width for Input Image</b>. <br>
|
| 89 |
2️⃣ Set your <b>canvas top left</b> and <b>bottom right expansion</b>. The combined height and width should be the multiplier of 32. <br>
|
| 90 |
-
|
| 91 |
3️⃣ Click <b>Build the Canvas</b>. <br>
|
| 92 |
4️⃣ Provide the trajectory of the main object in the canvas by clicking on the <b>Expanded Canvas</b>. <br>
|
| 93 |
5️⃣ Provide the ID reference image and its trajectory (optional). Also, write a detailed <b>text prompt</b>. <br>
|
|
@@ -122,48 +121,20 @@ if not os.path.exists("__assets__"): # Check if the assets images exi
|
|
| 122 |
|
| 123 |
|
| 124 |
|
| 125 |
-
|
| 126 |
-
######################################################## CogVideoX #################################################################
|
| 127 |
-
|
| 128 |
-
# Path Setting
|
| 129 |
-
model_code_name = "CogVideox"
|
| 130 |
-
base_model_id = "zai-org/CogVideoX-5b-I2V"
|
| 131 |
-
transformer_ckpt_path = "uva-cv-lab/FrameINO_CogVideoX_Stage2_MotionINO_v1.0"
|
| 132 |
-
|
| 133 |
-
# Load Model
|
| 134 |
-
transformer = CogVideoXTransformer3DModel.from_pretrained(transformer_ckpt_path, torch_dtype=torch.float16)
|
| 135 |
-
text_encoder = T5EncoderModel.from_pretrained(base_model_id, subfolder="text_encoder", torch_dtype=torch.float16)
|
| 136 |
-
vae = AutoencoderKLCogVideoX.from_pretrained(base_model_id, subfolder="vae", torch_dtype=torch.float16)
|
| 137 |
-
|
| 138 |
-
# Create pipeline and run inference
|
| 139 |
-
pipe = CogVideoXImageToVideoPipeline.from_pretrained(
|
| 140 |
-
base_model_id,
|
| 141 |
-
text_encoder = text_encoder,
|
| 142 |
-
transformer = transformer,
|
| 143 |
-
vae = vae,
|
| 144 |
-
torch_dtype = torch.float16,
|
| 145 |
-
)
|
| 146 |
-
pipe.enable_model_cpu_offload()
|
| 147 |
-
|
| 148 |
-
#####################################################################################################################################
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
######################################################## Wan2.2 5B #################################################################
|
| 154 |
|
| 155 |
# Path Setting
|
| 156 |
model_code_name = "Wan"
|
| 157 |
base_model_id = "Wan-AI/Wan2.2-TI2V-5B-Diffusers"
|
| 158 |
-
transformer_ckpt_path = "uva-cv-lab/FrameINO_Wan2.2_5B_Stage2_MotionINO_v1.
|
| 159 |
|
| 160 |
|
| 161 |
-
# Load
|
| 162 |
print("Loading the model!")
|
| 163 |
transformer = WanTransformer3DModel.from_pretrained(transformer_ckpt_path, torch_dtype=torch.float16)
|
| 164 |
vae = AutoencoderKLWan.from_pretrained(base_model_id, subfolder="vae", torch_dtype=torch.float32)
|
| 165 |
|
| 166 |
-
# Create the
|
| 167 |
print("Loading the pipeline!")
|
| 168 |
pipe = WanImageToVideoPipeline.from_pretrained(base_model_id, transformer=transformer, vae=vae, torch_dtype=torch.bfloat16)
|
| 169 |
pipe.to("cuda")
|
|
|
|
| 47 |
</h2> \
|
| 48 |
|
| 49 |
<div style="display: flex; flex-wrap: wrap; justify-content: center; gap: 2rem; margin-bottom: 1rem;">
|
|
|
|
| 50 |
<a href="https://arxiv.org/abs/2505.21491" target="_blank"
|
| 51 |
style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; /* 浅灰色背景 */ color: #333; /* 深色文字 */ text-decoration: none; border-radius: 9999px; font-weight: 500; transition: background-color 0.3s;">
|
| 52 |
+
<span style="margin-right: 0.5rem;">📄</span>
|
| 53 |
<span>Paper</span>
|
| 54 |
</a>
|
| 55 |
<a href="https://github.com/UVA-Computer-Vision-Lab/FrameINO" target="_blank"
|
| 56 |
style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; color: #333; text-decoration: none; border-radius: 9999px; font-weight: 500; transition: background-color 0.3s;">
|
| 57 |
+
<span style="margin-right: 0.5rem;">💻</span>
|
| 58 |
<span>GitHub</span>
|
| 59 |
</a>
|
| 60 |
<a href="https://uva-computer-vision-lab.github.io/Frame-In-N-Out" target="_blank"
|
|
|
|
| 86 |
❗️❗️❗️Instruction Steps:<br>
|
| 87 |
1️⃣ Upload your first frame image. Set the size you want to resize to for <b>Resized Height for Input Image</b> and <b>Resized Width for Input Image</b>. <br>
|
| 88 |
2️⃣ Set your <b>canvas top left</b> and <b>bottom right expansion</b>. The combined height and width should be the multiplier of 32. <br>
|
| 89 |
+
Recommend <b>Canvas HEIGHT = 704</b> and <b>Canvas WIDTH = 1280</b> for the best performance (Pre-trained training Resolution). <br>
|
| 90 |
3️⃣ Click <b>Build the Canvas</b>. <br>
|
| 91 |
4️⃣ Provide the trajectory of the main object in the canvas by clicking on the <b>Expanded Canvas</b>. <br>
|
| 92 |
5️⃣ Provide the ID reference image and its trajectory (optional). Also, write a detailed <b>text prompt</b>. <br>
|
|
|
|
| 121 |
|
| 122 |
|
| 123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
######################################################## Wan2.2 5B #################################################################
|
| 125 |
|
| 126 |
# Path Setting
|
| 127 |
model_code_name = "Wan"
|
| 128 |
base_model_id = "Wan-AI/Wan2.2-TI2V-5B-Diffusers"
|
| 129 |
+
transformer_ckpt_path = "uva-cv-lab/FrameINO_Wan2.2_5B_Stage2_MotionINO_v1.6"
|
| 130 |
|
| 131 |
|
| 132 |
+
# Load Model
|
| 133 |
print("Loading the model!")
|
| 134 |
transformer = WanTransformer3DModel.from_pretrained(transformer_ckpt_path, torch_dtype=torch.float16)
|
| 135 |
vae = AutoencoderKLWan.from_pretrained(base_model_id, subfolder="vae", torch_dtype=torch.float32)
|
| 136 |
|
| 137 |
+
# Create the Pipeline
|
| 138 |
print("Loading the pipeline!")
|
| 139 |
pipe = WanImageToVideoPipeline.from_pretrained(base_model_id, transformer=transformer, vae=vae, torch_dtype=torch.bfloat16)
|
| 140 |
pipe.to("cuda")
|