HikariDawn commited on
Commit
497e461
·
1 Parent(s): 8b1006a

feat: replace new model and delete unnecessary loading

Browse files
Files changed (1) hide show
  1. app.py +6 -35
app.py CHANGED
@@ -47,15 +47,14 @@ MARKDOWN = \
47
  </h2> \
48
 
49
  <div style="display: flex; flex-wrap: wrap; justify-content: center; gap: 2rem; margin-bottom: 1rem;">
50
- <!-- 第一行按钮 -->
51
  <a href="https://arxiv.org/abs/2505.21491" target="_blank"
52
  style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; /* 浅灰色背景 */ color: #333; /* 深色文字 */ text-decoration: none; border-radius: 9999px; font-weight: 500; transition: background-color 0.3s;">
53
- <span style="margin-right: 0.5rem;">📄</span> <!-- 使用文档图标 -->
54
  <span>Paper</span>
55
  </a>
56
  <a href="https://github.com/UVA-Computer-Vision-Lab/FrameINO" target="_blank"
57
  style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; color: #333; text-decoration: none; border-radius: 9999px; font-weight: 500; transition: background-color 0.3s;">
58
- <span style="margin-right: 0.5rem;">💻</span> <!-- 使用电脑图标 -->
59
  <span>GitHub</span>
60
  </a>
61
  <a href="https://uva-computer-vision-lab.github.io/Frame-In-N-Out" target="_blank"
@@ -87,7 +86,7 @@ MARKDOWN = \
87
  ❗️❗️❗️Instruction Steps:<br>
88
  1️⃣ Upload your first frame image. Set the size you want to resize to for <b>Resized Height for Input Image</b> and <b>Resized Width for Input Image</b>. <br>
89
  2️⃣ Set your <b>canvas top left</b> and <b>bottom right expansion</b>. The combined height and width should be the multiplier of 32. <br>
90
- PLEASE ENSURE that <b>Canvas HEIGHT = 704</b> and <b>Canvas WIDTH = 1280</b> for the best performance (current training resolution). <br>
91
  3️⃣ Click <b>Build the Canvas</b>. <br>
92
  4️⃣ Provide the trajectory of the main object in the canvas by clicking on the <b>Expanded Canvas</b>. <br>
93
  5️⃣ Provide the ID reference image and its trajectory (optional). Also, write a detailed <b>text prompt</b>. <br>
@@ -122,48 +121,20 @@ if not os.path.exists("__assets__"): # Check if the assets images exi
122
 
123
 
124
 
125
-
126
- ######################################################## CogVideoX #################################################################
127
-
128
- # Path Setting
129
- model_code_name = "CogVideox"
130
- base_model_id = "zai-org/CogVideoX-5b-I2V"
131
- transformer_ckpt_path = "uva-cv-lab/FrameINO_CogVideoX_Stage2_MotionINO_v1.0"
132
-
133
- # Load Model
134
- transformer = CogVideoXTransformer3DModel.from_pretrained(transformer_ckpt_path, torch_dtype=torch.float16)
135
- text_encoder = T5EncoderModel.from_pretrained(base_model_id, subfolder="text_encoder", torch_dtype=torch.float16)
136
- vae = AutoencoderKLCogVideoX.from_pretrained(base_model_id, subfolder="vae", torch_dtype=torch.float16)
137
-
138
- # Create pipeline and run inference
139
- pipe = CogVideoXImageToVideoPipeline.from_pretrained(
140
- base_model_id,
141
- text_encoder = text_encoder,
142
- transformer = transformer,
143
- vae = vae,
144
- torch_dtype = torch.float16,
145
- )
146
- pipe.enable_model_cpu_offload()
147
-
148
- #####################################################################################################################################
149
-
150
-
151
-
152
-
153
  ######################################################## Wan2.2 5B #################################################################
154
 
155
  # Path Setting
156
  model_code_name = "Wan"
157
  base_model_id = "Wan-AI/Wan2.2-TI2V-5B-Diffusers"
158
- transformer_ckpt_path = "uva-cv-lab/FrameINO_Wan2.2_5B_Stage2_MotionINO_v1.5"
159
 
160
 
161
- # Load model
162
  print("Loading the model!")
163
  transformer = WanTransformer3DModel.from_pretrained(transformer_ckpt_path, torch_dtype=torch.float16)
164
  vae = AutoencoderKLWan.from_pretrained(base_model_id, subfolder="vae", torch_dtype=torch.float32)
165
 
166
- # Create the pipeline
167
  print("Loading the pipeline!")
168
  pipe = WanImageToVideoPipeline.from_pretrained(base_model_id, transformer=transformer, vae=vae, torch_dtype=torch.bfloat16)
169
  pipe.to("cuda")
 
47
  </h2> \
48
 
49
  <div style="display: flex; flex-wrap: wrap; justify-content: center; gap: 2rem; margin-bottom: 1rem;">
 
50
  <a href="https://arxiv.org/abs/2505.21491" target="_blank"
51
  style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; /* 浅灰色背景 */ color: #333; /* 深色文字 */ text-decoration: none; border-radius: 9999px; font-weight: 500; transition: background-color 0.3s;">
52
+ <span style="margin-right: 0.5rem;">📄</span>
53
  <span>Paper</span>
54
  </a>
55
  <a href="https://github.com/UVA-Computer-Vision-Lab/FrameINO" target="_blank"
56
  style="display: inline-flex; align-items: center; padding: 0.5rem 1rem; background-color: #f0f0f0; color: #333; text-decoration: none; border-radius: 9999px; font-weight: 500; transition: background-color 0.3s;">
57
+ <span style="margin-right: 0.5rem;">💻</span>
58
  <span>GitHub</span>
59
  </a>
60
  <a href="https://uva-computer-vision-lab.github.io/Frame-In-N-Out" target="_blank"
 
86
  ❗️❗️❗️Instruction Steps:<br>
87
  1️⃣ Upload your first frame image. Set the size you want to resize to for <b>Resized Height for Input Image</b> and <b>Resized Width for Input Image</b>. <br>
88
  2️⃣ Set your <b>canvas top left</b> and <b>bottom right expansion</b>. The combined height and width should be the multiplier of 32. <br>
89
+ Recommend <b>Canvas HEIGHT = 704</b> and <b>Canvas WIDTH = 1280</b> for the best performance (Pre-trained training Resolution). <br>
90
  3️⃣ Click <b>Build the Canvas</b>. <br>
91
  4️⃣ Provide the trajectory of the main object in the canvas by clicking on the <b>Expanded Canvas</b>. <br>
92
  5️⃣ Provide the ID reference image and its trajectory (optional). Also, write a detailed <b>text prompt</b>. <br>
 
121
 
122
 
123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  ######################################################## Wan2.2 5B #################################################################
125
 
126
  # Path Setting
127
  model_code_name = "Wan"
128
  base_model_id = "Wan-AI/Wan2.2-TI2V-5B-Diffusers"
129
+ transformer_ckpt_path = "uva-cv-lab/FrameINO_Wan2.2_5B_Stage2_MotionINO_v1.6"
130
 
131
 
132
+ # Load Model
133
  print("Loading the model!")
134
  transformer = WanTransformer3DModel.from_pretrained(transformer_ckpt_path, torch_dtype=torch.float16)
135
  vae = AutoencoderKLWan.from_pretrained(base_model_id, subfolder="vae", torch_dtype=torch.float32)
136
 
137
+ # Create the Pipeline
138
  print("Loading the pipeline!")
139
  pipe = WanImageToVideoPipeline.from_pretrained(base_model_id, transformer=transformer, vae=vae, torch_dtype=torch.bfloat16)
140
  pipe.to("cuda")