Spaces:
Running
on
Zero
Running
on
Zero
ss
Browse files- app.py +3 -3
- dkt/pipelines/pipeline.py +3 -10
- examples/18.mp4 +2 -2
- examples/27.mp4 +2 -2
- examples/28.mp4 +2 -2
- examples/5eaeaff52b23787a3dc3c610655a49d2.mp4 +2 -2
- examples/73fc0b2a3af3474de27c7da0bfbf5faa.mp4 +2 -2
- examples/9f2909760aff526070f169620ff38290.mp4 +2 -2
- examples/IMG_5703.mp4 +3 -0
- examples/episode_48-camera_third_view.mp4 +2 -2
- examples/extra_5.mp4 +2 -2
- examples/extra_9.mp4 +2 -2
- examples/input_20251128_121408.mp4 +2 -2
- examples/input_20251128_122722.mp4 +2 -2
- examples/input_20251202_031811.mp4 +2 -2
- examples/input_20251202_032007.mp4 +2 -2
- examples/teaser_1.mp4 +2 -2
- examples/teaser_25.mp4 +2 -2
- examples/teaser_3.mp4 +2 -2
- examples/teaser_7.mp4 +2 -2
- tools/common_utils.py +6 -0
app.py
CHANGED
|
@@ -40,11 +40,10 @@ example_inputs = [
|
|
| 40 |
"examples/1.mp4",
|
| 41 |
"examples/7.mp4",
|
| 42 |
"examples/8.mp4",
|
| 43 |
-
"examples/36.mp4",
|
| 44 |
"examples/39.mp4",
|
| 45 |
"examples/10.mp4",
|
| 46 |
"examples/30.mp4",
|
| 47 |
-
|
| 48 |
"examples/35.mp4",
|
| 49 |
"examples/40.mp4",
|
| 50 |
"examples/2.mp4",
|
|
@@ -67,6 +66,7 @@ example_inputs = [
|
|
| 67 |
"examples/input_20251202_031811.mp4",
|
| 68 |
"examples/input_20251202_032007.mp4",
|
| 69 |
"examples/teaser_1.mp4",
|
|
|
|
| 70 |
"examples/teaser_3.mp4",
|
| 71 |
"examples/teaser_7.mp4",
|
| 72 |
"examples/teaser_25.mp4",
|
|
@@ -442,7 +442,7 @@ with gr.Blocks(css=css, title="DKT", head=head_html) as demo:
|
|
| 442 |
output_point_map0, output_point_map1, output_point_map2, output_point_map3
|
| 443 |
],
|
| 444 |
fn=on_example_submit,
|
| 445 |
-
examples_per_page=
|
| 446 |
cache_examples=False
|
| 447 |
)
|
| 448 |
|
|
|
|
| 40 |
"examples/1.mp4",
|
| 41 |
"examples/7.mp4",
|
| 42 |
"examples/8.mp4",
|
|
|
|
| 43 |
"examples/39.mp4",
|
| 44 |
"examples/10.mp4",
|
| 45 |
"examples/30.mp4",
|
| 46 |
+
|
| 47 |
"examples/35.mp4",
|
| 48 |
"examples/40.mp4",
|
| 49 |
"examples/2.mp4",
|
|
|
|
| 66 |
"examples/input_20251202_031811.mp4",
|
| 67 |
"examples/input_20251202_032007.mp4",
|
| 68 |
"examples/teaser_1.mp4",
|
| 69 |
+
"examples/3.mp4",
|
| 70 |
"examples/teaser_3.mp4",
|
| 71 |
"examples/teaser_7.mp4",
|
| 72 |
"examples/teaser_25.mp4",
|
|
|
|
| 442 |
output_point_map0, output_point_map1, output_point_map2, output_point_map3
|
| 443 |
],
|
| 444 |
fn=on_example_submit,
|
| 445 |
+
examples_per_page=36,
|
| 446 |
cache_examples=False
|
| 447 |
)
|
| 448 |
|
dkt/pipelines/pipeline.py
CHANGED
|
@@ -29,7 +29,10 @@ from ..lora import GeneralLoRALoader
|
|
| 29 |
|
| 30 |
from loguru import logger
|
| 31 |
|
|
|
|
|
|
|
| 32 |
import spaces
|
|
|
|
| 33 |
|
| 34 |
class BasePipeline(torch.nn.Module):
|
| 35 |
|
|
@@ -976,23 +979,13 @@ class DKTPipeline:
|
|
| 976 |
def moge_infer(self, input_image):
|
| 977 |
|
| 978 |
device = torch.device("cuda")
|
| 979 |
-
|
| 980 |
|
| 981 |
self.moge_pipe = self.moge_pipe.to(device)
|
| 982 |
self.moge_pipe.eval()
|
| 983 |
|
| 984 |
-
|
| 985 |
# 5. 放到 GPU
|
| 986 |
input_image = input_image.to(device=device, dtype=torch.float32)
|
| 987 |
|
| 988 |
-
|
| 989 |
-
|
| 990 |
-
|
| 991 |
-
# 🔴 必须补 batch 维度
|
| 992 |
-
if input_image.dim() == 3:
|
| 993 |
-
input_image = input_image.unsqueeze(0) # (1, 3, H, W)
|
| 994 |
-
|
| 995 |
-
|
| 996 |
model_device = next(self.moge_pipe.parameters()).device
|
| 997 |
print(f'input_image device: {input_image.device}, moge_pipe device: {model_device}, input shape: {input_image.shape}, input dtype: {input_image.dtype}')
|
| 998 |
|
|
|
|
| 29 |
|
| 30 |
from loguru import logger
|
| 31 |
|
| 32 |
+
|
| 33 |
+
|
| 34 |
import spaces
|
| 35 |
+
|
| 36 |
|
| 37 |
class BasePipeline(torch.nn.Module):
|
| 38 |
|
|
|
|
| 979 |
def moge_infer(self, input_image):
|
| 980 |
|
| 981 |
device = torch.device("cuda")
|
|
|
|
| 982 |
|
| 983 |
self.moge_pipe = self.moge_pipe.to(device)
|
| 984 |
self.moge_pipe.eval()
|
| 985 |
|
|
|
|
| 986 |
# 5. 放到 GPU
|
| 987 |
input_image = input_image.to(device=device, dtype=torch.float32)
|
| 988 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 989 |
model_device = next(self.moge_pipe.parameters()).device
|
| 990 |
print(f'input_image device: {input_image.device}, moge_pipe device: {model_device}, input shape: {input_image.shape}, input dtype: {input_image.dtype}')
|
| 991 |
|
examples/18.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38a34cc7a7bd060e1cb891d35457b37e2ad91e8fac457273367500b65a8e1eb8
|
| 3 |
+
size 1091805
|
examples/27.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6959f0dc0f5fde449cab87a0db04f2a14c23bc3df414b9e0474bd45bf901fbb
|
| 3 |
+
size 893079
|
examples/28.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd389e1fa12d73f8ded9181f9aa2b2586f8e150e855e96bf1df3c6420062bd12
|
| 3 |
+
size 605351
|
examples/5eaeaff52b23787a3dc3c610655a49d2.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f34029851307dea7724214a5ce5f9e0cc1f5283618d535dfd927ae4d5bc936e
|
| 3 |
+
size 1371356
|
examples/73fc0b2a3af3474de27c7da0bfbf5faa.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7619a66b098c36cf098e0d613b1132a4bc7a649bf32e0558574c6045dbf833b
|
| 3 |
+
size 1837604
|
examples/9f2909760aff526070f169620ff38290.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0f40eaaeefd6d4dff72cb944708e0e830ced4c3f97b4e8fb7ff7ce8b3f59c83
|
| 3 |
+
size 2111360
|
examples/IMG_5703.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd7701a09bb7e874a081a04e5d06dd2a4f06bfd0849e91c617d3623bdb8069f1
|
| 3 |
+
size 1897742
|
examples/episode_48-camera_third_view.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59efd48910999b4ed0c356438c676647965c9a2e4dde78e70041ed150fab3e57
|
| 3 |
+
size 776108
|
examples/extra_5.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87105db3c75e0579118a52798a27d0373810dc2fb0690be6126d5e1d18ab9ed1
|
| 3 |
+
size 1088857
|
examples/extra_9.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67ba8aef9ac1d4370f70896399a7dc093b0bc9aff9cd4e7e504dbe7eaf3d3016
|
| 3 |
+
size 547381
|
examples/input_20251128_121408.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b23bfaf76a6d3a9bcb8124c6f9ace836885e4fafcad110cc7f9095752f96b324
|
| 3 |
+
size 2722066
|
examples/input_20251128_122722.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5531cbf5890dfca572fcc1eb28a859f4cd09ae2e822f9bb579fe80eeca2d6962
|
| 3 |
+
size 692739
|
examples/input_20251202_031811.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5530e4322fefacb7d75245fcc13ec64085181e66c546438cfc935fe5ba5fecd2
|
| 3 |
+
size 545173
|
examples/input_20251202_032007.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd9cdf1fd5ee8820d74882405c88cb16860c2c1a1d07662538390a175b886080
|
| 3 |
+
size 561135
|
examples/teaser_1.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f454ee9e5d7d35ac17ce515c05c76feca434d38d0fc3c674aaf4e28ced95869
|
| 3 |
+
size 881391
|
examples/teaser_25.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5115a950b878631f8aefb00bd7f39b6f73ed3a636976df462b190d34df41ade4
|
| 3 |
+
size 1087457
|
examples/teaser_3.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52c3f306ee1ac2db9c7284908e2798d41e7c47d6796fdb17c7b429a8e37298bb
|
| 3 |
+
size 2713463
|
examples/teaser_7.mp4
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59bd55c5dfcf8d362af1c0942c0dd5c3d11c4c8b3e3832b390275c4f3cb6f44c
|
| 3 |
+
size 1021290
|
tools/common_utils.py
CHANGED
|
@@ -12,6 +12,12 @@ def save_video(frames, save_path, fps, quality=9, ffmpeg_params=None):
|
|
| 12 |
frames[0].save(save_path.replace('.mp4', '.png'))
|
| 13 |
return
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
writer = imageio.get_writer(save_path, fps=fps, quality=quality, ffmpeg_params=ffmpeg_params)
|
| 16 |
for frame in tqdm(frames, desc="Saving video"):
|
| 17 |
frame = np.array(frame)
|
|
|
|
| 12 |
frames[0].save(save_path.replace('.mp4', '.png'))
|
| 13 |
return
|
| 14 |
|
| 15 |
+
# Ensure browser-compatible pixel format (yuv420p) to avoid Gradio conversion warning
|
| 16 |
+
if ffmpeg_params is None:
|
| 17 |
+
ffmpeg_params = ['-pix_fmt', 'yuv420p']
|
| 18 |
+
elif '-pix_fmt' not in ffmpeg_params:
|
| 19 |
+
ffmpeg_params = list(ffmpeg_params) + ['-pix_fmt', 'yuv420p']
|
| 20 |
+
|
| 21 |
writer = imageio.get_writer(save_path, fps=fps, quality=quality, ffmpeg_params=ffmpeg_params)
|
| 22 |
for frame in tqdm(frames, desc="Saving video"):
|
| 23 |
frame = np.array(frame)
|