Spaces:
Runtime error
Runtime error
| # Copyright (c) 2023-2024, Zexin He | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # https://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| import math | |
| import torch | |
| """ | |
| R: (N, 3, 3) | |
| T: (N, 3) | |
| E: (N, 4, 4) | |
| vector: (N, 3) | |
| """ | |
| def compose_extrinsic_R_T(R: torch.Tensor, T: torch.Tensor): | |
| """ | |
| Compose the standard form extrinsic matrix from R and T. | |
| Batched I/O. | |
| """ | |
| RT = torch.cat((R, T.unsqueeze(-1)), dim=-1) | |
| return compose_extrinsic_RT(RT) | |
| def compose_extrinsic_RT(RT: torch.Tensor): | |
| """ | |
| Compose the standard form extrinsic matrix from RT. | |
| Batched I/O. | |
| """ | |
| return torch.cat([ | |
| RT, | |
| torch.tensor([[[0, 0, 0, 1]]], dtype=RT.dtype, device=RT.device).repeat(RT.shape[0], 1, 1) | |
| ], dim=1) | |
| def decompose_extrinsic_R_T(E: torch.Tensor): | |
| """ | |
| Decompose the standard extrinsic matrix into R and T. | |
| Batched I/O. | |
| """ | |
| RT = decompose_extrinsic_RT(E) | |
| return RT[:, :, :3], RT[:, :, 3] | |
| def decompose_extrinsic_RT(E: torch.Tensor): | |
| """ | |
| Decompose the standard extrinsic matrix into RT. | |
| Batched I/O. | |
| """ | |
| return E[:, :3, :] | |
| def camera_normalization_objaverse(normed_dist_to_center, poses: torch.Tensor, ret_transform: bool = False): | |
| assert normed_dist_to_center is not None | |
| pivotal_pose = compose_extrinsic_RT(poses[:1]) | |
| dist_to_center = pivotal_pose[:, :3, 3].norm(dim=-1, keepdim=True).item() \ | |
| if normed_dist_to_center == 'auto' else normed_dist_to_center | |
| # compute camera norm (new version) | |
| canonical_camera_extrinsics = torch.tensor([[ | |
| [1, 0, 0, 0], | |
| [0, 0, -1, -dist_to_center], | |
| [0, 1, 0, 0], | |
| [0, 0, 0, 1], | |
| ]], dtype=torch.float32) | |
| pivotal_pose_inv = torch.inverse(pivotal_pose) | |
| camera_norm_matrix = torch.bmm(canonical_camera_extrinsics, pivotal_pose_inv) | |
| # normalize all views | |
| poses = compose_extrinsic_RT(poses) | |
| poses = torch.bmm(camera_norm_matrix.repeat(poses.shape[0], 1, 1), poses) | |
| poses = decompose_extrinsic_RT(poses) | |
| if ret_transform: | |
| return poses, camera_norm_matrix.squeeze(dim=0) | |
| return poses | |
| def get_normalized_camera_intrinsics(intrinsics: torch.Tensor): | |
| """ | |
| intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]] | |
| Return batched fx, fy, cx, cy | |
| """ | |
| fx, fy = intrinsics[:, 0, 0], intrinsics[:, 0, 1] | |
| cx, cy = intrinsics[:, 1, 0], intrinsics[:, 1, 1] | |
| width, height = intrinsics[:, 2, 0], intrinsics[:, 2, 1] | |
| fx, fy = fx / width, fy / height | |
| cx, cy = cx / width, cy / height | |
| return fx, fy, cx, cy | |
| def build_camera_principle(RT: torch.Tensor, intrinsics: torch.Tensor): | |
| """ | |
| RT: (N, 3, 4) | |
| intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]] | |
| """ | |
| fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics) | |
| return torch.cat([ | |
| RT.reshape(-1, 12), | |
| fx.unsqueeze(-1), fy.unsqueeze(-1), cx.unsqueeze(-1), cy.unsqueeze(-1), | |
| ], dim=-1) | |
| def build_camera_standard(RT: torch.Tensor, intrinsics: torch.Tensor): | |
| """ | |
| RT: (N, 3, 4) | |
| intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]] | |
| """ | |
| E = compose_extrinsic_RT(RT) | |
| fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics) | |
| I = torch.stack([ | |
| torch.stack([fx, torch.zeros_like(fx), cx], dim=-1), | |
| torch.stack([torch.zeros_like(fy), fy, cy], dim=-1), | |
| torch.tensor([[0, 0, 1]], dtype=torch.float32, device=RT.device).repeat(RT.shape[0], 1), | |
| ], dim=1) | |
| return torch.cat([ | |
| E.reshape(-1, 16), | |
| I.reshape(-1, 9), | |
| ], dim=-1) | |
| def center_looking_at_camera_pose( | |
| camera_position: torch.Tensor, look_at: torch.Tensor = None, up_world: torch.Tensor = None, | |
| device: torch.device = torch.device('cpu'), | |
| ): | |
| """ | |
| camera_position: (M, 3) | |
| look_at: (3) | |
| up_world: (3) | |
| return: (M, 3, 4) | |
| """ | |
| # by default, looking at the origin and world up is pos-z | |
| if look_at is None: | |
| look_at = torch.tensor([0, 0, 0], dtype=torch.float32, device=device) | |
| if up_world is None: | |
| up_world = torch.tensor([0, 0, 1], dtype=torch.float32, device=device) | |
| look_at = look_at.unsqueeze(0).repeat(camera_position.shape[0], 1) | |
| up_world = up_world.unsqueeze(0).repeat(camera_position.shape[0], 1) | |
| z_axis = camera_position - look_at | |
| z_axis = z_axis / z_axis.norm(dim=-1, keepdim=True) | |
| x_axis = torch.cross(up_world, z_axis) | |
| x_axis = x_axis / x_axis.norm(dim=-1, keepdim=True) | |
| y_axis = torch.cross(z_axis, x_axis) | |
| y_axis = y_axis / y_axis.norm(dim=-1, keepdim=True) | |
| extrinsics = torch.stack([x_axis, y_axis, z_axis, camera_position], dim=-1) | |
| return extrinsics | |
| def surrounding_views_linspace(n_views: int, radius: float = 2.0, height: float = 0.8, device: torch.device = torch.device('cpu')): | |
| """ | |
| n_views: number of surrounding views | |
| radius: camera dist to center | |
| height: height of the camera | |
| return: (M, 3, 4) | |
| """ | |
| assert n_views > 0 | |
| assert radius > 0 | |
| theta = torch.linspace(-torch.pi / 2, 3 * torch.pi / 2, n_views, device=device) | |
| projected_radius = math.sqrt(radius ** 2 - height ** 2) | |
| x = torch.cos(theta) * projected_radius | |
| y = torch.sin(theta) * projected_radius | |
| z = torch.full((n_views,), height, device=device) | |
| camera_positions = torch.stack([x, y, z], dim=1) | |
| extrinsics = center_looking_at_camera_pose(camera_positions, device=device) | |
| return extrinsics | |
| def create_intrinsics( | |
| f: float, | |
| c: float = None, cx: float = None, cy: float = None, | |
| w: float = 1., h: float = 1., | |
| dtype: torch.dtype = torch.float32, | |
| device: torch.device = torch.device('cpu'), | |
| ): | |
| """ | |
| return: (3, 2) | |
| """ | |
| fx = fy = f | |
| if c is not None: | |
| assert cx is None and cy is None, "c and cx/cy cannot be used together" | |
| cx = cy = c | |
| else: | |
| assert cx is not None and cy is not None, "cx/cy must be provided when c is not provided" | |
| fx, fy, cx, cy, w, h = fx/w, fy/h, cx/w, cy/h, 1., 1. | |
| intrinsics = torch.tensor([ | |
| [fx, fy], | |
| [cx, cy], | |
| [w, h], | |
| ], dtype=dtype, device=device) | |
| return intrinsics | |