Spaces:

Stable-X
/

ReconViaGen

Running on Zero

App Files Files Community

Stable-X commited on Nov 3

Commit

d0afedf

verified ·

1 Parent(s): 0886e8b

Delete mast3r

Browse files

Files changed (24) hide show

mast3r/__init__.py +0 -2
mast3r/catmlp_dpt_head.py +0 -123
mast3r/cloud_opt/__init__.py +0 -2
mast3r/cloud_opt/sparse_ga.py +0 -1001
mast3r/cloud_opt/triangulation.py +0 -80
mast3r/cloud_opt/tsdf_optimizer.py +0 -273
mast3r/cloud_opt/utils/__init__.py +0 -2
mast3r/cloud_opt/utils/losses.py +0 -32
mast3r/cloud_opt/utils/schedules.py +0 -17
mast3r/colmap/__init__.py +0 -2
mast3r/colmap/database.py +0 -383
mast3r/datasets/__init__.py +0 -62
mast3r/datasets/base/__init__.py +0 -2
mast3r/datasets/base/mast3r_base_stereo_view_dataset.py +0 -355
mast3r/datasets/utils/__init__.py +0 -2
mast3r/datasets/utils/cropping.py +0 -219
mast3r/fast_nn.py +0 -221
mast3r/losses.py +0 -514
mast3r/model.py +0 -68
mast3r/utils/__init__.py +0 -2
mast3r/utils/coarse_to_fine.py +0 -214
mast3r/utils/collate.py +0 -62
mast3r/utils/misc.py +0 -17
mast3r/utils/path_to_dust3r.py +0 -19

mast3r/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # Copyright (C) 2024-present Naver Corporation. All rights reserved.
2	- # Licensed under CC BY-NC-SA 4.0 (non-commercial use only).

mast3r/catmlp_dpt_head.py DELETED Viewed

@@ -1,123 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# MASt3R heads
-# --------------------------------------------------------
-import torch
-import torch.nn.functional as F
-import mast3r.utils.path_to_dust3r  # noqa
-from dust3r.heads.postprocess import reg_dense_depth, reg_dense_conf  # noqa
-from dust3r.heads.dpt_head import PixelwiseTaskWithDPT  # noqa
-import dust3r.utils.path_to_croco  # noqa
-from models.blocks import Mlp  # noqa
-def reg_desc(desc, mode):
-    if 'norm' in mode:
-        desc = desc / desc.norm(dim=-1, keepdim=True)
-    else:
-        raise ValueError(f"Unknown desc mode {mode}")
-    return desc
-def postprocess(out, depth_mode, conf_mode, desc_dim=None, desc_mode='norm', two_confs=False, desc_conf_mode=None):
-    if desc_conf_mode is None:
-        desc_conf_mode = conf_mode
-    fmap = out.permute(0, 2, 3, 1)  # B,H,W,D
-    res = dict(pts3d=reg_dense_depth(fmap[..., 0:3], mode=depth_mode))
-    if conf_mode is not None:
-        res['conf'] = reg_dense_conf(fmap[..., 3], mode=conf_mode)
-    if desc_dim is not None:
-        start = 3 + int(conf_mode is not None)
-        res['desc'] = reg_desc(fmap[..., start:start + desc_dim], mode=desc_mode)
-        if two_confs:
-            res['desc_conf'] = reg_dense_conf(fmap[..., start + desc_dim], mode=desc_conf_mode)
-        else:
-            res['desc_conf'] = res['conf'].clone()
-    return res
-class Cat_MLP_LocalFeatures_DPT_Pts3d(PixelwiseTaskWithDPT):
-    """ Mixture between MLP and DPT head that outputs 3d points and local features (with MLP).
-    The input for both heads is a concatenation of Encoder and Decoder outputs
-    """
-    def __init__(self, net, has_conf=False, local_feat_dim=16, hidden_dim_factor=4., hooks_idx=None, dim_tokens=None,
-                 num_channels=1, postprocess=None, feature_dim=256, last_dim=32, depth_mode=None, conf_mode=None, head_type="regression", **kwargs):
-        super().__init__(num_channels=num_channels, feature_dim=feature_dim, last_dim=last_dim, hooks_idx=hooks_idx,
-                         dim_tokens=dim_tokens, depth_mode=depth_mode, postprocess=postprocess, conf_mode=conf_mode, head_type=head_type)
-        self.local_feat_dim = local_feat_dim
-        patch_size = net.patch_embed.patch_size
-        if isinstance(patch_size, tuple):
-            assert len(patch_size) == 2 and isinstance(patch_size[0], int) and isinstance(
-                patch_size[1], int), "What is your patchsize format? Expected a single int or a tuple of two ints."
-            assert patch_size[0] == patch_size[1], "Error, non square patches not managed"
-            patch_size = patch_size[0]
-        self.patch_size = patch_size
-        self.desc_mode = net.desc_mode
-        self.has_conf = has_conf
-        self.two_confs = net.two_confs  # independent confs for 3D regr and descs
-        self.desc_conf_mode = net.desc_conf_mode
-        idim = net.enc_embed_dim + net.dec_embed_dim
-        self.head_local_features = Mlp(in_features=idim,
-                                       hidden_features=int(hidden_dim_factor * idim),
-                                       out_features=(self.local_feat_dim + self.two_confs) * self.patch_size**2)
-    def forward(self, decout, img_shape):
-        # pass through the heads
-        pts3d = self.dpt(decout, image_size=(img_shape[0], img_shape[1]))
-        # recover encoder and decoder outputs
-        enc_output, dec_output = decout[0], decout[-1]
-        cat_output = torch.cat([enc_output, dec_output], dim=-1)  # concatenate
-        H, W = img_shape
-        B, S, D = cat_output.shape
-        # extract local_features
-        local_features = self.head_local_features(cat_output)  # B,S,D
-        local_features = local_features.transpose(-1, -2).view(B, -1, H // self.patch_size, W // self.patch_size)
-        local_features = F.pixel_shuffle(local_features, self.patch_size)  # B,d,H,W
-        # post process 3D pts, descriptors and confidences
-        out = torch.cat([pts3d, local_features], dim=1)
-        if self.postprocess:
-            out = self.postprocess(out,
-                                   depth_mode=self.depth_mode,
-                                   conf_mode=self.conf_mode,
-                                   desc_dim=self.local_feat_dim,
-                                   desc_mode=self.desc_mode,
-                                   two_confs=self.two_confs,
-                                   desc_conf_mode=self.desc_conf_mode)
-        return out
-def mast3r_head_factory(head_type, output_mode, net, has_conf=False):
-    """" build a prediction head for the decoder
-    """
-    if head_type == 'catmlp+dpt' and output_mode.startswith('pts3d+desc'):
-        local_feat_dim = int(output_mode[10:])
-        assert net.dec_depth > 9
-        l2 = net.dec_depth
-        feature_dim = 256
-        last_dim = feature_dim // 2
-        out_nchan = 3
-        ed = net.enc_embed_dim
-        dd = net.dec_embed_dim
-        return Cat_MLP_LocalFeatures_DPT_Pts3d(net, local_feat_dim=local_feat_dim, has_conf=has_conf,
-                                               num_channels=out_nchan + has_conf,
-                                               feature_dim=feature_dim,
-                                               last_dim=last_dim,
-                                               hooks_idx=[0, l2 * 2 // 4, l2 * 3 // 4, l2],
-                                               dim_tokens=[ed, dd, dd, dd],
-                                               postprocess=postprocess,
-                                               depth_mode=net.depth_mode,
-                                               conf_mode=net.conf_mode,
-                                               head_type='regression')
-    else:
-        raise NotImplementedError(
-            f"unexpected {head_type=} and {output_mode=}")

mast3r/cloud_opt/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # Copyright (C) 2024-present Naver Corporation. All rights reserved.
2	- # Licensed under CC BY-NC-SA 4.0 (non-commercial use only).

mast3r/cloud_opt/sparse_ga.py DELETED Viewed

@@ -1,1001 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# MASt3R Sparse Global Alignement
-# --------------------------------------------------------
-from tqdm import tqdm
-import roma
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-import os
-from collections import namedtuple
-from functools import lru_cache
-from scipy import sparse as sp
-from mast3r.utils.misc import mkdir_for, hash_md5
-from mast3r.cloud_opt.utils.losses import gamma_loss
-from mast3r.cloud_opt.utils.schedules import linear_schedule, cosine_schedule
-from mast3r.fast_nn import fast_reciprocal_NNs, merge_corres
-import mast3r.utils.path_to_dust3r  # noqa
-from dust3r.utils.geometry import inv, geotrf  # noqa
-from dust3r.utils.device import to_cpu, to_numpy, todevice  # noqa
-from dust3r.post_process import estimate_focal_knowing_depth  # noqa
-from dust3r.optim_factory import adjust_learning_rate_by_lr  # noqa
-from dust3r.cloud_opt.base_opt import clean_pointcloud
-from dust3r.viz import SceneViz
-class SparseGA():
-    def __init__(self, img_paths, pairs_in, res_fine, anchors, canonical_paths=None):
-        def fetch_img(im):
-            def torgb(x): return (x[0].permute(1, 2, 0).numpy() * .5 + .5).clip(min=0., max=1.)
-            for im1, im2 in pairs_in:
-                if im1['instance'] == im:
-                    return torgb(im1['img'])
-                if im2['instance'] == im:
-                    return torgb(im2['img'])
-        self.canonical_paths = canonical_paths
-        self.img_paths = img_paths
-        self.imgs = [fetch_img(img) for img in img_paths]
-        self.intrinsics = res_fine['intrinsics']
-        self.cam2w = res_fine['cam2w']
-        self.depthmaps = res_fine['depthmaps']
-        self.pts3d = res_fine['pts3d']
-        self.pts3d_colors = []
-        self.working_device = self.cam2w.device
-        for i in range(len(self.imgs)):
-            im = self.imgs[i]
-            x, y = anchors[i][0][..., :2].detach().cpu().numpy().T
-            self.pts3d_colors.append(im[y, x])
-            assert self.pts3d_colors[-1].shape == self.pts3d[i].shape
-        self.n_imgs = len(self.imgs)
-    def get_focals(self):
-        return torch.tensor([ff[0, 0] for ff in self.intrinsics]).to(self.working_device)
-    def get_principal_points(self):
-        return torch.stack([ff[:2, -1] for ff in self.intrinsics]).to(self.working_device)
-    def get_im_poses(self):
-        return self.cam2w
-    def get_sparse_pts3d(self):
-        return self.pts3d
-    def get_dense_pts3d(self, clean_depth=True, subsample=8):
-        assert self.canonical_paths, 'cache_path is required for dense 3d points'
-        device = self.cam2w.device
-        confs = []
-        base_focals = []
-        anchors = {}
-        for i, canon_path in enumerate(self.canonical_paths):
-            (canon, canon2, conf), focal = torch.load(canon_path, map_location=device)
-            confs.append(conf)
-            base_focals.append(focal)
-            H, W = conf.shape
-            pixels = torch.from_numpy(np.mgrid[:W, :H].T.reshape(-1, 2)).float().to(device)
-            idxs, offsets = anchor_depth_offsets(canon2, {i: (pixels, None)}, subsample=subsample)
-            anchors[i] = (pixels, idxs[i], offsets[i])
-        # densify sparse depthmaps
-        pts3d, depthmaps = make_pts3d(anchors, self.intrinsics, self.cam2w, [
-                                      d.ravel() for d in self.depthmaps], base_focals=base_focals, ret_depth=True)
-        if clean_depth:
-            confs = clean_pointcloud(confs, self.intrinsics, inv(self.cam2w), depthmaps, pts3d)
-        return pts3d, depthmaps, confs
-    def get_pts3d_colors(self):
-        return self.pts3d_colors
-    def get_depthmaps(self):
-        return self.depthmaps
-    def get_masks(self):
-        return [slice(None, None) for _ in range(len(self.imgs))]
-    def show(self, show_cams=True):
-        pts3d, _, confs = self.get_dense_pts3d()
-        show_reconstruction(self.imgs, self.intrinsics if show_cams else None, self.cam2w,
-                            [p.clip(min=-50, max=50) for p in pts3d],
-                            masks=[c > 1 for c in confs])
-def convert_dust3r_pairs_naming(imgs, pairs_in):
-    for pair_id in range(len(pairs_in)):
-        for i in range(2):
-            pairs_in[pair_id][i]['instance'] = imgs[pairs_in[pair_id][i]['idx']]
-    return pairs_in
-def sparse_global_alignment(imgs, pairs_in, cache_path, model, subsample=8, desc_conf='desc_conf',
-                            device='cuda', dtype=torch.float32, **kw):
-    """ Sparse alignment with MASt3R
-        imgs: list of image paths
-        cache_path: path where to dump temporary files (str)
-        lr1, niter1: learning rate and #iterations for coarse global alignment (3D matching)
-        lr2, niter2: learning rate and #iterations for refinement (2D reproj error)
-        lora_depth: smart dimensionality reduction with depthmaps
-    """
-    # Convert pair naming convention from dust3r to mast3r
-    pairs_in = convert_dust3r_pairs_naming(imgs, pairs_in)
-    # forward pass
-    pairs, cache_path = forward_mast3r(pairs_in, model,
-                                       cache_path=cache_path, subsample=subsample,
-                                       desc_conf=desc_conf, device=device)
-    # extract canonical pointmaps
-    tmp_pairs, pairwise_scores, canonical_views, canonical_paths, preds_21 = \
-        prepare_canonical_data(imgs, pairs, subsample, cache_path=cache_path, mode='avg-angle', device=device)
-    # compute minimal spanning tree
-    mst = compute_min_spanning_tree(pairwise_scores)
-    # remove all edges not in the spanning tree?
-    # min_spanning_tree = {(imgs[i],imgs[j]) for i,j in mst[1]}
-    # tmp_pairs = {(a,b):v for (a,b),v in tmp_pairs.items() if {(a,b),(b,a)} & min_spanning_tree}
-    # smartly combine all usefull data
-    imsizes, pps, base_focals, core_depth, anchors, corres, corres2d = \
-        condense_data(imgs, tmp_pairs, canonical_views, dtype)
-    imgs, res_coarse, res_fine = sparse_scene_optimizer(
-        imgs, subsample, imsizes, pps, base_focals, core_depth, anchors, corres, corres2d, preds_21, canonical_paths,
-        mst, cache_path=cache_path, device=device, dtype=dtype, **kw)
-    return SparseGA(imgs, pairs_in, res_fine or res_coarse, anchors, canonical_paths)
-def sparse_scene_optimizer(imgs, subsample, imsizes, pps, base_focals, core_depth, anchors, corres, corres2d,
-                           preds_21, canonical_paths, mst, cache_path,
-                           lr1=0.2, niter1=500, loss1=gamma_loss(1.1),
-                           lr2=0.02, niter2=500, loss2=gamma_loss(0.4),
-                           lossd=gamma_loss(1.1),
-                           opt_pp=True, opt_depth=True,
-                           schedule=cosine_schedule, depth_mode='add', exp_depth=False,
-                           lora_depth=False,  # dict(k=96, gamma=15, min_norm=.5),
-                           init={}, device='cuda', dtype=torch.float32,
-                           matching_conf_thr=4., loss_dust3r_w=0.01,
-                           verbose=True, dbg=()):
-    # extrinsic parameters
-    vec0001 = torch.tensor((0, 0, 0, 1), dtype=dtype, device=device)
-    quats = [nn.Parameter(vec0001.clone()) for _ in range(len(imgs))]
-    trans = [nn.Parameter(torch.zeros(3, device=device, dtype=dtype)) for _ in range(len(imgs))]
-    # intialize
-    ones = torch.ones((len(imgs), 1), device=device, dtype=dtype)
-    median_depths = torch.ones(len(imgs), device=device, dtype=dtype)
-    for img in imgs:
-        idx = imgs.index(img)
-        init_values = init.setdefault(img, {})
-        if verbose and init_values:
-            print(f' >> initializing img=...{img[-25:]} [{idx}] for {set(init_values)}')
-        K = init_values.get('intrinsics')
-        if K is not None:
-            K = K.detach()
-            focal = K[:2, :2].diag().mean()
-            pp = K[:2, 2]
-            base_focals[idx] = focal
-            pps[idx] = pp
-        pps[idx] /= imsizes[idx]  # default principal_point would be (0.5, 0.5)
-        depth = init_values.get('depthmap')
-        if depth is not None:
-            core_depth[idx] = depth.detach()
-        median_depths[idx] = med_depth = core_depth[idx].median()
-        core_depth[idx] /= med_depth
-        cam2w = init_values.get('cam2w')
-        if cam2w is not None:
-            rot = cam2w[:3, :3].detach()
-            cam_center = cam2w[:3, 3].detach()
-            quats[idx].data[:] = roma.rotmat_to_unitquat(rot)
-            trans_offset = med_depth * torch.cat((imsizes[idx] / base_focals[idx] * (0.5 - pps[idx]), ones[:1, 0]))
-            trans[idx].data[:] = cam_center + rot @ trans_offset
-            del rot
-            assert False, 'inverse kinematic chain not yet implemented'
-    # intrinsics parameters
-    pps = [nn.Parameter(pp.to(dtype)) for pp in pps]
-    diags = imsizes.float().norm(dim=1)
-    min_focals = 0.25 * diags  # diag = 1.2~1.4*max(W,H) => beta >= 1/(2*1.2*tan(fov/2)) ~= 0.26
-    max_focals = 10 * diags
-    log_focals = [nn.Parameter(f.view(1).log().to(dtype)) for f in base_focals]
-    assert len(mst[1]) == len(pps) - 1
-    def make_K_cam_depth(log_focals, pps, trans, quats, log_sizes, core_depth):
-        # make intrinsics
-        focals = torch.cat(log_focals).exp().clip(min=min_focals, max=max_focals)
-        pps = torch.stack(pps)
-        K = torch.eye(3, dtype=dtype, device=device)[None].expand(len(imgs), 3, 3).clone()
-        K[:, 0, 0] = K[:, 1, 1] = focals
-        K[:, 0:2, 2] = pps * imsizes
-        if trans is None:
-            return K
-        # security! optimization is always trying to crush the scale down
-        sizes = torch.cat(log_sizes).exp()
-        global_scaling = 1 / sizes.min()
-        # compute distance of camera to focal plane
-        # tan(fov) = W/2 / focal
-        z_cameras = sizes * median_depths * focals / base_focals
-        # make extrinsic
-        rel_cam2cam = torch.eye(4, dtype=dtype, device=device)[None].expand(len(imgs), 4, 4).clone()
-        rel_cam2cam[:, :3, :3] = roma.unitquat_to_rotmat(F.normalize(torch.stack(quats), dim=1))
-        rel_cam2cam[:, :3, 3] = torch.stack(trans)
-        # camera are defined as a kinematic chain
-        tmp_cam2w = [None] * len(K)
-        tmp_cam2w[mst[0]] = rel_cam2cam[mst[0]]
-        for i, j in mst[1]:
-            # i is the cam_i_to_world reference, j is the relative pose = cam_j_to_cam_i
-            tmp_cam2w[j] = tmp_cam2w[i] @ rel_cam2cam[j]
-        tmp_cam2w = torch.stack(tmp_cam2w)
-        # smart reparameterizaton of cameras
-        trans_offset = z_cameras.unsqueeze(1) * torch.cat((imsizes / focals.unsqueeze(1) * (0.5 - pps), ones), dim=-1)
-        new_trans = global_scaling * (tmp_cam2w[:, :3, 3:4] - tmp_cam2w[:, :3, :3] @ trans_offset.unsqueeze(-1))
-        cam2w = torch.cat((torch.cat((tmp_cam2w[:, :3, :3], new_trans), dim=2),
-                          vec0001.view(1, 1, 4).expand(len(K), 1, 4)), dim=1)
-        depthmaps = []
-        for i in range(len(imgs)):
-            core_depth_img = core_depth[i]
-            if exp_depth:
-                core_depth_img = core_depth_img.exp()
-            if lora_depth:  # compute core_depth as a low-rank decomposition of 3d points
-                core_depth_img = lora_depth_proj[i] @ core_depth_img
-            if depth_mode == 'add':
-                core_depth_img = z_cameras[i] + (core_depth_img - 1) * (median_depths[i] * sizes[i])
-            elif depth_mode == 'mul':
-                core_depth_img = z_cameras[i] * core_depth_img
-            else:
-                raise ValueError(f'Bad {depth_mode=}')
-            depthmaps.append(global_scaling * core_depth_img)
-        return K, (inv(cam2w), cam2w), depthmaps
-    K = make_K_cam_depth(log_focals, pps, None, None, None, None)
-    print('init focals =', to_numpy(K[:, 0, 0]))
-    # spectral low-rank projection of depthmaps
-    if lora_depth:
-        core_depth, lora_depth_proj = spectral_projection_of_depthmaps(
-            imgs, K, core_depth, subsample, cache_path=cache_path, **lora_depth)
-    if exp_depth:
-        core_depth = [d.clip(min=1e-4).log() for d in core_depth]
-    core_depth = [nn.Parameter(d.ravel().to(dtype)) for d in core_depth]
-    log_sizes = [nn.Parameter(torch.zeros(1, dtype=dtype, device=device)) for _ in range(len(imgs))]
-    # Fetch img slices
-    _, confs_sum, imgs_slices = corres
-    # Define which pairs are fine to use with matching
-    def matching_check(x): return x.max() > matching_conf_thr
-    is_matching_ok = {}
-    for s in imgs_slices:
-        is_matching_ok[s.img1, s.img2] = matching_check(s.confs)
-    # Subsample preds_21
-    subsamp_preds_21 = {}
-    for imk, imv in preds_21.items():
-        subsamp_preds_21[imk] = {}
-        for im2k, (pred, conf) in preds_21[imk].items():
-            subpred = pred[::subsample, ::subsample].reshape(-1, 3)  # original subsample
-            subconf = conf[::subsample, ::subsample].ravel()       # for both ptmaps and confs
-            idxs = anchors[imgs.index(im2k)][1]
-            subsamp_preds_21[imk][im2k] = (subpred[idxs], subconf[idxs])  # anchors subsample
-    def loss_dust3r(cam2w, pts3d, pix_loss):
-        # In the case no correspondence could be established, fallback to DUSt3R GA regression loss formulation (sparsified)
-        loss = 0.
-        cf_sum = 0.
-        for s in imgs_slices:
-            if not is_matching_ok[s.img1, s.img2]:
-                # fallback to dust3r regression
-                tgt_pts, tgt_confs = subsamp_preds_21[imgs[s.img2]][imgs[s.img1]]
-                tgt_pts = geotrf(cam2w[s.img2], tgt_pts)
-                cf_sum += tgt_confs.sum()
-                loss += tgt_confs @ pix_loss(pts3d[s.img1], tgt_pts)
-        return loss / cf_sum if cf_sum != 0. else 0.
-    def loss_3d(K, w2cam, pts3d, pix_loss):
-        # For each correspondence, we have two 3D points (one for each image of the pair).
-        # For each 3D point, we have 2 reproj errors
-        if any(v.get('freeze') for v in init.values()):
-            pts3d_1 = []
-            pts3d_2 = []
-            confs = []
-            for s in imgs_slices:
-                if init[imgs[s.img1]].get('freeze') and init[imgs[s.img2]].get('freeze'):
-                    continue
-                if is_matching_ok[s.img1, s.img2]:
-                    pts3d_1.append(pts3d[s.img1][s.slice1])
-                    pts3d_2.append(pts3d[s.img2][s.slice2])
-                    confs.append(s.confs)
-        else:
-            pts3d_1 = [pts3d[s.img1][s.slice1] for s in imgs_slices if is_matching_ok[s.img1, s.img2]]
-            pts3d_2 = [pts3d[s.img2][s.slice2] for s in imgs_slices if is_matching_ok[s.img1, s.img2]]
-            confs = [s.confs for s in imgs_slices if is_matching_ok[s.img1, s.img2]]
-        if pts3d_1 != []:
-            confs = torch.cat(confs)
-            pts3d_1 = torch.cat(pts3d_1)
-            pts3d_2 = torch.cat(pts3d_2)
-            loss = confs @ pix_loss(pts3d_1, pts3d_2)
-            cf_sum = confs.sum()
-        else:
-            loss = 0.
-            cf_sum = 1.
-        return loss / cf_sum
-    def loss_2d(K, w2cam, pts3d, pix_loss):
-        # For each correspondence, we have two 3D points (one for each image of the pair).
-        # For each 3D point, we have 2 reproj errors
-        proj_matrix = K @ w2cam[:, :3]
-        loss = npix = 0
-        for img1, pix1, confs, cf_sum, imgs_slices in corres2d:
-            if init[imgs[img1]].get('freeze', 0) >= 1:
-                continue  # no need
-            pts3d_in_img1 = [pts3d[img2][slice2] for img2, slice2 in imgs_slices if is_matching_ok[img1, img2]]
-            pix1_filtered = []
-            confs_filtered = []
-            curstep = 0
-            for img2, slice2 in imgs_slices:
-                if is_matching_ok[img1, img2]:
-                    tslice = slice(curstep, curstep + slice2.stop - slice2.start, slice2.step)
-                    pix1_filtered.append(pix1[tslice])
-                    confs_filtered.append(confs[tslice])
-                curstep += slice2.stop - slice2.start
-            if pts3d_in_img1 != []:
-                pts3d_in_img1 = torch.cat(pts3d_in_img1)
-                pix1_filtered = torch.cat(pix1_filtered)
-                confs_filtered = torch.cat(confs_filtered)
-                loss += confs_filtered @ pix_loss(pix1_filtered, reproj2d(proj_matrix[img1], pts3d_in_img1))
-                npix += confs_filtered.sum()
-        return loss / npix if npix != 0 else 0.
-    def optimize_loop(loss_func, lr_base, niter, pix_loss, lr_end=0):
-        # create optimizer
-        params = pps + log_focals + quats + trans + log_sizes + core_depth
-        optimizer = torch.optim.Adam(params, lr=1, weight_decay=0, betas=(0.9, 0.9))
-        ploss = pix_loss if 'meta' in repr(pix_loss) else (lambda a: pix_loss)
-        with tqdm(total=niter) as bar:
-            for iter in range(niter or 1):
-                K, (w2cam, cam2w), depthmaps = make_K_cam_depth(log_focals, pps, trans, quats, log_sizes, core_depth)
-                pts3d = make_pts3d(anchors, K, cam2w, depthmaps, base_focals=base_focals)
-                if niter == 0:
-                    break
-                alpha = (iter / niter)
-                lr = schedule(alpha, lr_base, lr_end)
-                adjust_learning_rate_by_lr(optimizer, lr)
-                pix_loss = ploss(1 - alpha)
-                optimizer.zero_grad()
-                loss = loss_func(K, w2cam, pts3d, pix_loss) + loss_dust3r_w * loss_dust3r(cam2w, pts3d, lossd)
-                loss.backward()
-                optimizer.step()
-                # make sure the pose remains well optimizable
-                for i in range(len(imgs)):
-                    quats[i].data[:] /= quats[i].data.norm()
-                loss = float(loss)
-                if loss != loss:
-                    break  # NaN loss
-                bar.set_postfix_str(f'{lr=:.4f}, {loss=:.3f}')
-                bar.update(1)
-        if niter:
-            print(f'>> final loss = {loss}')
-        return dict(intrinsics=K.detach(), cam2w=cam2w.detach(),
-                    depthmaps=[d.detach() for d in depthmaps], pts3d=[p.detach() for p in pts3d])
-    # at start, don't optimize 3d points
-    for i, img in enumerate(imgs):
-        trainable = not (init[img].get('freeze'))
-        pps[i].requires_grad_(False)
-        log_focals[i].requires_grad_(False)
-        quats[i].requires_grad_(trainable)
-        trans[i].requires_grad_(trainable)
-        log_sizes[i].requires_grad_(trainable)
-        core_depth[i].requires_grad_(False)
-    res_coarse = optimize_loop(loss_3d, lr_base=lr1, niter=niter1, pix_loss=loss1)
-    res_fine = None
-    if niter2:
-        # now we can optimize 3d points
-        for i, img in enumerate(imgs):
-            if init[img].get('freeze', 0) >= 1:
-                continue
-            pps[i].requires_grad_(bool(opt_pp))
-            log_focals[i].requires_grad_(True)
-            core_depth[i].requires_grad_(opt_depth)
-        # refinement with 2d reproj
-        res_fine = optimize_loop(loss_2d, lr_base=lr2, niter=niter2, pix_loss=loss2)
-    return imgs, res_coarse, res_fine
-@lru_cache
-def mask110(device, dtype):
-    return torch.tensor((1, 1, 0), device=device, dtype=dtype)
-def proj3d(inv_K, pixels, z):
-    if pixels.shape[-1] == 2:
-        pixels = torch.cat((pixels, torch.ones_like(pixels[..., :1])), dim=-1)
-    return z.unsqueeze(-1) * (pixels * inv_K.diag() + inv_K[:, 2] * mask110(z.device, z.dtype))
-def make_pts3d(anchors, K, cam2w, depthmaps, base_focals=None, ret_depth=False):
-    focals = K[:, 0, 0]
-    invK = inv(K)
-    all_pts3d = []
-    depth_out = []
-    for img, (pixels, idxs, offsets) in anchors.items():
-        # from depthmaps to 3d points
-        if base_focals is None:
-            pass
-        else:
-            # compensate for focal
-            # depth + depth * (offset - 1) * base_focal / focal
-            # = depth * (1 + (offset - 1) * (base_focal / focal))
-            offsets = 1 + (offsets - 1) * (base_focals[img] / focals[img])
-        pts3d = proj3d(invK[img], pixels, depthmaps[img][idxs] * offsets)
-        if ret_depth:
-            depth_out.append(pts3d[..., 2])  # before camera rotation
-        # rotate to world coordinate
-        pts3d = geotrf(cam2w[img], pts3d)
-        all_pts3d.append(pts3d)
-    if ret_depth:
-        return all_pts3d, depth_out
-    return all_pts3d
-def make_dense_pts3d(intrinsics, cam2w, depthmaps, canonical_paths, subsample, device='cuda'):
-    base_focals = []
-    anchors = {}
-    confs = []
-    for i, canon_path in enumerate(canonical_paths):
-        (canon, canon2, conf), focal = torch.load(canon_path, map_location=device)
-        confs.append(conf)
-        base_focals.append(focal)
-        H, W = conf.shape
-        pixels = torch.from_numpy(np.mgrid[:W, :H].T.reshape(-1, 2)).float().to(device)
-        idxs, offsets = anchor_depth_offsets(canon2, {i: (pixels, None)}, subsample=subsample)
-        anchors[i] = (pixels, idxs[i], offsets[i])
-    # densify sparse depthmaps
-    pts3d, depthmaps_out = make_pts3d(anchors, intrinsics, cam2w, [
-                                      d.ravel() for d in depthmaps], base_focals=base_focals, ret_depth=True)
-    return pts3d, depthmaps_out, confs
-@torch.no_grad()
-def forward_mast3r(pairs, model, cache_path, desc_conf='desc_conf',
-                   device='cuda', subsample=8, **matching_kw):
-    res_paths = {}
-    for img1, img2 in tqdm(pairs):
-        idx1 = hash_md5(img1['instance'])
-        idx2 = hash_md5(img2['instance'])
-        path1 = cache_path + f'/forward/{idx1}/{idx2}.pth'
-        path2 = cache_path + f'/forward/{idx2}/{idx1}.pth'
-        path_corres = cache_path + f'/corres_conf={desc_conf}_{subsample=}/{idx1}-{idx2}.pth'
-        path_corres2 = cache_path + f'/corres_conf={desc_conf}_{subsample=}/{idx2}-{idx1}.pth'
-        if os.path.isfile(path_corres2) and not os.path.isfile(path_corres):
-            score, (xy1, xy2, confs) = torch.load(path_corres2)
-            torch.save((score, (xy2, xy1, confs)), path_corres)
-        if not all(os.path.isfile(p) for p in (path1, path2, path_corres)):
-            if model is None:
-                continue
-            res = symmetric_inference(model, img1, img2, device=device)
-            X11, X21, X22, X12 = [r['pts3d'][0] for r in res]
-            C11, C21, C22, C12 = [r['conf'][0] for r in res]
-            descs = [r['desc'][0] for r in res]
-            qonfs = [r[desc_conf][0] for r in res]
-            # save
-            torch.save(to_cpu((X11, C11, X21, C21)), mkdir_for(path1))
-            torch.save(to_cpu((X22, C22, X12, C12)), mkdir_for(path2))
-            # perform reciprocal matching
-            corres = extract_correspondences(descs, qonfs, device=device, subsample=subsample)
-            conf_score = (C11.mean() * C12.mean() * C21.mean() * C22.mean()).sqrt().sqrt()
-            matching_score = (float(conf_score), float(corres[2].sum()), len(corres[2]))
-            if cache_path is not None:
-                torch.save((matching_score, corres), mkdir_for(path_corres))
-        res_paths[img1['instance'], img2['instance']] = (path1, path2), path_corres
-    del model
-    torch.cuda.empty_cache()
-    return res_paths, cache_path
-def symmetric_inference(model, img1, img2, device):
-    shape1 = torch.from_numpy(img1['true_shape']).to(device, non_blocking=True)
-    shape2 = torch.from_numpy(img2['true_shape']).to(device, non_blocking=True)
-    img1 = img1['img'].to(device, non_blocking=True)
-    img2 = img2['img'].to(device, non_blocking=True)
-    # compute encoder only once
-    feat1, feat2, pos1, pos2 = model._encode_image_pairs(img1, img2, shape1, shape2)
-    def decoder(feat1, feat2, pos1, pos2, shape1, shape2):
-        dec1, dec2 = model._decoder(feat1, pos1, feat2, pos2)
-        with torch.cuda.amp.autocast(enabled=False):
-            res1 = model._downstream_head(1, [tok.float() for tok in dec1], shape1)
-            res2 = model._downstream_head(2, [tok.float() for tok in dec2], shape2)
-        return res1, res2
-    # decoder 1-2
-    res11, res21 = decoder(feat1, feat2, pos1, pos2, shape1, shape2)
-    # decoder 2-1
-    res22, res12 = decoder(feat2, feat1, pos2, pos1, shape2, shape1)
-    return (res11, res21, res22, res12)
-def extract_correspondences(feats, qonfs, subsample=8, device=None, ptmap_key='pred_desc'):
-    feat11, feat21, feat22, feat12 = feats
-    qonf11, qonf21, qonf22, qonf12 = qonfs
-    assert feat11.shape[:2] == feat12.shape[:2] == qonf11.shape == qonf12.shape
-    assert feat21.shape[:2] == feat22.shape[:2] == qonf21.shape == qonf22.shape
-    if '3d' in ptmap_key:
-        opt = dict(device='cpu', workers=32)
-    else:
-        opt = dict(device=device, dist='dot', block_size=2**13)
-    # matching the two pairs
-    idx1 = []
-    idx2 = []
-    qonf1 = []
-    qonf2 = []
-    # TODO add non symmetric / pixel_tol options
-    for A, B, QA, QB in [(feat11, feat21, qonf11.cpu(), qonf21.cpu()),
-                         (feat12, feat22, qonf12.cpu(), qonf22.cpu())]:
-        nn1to2 = fast_reciprocal_NNs(A, B, subsample_or_initxy1=subsample, ret_xy=False, **opt)
-        nn2to1 = fast_reciprocal_NNs(B, A, subsample_or_initxy1=subsample, ret_xy=False, **opt)
-        idx1.append(np.r_[nn1to2[0], nn2to1[1]])
-        idx2.append(np.r_[nn1to2[1], nn2to1[0]])
-        qonf1.append(QA.ravel()[idx1[-1]])
-        qonf2.append(QB.ravel()[idx2[-1]])
-    # merge corres from opposite pairs
-    H1, W1 = feat11.shape[:2]
-    H2, W2 = feat22.shape[:2]
-    cat = np.concatenate
-    xy1, xy2, idx = merge_corres(cat(idx1), cat(idx2), (H1, W1), (H2, W2), ret_xy=True, ret_index=True)
-    corres = (xy1.copy(), xy2.copy(), np.sqrt(cat(qonf1)[idx] * cat(qonf2)[idx]))
-    return todevice(corres, device)
-@torch.no_grad()
-def prepare_canonical_data(imgs, tmp_pairs, subsample, order_imgs=False, min_conf_thr=0,
-                           cache_path=None, device='cuda', **kw):
-    canonical_views = {}
-    pairwise_scores = torch.zeros((len(imgs), len(imgs)), device=device)
-    canonical_paths = []
-    preds_21 = {}
-    for img in tqdm(imgs):
-        if cache_path:
-            cache = os.path.join(cache_path, 'canon_views', hash_md5(img) + f'_{subsample=}_{kw=}.pth')
-            canonical_paths.append(cache)
-        try:
-            (canon, canon2, cconf), focal = torch.load(cache, map_location=device)
-        except IOError:
-            # cache does not exist yet, we create it!
-            canon = focal = None
-        # collect all pred1
-        n_pairs = sum((img in pair) for pair in tmp_pairs)
-        ptmaps11 = None
-        pixels = {}
-        n = 0
-        for (img1, img2), ((path1, path2), path_corres) in tmp_pairs.items():
-            score = None
-            if img == img1:
-                X, C, X2, C2 = torch.load(path1, map_location=device)
-                score, (xy1, xy2, confs) = load_corres(path_corres, device, min_conf_thr)
-                pixels[img2] = xy1, confs
-                if img not in preds_21:
-                    preds_21[img] = {}
-                preds_21[img][img2] = X2, C2
-            if img == img2:
-                X, C, X2, C2 = torch.load(path2, map_location=device)
-                score, (xy1, xy2, confs) = load_corres(path_corres, device, min_conf_thr)
-                pixels[img1] = xy2, confs
-                if img not in preds_21:
-                    preds_21[img] = {}
-                preds_21[img][img1] = X2, C2
-            if score is not None:
-                i, j = imgs.index(img1), imgs.index(img2)
-                # score = score[0]
-                # score = np.log1p(score[2])
-                score = score[2]
-                pairwise_scores[i, j] = score
-                pairwise_scores[j, i] = score
-                if canon is not None:
-                    continue
-                if ptmaps11 is None:
-                    H, W = C.shape
-                    ptmaps11 = torch.empty((n_pairs, H, W, 3), device=device)
-                    confs11 = torch.empty((n_pairs, H, W), device=device)
-                ptmaps11[n] = X
-                confs11[n] = C
-                n += 1
-        if canon is None:
-            canon, canon2, cconf = canonical_view(ptmaps11, confs11, subsample, **kw)
-            del ptmaps11
-            del confs11
-        # compute focals
-        H, W = canon.shape[:2]
-        pp = torch.tensor([W / 2, H / 2], device=device)
-        if focal is None:
-            focal = estimate_focal_knowing_depth(canon[None], pp, focal_mode='weiszfeld', min_focal=0.5, max_focal=3.5)
-            if cache:
-                torch.save(to_cpu(((canon, canon2, cconf), focal)), mkdir_for(cache))
-        # extract depth offsets with correspondences
-        core_depth = canon[subsample // 2::subsample, subsample // 2::subsample, 2]
-        idxs, offsets = anchor_depth_offsets(canon2, pixels, subsample=subsample)
-        canonical_views[img] = (pp, (H, W), focal.view(1), core_depth, pixels, idxs, offsets)
-    return tmp_pairs, pairwise_scores, canonical_views, canonical_paths, preds_21
-def load_corres(path_corres, device, min_conf_thr):
-    score, (xy1, xy2, confs) = torch.load(path_corres, map_location=device)
-    valid = confs > min_conf_thr if min_conf_thr else slice(None)
-    # valid = (xy1 > 0).all(dim=1) & (xy2 > 0).all(dim=1) & (xy1 < 512).all(dim=1) & (xy2 < 512).all(dim=1)
-    # print(f'keeping {valid.sum()} / {len(valid)} correspondences')
-    return score, (xy1[valid], xy2[valid], confs[valid])
-PairOfSlices = namedtuple(
-    'ImgPair', 'img1, slice1, pix1, anchor_idxs1, img2, slice2, pix2, anchor_idxs2, confs, confs_sum')
-def condense_data(imgs, tmp_paths, canonical_views, dtype=torch.float32):
-    # aggregate all data properly
-    set_imgs = set(imgs)
-    principal_points = []
-    shapes = []
-    focals = []
-    core_depth = []
-    img_anchors = {}
-    tmp_pixels = {}
-    for idx1, img1 in enumerate(imgs):
-        # load stuff
-        pp, shape, focal, anchors, pixels_confs, idxs, offsets = canonical_views[img1]
-        principal_points.append(pp)
-        shapes.append(shape)
-        focals.append(focal)
-        core_depth.append(anchors)
-        img_uv1 = []
-        img_idxs = []
-        img_offs = []
-        cur_n = [0]
-        for img2, (pixels, match_confs) in pixels_confs.items():
-            if img2 not in set_imgs:
-                continue
-            assert len(pixels) == len(idxs[img2]) == len(offsets[img2])
-            img_uv1.append(torch.cat((pixels, torch.ones_like(pixels[:, :1])), dim=-1))
-            img_idxs.append(idxs[img2])
-            img_offs.append(offsets[img2])
-            cur_n.append(cur_n[-1] + len(pixels))
-            # store the position of 3d points
-            tmp_pixels[img1, img2] = pixels.to(dtype), match_confs.to(dtype), slice(*cur_n[-2:])
-        img_anchors[idx1] = (torch.cat(img_uv1), torch.cat(img_idxs), torch.cat(img_offs))
-    all_confs = []
-    imgs_slices = []
-    corres2d = {img: [] for img in range(len(imgs))}
-    for img1, img2 in tmp_paths:
-        try:
-            pix1, confs1, slice1 = tmp_pixels[img1, img2]
-            pix2, confs2, slice2 = tmp_pixels[img2, img1]
-        except KeyError:
-            continue
-        img1 = imgs.index(img1)
-        img2 = imgs.index(img2)
-        confs = (confs1 * confs2).sqrt()
-        # prepare for loss_3d
-        all_confs.append(confs)
-        anchor_idxs1 = canonical_views[imgs[img1]][5][imgs[img2]]
-        anchor_idxs2 = canonical_views[imgs[img2]][5][imgs[img1]]
-        imgs_slices.append(PairOfSlices(img1, slice1, pix1, anchor_idxs1,
-                                        img2, slice2, pix2, anchor_idxs2,
-                                        confs, float(confs.sum())))
-        # prepare for loss_2d
-        corres2d[img1].append((pix1, confs, img2, slice2))
-        corres2d[img2].append((pix2, confs, img1, slice1))
-    all_confs = torch.cat(all_confs)
-    corres = (all_confs, float(all_confs.sum()), imgs_slices)
-    def aggreg_matches(img1, list_matches):
-        pix1, confs, img2, slice2 = zip(*list_matches)
-        all_pix1 = torch.cat(pix1).to(dtype)
-        all_confs = torch.cat(confs).to(dtype)
-        return img1, all_pix1, all_confs, float(all_confs.sum()), [(j, sl2) for j, sl2 in zip(img2, slice2)]
-    corres2d = [aggreg_matches(img, m) for img, m in corres2d.items()]
-    imsizes = torch.tensor([(W, H) for H, W in shapes], device=pp.device)  # (W,H)
-    principal_points = torch.stack(principal_points)
-    focals = torch.cat(focals)
-    return imsizes, principal_points, focals, core_depth, img_anchors, corres, corres2d
-def canonical_view(ptmaps11, confs11, subsample, mode='avg-angle'):
-    assert len(ptmaps11) == len(confs11) > 0, 'not a single view1 for img={i}'
-    # canonical pointmap is just a weighted average
-    confs11 = confs11.unsqueeze(-1) - 0.999
-    canon = (confs11 * ptmaps11).sum(0) / confs11.sum(0)
-    canon_depth = ptmaps11[..., 2].unsqueeze(1)
-    S = slice(subsample // 2, None, subsample)
-    center_depth = canon_depth[:, :, S, S]
-    assert (center_depth > 0).all()
-    stacked_depth = F.pixel_unshuffle(canon_depth, subsample)
-    stacked_confs = F.pixel_unshuffle(confs11[:, None, :, :, 0], subsample)
-    if mode == 'avg-reldepth':
-        rel_depth = stacked_depth / center_depth
-        stacked_canon = (stacked_confs * rel_depth).sum(dim=0) / stacked_confs.sum(dim=0)
-        canon2 = F.pixel_shuffle(stacked_canon.unsqueeze(0), subsample).squeeze()
-    elif mode == 'avg-angle':
-        xy = ptmaps11[..., 0:2].permute(0, 3, 1, 2)
-        stacked_xy = F.pixel_unshuffle(xy, subsample)
-        B, _, H, W = stacked_xy.shape
-        stacked_radius = (stacked_xy.view(B, 2, -1, H, W) - xy[:, :, None, S, S]).norm(dim=1)
-        stacked_radius.clip_(min=1e-8)
-        stacked_angle = torch.arctan((stacked_depth - center_depth) / stacked_radius)
-        avg_angle = (stacked_confs * stacked_angle).sum(dim=0) / stacked_confs.sum(dim=0)
-        # back to depth
-        stacked_depth = stacked_radius.mean(dim=0) * torch.tan(avg_angle)
-        canon2 = F.pixel_shuffle((1 + stacked_depth / canon[S, S, 2]).unsqueeze(0), subsample).squeeze()
-    else:
-        raise ValueError(f'bad {mode=}')
-    confs = (confs11.square().sum(dim=0) / confs11.sum(dim=0)).squeeze()
-    return canon, canon2, confs
-def anchor_depth_offsets(canon_depth, pixels, subsample=8):
-    device = canon_depth.device
-    # create a 2D grid of anchor 3D points
-    H1, W1 = canon_depth.shape
-    yx = np.mgrid[subsample // 2:H1:subsample, subsample // 2:W1:subsample]
-    H2, W2 = yx.shape[1:]
-    cy, cx = yx.reshape(2, -1)
-    core_depth = canon_depth[cy, cx]
-    assert (core_depth > 0).all()
-    # slave 3d points (attached to core 3d points)
-    core_idxs = {}  # core_idxs[img2] = {corr_idx:core_idx}
-    core_offs = {}  # core_offs[img2] = {corr_idx:3d_offset}
-    for img2, (xy1, _confs) in pixels.items():
-        px, py = xy1.long().T
-        # find nearest anchor == block quantization
-        core_idx = (py // subsample) * W2 + (px // subsample)
-        core_idxs[img2] = core_idx.to(device)
-        # compute relative depth offsets w.r.t. anchors
-        ref_z = core_depth[core_idx]
-        pts_z = canon_depth[py, px]
-        offset = pts_z / ref_z
-        core_offs[img2] = offset.detach().to(device)
-    return core_idxs, core_offs
-def spectral_clustering(graph, k=None, normalized_cuts=False):
-    graph.fill_diagonal_(0)
-    # graph laplacian
-    degrees = graph.sum(dim=-1)
-    laplacian = torch.diag(degrees) - graph
-    if normalized_cuts:
-        i_inv = torch.diag(degrees.sqrt().reciprocal())
-        laplacian = i_inv @ laplacian @ i_inv
-    # compute eigenvectors!
-    eigval, eigvec = torch.linalg.eigh(laplacian)
-    return eigval[:k], eigvec[:, :k]
-def sim_func(p1, p2, gamma):
-    diff = (p1 - p2).norm(dim=-1)
-    avg_depth = (p1[:, :, 2] + p2[:, :, 2])
-    rel_distance = diff / avg_depth
-    sim = torch.exp(-gamma * rel_distance.square())
-    return sim
-def backproj(K, depthmap, subsample):
-    H, W = depthmap.shape
-    uv = np.mgrid[subsample // 2:subsample * W:subsample, subsample // 2:subsample * H:subsample].T.reshape(H, W, 2)
-    xyz = depthmap.unsqueeze(-1) * geotrf(inv(K), todevice(uv, K.device), ncol=3)
-    return xyz
-def spectral_projection_depth(K, depthmap, subsample, k=64, cache_path='',
-                              normalized_cuts=True, gamma=7, min_norm=5):
-    try:
-        if cache_path:
-            cache_path = cache_path + f'_{k=}_norm={normalized_cuts}_{gamma=}.pth'
-        lora_proj = torch.load(cache_path, map_location=K.device)
-    except IOError:
-        # reconstruct 3d points in camera coordinates
-        xyz = backproj(K, depthmap, subsample)
-        # compute all distances
-        xyz = xyz.reshape(-1, 3)
-        graph = sim_func(xyz[:, None], xyz[None, :], gamma=gamma)
-        _, lora_proj = spectral_clustering(graph, k, normalized_cuts=normalized_cuts)
-        if cache_path:
-            torch.save(lora_proj.cpu(), mkdir_for(cache_path))
-    lora_proj, coeffs = lora_encode_normed(lora_proj, depthmap.ravel(), min_norm=min_norm)
-    # depthmap ~= lora_proj @ coeffs
-    return coeffs, lora_proj
-def lora_encode_normed(lora_proj, x, min_norm, global_norm=False):
-    # encode the pointmap
-    coeffs = torch.linalg.pinv(lora_proj) @ x
-    # rectify the norm of basis vector to be ~ equal
-    if coeffs.ndim == 1:
-        coeffs = coeffs[:, None]
-    if global_norm:
-        lora_proj *= coeffs[1:].norm() * min_norm / coeffs.shape[1]
-    elif min_norm:
-        lora_proj *= coeffs.norm(dim=1).clip(min=min_norm)
-    # can have rounding errors here!
-    coeffs = (torch.linalg.pinv(lora_proj.double()) @ x.double()).float()
-    return lora_proj.detach(), coeffs.detach()
-@torch.no_grad()
-def spectral_projection_of_depthmaps(imgs, intrinsics, depthmaps, subsample, cache_path=None, **kw):
-    # recover 3d points
-    core_depth = []
-    lora_proj = []
-    for i, img in enumerate(tqdm(imgs)):
-        cache = os.path.join(cache_path, 'lora_depth', hash_md5(img)) if cache_path else None
-        depth, proj = spectral_projection_depth(intrinsics[i], depthmaps[i], subsample,
-                                                cache_path=cache, **kw)
-        core_depth.append(depth)
-        lora_proj.append(proj)
-    return core_depth, lora_proj
-def reproj2d(Trf, pts3d):
-    res = (pts3d @ Trf[:3, :3].transpose(-1, -2)) + Trf[:3, 3]
-    clipped_z = res[:, 2:3].clip(min=1e-3)  # make sure we don't have nans!
-    uv = res[:, 0:2] / clipped_z
-    return uv.clip(min=-1000, max=2000)
-def bfs(tree, start_node):
-    order, predecessors = sp.csgraph.breadth_first_order(tree, start_node, directed=False)
-    ranks = np.arange(len(order))
-    ranks[order] = ranks.copy()
-    return ranks, predecessors
-def compute_min_spanning_tree(pws):
-    sparse_graph = sp.dok_array(pws.shape)
-    for i, j in pws.nonzero().cpu().tolist():
-        sparse_graph[i, j] = -float(pws[i, j])
-    msp = sp.csgraph.minimum_spanning_tree(sparse_graph)
-    # now reorder the oriented edges, starting from the central point
-    ranks1, _ = bfs(msp, 0)
-    ranks2, _ = bfs(msp, ranks1.argmax())
-    ranks1, _ = bfs(msp, ranks2.argmax())
-    # this is the point farther from any leaf
-    root = np.minimum(ranks1, ranks2).argmax()
-    # find the ordered list of edges that describe the tree
-    order, predecessors = sp.csgraph.breadth_first_order(msp, root, directed=False)
-    order = order[1:]  # root not do not have a predecessor
-    edges = [(predecessors[i], i) for i in order]
-    return root, edges
-def show_reconstruction(shapes_or_imgs, K, cam2w, pts3d, gt_cam2w=None, gt_K=None, cam_size=None, masks=None, **kw):
-    viz = SceneViz()
-    cc = cam2w[:, :3, 3]
-    cs = cam_size or float(torch.cdist(cc, cc).fill_diagonal_(np.inf).min(dim=0).values.median())
-    colors = 64 + np.random.randint(255 - 64, size=(len(cam2w), 3))
-    if isinstance(shapes_or_imgs, np.ndarray) and shapes_or_imgs.ndim == 2:
-        cam_kws = dict(imsizes=shapes_or_imgs[:, ::-1], cam_size=cs)
-    else:
-        imgs = shapes_or_imgs
-        cam_kws = dict(images=imgs, cam_size=cs)
-    if K is not None:
-        viz.add_cameras(to_numpy(cam2w), to_numpy(K), colors=colors, **cam_kws)
-    if gt_cam2w is not None:
-        if gt_K is None:
-            gt_K = K
-        viz.add_cameras(to_numpy(gt_cam2w), to_numpy(gt_K), colors=colors, marker='o', **cam_kws)
-    if pts3d is not None:
-        for i, p in enumerate(pts3d):
-            if not len(p):
-                continue
-            if masks is None:
-                viz.add_pointcloud(to_numpy(p), color=tuple(colors[i].tolist()))
-            else:
-                viz.add_pointcloud(to_numpy(p), mask=masks[i], color=imgs[i])
-    viz.show(**kw)

mast3r/cloud_opt/triangulation.py DELETED Viewed

@@ -1,80 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# Matches Triangulation Utils
-# --------------------------------------------------------
-import numpy as np
-import torch
-# Batched Matches Triangulation
-def batched_triangulate(pts2d,        # [B, Ncams, Npts, 2]
-                        proj_mats):   # [B, Ncams, 3, 4] I@E projection matrix
-    B, Ncams, Npts, two = pts2d.shape
-    assert two==2
-    assert proj_mats.shape == (B, Ncams, 3, 4)
-    # P - xP
-    x = proj_mats[...,0,:][...,None,:] - torch.einsum('bij,bik->bijk', pts2d[...,0], proj_mats[...,2,:]) # [B, Ncams, Npts, 4]
-    y = proj_mats[...,1,:][...,None,:] - torch.einsum('bij,bik->bijk', pts2d[...,1], proj_mats[...,2,:]) # [B, Ncams, Npts, 4]
-    eq = torch.cat([x, y], dim=1).transpose(1, 2) # [B, Npts, 2xNcams, 4]
-    return torch.linalg.lstsq(eq[...,:3], -eq[...,3]).solution
-def matches_to_depths(intrinsics, # input camera intrinsics     [B, Ncams, 3, 3]
-                      extrinsics, # input camera extrinsics     [B, Ncams, 3, 4]
-                      matches,    # input correspondences       [B, Ncams, Npts, 2]
-                      batchsize=16, # bs for batched processing
-                      min_num_valids_ratio=.3 # at least this ratio of image pairs need to predict a match for a given pixel of img1
-                      ):
-    B, Nv, H, W, five = matches.shape
-    min_num_valids = np.floor(Nv*min_num_valids_ratio)
-    out_aggregated_points, out_depths, out_confs = [], [], []
-    for b in range(B//batchsize+1): # batched processing
-        start, stop = b*batchsize,min(B,(b+1)*batchsize)
-        sub_batch=slice(start,stop)
-        sub_batchsize = stop-start
-        if sub_batchsize==0:continue
-        points1, points2, confs = matches[sub_batch, ..., :2], matches[sub_batch, ..., 2:4], matches[sub_batch, ..., -1]
-        allpoints = torch.cat([points1.view([sub_batchsize*Nv,1,H*W,2]), points2.view([sub_batchsize*Nv,1,H*W,2])],dim=1) # [BxNv, 2, HxW, 2]
-        allcam_Ps = intrinsics[sub_batch] @ extrinsics[sub_batch,:,:3,:]
-        cam_Ps1, cam_Ps2 = allcam_Ps[:,[0]].repeat([1,Nv,1,1]), allcam_Ps[:,1:] # [B, Nv, 3, 4]
-        formatted_camPs = torch.cat([cam_Ps1.reshape([sub_batchsize*Nv,1,3,4]), cam_Ps2.reshape([sub_batchsize*Nv,1,3,4])],dim=1) # [BxNv, 2, 3, 4]
-        # Triangulate matches to 3D
-        points_3d_world = batched_triangulate(allpoints, formatted_camPs) # [BxNv, HxW, three]
-        # Aggregate pairwise predictions
-        points_3d_world = points_3d_world.view([sub_batchsize,Nv,H,W,3])
-        valids = points_3d_world.isfinite()
-        valids_sum = valids.sum(dim=-1)
-        validsuni=valids_sum.unique()
-        assert torch.all(torch.logical_or(validsuni == 0 , validsuni == 3)), "Error, can only be nan for none or all XYZ values, not a subset"
-        confs[valids_sum==0] = 0.
-        points_3d_world = points_3d_world*confs[...,None]
-        # Take care of NaNs
-        normalization = confs.sum(dim=1)[:,None].repeat(1,Nv,1,1)
-        normalization[normalization <= 1e-5] = 1.
-        points_3d_world[valids] /= normalization[valids_sum==3][:,None].repeat(1,3).view(-1)
-        points_3d_world[~valids] = 0.
-        aggregated_points = points_3d_world.sum(dim=1) # weighted average (by confidence value) ignoring nans
-        # Reset invalid values to nans, with a min visibility threshold
-        aggregated_points[valids_sum.sum(dim=1)/3 <= min_num_valids] = torch.nan
-        # From 3D to depths
-        refcamE = extrinsics[sub_batch, 0]
-        points_3d_camera = (refcamE[:,:3, :3] @ aggregated_points.view(sub_batchsize,-1,3).transpose(-2,-1) + refcamE[:,:3,[3]]).transpose(-2,-1) # [B,HxW,3]
-        depths = points_3d_camera.view(sub_batchsize,H,W,3)[..., 2] # [B,H,W]
-        # Cat results
-        out_aggregated_points.append(aggregated_points.cpu())
-        out_depths.append(depths.cpu())
-        out_confs.append(confs.sum(dim=1).cpu())
-    out_aggregated_points = torch.cat(out_aggregated_points,dim=0)
-    out_depths            = torch.cat(out_depths,dim=0)
-    out_confs             = torch.cat(out_confs,dim=0)
-    return out_aggregated_points, out_depths, out_confs

mast3r/cloud_opt/tsdf_optimizer.py DELETED Viewed

@@ -1,273 +0,0 @@
-import torch
-from torch import nn
-import numpy as np
-from tqdm import tqdm
-from matplotlib import pyplot as pl
-import mast3r.utils.path_to_dust3r  # noqa
-from dust3r.utils.geometry import depthmap_to_pts3d, geotrf, inv
-from dust3r.cloud_opt.base_opt import clean_pointcloud
-class TSDFPostProcess:
-    """ Optimizes a signed distance-function to improve depthmaps.
-    """
-    def __init__(self, optimizer, subsample=8, TSDF_thresh=0., TSDF_batchsize=int(1e7)):
-        self.TSDF_thresh = TSDF_thresh  # None -> no TSDF
-        self.TSDF_batchsize = TSDF_batchsize
-        self.optimizer = optimizer
-        pts3d, depthmaps, confs = optimizer.get_dense_pts3d(clean_depth=False, subsample=subsample)
-        pts3d, depthmaps = self._TSDF_postprocess_or_not(pts3d, depthmaps, confs)
-        self.pts3d = pts3d
-        self.depthmaps = depthmaps
-        self.confs = confs
-    def _get_depthmaps(self, TSDF_filtering_thresh=None):
-        if TSDF_filtering_thresh:
-            self._refine_depths_with_TSDF(self.optimizer, TSDF_filtering_thresh)  # compute refined depths if needed
-        dms = self.TSDF_im_depthmaps if TSDF_filtering_thresh else self.im_depthmaps
-        return [d.exp() for d in dms]
-    @torch.no_grad()
-    def _refine_depths_with_TSDF(self, TSDF_filtering_thresh, niter=1, nsamples=1000):
-        """
-        Leverage TSDF to post-process estimated depths
-        for each pixel, find zero level of TSDF along ray (or closest to 0)
-        """
-        print("Post-Processing Depths with TSDF fusion.")
-        self.TSDF_im_depthmaps = []
-        alldepths, allposes, allfocals, allpps, allimshapes = self._get_depthmaps(), self.optimizer.get_im_poses(
-        ), self.optimizer.get_focals(), self.optimizer.get_principal_points(), self.imshapes
-        for vi in tqdm(range(self.optimizer.n_imgs)):
-            dm, pose, focal, pp, imshape = alldepths[vi], allposes[vi], allfocals[vi], allpps[vi], allimshapes[vi]
-            minvals = torch.full(dm.shape, 1e20)
-            for it in range(niter):
-                H, W = dm.shape
-                curthresh = (niter - it) * TSDF_filtering_thresh
-                dm_offsets = (torch.randn(H, W, nsamples).to(dm) - 1.) * \
-                    curthresh  # decreasing search std along with iterations
-                newdm = dm[..., None] + dm_offsets  # [H,W,Nsamp]
-                curproj = self._backproj_pts3d(in_depths=[newdm], in_im_poses=pose[None], in_focals=focal[None], in_pps=pp[None], in_imshapes=[
-                    imshape])[0]  # [H,W,Nsamp,3]
-                # Batched TSDF eval
-                curproj = curproj.view(-1, 3)
-                tsdf_vals = []
-                valids = []
-                for batch in range(0, len(curproj), self.TSDF_batchsize):
-                    values, valid = self._TSDF_query(
-                        curproj[batch:min(batch + self.TSDF_batchsize, len(curproj))], curthresh)
-                    tsdf_vals.append(values)
-                    valids.append(valid)
-                tsdf_vals = torch.cat(tsdf_vals, dim=0)
-                valids = torch.cat(valids, dim=0)
-                tsdf_vals = tsdf_vals.view([H, W, nsamples])
-                valids = valids.view([H, W, nsamples])
-                # keep depth value that got us the closest to 0
-                tsdf_vals[~valids] = torch.inf  # ignore invalid values
-                tsdf_vals = tsdf_vals.abs()
-                mins = torch.argmin(tsdf_vals, dim=-1, keepdim=True)
-                # when all samples live on a very flat zone, do nothing
-                allbad = (tsdf_vals == curthresh).sum(dim=-1) == nsamples
-                dm[~allbad] = torch.gather(newdm, -1, mins)[..., 0][~allbad]
-            # Save refined depth map
-            self.TSDF_im_depthmaps.append(dm.log())
-    def _TSDF_query(self, qpoints, TSDF_filtering_thresh, weighted=True):
-        """
-        TSDF query call: returns the weighted TSDF value for each query point [N, 3]
-        """
-        N, three = qpoints.shape
-        assert three == 3
-        qpoints = qpoints[None].repeat(self.optimizer.n_imgs, 1, 1)  # [B,N,3]
-        # get projection coordinates and depths onto images
-        coords_and_depth = self._proj_pts3d(pts3d=qpoints, cam2worlds=self.optimizer.get_im_poses(
-        ), focals=self.optimizer.get_focals(), pps=self.optimizer.get_principal_points())
-        image_coords = coords_and_depth[..., :2].round().to(int)  # for now, there's no interpolation...
-        proj_depths = coords_and_depth[..., -1]
-        # recover depth values after scene optim
-        pred_depths, pred_confs, valids = self._get_pixel_depths(image_coords)
-        # Gather TSDF scores
-        all_SDF_scores = pred_depths - proj_depths  # SDF
-        unseen = all_SDF_scores < -TSDF_filtering_thresh  # handle visibility
-        # all_TSDF_scores = all_SDF_scores.clip(-TSDF_filtering_thresh,TSDF_filtering_thresh) # SDF -> TSDF
-        all_TSDF_scores = all_SDF_scores.clip(-TSDF_filtering_thresh, 1e20)  # SDF -> TSDF
-        # Gather TSDF confidences and ignore points that are unseen, either OOB during reproj or too far behind seen depth
-        all_TSDF_weights = (~unseen).float() * valids.float()
-        if weighted:
-            all_TSDF_weights = pred_confs.exp() * all_TSDF_weights
-        # Aggregate all votes, ignoring zeros
-        TSDF_weights = all_TSDF_weights.sum(dim=0)
-        valids = TSDF_weights != 0.
-        TSDF_wsum = (all_TSDF_weights * all_TSDF_scores).sum(dim=0)
-        TSDF_wsum[valids] /= TSDF_weights[valids]
-        return TSDF_wsum, valids
-    def _get_pixel_depths(self, image_coords, TSDF_filtering_thresh=None, with_normals_conf=False):
-        """ Recover depth value for each input pixel coordinate, along with OOB validity mask
-        """
-        B, N, two = image_coords.shape
-        assert B == self.optimizer.n_imgs and two == 2
-        depths = torch.zeros([B, N], device=image_coords.device)
-        valids = torch.zeros([B, N], dtype=bool, device=image_coords.device)
-        confs = torch.zeros([B, N], device=image_coords.device)
-        curconfs = self._get_confs_with_normals() if with_normals_conf else self.im_conf
-        for ni, (imc, depth, conf) in enumerate(zip(image_coords, self._get_depthmaps(TSDF_filtering_thresh), curconfs)):
-            H, W = depth.shape
-            valids[ni] = torch.logical_and(0 <= imc[:, 1], imc[:, 1] <
-                                           H) & torch.logical_and(0 <= imc[:, 0], imc[:, 0] < W)
-            imc[~valids[ni]] = 0
-            depths[ni] = depth[imc[:, 1], imc[:, 0]]
-            confs[ni] = conf.cuda()[imc[:, 1], imc[:, 0]]
-        return depths, confs, valids
-    def _get_confs_with_normals(self):
-        outconfs = []
-        # Confidence basedf on depth gradient
-        class Sobel(nn.Module):
-            def __init__(self):
-                super().__init__()
-                self.filter = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=3, stride=1, padding=1, bias=False)
-                Gx = torch.tensor([[2.0, 0.0, -2.0], [4.0, 0.0, -4.0], [2.0, 0.0, -2.0]])
-                Gy = torch.tensor([[2.0, 4.0, 2.0], [0.0, 0.0, 0.0], [-2.0, -4.0, -2.0]])
-                G = torch.cat([Gx.unsqueeze(0), Gy.unsqueeze(0)], 0)
-                G = G.unsqueeze(1)
-                self.filter.weight = nn.Parameter(G, requires_grad=False)
-            def forward(self, img):
-                x = self.filter(img)
-                x = torch.mul(x, x)
-                x = torch.sum(x, dim=1, keepdim=True)
-                x = torch.sqrt(x)
-                return x
-        grad_op = Sobel().to(self.im_depthmaps[0].device)
-        for conf, depth in zip(self.im_conf, self.im_depthmaps):
-            grad_confs = (1. - grad_op(depth[None, None])[0, 0]).clip(0)
-            if not 'dbg show':
-                pl.imshow(grad_confs.cpu())
-                pl.show()
-            outconfs.append(conf * grad_confs.to(conf))
-        return outconfs
-    def _proj_pts3d(self, pts3d, cam2worlds, focals, pps):
-        """
-        Projection operation: from 3D points to 2D coordinates + depths
-        """
-        B = pts3d.shape[0]
-        assert pts3d.shape[0] == cam2worlds.shape[0]
-        # prepare Extrinsincs
-        R, t = cam2worlds[:, :3, :3], cam2worlds[:, :3, -1]
-        Rinv = R.transpose(-2, -1)
-        tinv = -Rinv @ t[..., None]
-        # prepare intrinsics
-        intrinsics = torch.eye(3).to(cam2worlds)[None].repeat(focals.shape[0], 1, 1)
-        if len(focals.shape) == 1:
-            focals = torch.stack([focals, focals], dim=-1)
-        intrinsics[:, 0, 0] = focals[:, 0]
-        intrinsics[:, 1, 1] = focals[:, 1]
-        intrinsics[:, :2, -1] = pps
-        # Project
-        projpts = intrinsics @ (Rinv @ pts3d.transpose(-2, -1) + tinv)  # I(RX+t) : [B,3,N]
-        projpts = projpts.transpose(-2, -1)  # [B,N,3]
-        projpts[..., :2] /= projpts[..., [-1]]  # [B,N,3] (X/Z , Y/Z, Z)
-        return projpts
-    def _backproj_pts3d(self, in_depths=None, in_im_poses=None,
-                        in_focals=None, in_pps=None, in_imshapes=None):
-        """
-        Backprojection operation: from image depths to 3D points
-        """
-        # Get depths and  projection params if not provided
-        focals = self.optimizer.get_focals() if in_focals is None else in_focals
-        im_poses = self.optimizer.get_im_poses() if in_im_poses is None else in_im_poses
-        depth = self._get_depthmaps() if in_depths is None else in_depths
-        pp = self.optimizer.get_principal_points() if in_pps is None else in_pps
-        imshapes = self.imshapes if in_imshapes is None else in_imshapes
-        def focal_ex(i): return focals[i][..., None, None].expand(1, *focals[i].shape, *imshapes[i])
-        dm_to_3d = [depthmap_to_pts3d(depth[i][None], focal_ex(i), pp=pp[[i]]) for i in range(im_poses.shape[0])]
-        def autoprocess(x):
-            x = x[0]
-            return x.transpose(-2, -1) if len(x.shape) == 4 else x
-        return [geotrf(pose, autoprocess(pt)) for pose, pt in zip(im_poses, dm_to_3d)]
-    def _pts3d_to_depth(self, pts3d, cam2worlds, focals, pps):
-        """
-        Projection operation: from 3D points to 2D coordinates + depths
-        """
-        B = pts3d.shape[0]
-        assert pts3d.shape[0] == cam2worlds.shape[0]
-        # prepare Extrinsincs
-        R, t = cam2worlds[:, :3, :3], cam2worlds[:, :3, -1]
-        Rinv = R.transpose(-2, -1)
-        tinv = -Rinv @ t[..., None]
-        # prepare intrinsics
-        intrinsics = torch.eye(3).to(cam2worlds)[None].repeat(self.optimizer.n_imgs, 1, 1)
-        if len(focals.shape) == 1:
-            focals = torch.stack([focals, focals], dim=-1)
-        intrinsics[:, 0, 0] = focals[:, 0]
-        intrinsics[:, 1, 1] = focals[:, 1]
-        intrinsics[:, :2, -1] = pps
-        # Project
-        projpts = intrinsics @ (Rinv @ pts3d.transpose(-2, -1) + tinv)  # I(RX+t) : [B,3,N]
-        projpts = projpts.transpose(-2, -1)  # [B,N,3]
-        projpts[..., :2] /= projpts[..., [-1]]  # [B,N,3] (X/Z , Y/Z, Z)
-        return projpts
-    def _depth_to_pts3d(self, in_depths=None, in_im_poses=None, in_focals=None, in_pps=None, in_imshapes=None):
-        """
-        Backprojection operation: from image depths to 3D points
-        """
-        # Get depths and  projection params if not provided
-        focals = self.optimizer.get_focals() if in_focals is None else in_focals
-        im_poses = self.optimizer.get_im_poses() if in_im_poses is None else in_im_poses
-        depth = self._get_depthmaps() if in_depths is None else in_depths
-        pp = self.optimizer.get_principal_points() if in_pps is None else in_pps
-        imshapes = self.imshapes if in_imshapes is None else in_imshapes
-        def focal_ex(i): return focals[i][..., None, None].expand(1, *focals[i].shape, *imshapes[i])
-        dm_to_3d = [depthmap_to_pts3d(depth[i][None], focal_ex(i), pp=pp[i:i + 1]) for i in range(im_poses.shape[0])]
-        def autoprocess(x):
-            x = x[0]
-            H, W, three = x.shape[:3]
-            return x.transpose(-2, -1) if len(x.shape) == 4 else x
-        return [geotrf(pp, autoprocess(pt)) for pp, pt in zip(im_poses, dm_to_3d)]
-    def _get_pts3d(self, TSDF_filtering_thresh=None, **kw):
-        """
-        return 3D points (possibly filtering depths with TSDF)
-        """
-        return self._backproj_pts3d(in_depths=self._get_depthmaps(TSDF_filtering_thresh=TSDF_filtering_thresh), **kw)
-    def _TSDF_postprocess_or_not(self, pts3d, depthmaps, confs, niter=1):
-        # Setup inner variables
-        self.imshapes = [im.shape[:2] for im in self.optimizer.imgs]
-        self.im_depthmaps = [dd.log().view(imshape) for dd, imshape in zip(depthmaps, self.imshapes)]
-        self.im_conf = confs
-        if self.TSDF_thresh > 0.:
-            # Create or update self.TSDF_im_depthmaps that contain logdepths filtered with TSDF
-            self._refine_depths_with_TSDF(self.TSDF_thresh, niter=niter)
-            depthmaps = [dd.exp() for dd in self.TSDF_im_depthmaps]
-            # Turn them into 3D points
-            pts3d = self._backproj_pts3d(in_depths=depthmaps)
-            depthmaps = [dd.flatten() for dd in depthmaps]
-            pts3d = [pp.view(-1, 3) for pp in pts3d]
-        return pts3d, depthmaps
-    def get_dense_pts3d(self, clean_depth=True):
-        if clean_depth:
-            confs = clean_pointcloud(self.confs, self.optimizer.intrinsics, inv(self.optimizer.cam2w),
-                                     self.depthmaps, self.pts3d)
-        return self.pts3d, self.depthmaps, confs

mast3r/cloud_opt/utils/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # Copyright (C) 2024-present Naver Corporation. All rights reserved.
2	- # Licensed under CC BY-NC-SA 4.0 (non-commercial use only).

mast3r/cloud_opt/utils/losses.py DELETED Viewed

@@ -1,32 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# losses for sparse ga
-# --------------------------------------------------------
-import torch
-import numpy as np
-def l05_loss(x, y):
-    return torch.linalg.norm(x - y, dim=-1).sqrt()
-def l1_loss(x, y):
-    return torch.linalg.norm(x - y, dim=-1)
-def gamma_loss(gamma, mul=1, offset=None, clip=np.inf):
-    if offset is None:
-        if gamma == 1:
-            return l1_loss
-        # d(x**p)/dx = 1 ==> p * x**(p-1) == 1 ==> x = (1/p)**(1/(p-1))
-        offset = (1 / gamma)**(1 / (gamma - 1))
-    def loss_func(x, y):
-        return (mul * l1_loss(x, y).clip(max=clip) + offset) ** gamma - offset ** gamma
-    return loss_func
-def meta_gamma_loss():
-    return lambda alpha: gamma_loss(alpha)

mast3r/cloud_opt/utils/schedules.py DELETED Viewed

@@ -1,17 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# lr schedules for sparse ga
-# --------------------------------------------------------
-import numpy as np
-def linear_schedule(alpha, lr_base, lr_end=0):
-    lr = (1 - alpha) * lr_base + alpha * lr_end
-    return lr
-def cosine_schedule(alpha, lr_base, lr_end=0):
-    lr = lr_end + (lr_base - lr_end) * (1 + np.cos(alpha * np.pi)) / 2
-    return lr

mast3r/colmap/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # Copyright (C) 2024-present Naver Corporation. All rights reserved.
2	- # Licensed under CC BY-NC-SA 4.0 (non-commercial use only).

mast3r/colmap/database.py DELETED Viewed

@@ -1,383 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# MASt3R to colmap export functions
-# --------------------------------------------------------
-import os
-import torch
-import copy
-import numpy as np
-import torchvision
-import numpy as np
-from tqdm import tqdm
-from scipy.cluster.hierarchy import DisjointSet
-from scipy.spatial.transform import Rotation as R
-from mast3r.utils.misc import hash_md5
-from mast3r.fast_nn import extract_correspondences_nonsym, bruteforce_reciprocal_nns
-import mast3r.utils.path_to_dust3r  # noqa
-from dust3r.utils.geometry import find_reciprocal_matches, xy_grid  # noqa
-def convert_im_matches_pairs(img0, img1, image_to_colmap, im_keypoints, matches_im0, matches_im1, viz):
-    if viz:
-        from matplotlib import pyplot as pl
-        image_mean = torch.as_tensor(
-            [0.5, 0.5, 0.5], device='cpu').reshape(1, 3, 1, 1)
-        image_std = torch.as_tensor(
-            [0.5, 0.5, 0.5], device='cpu').reshape(1, 3, 1, 1)
-        rgb0 = img0['img'] * image_std + image_mean
-        rgb0 = torchvision.transforms.functional.to_pil_image(rgb0[0])
-        rgb0 = np.array(rgb0)
-        rgb1 = img1['img'] * image_std + image_mean
-        rgb1 = torchvision.transforms.functional.to_pil_image(rgb1[0])
-        rgb1 = np.array(rgb1)
-        imgs = [rgb0, rgb1]
-        # visualize a few matches
-        n_viz = 100
-        num_matches = matches_im0.shape[0]
-        match_idx_to_viz = np.round(np.linspace(
-            0, num_matches - 1, n_viz)).astype(int)
-        viz_matches_im0, viz_matches_im1 = matches_im0[match_idx_to_viz], matches_im1[match_idx_to_viz]
-        H0, W0, H1, W1 = *imgs[0].shape[:2], *imgs[1].shape[:2]
-        rgb0 = np.pad(imgs[0], ((0, max(H1 - H0, 0)),
-                                (0, 0), (0, 0)), 'constant', constant_values=0)
-        rgb1 = np.pad(imgs[1], ((0, max(H0 - H1, 0)),
-                                (0, 0), (0, 0)), 'constant', constant_values=0)
-        img = np.concatenate((rgb0, rgb1), axis=1)
-        pl.figure()
-        pl.imshow(img)
-        cmap = pl.get_cmap('jet')
-        for ii in range(n_viz):
-            (x0, y0), (x1,
-                       y1) = viz_matches_im0[ii].T, viz_matches_im1[ii].T
-            pl.plot([x0, x1 + W0], [y0, y1], '-+', color=cmap(ii /
-                    (n_viz - 1)), scalex=False, scaley=False)
-        pl.show(block=True)
-    matches = [matches_im0.astype(np.float64), matches_im1.astype(np.float64)]
-    imgs = [img0, img1]
-    imidx0 = img0['idx']
-    imidx1 = img1['idx']
-    ravel_matches = []
-    for j in range(2):
-        H, W = imgs[j]['true_shape'][0]
-        with np.errstate(invalid='ignore'):
-            qx, qy = matches[j].round().astype(np.int32).T
-        ravel_matches_j = qx.clip(min=0, max=W - 1, out=qx) + W * qy.clip(min=0, max=H - 1, out=qy)
-        ravel_matches.append(ravel_matches_j)
-        imidxj = imgs[j]['idx']
-        for m in ravel_matches_j:
-            if m not in im_keypoints[imidxj]:
-                im_keypoints[imidxj][m] = 0
-            im_keypoints[imidxj][m] += 1
-    imid0 = copy.deepcopy(image_to_colmap[imidx0]['colmap_imid'])
-    imid1 = copy.deepcopy(image_to_colmap[imidx1]['colmap_imid'])
-    if imid0 > imid1:
-        colmap_matches = np.stack([ravel_matches[1], ravel_matches[0]], axis=-1)
-        imid0, imid1 = imid1, imid0
-        imidx0, imidx1 = imidx1, imidx0
-    else:
-        colmap_matches = np.stack([ravel_matches[0], ravel_matches[1]], axis=-1)
-    colmap_matches = np.unique(colmap_matches, axis=0)
-    return imidx0, imidx1, colmap_matches
-def get_im_matches(pred1, pred2, pairs, image_to_colmap, im_keypoints, conf_thr,
-                   is_sparse=True, subsample=8, pixel_tol=0, viz=False, device='cuda'):
-    im_matches = {}
-    for i in range(len(pred1['pts3d'])):
-        imidx0 = pairs[i][0]['idx']
-        imidx1 = pairs[i][1]['idx']
-        if 'desc' in pred1:  # mast3r
-            descs = [pred1['desc'][i], pred2['desc'][i]]
-            confidences = [pred1['desc_conf'][i], pred2['desc_conf'][i]]
-            desc_dim = descs[0].shape[-1]
-            if is_sparse:
-                corres = extract_correspondences_nonsym(descs[0], descs[1], confidences[0], confidences[1],
-                                                        device=device, subsample=subsample, pixel_tol=pixel_tol)
-                conf = corres[2]
-                mask = conf >= conf_thr
-                matches_im0 = corres[0][mask].cpu().numpy()
-                matches_im1 = corres[1][mask].cpu().numpy()
-            else:
-                confidence_masks = [confidences[0] >=
-                                    conf_thr, confidences[1] >= conf_thr]
-                pts2d_list, desc_list = [], []
-                for j in range(2):
-                    conf_j = confidence_masks[j].cpu().numpy().flatten()
-                    true_shape_j = pairs[i][j]['true_shape'][0]
-                    pts2d_j = xy_grid(
-                        true_shape_j[1], true_shape_j[0]).reshape(-1, 2)[conf_j]
-                    desc_j = descs[j].detach().cpu(
-                    ).numpy().reshape(-1, desc_dim)[conf_j]
-                    pts2d_list.append(pts2d_j)
-                    desc_list.append(desc_j)
-                if len(desc_list[0]) == 0 or len(desc_list[1]) == 0:
-                    continue
-                nn0, nn1 = bruteforce_reciprocal_nns(desc_list[0], desc_list[1],
-                                                     device=device, dist='dot', block_size=2**13)
-                reciprocal_in_P0 = (nn1[nn0] == np.arange(len(nn0)))
-                matches_im1 = pts2d_list[1][nn0][reciprocal_in_P0]
-                matches_im0 = pts2d_list[0][reciprocal_in_P0]
-        else:
-            pts3d = [pred1['pts3d'][i], pred2['pts3d_in_other_view'][i]]
-            confidences = [pred1['conf'][i], pred2['conf'][i]]
-            if is_sparse:
-                corres = extract_correspondences_nonsym(pts3d[0], pts3d[1], confidences[0], confidences[1],
-                                                        device=device, subsample=subsample, pixel_tol=pixel_tol,
-                                                        ptmap_key='3d')
-                conf = corres[2]
-                mask = conf >= conf_thr
-                matches_im0 = corres[0][mask].cpu().numpy()
-                matches_im1 = corres[1][mask].cpu().numpy()
-            else:
-                confidence_masks = [confidences[0] >=
-                                    conf_thr, confidences[1] >= conf_thr]
-                # find 2D-2D matches between the two images
-                pts2d_list, pts3d_list = [], []
-                for j in range(2):
-                    conf_j = confidence_masks[j].cpu().numpy().flatten()
-                    true_shape_j = pairs[i][j]['true_shape'][0]
-                    pts2d_j = xy_grid(true_shape_j[1], true_shape_j[0]).reshape(-1, 2)[conf_j]
-                    pts3d_j = pts3d[j].detach().cpu().numpy().reshape(-1, 3)[conf_j]
-                    pts2d_list.append(pts2d_j)
-                    pts3d_list.append(pts3d_j)
-                PQ, PM = pts3d_list[0], pts3d_list[1]
-                if len(PQ) == 0 or len(PM) == 0:
-                    continue
-                reciprocal_in_PM, nnM_in_PQ, num_matches = find_reciprocal_matches(
-                    PQ, PM)
-                matches_im1 = pts2d_list[1][reciprocal_in_PM]
-                matches_im0 = pts2d_list[0][nnM_in_PQ][reciprocal_in_PM]
-        if len(matches_im0) == 0:
-            continue
-        imidx0, imidx1, colmap_matches = convert_im_matches_pairs(pairs[i][0], pairs[i][1],
-                                                                  image_to_colmap, im_keypoints,
-                                                                  matches_im0, matches_im1, viz)
-        im_matches[(imidx0, imidx1)] = colmap_matches
-    return im_matches
-def get_im_matches_from_cache(pairs, cache_path, desc_conf, subsample,
-                              image_to_colmap, im_keypoints, conf_thr,
-                              viz=False, device='cuda'):
-    im_matches = {}
-    for i in range(len(pairs)):
-        imidx0 = pairs[i][0]['idx']
-        imidx1 = pairs[i][1]['idx']
-        corres_idx1 = hash_md5(pairs[i][0]['instance'])
-        corres_idx2 = hash_md5(pairs[i][1]['instance'])
-        path_corres = cache_path + f'/corres_conf={desc_conf}_{subsample=}/{corres_idx1}-{corres_idx2}.pth'
-        if os.path.isfile(path_corres):
-            score, (xy1, xy2, confs) = torch.load(path_corres, map_location=device)
-        else:
-            path_corres = cache_path + f'/corres_conf={desc_conf}_{subsample=}/{corres_idx2}-{corres_idx1}.pth'
-            score, (xy2, xy1, confs) = torch.load(path_corres, map_location=device)
-        mask = confs >= conf_thr
-        matches_im0 = xy1[mask].cpu().numpy()
-        matches_im1 = xy2[mask].cpu().numpy()
-        if len(matches_im0) == 0:
-            continue
-        imidx0, imidx1, colmap_matches = convert_im_matches_pairs(pairs[i][0], pairs[i][1],
-                                                                  image_to_colmap, im_keypoints,
-                                                                  matches_im0, matches_im1, viz)
-        im_matches[(imidx0, imidx1)] = colmap_matches
-    return im_matches
-def export_images(db, images, image_paths, focals, ga_world_to_cam, camera_model):
-    # add cameras/images to the db
-    # with the output of ga as prior
-    image_to_colmap = {}
-    im_keypoints = {}
-    for idx in range(len(image_paths)):
-        im_keypoints[idx] = {}
-        H, W = images[idx]["orig_shape"]
-        if focals is None:
-            focal_x = focal_y = 1.2 * max(W, H)
-            prior_focal_length = False
-            cx = W / 2.0
-            cy = H / 2.0
-        elif isinstance(focals[idx], np.ndarray) and len(focals[idx].shape) == 2:
-            # intrinsics
-            focal_x = focals[idx][0, 0]
-            focal_y = focals[idx][1, 1]
-            cx = focals[idx][0, 2] * images[idx]["to_orig"][0, 0]
-            cy = focals[idx][1, 2] * images[idx]["to_orig"][1, 1]
-            prior_focal_length = True
-        else:
-            focal_x = focal_y = float(focals[idx])
-            prior_focal_length = True
-            cx = W / 2.0
-            cy = H / 2.0
-        focal_x = focal_x * images[idx]["to_orig"][0, 0]
-        focal_y = focal_y * images[idx]["to_orig"][1, 1]
-        if camera_model == "SIMPLE_PINHOLE":
-            model_id = 0
-            focal = (focal_x + focal_y) / 2.0
-            params = np.asarray([focal, cx, cy], np.float64)
-        elif camera_model == "PINHOLE":
-            model_id = 1
-            params = np.asarray([focal_x, focal_y, cx, cy], np.float64)
-        elif camera_model == "SIMPLE_RADIAL":
-            model_id = 2
-            focal = (focal_x + focal_y) / 2.0
-            params = np.asarray([focal, cx, cy, 0.0], np.float64)
-        elif camera_model == "OPENCV":
-            model_id = 4
-            params = np.asarray([focal_x, focal_y, cx, cy, 0.0, 0.0, 0.0, 0.0], np.float64)
-        else:
-            raise ValueError(f"invalid camera model {camera_model}")
-        H, W = int(H), int(W)
-        # OPENCV camera model
-        camid = db.add_camera(
-            model_id, W, H, params, prior_focal_length=prior_focal_length)
-        if ga_world_to_cam is None:
-            prior_t = np.zeros(3)
-            prior_q = np.zeros(4)
-        else:
-            q = R.from_matrix(ga_world_to_cam[idx][:3, :3]).as_quat()
-            prior_t = ga_world_to_cam[idx][:3, 3]
-            prior_q = np.array([q[-1], q[0], q[1], q[2]])
-        imid = db.add_image(
-            image_paths[idx], camid, prior_q=prior_q, prior_t=prior_t)
-        image_to_colmap[idx] = {
-            'colmap_imid': imid,
-            'colmap_camid': camid
-        }
-    return image_to_colmap, im_keypoints
-def export_matches(db, images, image_to_colmap, im_keypoints, im_matches, min_len_track, skip_geometric_verification):
-    colmap_image_pairs = []
-    # 2D-2D are quite dense
-    # we want to remove the very small tracks
-    # and export only kpt for which we have values
-    # build tracks
-    print("building tracks")
-    keypoints_to_track_id = {}
-    track_id_to_kpt_list = []
-    to_merge = []
-    for (imidx0, imidx1), colmap_matches in tqdm(im_matches.items()):
-        if imidx0 not in keypoints_to_track_id:
-            keypoints_to_track_id[imidx0] = {}
-        if imidx1 not in keypoints_to_track_id:
-            keypoints_to_track_id[imidx1] = {}
-        for m in colmap_matches:
-            if m[0] not in keypoints_to_track_id[imidx0] and m[1] not in keypoints_to_track_id[imidx1]:
-                # new pair of kpts never seen before
-                track_idx = len(track_id_to_kpt_list)
-                keypoints_to_track_id[imidx0][m[0]] = track_idx
-                keypoints_to_track_id[imidx1][m[1]] = track_idx
-                track_id_to_kpt_list.append(
-                    [(imidx0, m[0]), (imidx1, m[1])])
-            elif m[1] not in keypoints_to_track_id[imidx1]:
-                # 0 has a track, not 1
-                track_idx = keypoints_to_track_id[imidx0][m[0]]
-                keypoints_to_track_id[imidx1][m[1]] = track_idx
-                track_id_to_kpt_list[track_idx].append((imidx1, m[1]))
-            elif m[0] not in keypoints_to_track_id[imidx0]:
-                # 1 has a track, not 0
-                track_idx = keypoints_to_track_id[imidx1][m[1]]
-                keypoints_to_track_id[imidx0][m[0]] = track_idx
-                track_id_to_kpt_list[track_idx].append((imidx0, m[0]))
-            else:
-                # both have tracks, merge them
-                track_idx0 = keypoints_to_track_id[imidx0][m[0]]
-                track_idx1 = keypoints_to_track_id[imidx1][m[1]]
-                if track_idx0 != track_idx1:
-                    # let's deal with them later
-                    to_merge.append((track_idx0, track_idx1))
-    # regroup merge targets
-    print("merging tracks")
-    unique = np.unique(to_merge)
-    tree = DisjointSet(unique)
-    for track_idx0, track_idx1 in tqdm(to_merge):
-        tree.merge(track_idx0, track_idx1)
-    subsets = tree.subsets()
-    print("applying merge")
-    for setvals in tqdm(subsets):
-        new_trackid = len(track_id_to_kpt_list)
-        kpt_list = []
-        for track_idx in setvals:
-            kpt_list.extend(track_id_to_kpt_list[track_idx])
-            for imidx, kpid in track_id_to_kpt_list[track_idx]:
-                keypoints_to_track_id[imidx][kpid] = new_trackid
-        track_id_to_kpt_list.append(kpt_list)
-    # binc = np.bincount([len(v) for v in track_id_to_kpt_list])
-    # nonzero = np.nonzero(binc)
-    # nonzerobinc = binc[nonzero[0]]
-    # print(nonzero[0].tolist())
-    # print(nonzerobinc)
-    num_valid_tracks = sum(
-        [1 for v in track_id_to_kpt_list if len(v) >= min_len_track])
-    keypoints_to_idx = {}
-    print(f"squashing keypoints - {num_valid_tracks} valid tracks")
-    for imidx, keypoints_imid in tqdm(im_keypoints.items()):
-        imid = image_to_colmap[imidx]['colmap_imid']
-        keypoints_kept = []
-        keypoints_to_idx[imidx] = {}
-        for kp in keypoints_imid.keys():
-            if kp not in keypoints_to_track_id[imidx]:
-                continue
-            track_idx = keypoints_to_track_id[imidx][kp]
-            track_length = len(track_id_to_kpt_list[track_idx])
-            if track_length < min_len_track:
-                continue
-            keypoints_to_idx[imidx][kp] = len(keypoints_kept)
-            keypoints_kept.append(kp)
-        if len(keypoints_kept) == 0:
-            continue
-        keypoints_kept = np.array(keypoints_kept)
-        keypoints_kept = np.unravel_index(keypoints_kept, images[imidx]['true_shape'][0])[
-            0].base[:, ::-1].copy().astype(np.float32)
-        # rescale coordinates
-        keypoints_kept[:, 0] += 0.5
-        keypoints_kept[:, 1] += 0.5
-        keypoints_kept = geotrf(images[imidx]['to_orig'], keypoints_kept, norm=True)
-        H, W = images[imidx]['orig_shape']
-        keypoints_kept[:, 0] = keypoints_kept[:, 0].clip(min=0, max=W - 0.01)
-        keypoints_kept[:, 1] = keypoints_kept[:, 1].clip(min=0, max=H - 0.01)
-        db.add_keypoints(imid, keypoints_kept)
-    print("exporting im_matches")
-    for (imidx0, imidx1), colmap_matches in im_matches.items():
-        imid0, imid1 = image_to_colmap[imidx0]['colmap_imid'], image_to_colmap[imidx1]['colmap_imid']
-        assert imid0 < imid1
-        final_matches = np.array([[keypoints_to_idx[imidx0][m[0]], keypoints_to_idx[imidx1][m[1]]]
-                                  for m in colmap_matches
-                                  if m[0] in keypoints_to_idx[imidx0] and m[1] in keypoints_to_idx[imidx1]])
-        if len(final_matches) > 0:
-            colmap_image_pairs.append(
-                (images[imidx0]['instance'], images[imidx1]['instance']))
-            db.add_matches(imid0, imid1, final_matches)
-            if skip_geometric_verification:
-                db.add_two_view_geometry(imid0, imid1, final_matches)
-    return colmap_image_pairs

mast3r/datasets/__init__.py DELETED Viewed

@@ -1,62 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-from .base.mast3r_base_stereo_view_dataset import MASt3RBaseStereoViewDataset
-import mast3r.utils.path_to_dust3r  # noqa
-from dust3r.datasets.arkitscenes import ARKitScenes as DUSt3R_ARKitScenes  # noqa
-from dust3r.datasets.blendedmvs import BlendedMVS as DUSt3R_BlendedMVS  # noqa
-from dust3r.datasets.co3d import Co3d as DUSt3R_Co3d  # noqa
-from dust3r.datasets.megadepth import MegaDepth as DUSt3R_MegaDepth  # noqa
-from dust3r.datasets.scannetpp import ScanNetpp as DUSt3R_ScanNetpp  # noqa
-from dust3r.datasets.staticthings3d import StaticThings3D as DUSt3R_StaticThings3D  # noqa
-from dust3r.datasets.waymo import Waymo as DUSt3R_Waymo  # noqa
-from dust3r.datasets.wildrgbd import WildRGBD as DUSt3R_WildRGBD  # noqa
-class ARKitScenes(DUSt3R_ARKitScenes, MASt3RBaseStereoViewDataset):
-    def __init__(self, *args, split, ROOT, **kwargs):
-        super().__init__(*args, split=split, ROOT=ROOT, **kwargs)
-        self.is_metric_scale = True
-class BlendedMVS(DUSt3R_BlendedMVS, MASt3RBaseStereoViewDataset):
-    def __init__(self, *args, ROOT, split=None, **kwargs):
-        super().__init__(*args, ROOT=ROOT, split=split, **kwargs)
-        self.is_metric_scale = False
-class Co3d(DUSt3R_Co3d, MASt3RBaseStereoViewDataset):
-    def __init__(self, mask_bg=True, *args, ROOT, **kwargs):
-        super().__init__(mask_bg, *args, ROOT=ROOT, **kwargs)
-        self.is_metric_scale = False
-class MegaDepth(DUSt3R_MegaDepth, MASt3RBaseStereoViewDataset):
-    def __init__(self, *args, split, ROOT, **kwargs):
-        super().__init__(*args, split=split, ROOT=ROOT, **kwargs)
-        self.is_metric_scale = False
-class ScanNetpp(DUSt3R_ScanNetpp, MASt3RBaseStereoViewDataset):
-    def __init__(self, *args, ROOT, **kwargs):
-        super().__init__(*args, ROOT=ROOT, **kwargs)
-        self.is_metric_scale = True
-class StaticThings3D(DUSt3R_StaticThings3D, MASt3RBaseStereoViewDataset):
-    def __init__(self, ROOT, *args, mask_bg='rand', **kwargs):
-        super().__init__(ROOT, *args, mask_bg=mask_bg, **kwargs)
-        self.is_metric_scale = False
-class Waymo(DUSt3R_Waymo, MASt3RBaseStereoViewDataset):
-    def __init__(self, *args, ROOT, **kwargs):
-        super().__init__(*args, ROOT=ROOT, **kwargs)
-        self.is_metric_scale = True
-class WildRGBD(DUSt3R_WildRGBD, MASt3RBaseStereoViewDataset):
-    def __init__(self, mask_bg=True, *args, ROOT, **kwargs):
-        super().__init__(mask_bg, *args, ROOT=ROOT, **kwargs)
-        self.is_metric_scale = True

mast3r/datasets/base/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # Copyright (C) 2024-present Naver Corporation. All rights reserved.
2	- # Licensed under CC BY-NC-SA 4.0 (non-commercial use only).

mast3r/datasets/base/mast3r_base_stereo_view_dataset.py DELETED Viewed

@@ -1,355 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# base class for implementing datasets
-# --------------------------------------------------------
-import PIL.Image
-import PIL.Image as Image
-import numpy as np
-import torch
-import copy
-from mast3r.datasets.utils.cropping import (extract_correspondences_from_pts3d,
-                                            gen_random_crops, in2d_rect, crop_to_homography)
-import mast3r.utils.path_to_dust3r  # noqa
-from dust3r.datasets.base.base_stereo_view_dataset import BaseStereoViewDataset, view_name, is_good_type  # noqa
-from dust3r.datasets.utils.transforms import ImgNorm
-from dust3r.utils.geometry import depthmap_to_absolute_camera_coordinates, geotrf, depthmap_to_camera_coordinates
-import dust3r.datasets.utils.cropping as cropping
-class MASt3RBaseStereoViewDataset(BaseStereoViewDataset):
-    def __init__(self, *,  # only keyword arguments
-                 split=None,
-                 resolution=None,  # square_size or (width, height) or list of [(width,height), ...]
-                 transform=ImgNorm,
-                 aug_crop=False,
-                 aug_swap=False,
-                 aug_monocular=False,
-                 aug_portrait_or_landscape=True,  # automatic choice between landscape/portrait when possible
-                 aug_rot90=False,
-                 n_corres=0,
-                 nneg=0,
-                 n_tentative_crops=4,
-                 seed=None):
-        super().__init__(split=split, resolution=resolution, transform=transform, aug_crop=aug_crop, seed=seed)
-        self.is_metric_scale = False  # by default a dataset is not metric scale, subclasses can overwrite this
-        self.aug_swap = aug_swap
-        self.aug_monocular = aug_monocular
-        self.aug_portrait_or_landscape = aug_portrait_or_landscape
-        self.aug_rot90 = aug_rot90
-        self.n_corres = n_corres
-        self.nneg = nneg
-        assert self.n_corres == 'all' or isinstance(self.n_corres, int) or (isinstance(self.n_corres, list) and len(
-            self.n_corres) == self.num_views), f"Error, n_corres should either be 'all', a single integer or a list of length {self.num_views}"
-        assert self.nneg == 0 or self.n_corres != 'all'
-        self.n_tentative_crops = n_tentative_crops
-    def _swap_view_aug(self, views):
-        if self._rng.random() < 0.5:
-            views.reverse()
-    def _crop_resize_if_necessary(self, image, depthmap, intrinsics, resolution, rng=None, info=None):
-        """ This function:
-            - first downsizes the image with LANCZOS inteprolation,
-                which is better than bilinear interpolation in
-        """
-        if not isinstance(image, PIL.Image.Image):
-            image = PIL.Image.fromarray(image)
-        # transpose the resolution if necessary
-        W, H = image.size  # new size
-        assert resolution[0] >= resolution[1]
-        if H > 1.1 * W:
-            # image is portrait mode
-            resolution = resolution[::-1]
-        elif 0.9 < H / W < 1.1 and resolution[0] != resolution[1]:
-            # image is square, so we chose (portrait, landscape) randomly
-            if rng.integers(2) and self.aug_portrait_or_landscape:
-                resolution = resolution[::-1]
-        # high-quality Lanczos down-scaling
-        target_resolution = np.array(resolution)
-        image, depthmap, intrinsics = cropping.rescale_image_depthmap(image, depthmap, intrinsics, target_resolution)
-        # actual cropping (if necessary) with bilinear interpolation
-        offset_factor = 0.5
-        intrinsics2 = cropping.camera_matrix_of_crop(intrinsics, image.size, resolution, offset_factor=offset_factor)
-        crop_bbox = cropping.bbox_from_intrinsics_in_out(intrinsics, intrinsics2, resolution)
-        image, depthmap, intrinsics2 = cropping.crop_image_depthmap(image, depthmap, intrinsics, crop_bbox)
-        return image, depthmap, intrinsics2
-    def generate_crops_from_pair(self, view1, view2, resolution, aug_crop_arg, n_crops=4, rng=np.random):
-        views = [view1, view2]
-        if aug_crop_arg is False:
-            # compatibility
-            for i in range(2):
-                view = views[i]
-                view['img'], view['depthmap'], view['camera_intrinsics'] = self._crop_resize_if_necessary(view['img'],
-                                                                                                          view['depthmap'],
-                                                                                                          view['camera_intrinsics'],
-                                                                                                          resolution,
-                                                                                                          rng=rng)
-                view['pts3d'], view['valid_mask'] = depthmap_to_absolute_camera_coordinates(view['depthmap'],
-                                                                                            view['camera_intrinsics'],
-                                                                                            view['camera_pose'])
-            return
-        # extract correspondences
-        corres = extract_correspondences_from_pts3d(*views, target_n_corres=None, rng=rng)
-        # generate 4 random crops in each view
-        view_crops = []
-        crops_resolution = []
-        corres_msks = []
-        for i in range(2):
-            if aug_crop_arg == 'auto':
-                S = min(views[i]['img'].size)
-                R = min(resolution)
-                aug_crop = S * (S - R) // R
-                aug_crop = max(.1 * S, aug_crop)  # for cropping: augment scale of at least 10%, and more if possible
-            else:
-                aug_crop = aug_crop_arg
-            # tranpose the target resolution if necessary
-            assert resolution[0] >= resolution[1]
-            W, H = imsize = views[i]['img'].size
-            crop_resolution = resolution
-            if H > 1.1 * W:
-                # image is portrait mode
-                crop_resolution = resolution[::-1]
-            elif 0.9 < H / W < 1.1 and resolution[0] != resolution[1]:
-                # image is square, so we chose (portrait, landscape) randomly
-                if rng.integers(2):
-                    crop_resolution = resolution[::-1]
-            crops = gen_random_crops(imsize, n_crops, crop_resolution, aug_crop=aug_crop, rng=rng)
-            view_crops.append(crops)
-            crops_resolution.append(crop_resolution)
-            # compute correspondences
-            corres_msks.append(in2d_rect(corres[i], crops))
-        # compute IoU for each
-        intersection = np.float32(corres_msks[0]).T @ np.float32(corres_msks[1])
-        # select best pair of crops
-        best = np.unravel_index(intersection.argmax(), (n_crops, n_crops))
-        crops = [view_crops[i][c] for i, c in enumerate(best)]
-        # crop with the homography
-        for i in range(2):
-            view = views[i]
-            imsize, K_new, R, H = crop_to_homography(view['camera_intrinsics'], crops[i], crops_resolution[i])
-            # imsize, K_new, H = upscale_homography(imsize, resolution, K_new, H)
-            # update camera params
-            K_old = view['camera_intrinsics']
-            view['camera_intrinsics'] = K_new
-            view['camera_pose'] = view['camera_pose'].copy()
-            view['camera_pose'][:3, :3] = view['camera_pose'][:3, :3] @ R
-            # apply homography to image and depthmap
-            homo8 = (H / H[2, 2]).ravel().tolist()[:8]
-            view['img'] = view['img'].transform(imsize, Image.Transform.PERSPECTIVE,
-                                                homo8,
-                                                resample=Image.Resampling.BICUBIC)
-            depthmap2 = depthmap_to_camera_coordinates(view['depthmap'], K_old)[0] @ R[:, 2]
-            view['depthmap'] = np.array(Image.fromarray(depthmap2).transform(
-                imsize, Image.Transform.PERSPECTIVE, homo8))
-            if 'track_labels' in view:
-                # convert from uint64 --> uint32, because PIL.Image cannot handle uint64
-                mapping, track_labels = np.unique(view['track_labels'], return_inverse=True)
-                track_labels = track_labels.astype(np.uint32).reshape(view['track_labels'].shape)
-                # homography transformation
-                res = np.array(Image.fromarray(track_labels).transform(imsize, Image.Transform.PERSPECTIVE, homo8))
-                view['track_labels'] = mapping[res]  # mapping back to uint64
-            # recompute 3d points from scratch
-            view['pts3d'], view['valid_mask'] = depthmap_to_absolute_camera_coordinates(view['depthmap'],
-                                                                                        view['camera_intrinsics'],
-                                                                                        view['camera_pose'])
-    def __getitem__(self, idx):
-        if isinstance(idx, tuple):
-            # the idx is specifying the aspect-ratio
-            idx, ar_idx = idx
-        else:
-            assert len(self._resolutions) == 1
-            ar_idx = 0
-        # set-up the rng
-        if self.seed:  # reseed for each __getitem__
-            self._rng = np.random.default_rng(seed=self.seed + idx)
-        elif not hasattr(self, '_rng'):
-            seed = torch.initial_seed()  # this is different for each dataloader process
-            self._rng = np.random.default_rng(seed=seed)
-        # over-loaded code
-        resolution = self._resolutions[ar_idx]  # DO NOT CHANGE THIS (compatible with BatchedRandomSampler)
-        views = self._get_views(idx, resolution, self._rng)
-        assert len(views) == self.num_views
-        for v, view in enumerate(views):
-            assert 'pts3d' not in view, f"pts3d should not be there, they will be computed afterwards based on intrinsics+depthmap for view {view_name(view)}"
-            view['idx'] = (idx, ar_idx, v)
-            view['is_metric_scale'] = self.is_metric_scale
-            assert 'camera_intrinsics' in view
-            if 'camera_pose' not in view:
-                view['camera_pose'] = np.full((4, 4), np.nan, dtype=np.float32)
-            else:
-                assert np.isfinite(view['camera_pose']).all(), f'NaN in camera pose for view {view_name(view)}'
-            assert 'pts3d' not in view
-            assert 'valid_mask' not in view
-            assert np.isfinite(view['depthmap']).all(), f'NaN in depthmap for view {view_name(view)}'
-            pts3d, valid_mask = depthmap_to_absolute_camera_coordinates(**view)
-            view['pts3d'] = pts3d
-            view['valid_mask'] = valid_mask & np.isfinite(pts3d).all(axis=-1)
-        self.generate_crops_from_pair(views[0], views[1], resolution=resolution,
-                                      aug_crop_arg=self.aug_crop,
-                                      n_crops=self.n_tentative_crops,
-                                      rng=self._rng)
-        for v, view in enumerate(views):
-            # encode the image
-            width, height = view['img'].size
-            view['true_shape'] = np.int32((height, width))
-            view['img'] = self.transform(view['img'])
-            # Pixels for which depth is fundamentally undefined
-            view['sky_mask'] = (view['depthmap'] < 0)
-        if self.aug_swap:
-            self._swap_view_aug(views)
-        if self.aug_monocular:
-            if self._rng.random() < self.aug_monocular:
-                views = [copy.deepcopy(views[0]) for _ in range(len(views))]
-        # automatic extraction of correspondences from pts3d + pose
-        if self.n_corres > 0 and ('corres' not in view):
-            corres1, corres2, valid = extract_correspondences_from_pts3d(*views, self.n_corres,
-                                                                         self._rng, nneg=self.nneg)
-            views[0]['corres'] = corres1
-            views[1]['corres'] = corres2
-            views[0]['valid_corres'] = valid
-            views[1]['valid_corres'] = valid
-        if self.aug_rot90 is False:
-            pass
-        elif self.aug_rot90 == 'same':
-            rotate_90(views, k=self._rng.choice(4))
-        elif self.aug_rot90 == 'diff':
-            rotate_90(views[:1], k=self._rng.choice(4))
-            rotate_90(views[1:], k=self._rng.choice(4))
-        else:
-            raise ValueError(f'Bad value for {self.aug_rot90=}')
-        # check data-types metric_scale
-        for v, view in enumerate(views):
-            if 'corres' not in view:
-                view['corres'] = np.full((self.n_corres, 2), np.nan, dtype=np.float32)
-            # check all datatypes
-            for key, val in view.items():
-                res, err_msg = is_good_type(key, val)
-                assert res, f"{err_msg} with {key}={val} for view {view_name(view)}"
-            K = view['camera_intrinsics']
-            # check shapes
-            assert view['depthmap'].shape == view['img'].shape[1:]
-            assert view['depthmap'].shape == view['pts3d'].shape[:2]
-            assert view['depthmap'].shape == view['valid_mask'].shape
-        # last thing done!
-        for view in views:
-            # transpose to make sure all views are the same size
-            transpose_to_landscape(view)
-            # this allows to check whether the RNG is is the same state each time
-            view['rng'] = int.from_bytes(self._rng.bytes(4), 'big')
-        return views
-def transpose_to_landscape(view, revert=False):
-    height, width = view['true_shape']
-    if width < height:
-        if revert:
-            height, width = width, height
-        # rectify portrait to landscape
-        assert view['img'].shape == (3, height, width)
-        view['img'] = view['img'].swapaxes(1, 2)
-        assert view['valid_mask'].shape == (height, width)
-        view['valid_mask'] = view['valid_mask'].swapaxes(0, 1)
-        assert view['sky_mask'].shape == (height, width)
-        view['sky_mask'] = view['sky_mask'].swapaxes(0, 1)
-        assert view['depthmap'].shape == (height, width)
-        view['depthmap'] = view['depthmap'].swapaxes(0, 1)
-        assert view['pts3d'].shape == (height, width, 3)
-        view['pts3d'] = view['pts3d'].swapaxes(0, 1)
-        # transpose x and y pixels
-        view['camera_intrinsics'] = view['camera_intrinsics'][[1, 0, 2]]
-        # transpose correspondences x and y
-        view['corres'] = view['corres'][:, [1, 0]]
-def rotate_90(views, k=1):
-    from scipy.spatial.transform import Rotation
-    # print('rotation =', k)
-    RT = np.eye(4, dtype=np.float32)
-    RT[:3, :3] = Rotation.from_euler('z', 90 * k, degrees=True).as_matrix()
-    for view in views:
-        view['img'] = torch.rot90(view['img'], k=k, dims=(-2, -1))  # WARNING!! dims=(-1,-2) != dims=(-2,-1)
-        view['depthmap'] = np.rot90(view['depthmap'], k=k).copy()
-        view['camera_pose'] = view['camera_pose'] @ RT
-        RT2 = np.eye(3, dtype=np.float32)
-        RT2[:2, :2] = RT[:2, :2] * ((1, -1), (-1, 1))
-        H, W = view['depthmap'].shape
-        if k % 4 == 0:
-            pass
-        elif k % 4 == 1:
-            # top-left (0,0) pixel becomes (0,H-1)
-            RT2[:2, 2] = (0, H - 1)
-        elif k % 4 == 2:
-            # top-left (0,0) pixel becomes (W-1,H-1)
-            RT2[:2, 2] = (W - 1, H - 1)
-        elif k % 4 == 3:
-            # top-left (0,0) pixel becomes (W-1,0)
-            RT2[:2, 2] = (W - 1, 0)
-        else:
-            raise ValueError(f'Bad value for {k=}')
-        view['camera_intrinsics'][:2, 2] = geotrf(RT2, view['camera_intrinsics'][:2, 2])
-        if k % 2 == 1:
-            K = view['camera_intrinsics']
-            np.fill_diagonal(K, K.diagonal()[[1, 0, 2]])
-        pts3d, valid_mask = depthmap_to_absolute_camera_coordinates(**view)
-        view['pts3d'] = pts3d
-        view['valid_mask'] = np.rot90(view['valid_mask'], k=k).copy()
-        view['sky_mask'] = np.rot90(view['sky_mask'], k=k).copy()
-        view['corres'] = geotrf(RT2, view['corres']).round().astype(view['corres'].dtype)
-        view['true_shape'] = np.int32((H, W))

mast3r/datasets/utils/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # Copyright (C) 2024-present Naver Corporation. All rights reserved.
2	- # Licensed under CC BY-NC-SA 4.0 (non-commercial use only).

mast3r/datasets/utils/cropping.py DELETED Viewed

@@ -1,219 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# cropping/match extraction
-# --------------------------------------------------------
-import numpy as np
-import mast3r.utils.path_to_dust3r  # noqa
-from dust3r.utils.device import to_numpy
-from dust3r.utils.geometry import inv, geotrf
-def reciprocal_1d(corres_1_to_2, corres_2_to_1, ret_recip=False):
-    is_reciprocal1 = (corres_2_to_1[corres_1_to_2] == np.arange(len(corres_1_to_2)))
-    pos1 = is_reciprocal1.nonzero()[0]
-    pos2 = corres_1_to_2[pos1]
-    if ret_recip:
-        return is_reciprocal1, pos1, pos2
-    return pos1, pos2
-def extract_correspondences_from_pts3d(view1, view2, target_n_corres, rng=np.random, ret_xy=True, nneg=0):
-    view1, view2 = to_numpy((view1, view2))
-    # project pixels from image1 --> 3d points --> image2 pixels
-    shape1, corres1_to_2 = reproject_view(view1['pts3d'], view2)
-    shape2, corres2_to_1 = reproject_view(view2['pts3d'], view1)
-    # compute reciprocal correspondences:
-    # pos1 == valid pixels (correspondences) in image1
-    is_reciprocal1, pos1, pos2 = reciprocal_1d(corres1_to_2, corres2_to_1, ret_recip=True)
-    is_reciprocal2 = (corres1_to_2[corres2_to_1] == np.arange(len(corres2_to_1)))
-    if target_n_corres is None:
-        if ret_xy:
-            pos1 = unravel_xy(pos1, shape1)
-            pos2 = unravel_xy(pos2, shape2)
-        return pos1, pos2
-    available_negatives = min((~is_reciprocal1).sum(), (~is_reciprocal2).sum())
-    target_n_positives = int(target_n_corres * (1 - nneg))
-    n_positives = min(len(pos1), target_n_positives)
-    n_negatives = min(target_n_corres - n_positives, available_negatives)
-    if n_negatives + n_positives != target_n_corres:
-        # should be really rare => when there are not enough negatives
-        # in that case, break nneg and add a few more positives ?
-        n_positives = target_n_corres - n_negatives
-        assert n_positives <= len(pos1)
-    assert n_positives <= len(pos1)
-    assert n_positives <= len(pos2)
-    assert n_negatives <= (~is_reciprocal1).sum()
-    assert n_negatives <= (~is_reciprocal2).sum()
-    assert n_positives + n_negatives == target_n_corres
-    valid = np.ones(n_positives, dtype=bool)
-    if n_positives < len(pos1):
-        # random sub-sampling of valid correspondences
-        perm = rng.permutation(len(pos1))[:n_positives]
-        pos1 = pos1[perm]
-        pos2 = pos2[perm]
-    if n_negatives > 0:
-        # add false correspondences if not enough
-        def norm(p): return p / p.sum()
-        pos1 = np.r_[pos1, rng.choice(shape1[0] * shape1[1], size=n_negatives, replace=False, p=norm(~is_reciprocal1))]
-        pos2 = np.r_[pos2, rng.choice(shape2[0] * shape2[1], size=n_negatives, replace=False, p=norm(~is_reciprocal2))]
-        valid = np.r_[valid, np.zeros(n_negatives, dtype=bool)]
-    # convert (x+W*y) back to 2d (x,y) coordinates
-    if ret_xy:
-        pos1 = unravel_xy(pos1, shape1)
-        pos2 = unravel_xy(pos2, shape2)
-    return pos1, pos2, valid
-def reproject_view(pts3d, view2):
-    shape = view2['pts3d'].shape[:2]
-    return reproject(pts3d, view2['camera_intrinsics'], inv(view2['camera_pose']), shape)
-def reproject(pts3d, K, world2cam, shape):
-    H, W, THREE = pts3d.shape
-    assert THREE == 3
-    # reproject in camera2 space
-    with np.errstate(divide='ignore', invalid='ignore'):
-        pos = geotrf(K @ world2cam[:3], pts3d, norm=1, ncol=2)
-    # quantize to pixel positions
-    return (H, W), ravel_xy(pos, shape)
-def ravel_xy(pos, shape):
-    H, W = shape
-    with np.errstate(invalid='ignore'):
-        qx, qy = pos.reshape(-1, 2).round().astype(np.int32).T
-    quantized_pos = qx.clip(min=0, max=W - 1, out=qx) + W * qy.clip(min=0, max=H - 1, out=qy)
-    return quantized_pos
-def unravel_xy(pos, shape):
-    # convert (x+W*y) back to 2d (x,y) coordinates
-    return np.unravel_index(pos, shape)[0].base[:, ::-1].copy()
-def _rotation_origin_to_pt(target):
-    """ Align the origin (0,0,1) with the target point (x,y,1) in projective space.
-    Method: rotate z to put target on (x'+,0,1), then rotate on Y to get (0,0,1) and un-rotate z.
-    """
-    from scipy.spatial.transform import Rotation
-    x, y = target
-    rot_z = np.arctan2(y, x)
-    rot_y = np.arctan(np.linalg.norm(target))
-    R = Rotation.from_euler('ZYZ', [rot_z, rot_y, -rot_z]).as_matrix()
-    return R
-def _dotmv(Trf, pts, ncol=None, norm=False):
-    assert Trf.ndim >= 2
-    ncol = ncol or pts.shape[-1]
-    # adapt shape if necessary
-    output_reshape = pts.shape[:-1]
-    if Trf.ndim >= 3:
-        n = Trf.ndim - 2
-        assert Trf.shape[:n] == pts.shape[:n], 'batch size does not match'
-        Trf = Trf.reshape(-1, Trf.shape[-2], Trf.shape[-1])
-        if pts.ndim > Trf.ndim:
-            # Trf == (B,d,d) & pts == (B,H,W,d) --> (B, H*W, d)
-            pts = pts.reshape(Trf.shape[0], -1, pts.shape[-1])
-        elif pts.ndim == 2:
-            # Trf == (B,d,d) & pts == (B,d) --> (B, 1, d)
-            pts = pts[:, None, :]
-    if pts.shape[-1] + 1 == Trf.shape[-1]:
-        Trf = Trf.swapaxes(-1, -2)  # transpose Trf
-        pts = pts @ Trf[..., :-1, :] + Trf[..., -1:, :]
-    elif pts.shape[-1] == Trf.shape[-1]:
-        Trf = Trf.swapaxes(-1, -2)  # transpose Trf
-        pts = pts @ Trf
-    else:
-        pts = Trf @ pts.T
-        if pts.ndim >= 2:
-            pts = pts.swapaxes(-1, -2)
-    if norm:
-        pts = pts / pts[..., -1:]  # DONT DO /= BECAUSE OF WEIRD PYTORCH BUG
-        if norm != 1:
-            pts *= norm
-    res = pts[..., :ncol].reshape(*output_reshape, ncol)
-    return res
-def crop_to_homography(K, crop, target_size=None):
-    """ Given an image and its intrinsics,
-        we want to replicate a rectangular crop with an homography,
-        so that the principal point of the new 'crop' is centered.
-    """
-    # build intrinsics for the crop
-    crop = np.round(crop)
-    crop_size = crop[2:] - crop[:2]
-    K2 = K.copy()  # same focal
-    K2[:2, 2] = crop_size / 2  # new principal point is perfectly centered
-    # find which corner is the most far-away from current principal point
-    # so that the final homography does not go over the image borders
-    corners = crop.reshape(-1, 2)
-    corner_idx = np.abs(corners - K[:2, 2]).argmax(0)
-    corner = corners[corner_idx, [0, 1]]
-    # align with the corresponding corner from the target view
-    corner2 = np.c_[[0, 0], crop_size][[0, 1], corner_idx]
-    old_pt = _dotmv(np.linalg.inv(K), corner, norm=1)
-    new_pt = _dotmv(np.linalg.inv(K2), corner2, norm=1)
-    R = _rotation_origin_to_pt(old_pt) @ np.linalg.inv(_rotation_origin_to_pt(new_pt))
-    if target_size is not None:
-        imsize = target_size
-        target_size = np.asarray(target_size)
-        scaling = min(target_size / crop_size)
-        K2[:2] *= scaling
-        K2[:2, 2] = target_size / 2
-    else:
-        imsize = tuple(np.int32(crop_size).tolist())
-    return imsize, K2, R, K @ R @ np.linalg.inv(K2)
-def gen_random_crops(imsize, n_crops, resolution, aug_crop, rng=np.random):
-    """ Generate random crops of size=resolution,
-        for an input image upscaled to (imsize + randint(0 , aug_crop))
-    """
-    resolution_crop = np.array(resolution) * min(np.array(imsize) / resolution)
-    # (virtually) upscale the input image
-    # scaling = rng.uniform(1, 1+(aug_crop+1)/min(imsize))
-    scaling = np.exp(rng.uniform(0, np.log(1 + aug_crop / min(imsize))))
-    imsize2 = np.int32(np.array(imsize) * scaling)
-    # generate some random crops
-    topleft = rng.random((n_crops, 2)) * (imsize2 - resolution_crop)
-    crops = np.c_[topleft, topleft + resolution_crop]
-    # print(f"{scaling=}, {topleft=}")
-    # reduce the resolution to come back to original size
-    crops /= scaling
-    return crops
-def in2d_rect(corres, crops):
-    # corres = (N,2)
-    # crops = (M,4)
-    # output = (N, M)
-    is_sup = (corres[:, None] >= crops[None, :, 0:2])
-    is_inf = (corres[:, None] < crops[None, :, 2:4])
-    return (is_sup & is_inf).all(axis=-1)

mast3r/fast_nn.py DELETED Viewed

@@ -1,221 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# MASt3R Fast Nearest Neighbor
-# --------------------------------------------------------
-import torch
-import numpy as np
-import math
-from scipy.spatial import KDTree
-import mast3r.utils.path_to_dust3r  # noqa
-from ..dust3r.dust3r.utils.device import to_numpy, todevice  # noqa
-@torch.no_grad()
-def bruteforce_reciprocal_nns(A, B, device='cuda', block_size=None, dist='l2'):
-    if isinstance(A, np.ndarray):
-        A = torch.from_numpy(A).to(device)
-    if isinstance(B, np.ndarray):
-        B = torch.from_numpy(B).to(device)
-    A = A.to(device)
-    B = B.to(device)
-    if dist == 'l2':
-        dist_func = torch.cdist
-        argmin = torch.min
-    elif dist == 'dot':
-        def dist_func(A, B):
-            return A @ B.T
-        def argmin(X, dim):
-            sim, nn = torch.max(X, dim=dim)
-            return sim.neg_(), nn
-    else:
-        raise ValueError(f'Unknown {dist=}')
-    if block_size is None or len(A) * len(B) <= block_size**2:
-        dists = dist_func(A, B)
-        _, nn_A = argmin(dists, dim=1)
-        _, nn_B = argmin(dists, dim=0)
-    else:
-        dis_A = torch.full((A.shape[0],), float('inf'), device=device, dtype=A.dtype)
-        dis_B = torch.full((B.shape[0],), float('inf'), device=device, dtype=B.dtype)
-        nn_A = torch.full((A.shape[0],), -1, device=device, dtype=torch.int64)
-        nn_B = torch.full((B.shape[0],), -1, device=device, dtype=torch.int64)
-        number_of_iteration_A = math.ceil(A.shape[0] / block_size)
-        number_of_iteration_B = math.ceil(B.shape[0] / block_size)
-        for i in range(number_of_iteration_A):
-            A_i = A[i * block_size:(i + 1) * block_size]
-            for j in range(number_of_iteration_B):
-                B_j = B[j * block_size:(j + 1) * block_size]
-                dists_blk = dist_func(A_i, B_j)  # A, B, 1
-                # dists_blk = dists[i * block_size:(i+1)*block_size, j * block_size:(j+1)*block_size]
-                min_A_i, argmin_A_i = argmin(dists_blk, dim=1)
-                min_B_j, argmin_B_j = argmin(dists_blk, dim=0)
-                col_mask = min_A_i < dis_A[i * block_size:(i + 1) * block_size]
-                line_mask = min_B_j < dis_B[j * block_size:(j + 1) * block_size]
-                dis_A[i * block_size:(i + 1) * block_size][col_mask] = min_A_i[col_mask]
-                dis_B[j * block_size:(j + 1) * block_size][line_mask] = min_B_j[line_mask]
-                nn_A[i * block_size:(i + 1) * block_size][col_mask] = argmin_A_i[col_mask] + (j * block_size)
-                nn_B[j * block_size:(j + 1) * block_size][line_mask] = argmin_B_j[line_mask] + (i * block_size)
-    nn_A = nn_A.cpu().numpy()
-    nn_B = nn_B.cpu().numpy()
-    return nn_A, nn_B
-class cdistMatcher:
-    def __init__(self, db_pts, device='cuda'):
-        self.db_pts = db_pts.to(device)
-        self.device = device
-    def query(self, queries, k=1, **kw):
-        assert k == 1
-        if queries.numel() == 0:
-            return None, []
-        nnA, nnB = bruteforce_reciprocal_nns(queries, self.db_pts, device=self.device, **kw)
-        dis = None
-        return dis, nnA
-def merge_corres(idx1, idx2, shape1=None, shape2=None, ret_xy=True, ret_index=False):
-    assert idx1.dtype == idx2.dtype == np.int32
-    # unique and sort along idx1
-    corres = np.unique(np.c_[idx2, idx1].view(np.int64), return_index=ret_index)
-    if ret_index:
-        corres, indices = corres
-    xy2, xy1 = corres[:, None].view(np.int32).T
-    if ret_xy:
-        assert shape1 and shape2
-        xy1 = np.unravel_index(xy1, shape1)
-        xy2 = np.unravel_index(xy2, shape2)
-        if ret_xy != 'y_x':
-            xy1 = xy1[0].base[:, ::-1]
-            xy2 = xy2[0].base[:, ::-1]
-    if ret_index:
-        return xy1, xy2, indices
-    return xy1, xy2
-def fast_reciprocal_NNs(pts1, pts2, subsample_or_initxy1=8, ret_xy=True, pixel_tol=0, ret_basin=False,
-                        device='cuda', **matcher_kw):
-    H1, W1, DIM1 = pts1.shape
-    H2, W2, DIM2 = pts2.shape
-    assert DIM1 == DIM2
-    pts1 = pts1.reshape(-1, DIM1)
-    pts2 = pts2.reshape(-1, DIM2)
-    if isinstance(subsample_or_initxy1, int) and pixel_tol == 0:
-        S = subsample_or_initxy1
-        y1, x1 = np.mgrid[S // 2:H1:S, S // 2:W1:S].reshape(2, -1)
-        max_iter = 10
-    else:
-        x1, y1 = subsample_or_initxy1
-        if isinstance(x1, torch.Tensor):
-            x1 = x1.cpu().numpy()
-        if isinstance(y1, torch.Tensor):
-            y1 = y1.cpu().numpy()
-        max_iter = 1
-    xy1 = np.int32(np.unique(x1 + W1 * y1))  # make sure there's no doublons
-    xy2 = np.full_like(xy1, -1)
-    old_xy1 = xy1.copy()
-    old_xy2 = xy2.copy()
-    if (isinstance(device, str) and device.startswith('cuda')) or (isinstance(device, torch.device) and device.type.startswith('cuda')):
-        pts1 = pts1.to(device)
-        pts2 = pts2.to(device)
-        tree1 = cdistMatcher(pts1, device=device)
-        tree2 = cdistMatcher(pts2, device=device)
-    else:
-        pts1, pts2 = to_numpy((pts1, pts2))
-        tree1 = KDTree(pts1)
-        tree2 = KDTree(pts2)
-    notyet = np.ones(len(xy1), dtype=bool)
-    basin = np.full((H1 * W1 + 1,), -1, dtype=np.int32) if ret_basin else None
-    niter = 0
-    # n_notyet = [len(notyet)]
-    while notyet.any():
-        _, xy2[notyet] = to_numpy(tree2.query(pts1[xy1[notyet]], **matcher_kw))
-        if not ret_basin:
-            notyet &= (old_xy2 != xy2)  # remove points that have converged
-        _, xy1[notyet] = to_numpy(tree1.query(pts2[xy2[notyet]], **matcher_kw))
-        if ret_basin:
-            basin[old_xy1[notyet]] = xy1[notyet]
-        notyet &= (old_xy1 != xy1)  # remove points that have converged
-        # n_notyet.append(notyet.sum())
-        niter += 1
-        if niter >= max_iter:
-            break
-        old_xy2[:] = xy2
-        old_xy1[:] = xy1
-    # print('notyet_stats:', ' '.join(map(str, (n_notyet+[0]*10)[:max_iter])))
-    if pixel_tol > 0:
-        # in case we only want to match some specific points
-        # and still have some way of checking reciprocity
-        old_yx1 = np.unravel_index(old_xy1, (H1, W1))[0].base
-        new_yx1 = np.unravel_index(xy1, (H1, W1))[0].base
-        dis = np.linalg.norm(old_yx1 - new_yx1, axis=-1)
-        converged = dis < pixel_tol
-        if not isinstance(subsample_or_initxy1, int):
-            xy1 = old_xy1  # replace new points by old ones
-    else:
-        converged = ~notyet  # converged correspondences
-    # keep only unique correspondences, and sort on xy1
-    xy1, xy2 = merge_corres(xy1[converged], xy2[converged], (H1, W1), (H2, W2), ret_xy=ret_xy)
-    if ret_basin:
-        return xy1, xy2, basin
-    return xy1, xy2
-def extract_correspondences_nonsym(A, B, confA, confB, subsample=8, device=None, ptmap_key='pred_desc', pixel_tol=0):
-    if '3d' in ptmap_key:
-        opt = dict(device='cpu', workers=32)
-    else:
-        opt = dict(device=device, dist='dot', block_size=2**13)
-    # matching the two pairs
-    idx1 = []
-    idx2 = []
-    # merge corres from opposite pairs
-    HA, WA = A.shape[:2]
-    HB, WB = B.shape[:2]
-    if pixel_tol == 0:
-        nn1to2 = fast_reciprocal_NNs(A, B, subsample_or_initxy1=subsample, ret_xy=False, **opt)
-        nn2to1 = fast_reciprocal_NNs(B, A, subsample_or_initxy1=subsample, ret_xy=False, **opt)
-    else:
-        S = subsample
-        yA, xA = np.mgrid[S // 2:HA:S, S // 2:WA:S].reshape(2, -1)
-        yB, xB = np.mgrid[S // 2:HB:S, S // 2:WB:S].reshape(2, -1)
-        nn1to2 = fast_reciprocal_NNs(A, B, subsample_or_initxy1=(xA, yA), ret_xy=False, pixel_tol=pixel_tol, **opt)
-        nn2to1 = fast_reciprocal_NNs(B, A, subsample_or_initxy1=(xB, yB), ret_xy=False, pixel_tol=pixel_tol, **opt)
-    idx1 = np.r_[nn1to2[0], nn2to1[1]]
-    idx2 = np.r_[nn1to2[1], nn2to1[0]]
-    c1 = confA.ravel()[idx1]
-    c2 = confB.ravel()[idx2]
-    xy1, xy2, idx = merge_corres(idx1, idx2, (HA, WA), (HB, WB), ret_xy=True, ret_index=True)
-    conf = np.minimum(c1[idx], c2[idx])
-    corres = (xy1.copy(), xy2.copy(), conf)
-    return todevice(corres, device)

mast3r/losses.py DELETED Viewed

@@ -1,514 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# Implementation of MASt3R training losses
-# --------------------------------------------------------
-import torch
-import torch.nn as nn
-import numpy as np
-from sklearn.metrics import average_precision_score
-import mast3r.utils.path_to_dust3r  # noqa
-from dust3r.losses import BaseCriterion, Criterion, MultiLoss, Sum, ConfLoss
-from dust3r.losses import Regr3D as Regr3D_dust3r
-from dust3r.utils.geometry import (geotrf, inv, normalize_pointcloud)
-from dust3r.inference import get_pred_pts3d
-from dust3r.utils.geometry import get_joint_pointcloud_depth, get_joint_pointcloud_center_scale
-def apply_log_to_norm(xyz):
-    d = xyz.norm(dim=-1, keepdim=True)
-    xyz = xyz / d.clip(min=1e-8)
-    xyz = xyz * torch.log1p(d)
-    return xyz
-class Regr3D (Regr3D_dust3r):
-    def __init__(self, criterion, norm_mode='avg_dis', gt_scale=False, opt_fit_gt=False,
-                 sky_loss_value=2, max_metric_scale=False, loss_in_log=False):
-        self.loss_in_log = loss_in_log
-        if norm_mode.startswith('?'):
-            # do no norm pts from metric scale datasets
-            self.norm_all = False
-            self.norm_mode = norm_mode[1:]
-        else:
-            self.norm_all = True
-            self.norm_mode = norm_mode
-        super().__init__(criterion, self.norm_mode, gt_scale)
-        self.sky_loss_value = sky_loss_value
-        self.max_metric_scale = max_metric_scale
-    def get_all_pts3d(self, gt1, gt2, pred1, pred2, dist_clip=None):
-        # everything is normalized w.r.t. camera of view1
-        in_camera1 = inv(gt1['camera_pose'])
-        gt_pts1 = geotrf(in_camera1, gt1['pts3d'])  # B,H,W,3
-        gt_pts2 = geotrf(in_camera1, gt2['pts3d'])  # B,H,W,3
-        valid1 = gt1['valid_mask'].clone()
-        valid2 = gt2['valid_mask'].clone()
-        if dist_clip is not None:
-            # points that are too far-away == invalid
-            dis1 = gt_pts1.norm(dim=-1)  # (B, H, W)
-            dis2 = gt_pts2.norm(dim=-1)  # (B, H, W)
-            valid1 = valid1 & (dis1 <= dist_clip)
-            valid2 = valid2 & (dis2 <= dist_clip)
-        if self.loss_in_log == 'before':
-            # this only make sense when depth_mode == 'linear'
-            gt_pts1 = apply_log_to_norm(gt_pts1)
-            gt_pts2 = apply_log_to_norm(gt_pts2)
-        pr_pts1 = get_pred_pts3d(gt1, pred1, use_pose=False).clone()
-        pr_pts2 = get_pred_pts3d(gt2, pred2, use_pose=True).clone()
-        if not self.norm_all:
-            if self.max_metric_scale:
-                B = valid1.shape[0]
-                # valid1: B, H, W
-                # torch.linalg.norm(gt_pts1, dim=-1) -> B, H, W
-                # dist1_to_cam1 -> reshape to B, H*W
-                dist1_to_cam1 = torch.where(valid1, torch.linalg.norm(gt_pts1, dim=-1), 0).view(B, -1)
-                dist2_to_cam1 = torch.where(valid2, torch.linalg.norm(gt_pts2, dim=-1), 0).view(B, -1)
-                # is_metric_scale: B
-                # dist1_to_cam1.max(dim=-1).values -> B
-                gt1['is_metric_scale'] = gt1['is_metric_scale'] \
-                    & (dist1_to_cam1.max(dim=-1).values < self.max_metric_scale) \
-                    & (dist2_to_cam1.max(dim=-1).values < self.max_metric_scale)
-                gt2['is_metric_scale'] = gt1['is_metric_scale']
-            mask = ~gt1['is_metric_scale']
-        else:
-            mask = torch.ones_like(gt1['is_metric_scale'])
-        # normalize 3d points
-        if self.norm_mode and mask.any():
-            pr_pts1[mask], pr_pts2[mask] = normalize_pointcloud(pr_pts1[mask], pr_pts2[mask], self.norm_mode,
-                                                                valid1[mask], valid2[mask])
-        if self.norm_mode and not self.gt_scale:
-            gt_pts1, gt_pts2, norm_factor = normalize_pointcloud(gt_pts1, gt_pts2, self.norm_mode,
-                                                                 valid1, valid2, ret_factor=True)
-            # apply the same normalization to prediction
-            pr_pts1[~mask] = pr_pts1[~mask] / norm_factor[~mask]
-            pr_pts2[~mask] = pr_pts2[~mask] / norm_factor[~mask]
-        # return sky segmentation, making sure they don't include any labelled 3d points
-        sky1 = gt1['sky_mask'] & (~valid1)
-        sky2 = gt2['sky_mask'] & (~valid2)
-        return gt_pts1, gt_pts2, pr_pts1, pr_pts2, valid1, valid2, sky1, sky2, {}
-    def compute_loss(self, gt1, gt2, pred1, pred2, **kw):
-        gt_pts1, gt_pts2, pred_pts1, pred_pts2, mask1, mask2, sky1, sky2, monitoring = \
-            self.get_all_pts3d(gt1, gt2, pred1, pred2, **kw)
-        if self.sky_loss_value > 0:
-            assert self.criterion.reduction == 'none', 'sky_loss_value should be 0 if no conf loss'
-            # add the sky pixel as "valid" pixels...
-            mask1 = mask1 | sky1
-            mask2 = mask2 | sky2
-        # loss on img1 side
-        pred_pts1 = pred_pts1[mask1]
-        gt_pts1 = gt_pts1[mask1]
-        if self.loss_in_log and self.loss_in_log != 'before':
-            # this only make sense when depth_mode == 'exp'
-            pred_pts1 = apply_log_to_norm(pred_pts1)
-            gt_pts1 = apply_log_to_norm(gt_pts1)
-        l1 = self.criterion(pred_pts1, gt_pts1)
-        # loss on gt2 side
-        pred_pts2 = pred_pts2[mask2]
-        gt_pts2 = gt_pts2[mask2]
-        if self.loss_in_log and self.loss_in_log != 'before':
-            pred_pts2 = apply_log_to_norm(pred_pts2)
-            gt_pts2 = apply_log_to_norm(gt_pts2)
-        l2 = self.criterion(pred_pts2, gt_pts2)
-        if self.sky_loss_value > 0:
-            assert self.criterion.reduction == 'none', 'sky_loss_value should be 0 if no conf loss'
-            # ... but force the loss to be high there
-            l1 = torch.where(sky1[mask1], self.sky_loss_value, l1)
-            l2 = torch.where(sky2[mask2], self.sky_loss_value, l2)
-        self_name = type(self).__name__
-        details = {self_name + '_pts3d_1': float(l1.mean()), self_name + '_pts3d_2': float(l2.mean())}
-        return Sum((l1, mask1), (l2, mask2)), (details | monitoring)
-class Regr3D_ShiftInv (Regr3D):
-    """ Same than Regr3D but invariant to depth shift.
-    """
-    def get_all_pts3d(self, gt1, gt2, pred1, pred2):
-        # compute unnormalized points
-        gt_pts1, gt_pts2, pred_pts1, pred_pts2, mask1, mask2, sky1, sky2, monitoring = \
-            super().get_all_pts3d(gt1, gt2, pred1, pred2)
-        # compute median depth
-        gt_z1, gt_z2 = gt_pts1[..., 2], gt_pts2[..., 2]
-        pred_z1, pred_z2 = pred_pts1[..., 2], pred_pts2[..., 2]
-        gt_shift_z = get_joint_pointcloud_depth(gt_z1, gt_z2, mask1, mask2)[:, None, None]
-        pred_shift_z = get_joint_pointcloud_depth(pred_z1, pred_z2, mask1, mask2)[:, None, None]
-        # subtract the median depth
-        gt_z1 -= gt_shift_z
-        gt_z2 -= gt_shift_z
-        pred_z1 -= pred_shift_z
-        pred_z2 -= pred_shift_z
-        # monitoring = dict(monitoring, gt_shift_z=gt_shift_z.mean().detach(), pred_shift_z=pred_shift_z.mean().detach())
-        return gt_pts1, gt_pts2, pred_pts1, pred_pts2, mask1, mask2, sky1, sky2, monitoring
-class Regr3D_ScaleInv (Regr3D):
-    """ Same than Regr3D but invariant to depth scale.
-        if gt_scale == True: enforce the prediction to take the same scale than GT
-    """
-    def get_all_pts3d(self, gt1, gt2, pred1, pred2):
-        # compute depth-normalized points
-        gt_pts1, gt_pts2, pred_pts1, pred_pts2, mask1, mask2, sky1, sky2, monitoring = \
-            super().get_all_pts3d(gt1, gt2, pred1, pred2)
-        # measure scene scale
-        _, gt_scale = get_joint_pointcloud_center_scale(gt_pts1, gt_pts2, mask1, mask2)
-        _, pred_scale = get_joint_pointcloud_center_scale(pred_pts1, pred_pts2, mask1, mask2)
-        # prevent predictions to be in a ridiculous range
-        pred_scale = pred_scale.clip(min=1e-3, max=1e3)
-        # subtract the median depth
-        if self.gt_scale:
-            pred_pts1 *= gt_scale / pred_scale
-            pred_pts2 *= gt_scale / pred_scale
-            # monitoring = dict(monitoring, pred_scale=(pred_scale/gt_scale).mean())
-        else:
-            gt_pts1 /= gt_scale
-            gt_pts2 /= gt_scale
-            pred_pts1 /= pred_scale
-            pred_pts2 /= pred_scale
-            # monitoring = dict(monitoring, gt_scale=gt_scale.mean(), pred_scale=pred_scale.mean().detach())
-        return gt_pts1, gt_pts2, pred_pts1, pred_pts2, mask1, mask2, sky1, sky2, monitoring
-class Regr3D_ScaleShiftInv (Regr3D_ScaleInv, Regr3D_ShiftInv):
-    # calls Regr3D_ShiftInv first, then Regr3D_ScaleInv
-    pass
-def get_similarities(desc1, desc2, euc=False):
-    if euc:  # euclidean distance in same range than similarities
-        dists = (desc1[:, :, None] - desc2[:, None]).norm(dim=-1)
-        sim = 1 / (1 + dists)
-    else:
-        # Compute similarities
-        sim = desc1 @ desc2.transpose(-2, -1)
-    return sim
-class MatchingCriterion(BaseCriterion):
-    def __init__(self, reduction='mean', fp=torch.float32):
-        super().__init__(reduction)
-        self.fp = fp
-    def forward(self, a, b, valid_matches=None, euc=False):
-        assert a.ndim >= 2 and 1 <= a.shape[-1], f'Bad shape = {a.shape}'
-        dist = self.loss(a.to(self.fp), b.to(self.fp), valid_matches, euc=euc)
-        # one dimension less or reduction to single value
-        assert (valid_matches is None and dist.ndim == a.ndim -
-                1) or self.reduction in ['mean', 'sum', '1-mean', 'none']
-        if self.reduction == 'none':
-            return dist
-        if self.reduction == 'sum':
-            return dist.sum()
-        if self.reduction == 'mean':
-            return dist.mean() if dist.numel() > 0 else dist.new_zeros(())
-        if self.reduction == '1-mean':
-            return 1. - dist.mean() if dist.numel() > 0 else dist.new_ones(())
-        raise ValueError(f'bad {self.reduction=} mode')
-    def loss(self, a, b, valid_matches=None):
-        raise NotImplementedError
-class InfoNCE(MatchingCriterion):
-    def __init__(self, temperature=0.07, eps=1e-8, mode='all', **kwargs):
-        super().__init__(**kwargs)
-        self.temperature = temperature
-        self.eps = eps
-        assert mode in ['all', 'proper', 'dual']
-        self.mode = mode
-    def loss(self, desc1, desc2, valid_matches=None, euc=False):
-        # valid positives are along diagonals
-        B, N, D = desc1.shape
-        B2, N2, D2 = desc2.shape
-        assert B == B2 and D == D2
-        if valid_matches is None:
-            valid_matches = torch.ones([B, N], dtype=bool)
-        # torch.all(valid_matches.sum(dim=-1) > 0) some pairs have no matches????
-        assert valid_matches.shape == torch.Size([B, N]) and valid_matches.sum() > 0
-        # Tempered similarities
-        sim = get_similarities(desc1, desc2, euc) / self.temperature
-        sim[sim.isnan()] = -torch.inf  # ignore nans
-        # Softmax of positives with temperature
-        sim = sim.exp_()  # save peak memory
-        positives = sim.diagonal(dim1=-2, dim2=-1)
-        # Loss
-        if self.mode == 'all':            # Previous InfoNCE
-            loss = -torch.log((positives / sim.sum(dim=-1).sum(dim=-1, keepdim=True)).clip(self.eps))
-        elif self.mode == 'proper':  # Proper InfoNCE
-            loss = -(torch.log((positives / sim.sum(dim=-2)).clip(self.eps)) +
-                     torch.log((positives / sim.sum(dim=-1)).clip(self.eps)))
-        elif self.mode == 'dual':  # Dual Softmax
-            loss = -(torch.log((positives**2 / sim.sum(dim=-1) / sim.sum(dim=-2)).clip(self.eps)))
-        else:
-            raise ValueError("This should not happen...")
-        return loss[valid_matches]
-class APLoss (MatchingCriterion):
-    """ AP loss.
-        Input: (N, M)   values in [min, max]
-        label: (N, M)   values in {0, 1}
-        Returns: 1 - mAP (mean AP for each n in {1..N})
-                 Note: typically, this is what you wanna minimize
-    """
-    def __init__(self, nq='torch', min=0, max=1, euc=False, **kw):
-        super().__init__(**kw)
-        # Exact/True AP loss (not differentiable)
-        if nq == 0:
-            nq = 'sklearn'  # special case
-        try:
-            self.compute_AP = eval('self.compute_true_AP_' + nq)
-        except:
-            raise ValueError("Unknown mode %s for AP loss" % nq)
-    @staticmethod
-    def compute_true_AP_sklearn(scores, labels):
-        def compute_AP(label, score):
-            return average_precision_score(label, score)
-        aps = scores.new_zeros((scores.shape[0], scores.shape[1]))
-        label_np = labels.cpu().numpy().astype(bool)
-        scores_np = scores.cpu().numpy()
-        for bi in range(scores_np.shape[0]):
-            for i in range(scores_np.shape[1]):
-                labels = label_np[bi, i, :]
-                if labels.sum() < 1:
-                    continue
-                aps[bi, i] = compute_AP(labels, scores_np[bi, i, :])
-        return aps
-    @staticmethod
-    def compute_true_AP_torch(scores, labels):
-        assert scores.shape == labels.shape
-        B, N, M = labels.shape
-        dev = labels.device
-        with torch.no_grad():
-            # sort scores
-            _, order = scores.sort(dim=-1, descending=True)
-            # sort labels accordingly
-            labels = labels[torch.arange(B, device=dev)[:, None, None].expand(order.shape),
-                            torch.arange(N, device=dev)[None, :, None].expand(order.shape),
-                            order]
-            # compute number of positives per query
-            npos = labels.sum(dim=-1)
-            assert torch.all(torch.isclose(npos, npos[0, 0])
-                             ), "only implemented for constant number of positives per query"
-            npos = int(npos[0, 0])
-            # compute precision at each recall point
-            posrank = labels.nonzero()[:, -1].view(B, N, npos)
-            recall = torch.arange(1, 1 + npos, dtype=torch.float32, device=dev)[None, None, :].expand(B, N, npos)
-            precision = recall / (1 + posrank).float()
-            # average precision values at all recall points
-            aps = precision.mean(dim=-1)
-        return aps
-    def loss(self, desc1, desc2, valid_matches=None, euc=False):  # if matches is None, positives are the diagonal
-        B, N1, D = desc1.shape
-        B2, N2, D2 = desc2.shape
-        assert B == B2 and D == D2
-        scores = get_similarities(desc1, desc2, euc)
-        labels = torch.zeros([B, N1, N2], dtype=scores.dtype, device=scores.device)
-        # allow all diagonal positives and only mask afterwards
-        labels.diagonal(dim1=-2, dim2=-1)[...] = 1.
-        apscore = self.compute_AP(scores, labels)
-        if valid_matches is not None:
-            apscore = apscore[valid_matches]
-        return apscore
-class MatchingLoss (Criterion, MultiLoss):
-    """
-    Matching loss per image
-    only compare pixels inside an image but not in the whole batch as what would be done usually
-    """
-    def __init__(self, criterion, withconf=False, use_pts3d=False, negatives_padding=0, blocksize=4096):
-        super().__init__(criterion)
-        self.negatives_padding = negatives_padding
-        self.use_pts3d = use_pts3d
-        self.blocksize = blocksize
-        self.withconf = withconf
-    def add_negatives(self, outdesc2, desc2, batchid, x2, y2):
-        if self.negatives_padding:
-            B, H, W, D = desc2.shape
-            negatives = torch.ones([B, H, W], device=desc2.device, dtype=bool)
-            negatives[batchid, y2, x2] = False
-            sel = negatives & (negatives.view([B, -1]).cumsum(dim=-1).view(B, H, W)
-                               <= self.negatives_padding)  # take the N-first negatives
-            outdesc2 = torch.cat([outdesc2, desc2[sel].view([B, -1, D])], dim=1)
-        return outdesc2
-    def get_confs(self, pred1, pred2, sel1, sel2):
-        if self.withconf:
-            if self.use_pts3d:
-                outconfs1 = pred1['conf'][sel1]
-                outconfs2 = pred2['conf'][sel2]
-            else:
-                outconfs1 = pred1['desc_conf'][sel1]
-                outconfs2 = pred2['desc_conf'][sel2]
-        else:
-            outconfs1 = outconfs2 = None
-        return outconfs1, outconfs2
-    def get_descs(self, pred1, pred2):
-        if self.use_pts3d:
-            desc1, desc2 = pred1['pts3d'], pred2['pts3d_in_other_view']
-        else:
-            desc1, desc2 = pred1['desc'], pred2['desc']
-        return desc1, desc2
-    def get_matching_descs(self, gt1, gt2, pred1, pred2, **kw):
-        outdesc1 = outdesc2 = outconfs1 = outconfs2 = None
-        # Recover descs, GT corres and valid mask
-        desc1, desc2 = self.get_descs(pred1, pred2)
-        (x1, y1), (x2, y2) = gt1['corres'].unbind(-1), gt2['corres'].unbind(-1)
-        valid_matches = gt1['valid_corres']
-        # Select descs that have GT matches
-        B, N = x1.shape
-        batchid = torch.arange(B)[:, None].repeat(1, N)  # B, N
-        outdesc1, outdesc2 = desc1[batchid, y1, x1], desc2[batchid, y2, x2]  # B, N, D
-        # Padd with unused negatives
-        outdesc2 = self.add_negatives(outdesc2, desc2, batchid, x2, y2)
-        # Gather confs if needed
-        sel1 = batchid, y1, x1
-        sel2 = batchid, y2, x2
-        outconfs1, outconfs2 = self.get_confs(pred1, pred2, sel1, sel2)
-        return outdesc1, outdesc2, outconfs1, outconfs2, valid_matches, {'use_euclidean_dist': self.use_pts3d}
-    def blockwise_criterion(self, descs1, descs2, confs1, confs2, valid_matches, euc, rng=np.random, shuffle=True):
-        loss = None
-        details = {}
-        B, N, D = descs1.shape
-        if N <= self.blocksize:  # Blocks are larger than provided descs, compute regular loss
-            loss = self.criterion(descs1, descs2, valid_matches, euc=euc)
-        else:  # Compute criterion on the blockdiagonal only, after shuffling
-            # Shuffle if necessary
-            matches_perm = slice(None)
-            if shuffle:
-                matches_perm = np.stack([rng.choice(range(N), size=N, replace=False) for _ in range(B)])
-                batchid = torch.tile(torch.arange(B), (N, 1)).T
-                matches_perm = batchid, matches_perm
-            descs1 = descs1[matches_perm]
-            descs2 = descs2[matches_perm]
-            valid_matches = valid_matches[matches_perm]
-            assert N % self.blocksize == 0, "Error, can't chunk block-diagonal, please check blocksize"
-            n_chunks = N // self.blocksize
-            descs1 = descs1.reshape([B * n_chunks, self.blocksize, D])  # [B*(N//blocksize), blocksize, D]
-            descs2 = descs2.reshape([B * n_chunks, self.blocksize, D])  # [B*(N//blocksize), blocksize, D]
-            valid_matches = valid_matches.view([B * n_chunks, self.blocksize])
-            loss = self.criterion(descs1, descs2, valid_matches, euc=euc)
-            if self.withconf:
-                confs1, confs2 = map(lambda x: x[matches_perm], (confs1, confs2))  # apply perm to confidences if needed
-        if self.withconf:
-            # split confidences between positives/negatives for loss computation
-            details['conf_pos'] = map(lambda x: x[valid_matches.view(B, -1)], (confs1, confs2))
-            details['conf_neg'] = map(lambda x: x[~valid_matches.view(B, -1)], (confs1, confs2))
-            details['Conf1_std'] = confs1.std()
-            details['Conf2_std'] = confs2.std()
-        return loss, details
-    def compute_loss(self, gt1, gt2, pred1, pred2, **kw):
-        # Gather preds and GT
-        descs1, descs2, confs1, confs2, valid_matches, monitoring = self.get_matching_descs(
-            gt1, gt2, pred1, pred2, **kw)
-        # loss on matches
-        loss, details = self.blockwise_criterion(descs1, descs2, confs1, confs2,
-                                                 valid_matches, euc=monitoring.pop('use_euclidean_dist', False))
-        details[type(self).__name__] = float(loss.mean())
-        return loss, (details | monitoring)
-class ConfMatchingLoss(ConfLoss):
-    """ Weight matching by learned confidence. Same as ConfLoss but for a matching criterion
-        Assuming the input matching_loss is a match-level loss.
-    """
-    def __init__(self, pixel_loss, alpha=1., confmode='prod', neg_conf_loss_quantile=False):
-        super().__init__(pixel_loss, alpha)
-        self.pixel_loss.withconf = True
-        self.confmode = confmode
-        self.neg_conf_loss_quantile = neg_conf_loss_quantile
-    def aggregate_confs(self, confs1, confs2):  # get the confidences resulting from the two view predictions
-        if self.confmode == 'prod':
-            confs = confs1 * confs2 if confs1 is not None and confs2 is not None else 1.
-        elif self.confmode == 'mean':
-            confs = .5 * (confs1 + confs2) if confs1 is not None and confs2 is not None else 1.
-        else:
-            raise ValueError(f"Unknown conf mode {self.confmode}")
-        return confs
-    def compute_loss(self, gt1, gt2, pred1, pred2, **kw):
-        # compute per-pixel loss
-        loss, details = self.pixel_loss(gt1, gt2, pred1, pred2, **kw)
-        # Recover confidences for positive and negative samples
-        conf1_pos, conf2_pos = details.pop('conf_pos')
-        conf1_neg, conf2_neg = details.pop('conf_neg')
-        conf_pos = self.aggregate_confs(conf1_pos, conf2_pos)
-        # weight Matching loss by confidence on positives
-        conf_pos, log_conf_pos = self.get_conf_log(conf_pos)
-        conf_loss = loss * conf_pos - self.alpha * log_conf_pos
-        # average + nan protection (in case of no valid pixels at all)
-        conf_loss = conf_loss.mean() if conf_loss.numel() > 0 else 0
-        # Add negative confs loss to give some supervision signal to confidences for pixels that are not matched in GT
-        if self.neg_conf_loss_quantile:
-            conf_neg = torch.cat([conf1_neg, conf2_neg])
-            conf_neg, log_conf_neg = self.get_conf_log(conf_neg)
-            # recover quantile that will be used for negatives loss value assignment
-            neg_loss_value = torch.quantile(loss, self.neg_conf_loss_quantile).detach()
-            neg_loss = neg_loss_value * conf_neg - self.alpha * log_conf_neg
-            neg_loss = neg_loss.mean() if neg_loss.numel() > 0 else 0
-            conf_loss = conf_loss + neg_loss
-        return conf_loss, dict(matching_conf_loss=float(conf_loss), **details)

mast3r/model.py DELETED Viewed

@@ -1,68 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# MASt3R model class
-# --------------------------------------------------------
-import torch
-import torch.nn.functional as F
-import os
-from mast3r.catmlp_dpt_head import mast3r_head_factory
-import mast3r.utils.path_to_dust3r  # noqa
-from ..dust3r.dust3r.model import AsymmetricCroCo3DStereo  # noqa
-from ..dust3r.dust3r.utils.misc import transpose_to_landscape  # noqa
-inf = float('inf')
-def load_model(model_path, device, verbose=True):
-    if verbose:
-        print('... loading model from', model_path)
-    ckpt = torch.load(model_path, map_location='cpu')
-    args = ckpt['args'].model.replace("ManyAR_PatchEmbed", "PatchEmbedDust3R")
-    if 'landscape_only' not in args:
-        args = args[:-1] + ', landscape_only=False)'
-    else:
-        args = args.replace(" ", "").replace('landscape_only=True', 'landscape_only=False')
-    assert "landscape_only=False" in args
-    if verbose:
-        print(f"instantiating : {args}")
-    net = eval(args)
-    s = net.load_state_dict(ckpt['model'], strict=False)
-    if verbose:
-        print(s)
-    return net.to(device)
-class AsymmetricMASt3R(AsymmetricCroCo3DStereo):
-    def __init__(self, desc_mode=('norm'), two_confs=False, desc_conf_mode=None, **kwargs):
-        self.desc_mode = desc_mode
-        self.two_confs = two_confs
-        self.desc_conf_mode = desc_conf_mode
-        super().__init__(**kwargs)
-    @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path, **kw):
-        if os.path.isfile(pretrained_model_name_or_path):
-            return load_model(pretrained_model_name_or_path, device='cpu')
-        else:
-            return super(AsymmetricMASt3R, cls).from_pretrained(pretrained_model_name_or_path, **kw)
-    def set_downstream_head(self, output_mode, head_type, landscape_only, depth_mode, conf_mode, patch_size, img_size, **kw):
-        # assert img_size[0] % patch_size == 0 and img_size[
-        #     1] % patch_size == 0, f'{img_size=} must be multiple of {patch_size=}'
-        self.output_mode = output_mode
-        self.head_type = head_type
-        self.depth_mode = depth_mode
-        self.conf_mode = conf_mode
-        if self.desc_conf_mode is None:
-            self.desc_conf_mode = conf_mode
-        # allocate heads
-        self.downstream_head1 = mast3r_head_factory(head_type, output_mode, self, has_conf=bool(conf_mode))
-        self.downstream_head2 = mast3r_head_factory(head_type, output_mode, self, has_conf=bool(conf_mode))
-        # magic wrapper
-        self.head1 = transpose_to_landscape(self.downstream_head1, activate=landscape_only)
-        self.head2 = transpose_to_landscape(self.downstream_head2, activate=landscape_only)

mast3r/utils/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # Copyright (C) 2024-present Naver Corporation. All rights reserved.
2	- # Licensed under CC BY-NC-SA 4.0 (non-commercial use only).

mast3r/utils/coarse_to_fine.py DELETED Viewed

@@ -1,214 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# coarse to fine utilities
-# --------------------------------------------------------
-import numpy as np
-def crop_tag(cell):
-    return f'[{cell[1]}:{cell[3]},{cell[0]}:{cell[2]}]'
-def crop_slice(cell):
-    return slice(cell[1], cell[3]), slice(cell[0], cell[2])
-def _start_pos(total_size, win_size, overlap):
-    # we must have AT LEAST overlap between segments
-    # first segment starts at 0, last segment starts at total_size-win_size
-    assert 0 <= overlap < 1
-    assert total_size >= win_size
-    spacing = win_size * (1 - overlap)
-    last_pt = total_size - win_size
-    n_windows = 2 + int((last_pt - 1) // spacing)
-    return np.linspace(0, last_pt, n_windows).round().astype(int)
-def multiple_of_16(x):
-    return (x // 16) * 16
-def _make_overlapping_grid(H, W, size, overlap):
-    H_win = multiple_of_16(H * size // max(H, W))
-    W_win = multiple_of_16(W * size // max(H, W))
-    x = _start_pos(W, W_win, overlap)
-    y = _start_pos(H, H_win, overlap)
-    grid = np.stack(np.meshgrid(x, y, indexing='xy'), axis=-1)
-    grid = np.concatenate((grid, grid + (W_win, H_win)), axis=-1)
-    return grid.reshape(-1, 4)
-def _cell_size(cell2):
-    width, height = cell2[:, 2] - cell2[:, 0], cell2[:, 3] - cell2[:, 1]
-    assert width.min() >= 0
-    assert height.min() >= 0
-    return width, height
-def _norm_windows(cell2, H2, W2, forced_resolution=None):
-    # make sure the window aspect ratio is 3/4, or the output resolution is forced_resolution  if defined
-    outcell = cell2.copy()
-    width, height = _cell_size(cell2)
-    width2, height2 = width.clip(max=W2), height.clip(max=H2)
-    if forced_resolution is None:
-        width2[width < height] = (height2[width < height] * 3.01 / 4).clip(max=W2)
-        height2[width >= height] = (width2[width >= height] * 3.01 / 4).clip(max=H2)
-    else:
-        forced_H, forced_W = forced_resolution
-        width2[:] = forced_W
-        height2[:] = forced_H
-    half = (width2 - width) / 2
-    outcell[:, 0] -= half
-    outcell[:, 2] += half
-    half = (height2 - height) / 2
-    outcell[:, 1] -= half
-    outcell[:, 3] += half
-    # proj to integers
-    outcell = np.floor(outcell).astype(int)
-    # Take care of flooring errors
-    tmpw, tmph = _cell_size(outcell)
-    outcell[:, 0] += tmpw.astype(tmpw.dtype) - width2.astype(tmpw.dtype)
-    outcell[:, 1] += tmph.astype(tmpw.dtype) - height2.astype(tmpw.dtype)
-    # make sure 0 <= x < W2 and 0 <= y < H2
-    outcell[:, 0::2] -= outcell[:, [0]].clip(max=0)
-    outcell[:, 1::2] -= outcell[:, [1]].clip(max=0)
-    outcell[:, 0::2] -= outcell[:, [2]].clip(min=W2) - W2
-    outcell[:, 1::2] -= outcell[:, [3]].clip(min=H2) - H2
-    width, height = _cell_size(outcell)
-    assert np.all(width == width2.astype(width.dtype)) and np.all(
-        height == height2.astype(height.dtype)), "Error, output is not of the expected shape."
-    assert np.all(width <= W2)
-    assert np.all(height <= H2)
-    return outcell
-def _weight_pixels(cell, pix, assigned, gauss_var=2):
-    center = cell.reshape(-1, 2, 2).mean(axis=1)
-    width, height = _cell_size(cell)
-    # square distance between each cell center and each point
-    dist = (center[:, None] - pix[None]) / np.c_[width, height][:, None]
-    dist2 = np.square(dist).sum(axis=-1)
-    assert assigned.shape == dist2.shape
-    res = np.where(assigned, np.exp(-gauss_var * dist2), 0)
-    return res
-def pos2d_in_rect(p1, cell1):
-    x, y = p1.T
-    l, t, r, b = cell1
-    assigned = (l <= x) & (x < r) & (t <= y) & (y < b)
-    return assigned
-def _score_cell(cell1, H2, W2, p1, p2, min_corres=10, forced_resolution=None):
-    assert p1.shape == p2.shape
-    # compute keypoint assignment
-    assigned = pos2d_in_rect(p1, cell1[None].T)
-    assert assigned.shape == (len(cell1), len(p1))
-    # remove cells without correspondences
-    valid_cells = assigned.sum(axis=1) >= min_corres
-    cell1 = cell1[valid_cells]
-    assigned = assigned[valid_cells]
-    if not valid_cells.any():
-        return cell1, cell1, assigned
-    # fill-in the assigned points in both image
-    assigned_p1 = np.empty((len(cell1), len(p1), 2), dtype=np.float32)
-    assigned_p2 = np.empty((len(cell1), len(p2), 2), dtype=np.float32)
-    assigned_p1[:] = p1[None]
-    assigned_p2[:] = p2[None]
-    assigned_p1[~assigned] = np.nan
-    assigned_p2[~assigned] = np.nan
-    # find the median center and scale of assigned points in each cell
-    # cell_center1 = np.nanmean(assigned_p1, axis=1)
-    cell_center2 = np.nanmean(assigned_p2, axis=1)
-    im1_q25, im1_q75 = np.nanquantile(assigned_p1, (0.1, 0.9), axis=1)
-    im2_q25, im2_q75 = np.nanquantile(assigned_p2, (0.1, 0.9), axis=1)
-    robust_std1 = (im1_q75 - im1_q25).clip(20.)
-    robust_std2 = (im2_q75 - im2_q25).clip(20.)
-    cell_size1 = (cell1[:, 2:4] - cell1[:, 0:2])
-    cell_size2 = cell_size1 * robust_std2 / robust_std1
-    cell2 = np.c_[cell_center2 - cell_size2 / 2, cell_center2 + cell_size2 / 2]
-    # make sure cell bounds are valid
-    cell2 = _norm_windows(cell2, H2, W2, forced_resolution=forced_resolution)
-    # compute correspondence weights
-    corres_weights = _weight_pixels(cell1, p1, assigned) * _weight_pixels(cell2, p2, assigned)
-    # return a list of window pairs and assigned correspondences
-    return cell1, cell2, corres_weights
-def greedy_selection(corres_weights, target=0.9):
-    # corres_weight = (n_cell_pair, n_corres) matrix.
-    # If corres_weight[c,p]>0, means that correspondence p is visible in cell pair p
-    assert 0 < target <= 1
-    corres_weights = corres_weights.copy()
-    total = corres_weights.max(axis=0).sum()
-    target *= total
-    # init = empty
-    res = []
-    cur = np.zeros(corres_weights.shape[1])  # current selection
-    while cur.sum() < target:
-        # pick the nex best cell pair
-        best = corres_weights.sum(axis=1).argmax()
-        res.append(best)
-        # update current
-        cur += corres_weights[best]
-        # print('appending', best, 'with score', corres_weights[best].sum(), '-->', cur.sum())
-        # remove from all other views
-        corres_weights = (corres_weights - corres_weights[best]).clip(min=0)
-    return res
-def select_pairs_of_crops(img_q, img_b, pos2d_in_query, pos2d_in_ref, maxdim=512, overlap=.5, forced_resolution=None):
-    # prepare the overlapping cells
-    grid_q = _make_overlapping_grid(*img_q.shape[:2], maxdim, overlap)
-    grid_b = _make_overlapping_grid(*img_b.shape[:2], maxdim, overlap)
-    assert forced_resolution is None or len(forced_resolution) == 2
-    if isinstance(forced_resolution[0], int) or not len(forced_resolution[0]) == 2:
-        forced_resolution1 = forced_resolution2 = forced_resolution
-    else:
-        assert len(forced_resolution[1]) == 2
-        forced_resolution1 = forced_resolution[0]
-        forced_resolution2 = forced_resolution[1]
-    # Make sure crops respect constraints
-    grid_q = _norm_windows(grid_q.astype(float), *img_q.shape[:2], forced_resolution=forced_resolution1)
-    grid_b = _norm_windows(grid_b.astype(float), *img_b.shape[:2], forced_resolution=forced_resolution2)
-    # score cells
-    pairs_q = _score_cell(grid_q, *img_b.shape[:2], pos2d_in_query, pos2d_in_ref, forced_resolution=forced_resolution2)
-    pairs_b = _score_cell(grid_b, *img_q.shape[:2], pos2d_in_ref, pos2d_in_query, forced_resolution=forced_resolution1)
-    pairs_b = pairs_b[1], pairs_b[0], pairs_b[2]  # cellq, cellb, corres_weights
-    # greedy selection until all correspondences are generated
-    cell1, cell2, corres_weights = map(np.concatenate, zip(pairs_q, pairs_b))
-    if len(corres_weights) == 0:
-        return  # tolerated for empty generators
-    order = greedy_selection(corres_weights, target=0.9)
-    for i in order:
-        def pair_tag(qi, bi): return (str(qi) + crop_tag(cell1[i]), str(bi) + crop_tag(cell2[i]))
-        yield cell1[i], cell2[i], pair_tag

mast3r/utils/collate.py DELETED Viewed

@@ -1,62 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# Collate extensions
-# --------------------------------------------------------
-import torch
-import collections
-from torch.utils.data._utils.collate import default_collate_fn_map, default_collate_err_msg_format
-from typing import Callable, Dict, Optional, Tuple, Type, Union, List
-def cat_collate_tensor_fn(batch, *, collate_fn_map):
-    return torch.cat(batch, dim=0)
-def cat_collate_list_fn(batch, *, collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ...]], Callable]] = None):
-    return [item for bb in batch for item in bb]  # concatenate all lists
-cat_collate_fn_map = default_collate_fn_map.copy()
-cat_collate_fn_map[torch.Tensor] = cat_collate_tensor_fn
-cat_collate_fn_map[List] = cat_collate_list_fn
-cat_collate_fn_map[type(None)] = lambda _, **kw: None  # When some Nones, simply return a single None
-def cat_collate(batch, *, collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ...]], Callable]] = None):
-    r"""Custom collate function that concatenates stuff instead of stacking them, and handles NoneTypes """
-    elem = batch[0]
-    elem_type = type(elem)
-    if collate_fn_map is not None:
-        if elem_type in collate_fn_map:
-            return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
-        for collate_type in collate_fn_map:
-            if isinstance(elem, collate_type):
-                return collate_fn_map[collate_type](batch, collate_fn_map=collate_fn_map)
-    if isinstance(elem, collections.abc.Mapping):
-        try:
-            return elem_type({key: cat_collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem})
-        except TypeError:
-            # The mapping type may not support `__init__(iterable)`.
-            return {key: cat_collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem}
-    elif isinstance(elem, tuple) and hasattr(elem, '_fields'):  # namedtuple
-        return elem_type(*(cat_collate(samples, collate_fn_map=collate_fn_map) for samples in zip(*batch)))
-    elif isinstance(elem, collections.abc.Sequence):
-        transposed = list(zip(*batch))  # It may be accessed twice, so we use a list.
-        if isinstance(elem, tuple):
-            # Backwards compatibility.
-            return [cat_collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]
-        else:
-            try:
-                return elem_type([cat_collate(samples, collate_fn_map=collate_fn_map) for samples in transposed])
-            except TypeError:
-                # The sequence type may not support `__init__(iterable)` (e.g., `range`).
-                return [cat_collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]
-    raise TypeError(default_collate_err_msg_format.format(elem_type))

mast3r/utils/misc.py DELETED Viewed

@@ -1,17 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# utilitary functions for MASt3R
-# --------------------------------------------------------
-import os
-import hashlib
-def mkdir_for(f):
-    os.makedirs(os.path.dirname(f), exist_ok=True)
-    return f
-def hash_md5(s):
-    return hashlib.md5(s.encode('utf-8')).hexdigest()

mast3r/utils/path_to_dust3r.py DELETED Viewed

@@ -1,19 +0,0 @@
-# Copyright (C) 2024-present Naver Corporation. All rights reserved.
-# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
-#
-# --------------------------------------------------------
-# dust3r submodule import
-# --------------------------------------------------------
-import sys
-import os.path as path
-HERE_PATH = path.normpath(path.dirname(__file__))
-DUSt3R_REPO_PATH = path.normpath(path.join(HERE_PATH, '../../dust3r'))
-DUSt3R_LIB_PATH = path.join(DUSt3R_REPO_PATH, 'dust3r')
-# check the presence of models directory in repo to be sure its cloned
-if path.isdir(DUSt3R_LIB_PATH):
-    # workaround for sibling import
-    sys.path.insert(0, DUSt3R_REPO_PATH)
-else:
-    raise ImportError(f"dust3r is not initialized, could not find: {DUSt3R_LIB_PATH}.\n "
-                      "Did you forget to run 'git submodule update --init --recursive' ?")