Squash Public Dataset Commit History
Browse filesCo-authored-by: meta-bot <meta-bot@users.noreply.huggingface.co>
- .gitattributes +35 -0
- README.md +39 -0
- config.json +188 -0
- model.safetensors +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- model_hub_mixin
|
| 4 |
+
- pytorch_model_hub_mixin
|
| 5 |
+
- computer-vision
|
| 6 |
+
- 3d-reconstruction
|
| 7 |
+
- multi-view-stereo
|
| 8 |
+
- depth-estimation
|
| 9 |
+
- camera-pose
|
| 10 |
+
- covisibility
|
| 11 |
+
- mapanything
|
| 12 |
+
license: cc-by-nc-4.0
|
| 13 |
+
language:
|
| 14 |
+
- en
|
| 15 |
+
pipeline_tag: image-to-3d
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
## Overview
|
| 19 |
+
|
| 20 |
+
MapAnything is a simple, end-to-end trained transformer model that directly regresses the factored metric 3D geometry of a scene given various types of modalities as inputs. A single feed-forward model supports over 12 different 3D reconstruction tasks including multi-image sfm, multi-view stereo, monocular metric depth estimation, registration, depth completion and more.
|
| 21 |
+
|
| 22 |
+
This is the CC-BY-NC-4.0 variant of the model.
|
| 23 |
+
|
| 24 |
+
## Quick Start
|
| 25 |
+
|
| 26 |
+
Please refer to our [Github Repo](https://github.com/facebookresearch/map-anything)
|
| 27 |
+
|
| 28 |
+
## Citation
|
| 29 |
+
|
| 30 |
+
If you find our repository useful, please consider giving it a star ⭐ and citing our paper in your work:
|
| 31 |
+
|
| 32 |
+
```bibtex
|
| 33 |
+
@inproceedings{keetha2025mapanything,
|
| 34 |
+
title={{MapAnything}: Universal Feed-Forward Metric {3D} Reconstruction},
|
| 35 |
+
author={Nikhil Keetha and Norman Müller and Johannes Schönberger and Lorenzo Porzi and Yuchen Zhang and Tobias Fischer and Arno Knapitsch and Duncan Zauss and Ethan Weber and Nelson Antunes and Jonathon Luiten and Manuel Lopez-Antequera and Samuel Rota Bulò and Christian Richardt and Deva Ramanan and Sebastian Scherer and Peter Kontschieder},
|
| 36 |
+
booktitle={arXiv},
|
| 37 |
+
year={2025}
|
| 38 |
+
}
|
| 39 |
+
```
|
config.json
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"encoder_config": {
|
| 3 |
+
"data_norm_type": "dinov2",
|
| 4 |
+
"encoder_str": "dinov2",
|
| 5 |
+
"gradient_checkpointing": true,
|
| 6 |
+
"name": "dinov2_large",
|
| 7 |
+
"size": "large",
|
| 8 |
+
"torch_hub_force_reload": false,
|
| 9 |
+
"uses_torch_hub": true,
|
| 10 |
+
"with_registers": false
|
| 11 |
+
},
|
| 12 |
+
"geometric_input_config": {
|
| 13 |
+
"cam_prob": 1.0,
|
| 14 |
+
"cam_rot_encoder_config": {
|
| 15 |
+
"enc_embed_dim": 1024,
|
| 16 |
+
"encoder_str": "global_rep_encoder",
|
| 17 |
+
"in_chans": 4,
|
| 18 |
+
"name": "cam_rot_quats_encoder"
|
| 19 |
+
},
|
| 20 |
+
"cam_trans_encoder_config": {
|
| 21 |
+
"enc_embed_dim": 1024,
|
| 22 |
+
"encoder_str": "global_rep_encoder",
|
| 23 |
+
"in_chans": 3,
|
| 24 |
+
"name": "cam_trans_encoder"
|
| 25 |
+
},
|
| 26 |
+
"depth_encoder_config": {
|
| 27 |
+
"apply_pe": false,
|
| 28 |
+
"enc_embed_dim": 1024,
|
| 29 |
+
"encoder_str": "dense_rep_encoder",
|
| 30 |
+
"in_chans": 1,
|
| 31 |
+
"name": "depth_encoder",
|
| 32 |
+
"patch_size": 14
|
| 33 |
+
},
|
| 34 |
+
"depth_prob": 1.0,
|
| 35 |
+
"depth_scale_norm_all_prob": 0.0,
|
| 36 |
+
"dropout_prob": 0.0,
|
| 37 |
+
"overall_prob": 1.0,
|
| 38 |
+
"pose_scale_norm_all_prob": 0.0,
|
| 39 |
+
"ray_dirs_encoder_config": {
|
| 40 |
+
"apply_pe": false,
|
| 41 |
+
"enc_embed_dim": 1024,
|
| 42 |
+
"encoder_str": "dense_rep_encoder",
|
| 43 |
+
"in_chans": 3,
|
| 44 |
+
"name": "ray_dirs_encoder",
|
| 45 |
+
"patch_size": 14
|
| 46 |
+
},
|
| 47 |
+
"ray_dirs_prob": 1.0,
|
| 48 |
+
"scale_encoder_config": {
|
| 49 |
+
"enc_embed_dim": 1024,
|
| 50 |
+
"encoder_str": "global_rep_encoder",
|
| 51 |
+
"in_chans": 1,
|
| 52 |
+
"name": "scale_encoder"
|
| 53 |
+
},
|
| 54 |
+
"sparse_depth_prob": 0.0,
|
| 55 |
+
"sparsification_removal_percent": 0.9
|
| 56 |
+
},
|
| 57 |
+
"info_sharing_config": {
|
| 58 |
+
"custom_positional_encoding": null,
|
| 59 |
+
"model_return_type": "intermediate_features",
|
| 60 |
+
"model_type": "alternating_attention",
|
| 61 |
+
"module_args": {
|
| 62 |
+
"custom_positional_encoding": null,
|
| 63 |
+
"depth": 24,
|
| 64 |
+
"distinguish_ref_and_non_ref_views": true,
|
| 65 |
+
"gradient_checkpointing": false,
|
| 66 |
+
"indices": [
|
| 67 |
+
11,
|
| 68 |
+
17
|
| 69 |
+
],
|
| 70 |
+
"input_embed_dim": 1024,
|
| 71 |
+
"name": "aat_24_layers_ifr",
|
| 72 |
+
"norm_intermediate": true,
|
| 73 |
+
"size": "24_layers"
|
| 74 |
+
}
|
| 75 |
+
},
|
| 76 |
+
"load_specific_pretrained_submodules": false,
|
| 77 |
+
"name": "mapanything",
|
| 78 |
+
"pred_head_config": {
|
| 79 |
+
"adaptor_config": {
|
| 80 |
+
"dense_pred_init_dict": {
|
| 81 |
+
"confidence_type": "exp",
|
| 82 |
+
"confidence_vmax": Infinity,
|
| 83 |
+
"confidence_vmin": 1,
|
| 84 |
+
"depth_mode": "exp",
|
| 85 |
+
"depth_vmax": Infinity,
|
| 86 |
+
"depth_vmin": 0,
|
| 87 |
+
"name": "raydirs+depth+pose+confidence+mask+scale",
|
| 88 |
+
"ray_directions_clamp_min_of_z_dir": false,
|
| 89 |
+
"ray_directions_mode": "linear",
|
| 90 |
+
"ray_directions_normalize_to_unit_image_plane": false,
|
| 91 |
+
"ray_directions_normalize_to_unit_sphere": true,
|
| 92 |
+
"ray_directions_vmax": Infinity,
|
| 93 |
+
"ray_directions_vmin": -Infinity,
|
| 94 |
+
"ray_directions_z_dir_min": -Infinity
|
| 95 |
+
},
|
| 96 |
+
"input_dim": 6,
|
| 97 |
+
"pose_pred_init_dict": {
|
| 98 |
+
"cam_trans_mode": "linear",
|
| 99 |
+
"cam_trans_vmax": Infinity,
|
| 100 |
+
"cam_trans_vmin": -Infinity,
|
| 101 |
+
"name": "raydirs+depth+pose+confidence+mask+scale",
|
| 102 |
+
"quaternions_mode": "linear",
|
| 103 |
+
"quaternions_normalize": true,
|
| 104 |
+
"quaternions_vmax": Infinity,
|
| 105 |
+
"quaternions_vmin": -Infinity
|
| 106 |
+
},
|
| 107 |
+
"scale_pred_init_dict": {
|
| 108 |
+
"mode": "exp",
|
| 109 |
+
"name": "raydirs+depth+pose+confidence+mask+scale",
|
| 110 |
+
"vmax": Infinity,
|
| 111 |
+
"vmin": 1e-08
|
| 112 |
+
},
|
| 113 |
+
"scene_rep_dim": 4,
|
| 114 |
+
"scene_rep_type": "raydirs+depth+pose",
|
| 115 |
+
"type": "raydirs+depth+pose+confidence+mask"
|
| 116 |
+
},
|
| 117 |
+
"adaptor_type": "raydirs+depth+pose+confidence+mask",
|
| 118 |
+
"dpt_adaptor": {
|
| 119 |
+
"confidence_type": "exp",
|
| 120 |
+
"confidence_vmax": Infinity,
|
| 121 |
+
"confidence_vmin": 1,
|
| 122 |
+
"depth_mode": "exp",
|
| 123 |
+
"depth_vmax": Infinity,
|
| 124 |
+
"depth_vmin": 0,
|
| 125 |
+
"name": "raydirs+depth+pose+confidence+mask+scale",
|
| 126 |
+
"ray_directions_clamp_min_of_z_dir": false,
|
| 127 |
+
"ray_directions_mode": "linear",
|
| 128 |
+
"ray_directions_normalize_to_unit_image_plane": false,
|
| 129 |
+
"ray_directions_normalize_to_unit_sphere": true,
|
| 130 |
+
"ray_directions_vmax": Infinity,
|
| 131 |
+
"ray_directions_vmin": -Infinity,
|
| 132 |
+
"ray_directions_z_dir_min": -Infinity
|
| 133 |
+
},
|
| 134 |
+
"feature_head": {
|
| 135 |
+
"checkpoint_gradient": false,
|
| 136 |
+
"feature_dim": 256,
|
| 137 |
+
"hooks": [
|
| 138 |
+
0,
|
| 139 |
+
1,
|
| 140 |
+
2,
|
| 141 |
+
3
|
| 142 |
+
],
|
| 143 |
+
"input_feature_dims": [
|
| 144 |
+
1024,
|
| 145 |
+
768,
|
| 146 |
+
768,
|
| 147 |
+
768
|
| 148 |
+
],
|
| 149 |
+
"patch_size": 14
|
| 150 |
+
},
|
| 151 |
+
"gradient_checkpointing": false,
|
| 152 |
+
"pose_adaptor": {
|
| 153 |
+
"cam_trans_mode": "linear",
|
| 154 |
+
"cam_trans_vmax": Infinity,
|
| 155 |
+
"cam_trans_vmin": -Infinity,
|
| 156 |
+
"name": "raydirs+depth+pose+confidence+mask+scale",
|
| 157 |
+
"quaternions_mode": "linear",
|
| 158 |
+
"quaternions_normalize": true,
|
| 159 |
+
"quaternions_vmax": Infinity,
|
| 160 |
+
"quaternions_vmin": -Infinity
|
| 161 |
+
},
|
| 162 |
+
"pose_head": {
|
| 163 |
+
"input_feature_dim": 768,
|
| 164 |
+
"num_resconv_block": 2,
|
| 165 |
+
"patch_size": 14,
|
| 166 |
+
"rot_representation_dim": 4
|
| 167 |
+
},
|
| 168 |
+
"regressor_head": {
|
| 169 |
+
"checkpoint_gradient": false,
|
| 170 |
+
"input_feature_dim": 256,
|
| 171 |
+
"output_dim": 6
|
| 172 |
+
},
|
| 173 |
+
"scale_adaptor": {
|
| 174 |
+
"mode": "exp",
|
| 175 |
+
"name": "raydirs+depth+pose+confidence+mask+scale",
|
| 176 |
+
"vmax": Infinity,
|
| 177 |
+
"vmin": 1e-08
|
| 178 |
+
},
|
| 179 |
+
"scale_head": {
|
| 180 |
+
"input_feature_dim": 768,
|
| 181 |
+
"output_dim": 1
|
| 182 |
+
},
|
| 183 |
+
"type": "dpt+pose"
|
| 184 |
+
},
|
| 185 |
+
"pretrained_checkpoint_path": null,
|
| 186 |
+
"specific_pretrained_submodules": [],
|
| 187 |
+
"torch_hub_force_reload": false
|
| 188 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20111747deb2c9a3c02fd3bb91f25ac51be951bdeffb5e89ebd45d6cb268b70e
|
| 3 |
+
size 2253444224
|