NikV09 meta-bot commited on
Commit
6f3a25b
·
verified ·
0 Parent(s):

Squash Public Dataset Commit History

Browse files

Co-authored-by: meta-bot <meta-bot@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +39 -0
  3. config.json +188 -0
  4. model.safetensors +3 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - model_hub_mixin
4
+ - pytorch_model_hub_mixin
5
+ - computer-vision
6
+ - 3d-reconstruction
7
+ - multi-view-stereo
8
+ - depth-estimation
9
+ - camera-pose
10
+ - covisibility
11
+ - mapanything
12
+ license: cc-by-nc-4.0
13
+ language:
14
+ - en
15
+ pipeline_tag: image-to-3d
16
+ ---
17
+
18
+ ## Overview
19
+
20
+ MapAnything is a simple, end-to-end trained transformer model that directly regresses the factored metric 3D geometry of a scene given various types of modalities as inputs. A single feed-forward model supports over 12 different 3D reconstruction tasks including multi-image sfm, multi-view stereo, monocular metric depth estimation, registration, depth completion and more.
21
+
22
+ This is the CC-BY-NC-4.0 variant of the model.
23
+
24
+ ## Quick Start
25
+
26
+ Please refer to our [Github Repo](https://github.com/facebookresearch/map-anything)
27
+
28
+ ## Citation
29
+
30
+ If you find our repository useful, please consider giving it a star ⭐ and citing our paper in your work:
31
+
32
+ ```bibtex
33
+ @inproceedings{keetha2025mapanything,
34
+ title={{MapAnything}: Universal Feed-Forward Metric {3D} Reconstruction},
35
+ author={Nikhil Keetha and Norman Müller and Johannes Schönberger and Lorenzo Porzi and Yuchen Zhang and Tobias Fischer and Arno Knapitsch and Duncan Zauss and Ethan Weber and Nelson Antunes and Jonathon Luiten and Manuel Lopez-Antequera and Samuel Rota Bulò and Christian Richardt and Deva Ramanan and Sebastian Scherer and Peter Kontschieder},
36
+ booktitle={arXiv},
37
+ year={2025}
38
+ }
39
+ ```
config.json ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "encoder_config": {
3
+ "data_norm_type": "dinov2",
4
+ "encoder_str": "dinov2",
5
+ "gradient_checkpointing": true,
6
+ "name": "dinov2_large",
7
+ "size": "large",
8
+ "torch_hub_force_reload": false,
9
+ "uses_torch_hub": true,
10
+ "with_registers": false
11
+ },
12
+ "geometric_input_config": {
13
+ "cam_prob": 1.0,
14
+ "cam_rot_encoder_config": {
15
+ "enc_embed_dim": 1024,
16
+ "encoder_str": "global_rep_encoder",
17
+ "in_chans": 4,
18
+ "name": "cam_rot_quats_encoder"
19
+ },
20
+ "cam_trans_encoder_config": {
21
+ "enc_embed_dim": 1024,
22
+ "encoder_str": "global_rep_encoder",
23
+ "in_chans": 3,
24
+ "name": "cam_trans_encoder"
25
+ },
26
+ "depth_encoder_config": {
27
+ "apply_pe": false,
28
+ "enc_embed_dim": 1024,
29
+ "encoder_str": "dense_rep_encoder",
30
+ "in_chans": 1,
31
+ "name": "depth_encoder",
32
+ "patch_size": 14
33
+ },
34
+ "depth_prob": 1.0,
35
+ "depth_scale_norm_all_prob": 0.0,
36
+ "dropout_prob": 0.0,
37
+ "overall_prob": 1.0,
38
+ "pose_scale_norm_all_prob": 0.0,
39
+ "ray_dirs_encoder_config": {
40
+ "apply_pe": false,
41
+ "enc_embed_dim": 1024,
42
+ "encoder_str": "dense_rep_encoder",
43
+ "in_chans": 3,
44
+ "name": "ray_dirs_encoder",
45
+ "patch_size": 14
46
+ },
47
+ "ray_dirs_prob": 1.0,
48
+ "scale_encoder_config": {
49
+ "enc_embed_dim": 1024,
50
+ "encoder_str": "global_rep_encoder",
51
+ "in_chans": 1,
52
+ "name": "scale_encoder"
53
+ },
54
+ "sparse_depth_prob": 0.0,
55
+ "sparsification_removal_percent": 0.9
56
+ },
57
+ "info_sharing_config": {
58
+ "custom_positional_encoding": null,
59
+ "model_return_type": "intermediate_features",
60
+ "model_type": "alternating_attention",
61
+ "module_args": {
62
+ "custom_positional_encoding": null,
63
+ "depth": 24,
64
+ "distinguish_ref_and_non_ref_views": true,
65
+ "gradient_checkpointing": false,
66
+ "indices": [
67
+ 11,
68
+ 17
69
+ ],
70
+ "input_embed_dim": 1024,
71
+ "name": "aat_24_layers_ifr",
72
+ "norm_intermediate": true,
73
+ "size": "24_layers"
74
+ }
75
+ },
76
+ "load_specific_pretrained_submodules": false,
77
+ "name": "mapanything",
78
+ "pred_head_config": {
79
+ "adaptor_config": {
80
+ "dense_pred_init_dict": {
81
+ "confidence_type": "exp",
82
+ "confidence_vmax": Infinity,
83
+ "confidence_vmin": 1,
84
+ "depth_mode": "exp",
85
+ "depth_vmax": Infinity,
86
+ "depth_vmin": 0,
87
+ "name": "raydirs+depth+pose+confidence+mask+scale",
88
+ "ray_directions_clamp_min_of_z_dir": false,
89
+ "ray_directions_mode": "linear",
90
+ "ray_directions_normalize_to_unit_image_plane": false,
91
+ "ray_directions_normalize_to_unit_sphere": true,
92
+ "ray_directions_vmax": Infinity,
93
+ "ray_directions_vmin": -Infinity,
94
+ "ray_directions_z_dir_min": -Infinity
95
+ },
96
+ "input_dim": 6,
97
+ "pose_pred_init_dict": {
98
+ "cam_trans_mode": "linear",
99
+ "cam_trans_vmax": Infinity,
100
+ "cam_trans_vmin": -Infinity,
101
+ "name": "raydirs+depth+pose+confidence+mask+scale",
102
+ "quaternions_mode": "linear",
103
+ "quaternions_normalize": true,
104
+ "quaternions_vmax": Infinity,
105
+ "quaternions_vmin": -Infinity
106
+ },
107
+ "scale_pred_init_dict": {
108
+ "mode": "exp",
109
+ "name": "raydirs+depth+pose+confidence+mask+scale",
110
+ "vmax": Infinity,
111
+ "vmin": 1e-08
112
+ },
113
+ "scene_rep_dim": 4,
114
+ "scene_rep_type": "raydirs+depth+pose",
115
+ "type": "raydirs+depth+pose+confidence+mask"
116
+ },
117
+ "adaptor_type": "raydirs+depth+pose+confidence+mask",
118
+ "dpt_adaptor": {
119
+ "confidence_type": "exp",
120
+ "confidence_vmax": Infinity,
121
+ "confidence_vmin": 1,
122
+ "depth_mode": "exp",
123
+ "depth_vmax": Infinity,
124
+ "depth_vmin": 0,
125
+ "name": "raydirs+depth+pose+confidence+mask+scale",
126
+ "ray_directions_clamp_min_of_z_dir": false,
127
+ "ray_directions_mode": "linear",
128
+ "ray_directions_normalize_to_unit_image_plane": false,
129
+ "ray_directions_normalize_to_unit_sphere": true,
130
+ "ray_directions_vmax": Infinity,
131
+ "ray_directions_vmin": -Infinity,
132
+ "ray_directions_z_dir_min": -Infinity
133
+ },
134
+ "feature_head": {
135
+ "checkpoint_gradient": false,
136
+ "feature_dim": 256,
137
+ "hooks": [
138
+ 0,
139
+ 1,
140
+ 2,
141
+ 3
142
+ ],
143
+ "input_feature_dims": [
144
+ 1024,
145
+ 768,
146
+ 768,
147
+ 768
148
+ ],
149
+ "patch_size": 14
150
+ },
151
+ "gradient_checkpointing": false,
152
+ "pose_adaptor": {
153
+ "cam_trans_mode": "linear",
154
+ "cam_trans_vmax": Infinity,
155
+ "cam_trans_vmin": -Infinity,
156
+ "name": "raydirs+depth+pose+confidence+mask+scale",
157
+ "quaternions_mode": "linear",
158
+ "quaternions_normalize": true,
159
+ "quaternions_vmax": Infinity,
160
+ "quaternions_vmin": -Infinity
161
+ },
162
+ "pose_head": {
163
+ "input_feature_dim": 768,
164
+ "num_resconv_block": 2,
165
+ "patch_size": 14,
166
+ "rot_representation_dim": 4
167
+ },
168
+ "regressor_head": {
169
+ "checkpoint_gradient": false,
170
+ "input_feature_dim": 256,
171
+ "output_dim": 6
172
+ },
173
+ "scale_adaptor": {
174
+ "mode": "exp",
175
+ "name": "raydirs+depth+pose+confidence+mask+scale",
176
+ "vmax": Infinity,
177
+ "vmin": 1e-08
178
+ },
179
+ "scale_head": {
180
+ "input_feature_dim": 768,
181
+ "output_dim": 1
182
+ },
183
+ "type": "dpt+pose"
184
+ },
185
+ "pretrained_checkpoint_path": null,
186
+ "specific_pretrained_submodules": [],
187
+ "torch_hub_force_reload": false
188
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20111747deb2c9a3c02fd3bb91f25ac51be951bdeffb5e89ebd45d6cb268b70e
3
+ size 2253444224