| { | |
| "model_type": "dinosaur", | |
| "initializer": { | |
| "name": "RandomInit", | |
| "n_slots": 7, | |
| "dim": 512 | |
| }, | |
| "encoder": { | |
| "backbone": { | |
| "name": "TimmExtractor", | |
| "model": "vit_large_patch14_reg4_dinov2.lvd142m", | |
| "features": "vit_block12", | |
| "frozen": true, | |
| "pretrained": true, | |
| "model_kwargs": { | |
| "dynamic_img_size": true | |
| } | |
| }, | |
| "output_transform": { | |
| "name": "networks.two_layer_mlp", | |
| "inp_dim": 1024, | |
| "outp_dim": 512, | |
| "hidden_dim": 2048, | |
| "layer_norm": true | |
| } | |
| }, | |
| "grouper": { | |
| "name": "SlotAttention", | |
| "inp_dim": 512, | |
| "slot_dim": 512, | |
| "n_iters": 3, | |
| "use_mlp": true | |
| }, | |
| "decoder": { | |
| "name": "MLPDecoder", | |
| "inp_dim": 512, | |
| "outp_dim": 1024, | |
| "hidden_dims": [2048, 2048, 2048], | |
| "n_patches": 676 | |
| }, | |
| "aux_outputs": false | |
| } |