Upload folder using huggingface_hub
Browse files
.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
autoencoder/svg_autoencoder_dinov3s16p_vit-s_epoch40.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2f7bbdfe59e403e54b9043d8e7cf051f575051df2703894832e0ffa13c63a23
|
| 3 |
+
size 858608613
|
autoencoder/svg_autoencoder_dinov3s16p_vit-s_epoch40.yaml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Model configuration
|
| 2 |
+
model:
|
| 3 |
+
target: autoencoder.ldm.models.dino_decoder.DinoDecoder
|
| 4 |
+
params:
|
| 5 |
+
ckpt_path: pretrained/autoencoder/svg_autoencoder_dinov3s16p_vit-s_epoch40.ckpt
|
| 6 |
+
is_train: false
|
| 7 |
+
only_decoder: true
|
| 8 |
+
embed_dim: 32
|
| 9 |
+
ddconfig:
|
| 10 |
+
double_z: true
|
| 11 |
+
z_channels: 392
|
| 12 |
+
resolution: 256
|
| 13 |
+
in_channels: 3
|
| 14 |
+
out_ch: 3
|
| 15 |
+
ch: 128
|
| 16 |
+
ch_mult:
|
| 17 |
+
- 1
|
| 18 |
+
- 1
|
| 19 |
+
- 2
|
| 20 |
+
- 2
|
| 21 |
+
- 4
|
| 22 |
+
num_res_blocks: 2
|
| 23 |
+
attn_resolutions:
|
| 24 |
+
- 16
|
| 25 |
+
dropout: 0.0
|
| 26 |
+
dinoconfig: null
|
| 27 |
+
lossconfig: null
|
checkpoints/V1-SVG-XL-7000K-256x256.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b987061498e38fc25d627dc3b419d9137e688a15b9660fba47b008ec61f852fe
|
| 3 |
+
size 2705900247
|
checkpoints/svg_xl_pretrained_config.yaml
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
basic:
|
| 2 |
+
exp_name: SVG-XL
|
| 3 |
+
results_dir: exps
|
| 4 |
+
data_path: null
|
| 5 |
+
global_seed: 1234
|
| 6 |
+
epochs: 1000
|
| 7 |
+
log_every: 100
|
| 8 |
+
ckpt_every: 50000
|
| 9 |
+
rf: true
|
| 10 |
+
shift: 0.4
|
| 11 |
+
accum_iter: 1
|
| 12 |
+
clip_grad_norm: null
|
| 13 |
+
image_size: 256
|
| 14 |
+
global_batch_size: 256
|
| 15 |
+
num_workers: 16
|
| 16 |
+
timestep_start: 0
|
| 17 |
+
timestep_end: 1000
|
| 18 |
+
encoder: dinov3_vitsp16
|
| 19 |
+
feature_norm: true
|
| 20 |
+
encoder_config: pretrained/autoencoder/svg_autoencoder_dinov3s16p_vit-s_epoch40.yaml
|
| 21 |
+
|
| 22 |
+
model:
|
| 23 |
+
ckpt: null
|
| 24 |
+
target: models.models_SVG.DiT
|
| 25 |
+
params:
|
| 26 |
+
input_size: 16
|
| 27 |
+
num_classes: 1000
|
| 28 |
+
patch_size: 1
|
| 29 |
+
depth: 28
|
| 30 |
+
hidden_size: 1152
|
| 31 |
+
num_heads: 16
|
| 32 |
+
mlp_ratio: 4
|
| 33 |
+
use_swiglu: false
|
| 34 |
+
in_channels: 392
|
| 35 |
+
qk_norm: true
|
| 36 |
+
class_dropout_prob: 0.1
|
| 37 |
+
optim:
|
| 38 |
+
base_learning_rate: 0.0001
|
| 39 |
+
weight_decay: 0
|
| 40 |
+
betas:
|
| 41 |
+
- 0.9
|
| 42 |
+
- 0.999
|
| 43 |
+
lr_sheduler:
|
| 44 |
+
warmup: null
|
| 45 |
+
train_epoch: null
|