| { | |
| "model_name": "mdx_extra", | |
| "model_class": "BagOfModelsMLX", | |
| "sub_model_class": "HDemucsMLX", | |
| "num_models": 4, | |
| "weights": [ | |
| [ | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0 | |
| ], | |
| [ | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0 | |
| ], | |
| [ | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0 | |
| ], | |
| [ | |
| 1.0, | |
| 1.0, | |
| 1.0, | |
| 1.0 | |
| ] | |
| ], | |
| "args": [], | |
| "kwargs": { | |
| "sources": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "audio_channels": 2, | |
| "samplerate": 44100, | |
| "segment": 44, | |
| "channels": 48, | |
| "channels_time": null, | |
| "growth": 2, | |
| "nfft": 4096, | |
| "wiener_iters": 0, | |
| "end_iters": 0, | |
| "wiener_residual": false, | |
| "cac": true, | |
| "depth": 6, | |
| "rewrite": true, | |
| "hybrid": true, | |
| "hybrid_old": false, | |
| "multi_freqs": [], | |
| "multi_freqs_depth": 3, | |
| "freq_emb": 0.2, | |
| "emb_scale": 10, | |
| "emb_smooth": true, | |
| "kernel_size": 8, | |
| "stride": 4, | |
| "time_stride": 2, | |
| "context": 1, | |
| "context_enc": 0, | |
| "norm_starts": 4, | |
| "norm_groups": 4, | |
| "dconv_mode": 1, | |
| "dconv_depth": 2, | |
| "dconv_comp": 4, | |
| "dconv_attn": 4, | |
| "dconv_lstm": 4, | |
| "dconv_init": 0.0001, | |
| "rescale": 0.1 | |
| }, | |
| "mlx_version": "0.30.3", | |
| "tensor_count": 1516, | |
| "model_configs": [ | |
| { | |
| "model_class": "HTDemucsMLX", | |
| "kwargs": { | |
| "sources": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "audio_channels": 2, | |
| "samplerate": 44100, | |
| "segment": 44, | |
| "channels": 48, | |
| "channels_time": null, | |
| "growth": 2, | |
| "nfft": 4096, | |
| "wiener_iters": 0, | |
| "end_iters": 0, | |
| "wiener_residual": false, | |
| "cac": true, | |
| "depth": 6, | |
| "rewrite": true, | |
| "hybrid": true, | |
| "hybrid_old": false, | |
| "multi_freqs": [], | |
| "multi_freqs_depth": 3, | |
| "freq_emb": 0.2, | |
| "emb_scale": 10, | |
| "emb_smooth": true, | |
| "kernel_size": 8, | |
| "stride": 4, | |
| "time_stride": 2, | |
| "context": 1, | |
| "context_enc": 0, | |
| "norm_starts": 4, | |
| "norm_groups": 4, | |
| "dconv_mode": 1, | |
| "dconv_depth": 2, | |
| "dconv_comp": 4, | |
| "dconv_attn": 4, | |
| "dconv_lstm": 4, | |
| "dconv_init": 0.0001, | |
| "rescale": 0.1 | |
| } | |
| }, | |
| { | |
| "model_class": "HTDemucsMLX", | |
| "kwargs": { | |
| "sources": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "audio_channels": 2, | |
| "samplerate": 44100, | |
| "segment": 44, | |
| "channels": 48, | |
| "channels_time": null, | |
| "growth": 2, | |
| "nfft": 4096, | |
| "wiener_iters": 0, | |
| "end_iters": 0, | |
| "wiener_residual": false, | |
| "cac": false, | |
| "depth": 6, | |
| "rewrite": true, | |
| "hybrid": true, | |
| "hybrid_old": true, | |
| "multi_freqs": [], | |
| "multi_freqs_depth": 3, | |
| "freq_emb": 0.2, | |
| "emb_scale": 10, | |
| "emb_smooth": true, | |
| "kernel_size": 8, | |
| "stride": 4, | |
| "time_stride": 2, | |
| "context": 1, | |
| "context_enc": 0, | |
| "norm_starts": 4, | |
| "norm_groups": 4, | |
| "dconv_mode": 1, | |
| "dconv_depth": 2, | |
| "dconv_comp": 4, | |
| "dconv_attn": 4, | |
| "dconv_lstm": 4, | |
| "dconv_init": 0.0001, | |
| "rescale": 0.1 | |
| } | |
| }, | |
| { | |
| "model_class": "HTDemucsMLX", | |
| "kwargs": { | |
| "sources": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "audio_channels": 2, | |
| "samplerate": 44100, | |
| "segment": 44, | |
| "channels": 48, | |
| "channels_time": null, | |
| "growth": 2, | |
| "nfft": 4096, | |
| "wiener_iters": 0, | |
| "end_iters": 0, | |
| "wiener_residual": false, | |
| "cac": false, | |
| "depth": 6, | |
| "rewrite": true, | |
| "hybrid": true, | |
| "hybrid_old": false, | |
| "multi_freqs": [], | |
| "multi_freqs_depth": 3, | |
| "freq_emb": 0.2, | |
| "emb_scale": 10, | |
| "emb_smooth": true, | |
| "kernel_size": 8, | |
| "stride": 4, | |
| "time_stride": 2, | |
| "context": 1, | |
| "context_enc": 0, | |
| "norm_starts": 4, | |
| "norm_groups": 4, | |
| "dconv_mode": 1, | |
| "dconv_depth": 2, | |
| "dconv_comp": 4, | |
| "dconv_attn": 4, | |
| "dconv_lstm": 4, | |
| "dconv_init": 0.0001, | |
| "rescale": 0.1 | |
| } | |
| }, | |
| { | |
| "model_class": "HTDemucsMLX", | |
| "kwargs": { | |
| "sources": [ | |
| "drums", | |
| "bass", | |
| "other", | |
| "vocals" | |
| ], | |
| "audio_channels": 2, | |
| "samplerate": 44100, | |
| "segment": 44, | |
| "channels": 48, | |
| "channels_time": null, | |
| "growth": 2, | |
| "nfft": 4096, | |
| "wiener_iters": 0, | |
| "end_iters": 0, | |
| "wiener_residual": false, | |
| "cac": true, | |
| "depth": 6, | |
| "rewrite": true, | |
| "hybrid": true, | |
| "hybrid_old": false, | |
| "multi_freqs": [], | |
| "multi_freqs_depth": 3, | |
| "freq_emb": 0.2, | |
| "emb_scale": 10, | |
| "emb_smooth": true, | |
| "kernel_size": 8, | |
| "stride": 4, | |
| "time_stride": 2, | |
| "context": 1, | |
| "context_enc": 0, | |
| "norm_starts": 4, | |
| "norm_groups": 4, | |
| "dconv_mode": 1, | |
| "dconv_depth": 2, | |
| "dconv_comp": 4, | |
| "dconv_attn": 4, | |
| "dconv_lstm": 4, | |
| "dconv_init": 0.0001, | |
| "rescale": 0.1 | |
| } | |
| } | |
| ] | |
| } |