Replace demucs-mlx conversion scripts with direct PyTorch exporter

Removes dependency on demucs-mlx re-implementation. The new
export_from_pytorch.py converts all 8 models directly from the
original PyTorch demucs package to safetensors + JSON config.

Files changed (4) hide show

convert_demucs_mlx_checkpoint.py +0 -121
export_all_models.py +0 -206
export_from_pytorch.py +472 -0
export_mdx.py +0 -343

convert_demucs_mlx_checkpoint.py DELETED Viewed

@@ -1,121 +0,0 @@
-#!/usr/bin/env python3
-"""
-Export demucs-mlx pickle checkpoint to flat safetensors + JSON metadata.
-This is a preparation step for native Swift/MLX loading.
-"""
-from __future__ import annotations
-import argparse
-import json
-import os
-import pickle
-from pathlib import Path
-from typing import Any
-from fractions import Fraction
-import mlx.core as mx
-def flatten_tree(node: Any, prefix: str = "") -> dict[str, mx.array]:
-    out: dict[str, mx.array] = {}
-    if isinstance(node, dict):
-        for k, v in node.items():
-            key = f"{prefix}.{k}" if prefix else str(k)
-            out.update(flatten_tree(v, key))
-        return out
-    if isinstance(node, (list, tuple)):
-        for idx, v in enumerate(node):
-            key = f"{prefix}.{idx}" if prefix else str(idx)
-            out.update(flatten_tree(v, key))
-        return out
-    # MLX array leaf
-    if isinstance(node, mx.array):
-        out[prefix] = node
-        return out
-    # Non-array leaf in state tree: ignore.
-    return out
-def to_builtin(obj: Any) -> Any:
-    if isinstance(obj, dict):
-        return {str(k): to_builtin(v) for k, v in obj.items()}
-    if isinstance(obj, (list, tuple)):
-        return [to_builtin(x) for x in obj]
-    if isinstance(obj, Fraction):
-        return f"{obj.numerator}/{obj.denominator}"
-    return obj
-def main() -> None:
-    ap = argparse.ArgumentParser()
-    ap.add_argument(
-        "--checkpoint",
-        default=os.path.expanduser("~/.cache/demucs-mlx/htdemucs_mlx.pkl"),
-        help="Path to demucs-mlx pickle checkpoint",
-    )
-    ap.add_argument(
-        "--out-dir",
-        default="./Models/htdemucs",
-        help="Output directory",
-    )
-    ap.add_argument(
-        "--name",
-        default="htdemucs",
-        help="Output model basename",
-    )
-    args = ap.parse_args()
-    ck_path = Path(args.checkpoint).expanduser().resolve()
-    out_dir = Path(args.out_dir).resolve()
-    out_dir.mkdir(parents=True, exist_ok=True)
-    with ck_path.open("rb") as f:
-        checkpoint = pickle.load(f)
-    if "state" not in checkpoint:
-        raise ValueError(f"No 'state' key in checkpoint: {ck_path}")
-    flat = flatten_tree(checkpoint["state"])
-    if not flat:
-        raise ValueError("No MLX arrays found while flattening state tree")
-    safetensors_path = out_dir / f"{args.name}.safetensors"
-    config_path = out_dir / f"{args.name}_config.json"
-    mx.save_safetensors(str(safetensors_path), flat)
-    metadata = {
-        "model_name": checkpoint.get("model_name"),
-        "model_class": checkpoint.get("model_class"),
-        "sub_model_class": checkpoint.get("sub_model_class"),
-        "num_models": checkpoint.get("num_models"),
-        "weights": checkpoint.get("weights"),
-        "args": to_builtin(checkpoint.get("args", [])),
-        "kwargs": to_builtin(checkpoint.get("kwargs", {})),
-        "mlx_version": checkpoint.get("mlx_version"),
-        "tensor_count": len(flat),
-        "tensors": {
-            k: {
-                "shape": list(v.shape),
-                "dtype": str(v.dtype),
-            }
-            for k, v in flat.items()
-        },
-    }
-    with config_path.open("w") as f:
-        json.dump(metadata, f, indent=2)
-    print(f"wrote {safetensors_path}")
-    print(f"wrote {config_path}")
-    print(f"tensors: {len(flat)}")
-if __name__ == "__main__":
-    main()

export_all_models.py DELETED Viewed

@@ -1,206 +0,0 @@
-#!/usr/bin/env python3
-"""
-Export all available demucs-mlx model checkpoints to safetensors + JSON.
-Usage:
-    python scripts/export_all_models.py [--cache-dir ~/.cache/demucs-mlx] [--out-dir ./Models]
-This script finds all *_mlx.pkl checkpoints in the demucs-mlx cache directory
-and exports each one as:
-    <out-dir>/<model_name>/<model_name>.safetensors
-    <out-dir>/<model_name>/<model_name>_config.json
-If you haven't converted models yet, run demucs-mlx first to generate the
-pickle checkpoints:
-    python -m demucs_mlx --model htdemucs -n test.mp3
-"""
-from __future__ import annotations
-import argparse
-import json
-import os
-import pickle
-import sys
-from pathlib import Path
-from typing import Any
-from fractions import Fraction
-import mlx.core as mx
-# Known model names in demucs-mlx
-ALL_MODELS = [
-    "htdemucs",
-    "htdemucs_ft",
-    "htdemucs_6s",
-    "hdemucs_mmi",
-    "mdx",
-    "mdx_extra",
-    "mdx_q",
-    "mdx_extra_q",
-]
-def flatten_tree(node: Any, prefix: str = "") -> dict[str, mx.array]:
-    out: dict[str, mx.array] = {}
-    if isinstance(node, dict):
-        for k, v in node.items():
-            key = f"{prefix}.{k}" if prefix else str(k)
-            out.update(flatten_tree(v, key))
-        return out
-    if isinstance(node, (list, tuple)):
-        for idx, v in enumerate(node):
-            key = f"{prefix}.{idx}" if prefix else str(idx)
-            out.update(flatten_tree(v, key))
-        return out
-    if isinstance(node, mx.array):
-        out[prefix] = node
-        return out
-    return out
-def to_builtin(obj: Any) -> Any:
-    if isinstance(obj, dict):
-        return {str(k): to_builtin(v) for k, v in obj.items()}
-    if isinstance(obj, (list, tuple)):
-        return [to_builtin(x) for x in obj]
-    if isinstance(obj, Fraction):
-        return f"{obj.numerator}/{obj.denominator}"
-    return obj
-def export_checkpoint(ck_path: Path, out_dir: Path, model_name: str) -> bool:
-    """Export a single checkpoint. Returns True on success."""
-    if not ck_path.exists():
-        return False
-    print(f"\n--- Exporting {model_name} from {ck_path} ---")
-    with ck_path.open("rb") as f:
-        checkpoint = pickle.load(f)
-    if "state" not in checkpoint:
-        print(f"  WARNING: No 'state' key in checkpoint, skipping")
-        return False
-    flat = flatten_tree(checkpoint["state"])
-    if not flat:
-        print(f"  WARNING: No MLX arrays found, skipping")
-        return False
-    model_dir = out_dir / model_name
-    model_dir.mkdir(parents=True, exist_ok=True)
-    safetensors_path = model_dir / f"{model_name}.safetensors"
-    config_path = model_dir / f"{model_name}_config.json"
-    mx.save_safetensors(str(safetensors_path), flat)
-    metadata = {
-        "model_name": checkpoint.get("model_name", model_name),
-        "model_class": checkpoint.get("model_class"),
-        "sub_model_class": checkpoint.get("sub_model_class"),
-        "num_models": checkpoint.get("num_models"),
-        "weights": checkpoint.get("weights"),
-        "args": to_builtin(checkpoint.get("args", [])),
-        "kwargs": to_builtin(checkpoint.get("kwargs", {})),
-        "mlx_version": checkpoint.get("mlx_version"),
-        "tensor_count": len(flat),
-    }
-    # For heterogeneous bags, include per-model class and kwargs
-    per_model_class = checkpoint.get("per_model_class")
-    per_model_kwargs = checkpoint.get("per_model_kwargs")
-    if per_model_class:
-        # Map PyTorch class names to MLX class names
-        class_map = {
-            'Demucs': 'DemucsMLX',
-            'HDemucs': 'HDemucsMLX',
-            'HTDemucs': 'HTDemucsMLX',
-        }
-        metadata["sub_model_classes"] = [class_map.get(c, c) for c in per_model_class]
-    if per_model_kwargs:
-        # Build model_configs array with per-model class + kwargs
-        model_configs = []
-        for i, kw in enumerate(per_model_kwargs):
-            mc = "HTDemucsMLX"
-            if per_model_class and i < len(per_model_class):
-                mc = class_map.get(per_model_class[i], per_model_class[i])
-            model_configs.append({
-                "model_class": mc,
-                "kwargs": to_builtin(kw),
-            })
-        metadata["model_configs"] = model_configs
-    # Remove None values for cleaner JSON
-    metadata = {k: v for k, v in metadata.items() if v is not None}
-    with config_path.open("w") as f:
-        json.dump(metadata, f, indent=2)
-    print(f"  wrote {safetensors_path} ({len(flat)} tensors)")
-    print(f"  wrote {config_path}")
-    mc = metadata.get("model_class", "?")
-    smc = metadata.get("sub_model_class", "")
-    nm = metadata.get("num_models", 1)
-    print(f"  class={mc}, sub_class={smc}, num_models={nm}")
-    return True
-def main() -> None:
-    ap = argparse.ArgumentParser(description="Export all demucs-mlx checkpoints to safetensors")
-    ap.add_argument(
-        "--cache-dir",
-        default=os.path.expanduser("~/.cache/demucs-mlx"),
-        help="demucs-mlx cache directory containing *_mlx.pkl files",
-    )
-    ap.add_argument(
-        "--out-dir",
-        default="./Models",
-        help="Output root directory (model files go into <out-dir>/<model_name>/)",
-    )
-    ap.add_argument(
-        "--models",
-        nargs="*",
-        default=None,
-        help="Specific model names to export (default: all found)",
-    )
-    args = ap.parse_args()
-    cache_dir = Path(args.cache_dir).expanduser().resolve()
-    out_dir = Path(args.out_dir).resolve()
-    if not cache_dir.exists():
-        print(f"Cache directory not found: {cache_dir}")
-        print("Run demucs-mlx first to download and convert models.")
-        sys.exit(1)
-    models_to_export = args.models or ALL_MODELS
-    exported = 0
-    skipped = 0
-    for model_name in models_to_export:
-        ck_path = cache_dir / f"{model_name}_mlx.pkl"
-        if export_checkpoint(ck_path, out_dir, model_name):
-            exported += 1
-        else:
-            skipped += 1
-    # Also check for any *_mlx.pkl files not in our known list
-    if args.models is None:
-        for pkl_file in sorted(cache_dir.glob("*_mlx.pkl")):
-            name = pkl_file.stem.replace("_mlx", "")
-            if name not in ALL_MODELS:
-                print(f"\nFound additional checkpoint: {pkl_file.name}")
-                if export_checkpoint(pkl_file, out_dir, name):
-                    exported += 1
-    print(f"\n=== Done: {exported} exported, {skipped} skipped ===")
-if __name__ == "__main__":
-    main()

export_from_pytorch.py ADDED Viewed

	@@ -0,0 +1,472 @@

+#!/usr/bin/env python3
+"""
+Export Demucs PyTorch models directly to safetensors + JSON config for Swift MLX.
+Converts all 8 pretrained models directly from the original PyTorch demucs package.
+No dependency on demucs-mlx or any other re-implementation.
+Usage:
+    # Export all models
+    python scripts/export_from_pytorch.py --out-dir ~/.cache/demucs-mlx-swift-models
+    # Export specific models
+    python scripts/export_from_pytorch.py --models htdemucs htdemucs_ft --out-dir ./Models
+Requirements:
+    pip install demucs safetensors numpy
+"""
+from __future__ import annotations
+import argparse
+import inspect
+import json
+import re
+import sys
+from fractions import Fraction
+from pathlib import Path
+import numpy as np
+import torch
+ALL_MODELS = [
+    "htdemucs",
+    "htdemucs_ft",
+    "htdemucs_6s",
+    "hdemucs_mmi",
+    "mdx",
+    "mdx_extra",
+    "mdx_q",
+    "mdx_extra_q",
+]
+# Map PyTorch class names to MLX class names used by Swift loader
+CLASS_MAP = {
+    "Demucs": "DemucsMLX",
+    "HDemucs": "HDemucsMLX",
+    "HTDemucs": "HTDemucsMLX",
+}
+# Conv-like layer names that get .conv. wrapper in MLX
+CONV_LAYER_NAMES = {
+    "conv", "conv_tr", "rewrite",
+    "channel_upsampler", "channel_downsampler",
+    "channel_upsampler_t", "channel_downsampler_t",
+}
+# DConv attention sub-module names (LocalState)
+DCONV_ATTN_NAMES = {"content", "key", "query", "proj", "query_decay", "query_freqs"}
+def to_json_serializable(obj):
+    """Convert Python objects to JSON-serializable types."""
+    if isinstance(obj, Fraction):
+        return f"{obj.numerator}/{obj.denominator}"
+    if isinstance(obj, torch.Tensor):
+        return obj.item() if obj.numel() == 1 else obj.tolist()
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, (list, tuple)):
+        return [to_json_serializable(x) for x in obj]
+    if isinstance(obj, dict):
+        return {str(k): to_json_serializable(v) for k, v in obj.items()}
+    return obj
+def transpose_conv_weights(key: str, value: np.ndarray, is_conv_transpose: bool = False) -> np.ndarray:
+    """Transpose PyTorch conv weights to MLX layout.
+    Conv1d:          (out, in, k)    → MLX: (out, k, in)       transpose (0,2,1)
+    Conv2d:          (out, in, h, w) → MLX: (out, h, w, in)    transpose (0,2,3,1)
+    ConvTranspose1d: (in, out, k)    → MLX: (out, k, in)       transpose (1,2,0)
+    ConvTranspose2d: (in, out, h, w) → MLX: (out, h, w, in)    transpose (1,2,3,0)
+    """
+    if not key.endswith(".weight"):
+        return value
+    if len(value.shape) == 3:
+        return np.transpose(value, (1, 2, 0) if is_conv_transpose else (0, 2, 1))
+    if len(value.shape) == 4:
+        return np.transpose(value, (1, 2, 3, 0) if is_conv_transpose else (0, 2, 3, 1))
+    return value
+def remap_key(
+    key: str,
+    value: np.ndarray,
+    model_type: str = "HTDemucs",
+    dconv_conv_slots: set | None = None,
+    seq_conv_slots: set | None = None,
+) -> list[tuple[str, np.ndarray]]:
+    """Remap a PyTorch state dict key to MLX key convention.
+    Returns a list of (key, value) pairs (multiple for attention in_proj splits).
+    Duplicate target keys (e.g. LSTM bias_ih + bias_hh) are merged by the caller.
+    Args:
+        key: PyTorch state dict key
+        value: numpy array (already transposed for conv weights)
+        model_type: PyTorch class name ("Demucs", "HDemucs", "HTDemucs")
+        dconv_conv_slots: set of (block_prefix, slot_str) for DConv slots with 3D weights
+        seq_conv_slots: set of (enc_dec, layer, slot) for Demucs v1/v2 Sequential Conv slots
+    """
+    dconv_conv_slots = dconv_conv_slots or set()
+    seq_conv_slots = seq_conv_slots or set()
+    # =========================================================================
+    # Step 1: Demucs v1/v2 Sequential insertion
+    # encoder.{i}.{j}.rest → encoder.{i}.layers.{j}.rest
+    # decoder.{i}.{j}.rest → decoder.{i}.layers.{j}.rest
+    # =========================================================================
+    if model_type == "Demucs":
+        m = re.match(r"(encoder|decoder)\.(\d+)\.(\d+)(\..*)?$", key)
+        if m:
+            enc_dec, layer, slot, rest = m.groups()
+            rest = rest or ""
+            key = f"{enc_dec}.{layer}.layers.{slot}{rest}"
+    # =========================================================================
+    # Step 1.5: Demucs v1/v2 Sequential Conv/Norm slot wrapping
+    # encoder.{i}.layers.{j}.weight → encoder.{i}.layers.{j}.conv.weight (if Conv slot)
+    # =========================================================================
+    if model_type == "Demucs":
+        m = re.match(r"(encoder|decoder)\.(\d+)\.layers\.(\d+)\.(weight|bias)$", key)
+        if m:
+            enc_dec, layer, slot, param = m.groups()
+            if (enc_dec, layer, slot) in seq_conv_slots:
+                return [(f"{enc_dec}.{layer}.layers.{slot}.conv.{param}", value)]
+            else:
+                return [(f"{enc_dec}.{layer}.layers.{slot}.{param}", value)]
+    # =========================================================================
+    # Step 2: DConv internal slot handling
+    # Matches: *.layers.{block_idx}.{slot_idx}.{rest}
+    # Both HDemucs (.dconv.layers.) and Demucs v1/v2 (.layers.{N}.layers.) end
+    # with this pattern after Step 1.
+    # =========================================================================
+    m = re.match(r"(.+\.layers\.\d+)\.(\d+)\.(.+)$", key)
+    if m:
+        block_prefix = m.group(1)
+        slot = m.group(2)
+        rest = m.group(3)
+        # --- 2a. Simple weight/bias/scale ---
+        if rest in ("weight", "bias", "scale"):
+            if rest == "weight" and len(value.shape) >= 2:
+                # 3D weight = Conv1d → add .conv.
+                return [(f"{block_prefix}.layers.{slot}.conv.{rest}", value)]
+            elif rest == "weight":
+                # 1D weight = GroupNorm → no wrapper
+                return [(f"{block_prefix}.layers.{slot}.{rest}", value)]
+            elif rest == "bias":
+                if (block_prefix, slot) in dconv_conv_slots:
+                    return [(f"{block_prefix}.layers.{slot}.conv.{rest}", value)]
+                else:
+                    return [(f"{block_prefix}.layers.{slot}.{rest}", value)]
+            else:  # scale
+                return [(f"{block_prefix}.layers.{slot}.{rest}", value)]
+        # --- 2b. LSTM weights/biases ---
+        m_lstm = re.match(r"lstm\.(weight|bias)_(ih|hh)_l(\d+)(_reverse)?$", rest)
+        if m_lstm:
+            wb, ih_hh, layer_idx, reverse = m_lstm.groups()
+            direction = "backward_lstms" if reverse else "forward_lstms"
+            if wb == "weight":
+                param = "Wx" if ih_hh == "ih" else "Wh"
+                return [(f"{block_prefix}.layers.{slot}.{direction}.{layer_idx}.{param}", value)]
+            else:  # bias — both bias_ih and bias_hh map to same key; caller merges
+                return [(f"{block_prefix}.layers.{slot}.{direction}.{layer_idx}.bias", value)]
+        # --- 2c. LSTM linear ---
+        m_linear = re.match(r"linear\.(weight|bias)$", rest)
+        if m_linear:
+            param = m_linear.group(1)
+            return [(f"{block_prefix}.layers.{slot}.linear.{param}", value)]
+        # --- 2d. Attention sub-modules (LocalState) ---
+        m_attn = re.match(r"(content|key|query|proj|query_decay|query_freqs)\.(weight|bias)$", rest)
+        if m_attn:
+            attn_name, param = m_attn.groups()
+            # These are all Conv1d modules → add .conv. wrapper
+            return [(f"{block_prefix}.layers.{slot}.{attn_name}.conv.{param}", value)]
+        # --- 2e. Fallback for unknown compound keys ---
+        return [(f"{block_prefix}.layers.{slot}.{rest}", value)]
+    # =========================================================================
+    # Step 3: MultiheadAttention in_proj split (HTDemucs transformer)
+    # =========================================================================
+    m = re.match(r"(.+)\.(self_attn|cross_attn)\.in_proj_(weight|bias)$", key)
+    if m:
+        prefix, attn_type, param = m.group(1), m.group(2), m.group(3)
+        mlx_attn = "attn" if attn_type == "self_attn" else "cross_attn"
+        dim = value.shape[0] // 3
+        q, k_val, v = value[:dim], value[dim : 2 * dim], value[2 * dim :]
+        return [
+            (f"{prefix}.{mlx_attn}.query_proj.{param}", q),
+            (f"{prefix}.{mlx_attn}.key_proj.{param}", k_val),
+            (f"{prefix}.{mlx_attn}.value_proj.{param}", v),
+        ]
+    # self_attn.out_proj → attn.out_proj
+    m = re.match(r"(.+)\.self_attn\.out_proj\.(weight|bias)$", key)
+    if m:
+        prefix, param = m.group(1), m.group(2)
+        return [(f"{prefix}.attn.out_proj.{param}", value)]
+    # =========================================================================
+    # Step 4: norm_out wrapping → norm_out.gn
+    # =========================================================================
+    m = re.match(r"(.+)\.norm_out\.(weight|bias)$", key)
+    if m:
+        prefix, param = m.group(1), m.group(2)
+        return [(f"{prefix}.norm_out.gn.{param}", value)]
+    # =========================================================================
+    # Step 5: Bottleneck LSTM (Demucs v1/v2 and HDemucs)
+    # lstm.lstm.weight_ih_l0 → lstm.forward_lstms.0.Wx
+    # =========================================================================
+    m = re.match(r"(.+)\.lstm\.(weight|bias)_(ih|hh)_l(\d+)(_reverse)?$", key)
+    if m:
+        prefix = m.group(1)
+        wb = m.group(2)
+        ih_hh = m.group(3)
+        layer_idx = m.group(4)
+        reverse = m.group(5)
+        direction = "backward_lstms" if reverse else "forward_lstms"
+        if wb == "weight":
+            param = "Wx" if ih_hh == "ih" else "Wh"
+            return [(f"{prefix}.{direction}.{layer_idx}.{param}", value)]
+        else:  # bias — merge handled by caller
+            return [(f"{prefix}.{direction}.{layer_idx}.bias", value)]
+    # =========================================================================
+    # Step 6: Conv/ConvTranspose/Rewrite named layers → add .conv. wrapper
+    # =========================================================================
+    parts = key.rsplit(".", 1)
+    if len(parts) == 2:
+        path, param = parts
+        path_parts = path.split(".")
+        last_name = path_parts[-1]
+        if last_name in CONV_LAYER_NAMES and param in ("weight", "bias"):
+            return [(f"{path}.conv.{param}", value)]
+    # =========================================================================
+    # Default: no change
+    # =========================================================================
+    return [(key, value)]
+def convert_sub_model(model, prefix: str) -> dict[str, np.ndarray]:
+    """Convert a single sub-model's state dict to MLX-compatible numpy arrays."""
+    cls_name = type(model).__name__
+    # --- Pre-scan: identify ConvTranspose modules by type ---
+    conv_tr_paths = set()
+    for name, module in model.named_modules():
+        if isinstance(module, (torch.nn.ConvTranspose1d, torch.nn.ConvTranspose2d)):
+            conv_tr_paths.add(name)
+    # --- Collect state dict as numpy ---
+    state_items = []
+    for key, tensor in model.state_dict().items():
+        arr = tensor.detach().cpu().float().numpy()
+        state_items.append((key, arr))
+    # --- Pre-scan: identify DConv Conv slots (3D weights) ---
+    # Pattern: *.layers.{block}.{slot}.weight where value is 3D
+    # For Demucs v1/v2, apply Sequential insertion first so lookups match remap_key
+    dconv_conv_slots: set[tuple[str, str]] = set()
+    for key, arr in state_items:
+        scan_key = key
+        if cls_name == "Demucs":
+            m = re.match(r"(encoder|decoder)\.(\d+)\.(\d+)(\..*)?$", scan_key)
+            if m:
+                enc_dec, layer, slot, rest = m.groups()
+                rest = rest or ""
+                scan_key = f"{enc_dec}.{layer}.layers.{slot}{rest}"
+        m = re.match(r"(.+\.layers\.\d+)\.(\d+)\.weight$", scan_key)
+        if m and len(arr.shape) >= 2:
+            dconv_conv_slots.add((m.group(1), m.group(2)))
+    # --- Pre-scan: Demucs v1/v2 Sequential Conv slots ---
+    seq_conv_slots: set[tuple[str, str, str]] = set()
+    if cls_name == "Demucs":
+        for key, arr in state_items:
+            m = re.match(r"(encoder|decoder)\.(\d+)\.(\d+)\.weight$", key)
+            if m and len(arr.shape) >= 2:
+                seq_conv_slots.add((m.group(1), m.group(2), m.group(3)))
+    # --- Convert ---
+    weights: dict[str, np.ndarray] = {}
+    for key, arr in state_items:
+        # Determine if this belongs to a ConvTranspose module
+        is_conv_tr = any(key.startswith(p + ".") for p in conv_tr_paths)
+        # Transpose conv weights
+        arr = transpose_conv_weights(key, arr, is_conv_transpose=is_conv_tr)
+        # Remap key
+        remapped = remap_key(key, arr, cls_name, dconv_conv_slots, seq_conv_slots)
+        for new_key, new_val in remapped:
+            full_key = f"{prefix}{new_key}"
+            if full_key in weights:
+                # LSTM bias merge: bias_ih + bias_hh → bias (additive)
+                weights[full_key] = weights[full_key] + new_val
+            else:
+                weights[full_key] = new_val
+    return weights
+def extract_kwargs(model) -> dict:
+    """Extract constructor kwargs from a model using _init_args_kwargs or inspection."""
+    if hasattr(model, "_init_args_kwargs"):
+        _, kwargs = model._init_args_kwargs
+        return {k: to_json_serializable(v) for k, v in kwargs.items()
+                if isinstance(v, (int, float, str, bool, list, tuple, type(None), Fraction))}
+    # Fallback: inspect __init__ signature and read matching attributes
+    sig = inspect.signature(type(model).__init__)
+    kwargs = {}
+    for name in sig.parameters:
+        if name == "self":
+            continue
+        if hasattr(model, name):
+            val = getattr(model, name)
+            kwargs[name] = to_json_serializable(val)
+    return kwargs
+def export_model(model_name: str, out_dir: Path) -> bool:
+    """Export a single model (or bag) to safetensors + config JSON."""
+    from demucs.pretrained import get_model
+    from demucs.apply import BagOfModels
+    print(f"\n--- Exporting {model_name} ---")
+    try:
+        model = get_model(model_name)
+    except Exception as e:
+        print(f"  Failed to load model: {e}")
+        return False
+    is_bag = isinstance(model, BagOfModels)
+    if is_bag:
+        sub_models = list(model.models)
+        num_models = len(sub_models)
+        bag_weights = model.weights.tolist() if hasattr(model.weights, "tolist") else list(model.weights)
+    else:
+        sub_models = [model]
+        num_models = 1
+        bag_weights = None
+    print(f"  {'Bag of ' + str(num_models) + ' models' if is_bag else 'Single model'}")
+    # Collect all weights and metadata
+    all_weights: dict[str, np.ndarray] = {}
+    model_classes: list[str] = []
+    model_configs: list[dict] = []
+    for i, sub in enumerate(sub_models):
+        cls_name = type(sub).__name__
+        mlx_cls = CLASS_MAP.get(cls_name, cls_name)
+        model_classes.append(mlx_cls)
+        print(f"  Model {i}: {cls_name} → {mlx_cls}")
+        prefix = f"model_{i}." if is_bag else ""
+        sub_weights = convert_sub_model(sub, prefix)
+        all_weights.update(sub_weights)
+        kwargs = extract_kwargs(sub)
+        model_configs.append({
+            "model_class": mlx_cls,
+            "kwargs": kwargs,
+        })
+    # Build config JSON
+    config: dict = {
+        "model_name": model_name,
+        "tensor_count": len(all_weights),
+    }
+    if is_bag:
+        config["model_class"] = "BagOfModelsMLX"
+        config["num_models"] = num_models
+        config["weights"] = bag_weights
+        config["sub_model_classes"] = model_classes
+        # If all sub-models are the same class, set sub_model_class for compat
+        unique = set(model_classes)
+        if len(unique) == 1:
+            config["sub_model_class"] = unique.pop()
+        config["model_configs"] = model_configs
+        # Also put kwargs at top level for single-model bags (common case)
+        if num_models == 1:
+            config["kwargs"] = model_configs[0]["kwargs"]
+    else:
+        config["model_class"] = model_classes[0]
+        config["kwargs"] = model_configs[0]["kwargs"]
+    # Save files
+    model_dir = out_dir / model_name
+    model_dir.mkdir(parents=True, exist_ok=True)
+    safetensors_path = model_dir / f"{model_name}.safetensors"
+    config_path = model_dir / f"{model_name}_config.json"
+    # Save safetensors (prefer safetensors library, fallback to mlx)
+    try:
+        from safetensors.numpy import save_file
+        save_file(all_weights, str(safetensors_path))
+    except ImportError:
+        import mlx.core as mx
+        mlx_weights = {k: mx.array(v) for k, v in all_weights.items()}
+        mx.save_safetensors(str(safetensors_path), mlx_weights)
+    with config_path.open("w") as f:
+        json.dump(config, f, indent=2, default=str)
+    size_mb = safetensors_path.stat().st_size / (1024 * 1024)
+    print(f"  Wrote {safetensors_path} ({len(all_weights)} tensors, {size_mb:.0f} MB)")
+    print(f"  Wrote {config_path}")
+    return True
+def main():
+    ap = argparse.ArgumentParser(
+        description="Export Demucs PyTorch models to safetensors for Swift MLX"
+    )
+    ap.add_argument(
+        "--models",
+        nargs="*",
+        default=None,
+        help=f"Models to export (default: all). Choices: {', '.join(ALL_MODELS)}",
+    )
+    ap.add_argument(
+        "--out-dir",
+        default="./Models",
+        help="Output root directory (files go into <out-dir>/<model_name>/)",
+    )
+    args = ap.parse_args()
+    models = args.models or ALL_MODELS
+    out_dir = Path(args.out_dir).resolve()
+    exported = 0
+    failed = 0
+    for name in models:
+        if export_model(name, out_dir):
+            exported += 1
+        else:
+            failed += 1
+    print(f"\n=== Done: {exported} exported, {failed} failed ===")
+    if failed:
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

export_mdx.py DELETED Viewed

@@ -1,343 +0,0 @@
-#!/usr/bin/env python3
-"""
-Export mdx/mdx_extra models (heterogeneous bags of Demucs + HDemucs) to safetensors.
-These models contain a mix of Demucs (v1/v2) and HDemucs (v3) sub-models in a
-single BagOfModels. The Python MLX converter has a bug that prevents it from
-handling these models, so we do a direct PyTorch → safetensors conversion.
-Usage:
-    python scripts/export_mdx.py --model mdx --out-dir .scratch/models
-"""
-from __future__ import annotations
-import argparse
-import json
-import os
-import sys
-from pathlib import Path
-import torch
-import numpy as np
-def flatten_state_dict(state_dict: dict, prefix: str = "") -> dict:
-    """Flatten a nested state dict into dot-separated keys with numpy arrays."""
-    flat = {}
-    for key, value in state_dict.items():
-        full_key = f"{prefix}{key}" if prefix else key
-        if isinstance(value, torch.Tensor):
-            flat[full_key] = value.detach().cpu().numpy()
-        elif isinstance(value, dict):
-            flat.update(flatten_state_dict(value, f"{full_key}."))
-    return flat
-def convert_torch_to_mlx_keys(state_dict: dict, model_type: str) -> dict:
-    """Convert PyTorch state dict keys/shapes to MLX-compatible format.
-    Key differences:
-    - Conv1d weight: (out, in, k) → (out, k, in)
-    - Conv2d weight: (out, in, h, w) → (out, h, w, in)
-    - ConvTranspose1d weight: (in, out, k) → (out, k, in)
-    - ConvTranspose2d weight: (in, out, h, w) → (out, h, w, in)
-    - nn.Sequential indices stay as-is but may need remapping for DConv
-    """
-    converted = {}
-    for key, value in state_dict.items():
-        # Skip non-tensor items
-        if not isinstance(value, np.ndarray):
-            continue
-        new_key = key
-        new_value = value
-        # Transpose conv weights
-        if key.endswith('.weight') and len(value.shape) == 3:
-            # 1D conv: (out, in, k) → (out, k, in)
-            new_value = np.transpose(value, (0, 2, 1))
-        elif key.endswith('.weight') and len(value.shape) == 4:
-            # 2D conv: (out, in, h, w) → (out, h, w, in)
-            new_value = np.transpose(value, (0, 2, 3, 1))
-        # Handle ConvTranspose weight naming
-        # ConvTranspose1d: (in, out, k) → (out, k, in)
-        # These are already handled above since they also end in .weight with 3 dims
-        converted[new_key] = new_value
-    return converted
-def remap_demucs_keys(state_dict: dict) -> dict:
-    """Remap Demucs v1/v2 PyTorch keys to MLX key structure.
-    PyTorch Demucs uses nn.ModuleList of nn.Sequential:
-    - encoder[i] = Sequential(Conv1d, GroupNorm, ..., DConv, ...)
-    In the PyTorch state dict, keys look like:
-    - encoder.{i}.{j}.weight  (for simple layers)
-    - encoder.{i}.{j}.layers.{k}.{l}.weight  (for DConv)
-    MLX uses explicit named sub-modules, so we need to wrap in Conv1dNCL etc.
-    The MLX structure wraps Conv1d in Conv1dNCL which has .conv sub-module.
-    """
-    remapped = {}
-    # Map of which sequential indices are Conv1d/ConvTranspose1d
-    # and need wrapping in Conv1dNCL/ConvTranspose1dNCL
-    for key, value in state_dict.items():
-        parts = key.split('.')
-        # Handle encoder layers
-        if len(parts) >= 3 and parts[0] == 'encoder':
-            enc_idx = parts[1]
-            layer_idx = int(parts[2])
-            rest = '.'.join(parts[3:])
-            # Sequential structure for encoder:
-            # 0: Conv1d → Conv1dNCL wrapper (add .conv. prefix)
-            # 1: GroupNorm or Identity
-            # 2: Identity (GELU placeholder)
-            # 3+: DConv (if present), then rewrite Conv1d, GroupNorm, Identity
-            if layer_idx == 0 and (rest.startswith('weight') or rest.startswith('bias')):
-                # Conv1d → wrap in Conv1dNCL
-                new_key = f"encoder.{enc_idx}.layers.{layer_idx}.conv.{rest}"
-            elif rest.startswith('layers.'):
-                # DConv internal structure - remap sequential to named
-                new_key = remap_dconv_key(f"encoder.{enc_idx}.layers.{layer_idx}", rest, value)
-                if new_key:
-                    remapped[new_key] = value
-                    continue
-                else:
-                    # Fallback: keep original structure
-                    new_key = f"encoder.{enc_idx}.layers.{layer_idx}.{rest}"
-            else:
-                new_key = f"encoder.{enc_idx}.layers.{layer_idx}.{rest}"
-            remapped[new_key] = value
-            continue
-        # Handle decoder layers (similar structure but reversed)
-        if len(parts) >= 3 and parts[0] == 'decoder':
-            dec_idx = parts[1]
-            layer_idx = int(parts[2])
-            rest = '.'.join(parts[3:])
-            # For decoder, rewrite comes first, then DConv, then ConvTranspose
-            # Need to check what the sequential order is
-            new_key = f"decoder.{dec_idx}.layers.{layer_idx}.{rest}"
-            # Conv layers need wrapping
-            if (rest.startswith('weight') or rest.startswith('bias')) and len(value.shape) >= 2:
-                # Check if it's a conv by shape
-                if len(value.shape) == 3:
-                    new_key = f"decoder.{dec_idx}.layers.{layer_idx}.conv.{rest}"
-                # else it's a GroupNorm - keep as is
-            remapped[new_key] = value
-            continue
-        # Handle LSTM
-        if parts[0] == 'lstm':
-            remapped[key] = value
-            continue
-        remapped[key] = value
-    return remapped
-def remap_dconv_key(prefix: str, rest: str, value: np.ndarray) -> str | None:
-    """Remap DConv internal key structure.
-    PyTorch DConv uses nn.Sequential for each block:
-    - layers[0][0] = Conv1d (depthwise)
-    - layers[0][1] = GroupNorm
-    - layers[0][2] = Identity
-    - layers[0][3] = Conv1d (pointwise)
-    - layers[0][4] = GroupNorm
-    - layers[0][5] = Identity
-    - layers[0][6] = LayerScale
-    MLX DConvBlock uses:
-    - layers[0] = DConvSlot(.conv) → has .conv.weight/.conv.bias
-    - layers[1] = DConvSlot(.normGELU) → has .weight/.bias
-    - layers[2] = DConvSlot(.identity) → no params
-    - layers[3] = DConvSlot(.conv) → has .conv.weight/.conv.bias
-    - layers[4] = DConvSlot(.normGLU) → has .weight/.bias
-    - layers[5] = DConvSlot(.identity) → no params
-    - layers[6] = DConvSlot(.scale) → has .scale
-    """
-    # rest looks like: layers.{block_idx}.{seq_idx}.weight
-    parts = rest.split('.')
-    if len(parts) < 4:
-        return None
-    block_idx = parts[1]
-    seq_idx = int(parts[2])
-    param_rest = '.'.join(parts[3:])
-    # Map sequential index to DConvSlot index
-    # PyTorch seq: 0=Conv, 1=GroupNorm, 2=Identity, 3=Conv1x1, 4=GroupNorm, 5=Identity, 6=Scale
-    # MLX slots:   0=conv, 1=normGELU,  2=identity, 3=conv,    4=normGLU,   5=identity, 6=scale
-    if seq_idx in (0, 3):
-        # Conv layers - wrap in DConvSlot .conv
-        new_key = f"{prefix}.layers.{block_idx}.layers.{seq_idx}.conv.{param_rest}"
-    elif seq_idx in (1, 4):
-        # GroupNorm - direct weight/bias
-        new_key = f"{prefix}.layers.{block_idx}.layers.{seq_idx}.{param_rest}"
-    elif seq_idx == 6:
-        # LayerScale - has .scale parameter
-        if param_rest == 'scale':
-            new_key = f"{prefix}.layers.{block_idx}.layers.{seq_idx}.{param_rest}"
-        else:
-            return None
-    else:
-        return None
-    return new_key
-def export_model(model_name: str, out_dir: Path) -> bool:
-    """Export a model to safetensors + config JSON."""
-    from demucs.pretrained import get_model
-    print(f"\n--- Exporting {model_name} ---")
-    try:
-        bag = get_model(model_name)
-    except Exception as e:
-        print(f"  Failed to load model: {e}")
-        return False
-    from demucs.apply import BagOfModels
-    if not isinstance(bag, BagOfModels):
-        print(f"  Expected BagOfModels, got {type(bag).__name__}")
-        return False
-    num_models = len(bag.models)
-    print(f"  Bag of {num_models} models")
-    # Collect all weights with model_X prefix
-    all_weights = {}
-    model_classes = []
-    model_kwargs_list = []
-    for i, sub_model in enumerate(bag.models):
-        cls_name = type(sub_model).__name__
-        print(f"  Model {i}: {cls_name}")
-        model_classes.append(cls_name)
-        # Get state dict
-        sd = sub_model.state_dict()
-        flat = {}
-        for key, tensor in sd.items():
-            arr = tensor.detach().cpu().numpy()
-            # Transpose conv weights
-            if key.endswith('.weight'):
-                if len(arr.shape) == 3:
-                    arr = np.transpose(arr, (0, 2, 1))
-                elif len(arr.shape) == 4:
-                    arr = np.transpose(arr, (0, 2, 3, 1))
-            flat[f"model_{i}.{key}"] = arr
-        all_weights.update(flat)
-        # Extract kwargs
-        import inspect
-        init_sig = inspect.signature(type(sub_model).__init__)
-        kwargs = {}
-        for param_name in init_sig.parameters:
-            if param_name == 'self':
-                continue
-            if hasattr(sub_model, param_name):
-                val = getattr(sub_model, param_name)
-                if isinstance(val, torch.Tensor):
-                    val = val.item()
-                elif isinstance(val, (list, tuple)):
-                    val = list(val)
-                kwargs[param_name] = val
-        model_kwargs_list.append(kwargs)
-    # Save safetensors
-    model_dir = out_dir / model_name
-    model_dir.mkdir(parents=True, exist_ok=True)
-    safetensors_path = model_dir / f"{model_name}.safetensors"
-    config_path = model_dir / f"{model_name}_config.json"
-    # Convert numpy arrays to mlx arrays and save
-    try:
-        import mlx.core as mx
-        mlx_weights = {k: mx.array(v) for k, v in all_weights.items()}
-        mx.save_safetensors(str(safetensors_path), mlx_weights)
-    except ImportError:
-        # Fallback: use safetensors library directly
-        from safetensors.numpy import save_file
-        save_file(all_weights, str(safetensors_path))
-    # Build config
-    # Map PyTorch class names to MLX class names
-    class_map = {
-        'Demucs': 'DemucsMLX',
-        'HDemucs': 'HDemucsMLX',
-        'HTDemucs': 'HTDemucsMLX',
-    }
-    # Get weights
-    weights = None
-    if bag.weights is not None:
-        weights = bag.weights.tolist() if hasattr(bag.weights, 'tolist') else list(bag.weights)
-    config = {
-        "model_name": model_name,
-        "model_class": "BagOfModelsMLX",
-        "num_models": num_models,
-        "weights": weights,
-        "sub_model_classes": [class_map.get(c, c) for c in model_classes],
-        "model_configs": [],
-        "tensor_count": len(all_weights),
-    }
-    # If all models are the same class, also set sub_model_class for compatibility
-    unique_classes = set(config["sub_model_classes"])
-    if len(unique_classes) == 1:
-        config["sub_model_class"] = unique_classes.pop()
-    # Add per-model configs
-    for i, (cls, kwargs) in enumerate(zip(model_classes, model_kwargs_list)):
-        model_config = {
-            "model_class": class_map.get(cls, cls),
-            "kwargs": {},
-        }
-        # Convert kwargs to JSON-serializable
-        for k, v in kwargs.items():
-            if isinstance(v, (int, float, str, bool, list)):
-                model_config["kwargs"][k] = v
-            elif v is None:
-                model_config["kwargs"][k] = None
-        config["model_configs"].append(model_config)
-    with config_path.open("w") as f:
-        json.dump(config, f, indent=2, default=str)
-    print(f"  Wrote {safetensors_path} ({len(all_weights)} tensors)")
-    print(f"  Wrote {config_path}")
-    return True
-def main():
-    ap = argparse.ArgumentParser(description="Export mdx/mdx_extra models")
-    ap.add_argument("--model", default="mdx", help="Model name")
-    ap.add_argument("--out-dir", default=".scratch/models", help="Output directory")
-    args = ap.parse_args()
-    out_dir = Path(args.out_dir).resolve()
-    export_model(args.model, out_dir)
-if __name__ == "__main__":
-    main()