Spaces:

codemichaeld
/

new03

Sleeping

App Files Files Community

codemichaeld commited on 13 days ago

Commit

7776d1d

verified ·

1 Parent(s): 2e940ab

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -579

app.py CHANGED Viewed

@@ -21,140 +21,6 @@ try:
 except ImportError:
     MODELScope_AVAILABLE = False
-def low_rank_decomposition(weight, rank=64):
-    """
-    Correct LoRA decomposition supporting 2D and 4D tensors.
-    Returns (lora_A, lora_B) such that weight ≈ lora_B @ lora_A for 2D,
-    or appropriate conv form for 4D.
-    """
-    original_shape = weight.shape
-    original_dtype = weight.dtype
-    try:
-        if weight.ndim == 2:
-            actual_rank = min(rank, min(weight.shape) // 2)
-            if actual_rank < 4:
-                return None, None
-            U, S, Vh = torch.linalg.svd(weight.float(), full_matrices=False)
-            S_sqrt = torch.sqrt(S[:actual_rank])
-            # Standard LoRA factorization: W ≈ W_B @ W_A
-            W_A = (Vh[:actual_rank, :] * S_sqrt.unsqueeze(1)).contiguous()  # [rank, in_features]
-            W_B = (U[:, :actual_rank] * S_sqrt.unsqueeze(0)).contiguous()   # [out_features, rank]
-            return W_A.to(original_dtype), W_B.to(original_dtype)
-        elif weight.ndim == 4:
-            out_ch, in_ch, k_h, k_w = weight.shape
-            if k_h * k_w <= 9:  # small conv kernels (e.g., 3x3)
-                # Reshape to 2D: [out_ch, in_ch * k_h * k_w]
-                weight_2d = weight.view(out_ch, -1)
-                actual_rank = min(rank, min(weight_2d.shape) // 2)
-                if actual_rank < 4:
-                    return None, None
-                U, S, Vh = torch.linalg.svd(weight_2d.float(), full_matrices=False)
-                S_sqrt = torch.sqrt(S[:actual_rank])
-                W_A_2d = (Vh[:actual_rank, :] * S_sqrt.unsqueeze(1)).contiguous()
-                W_B_2d = (U[:, :actual_rank] * S_sqrt.unsqueeze(0)).contiguous()
-                # Reshape back to conv format
-                W_A = W_A_2d.view(actual_rank, in_ch, k_h, k_w).contiguous()
-                W_B = W_B_2d.view(out_ch, actual_rank, 1, 1).contiguous()
-                return W_A.to(original_dtype), W_B.to(original_dtype)
-        return None, None
-    except Exception as e:
-        print(f"Decomposition error for {original_shape}: {e}")
-        traceback.print_exc()
-        return None, None
-def extract_correction_factors(original_weight, fp8_weight):
-    """Extract per-channel/tensor correction factors (difference method)."""
-    with torch.no_grad():
-        # Convert to float32 for precision
-        orig = original_weight.float()
-        quant = fp8_weight.float()
-        # Compute error (what needs to be added to FP8 to recover original)
-        error = orig - quant
-        # Skip if error is negligible
-        error_norm = torch.norm(error)
-        orig_norm = torch.norm(orig)
-        if orig_norm > 1e-6 and error_norm / orig_norm < 0.01:
-            return None
-        # For 4D tensors (common in VAE, CNNs)
-        if orig.ndim == 4:
-            # Channel dimension is typically dimension 0 (output channels)
-            channel_dim = 0
-            # Compute mean error per output channel
-            channel_mean = error.mean(dim=tuple(i for i in range(1, orig.ndim)), keepdim=True)
-            return channel_mean.to(original_weight.dtype)
-        # For 2D tensors (linear layers)
-        elif orig.ndim == 2:
-            # Compute mean error per output row
-            row_mean = error.mean(dim=1, keepdim=True)
-            return row_mean.to(original_weight.dtype)
-        # For 1D tensors (bias, batchnorm)
-        else:
-            return error.mean().to(original_weight.dtype)
-def get_tensor_info(tensor):
-    """Get detailed tensor information for pattern matching."""
-    shape = list(tensor.shape)
-    dim = tensor.dim()
-    numel = tensor.numel()
-    dtype = str(tensor.dtype)
-    # Determine tensor type based on shape
-    tensor_type = "other"
-    if dim == 4 and shape[2] == shape[3]:  # Convolutional layer with square kernel
-        tensor_type = "conv"
-    elif dim == 2:
-        if shape[0] > shape[1] * 4:  # More likely to be output projection
-            tensor_type = "output_proj"
-        elif shape[1] > shape[0] * 4:  # More likely to be input projection
-            tensor_type = "input_proj"
-        else:
-            tensor_type = "linear"
-    elif dim == 1:
-        tensor_type = "bias"
-    return {
-        "shape": shape,
-        "dim": dim,
-        "numel": numel,
-        "type": tensor_type,
-        "dtype": dtype
-    }
-def matches_pattern(key, tensor_info, pattern):
-    """Check if a tensor matches a pattern definition."""
-    key_lower = key.lower()
-    # Match by key name pattern
-    if "key_pattern" in pattern:
-        key_pattern = pattern["key_pattern"].lower()
-        if key_pattern != "all" and key_pattern not in key_lower:
-            return False
-    # Match by tensor dimension
-    if "dim" in pattern and tensor_info["dim"] != pattern["dim"]:
-        return False
-    # Match by tensor type
-    if "type" in pattern and tensor_info["type"] != pattern["type"]:
-        return False
-    # Match by minimum tensor size
-    if "min_size" in pattern and tensor_info["numel"] < pattern["min_size"]:
-        return False
-    # Match by shape constraints
-    if "shape_contains" in pattern:
-        shape_contains = pattern["shape_contains"]
-        if not any(shape_contains == dim for dim in tensor_info["shape"]):
-            return False
-    return True
 def load_model_files(model_paths, model_format="safetensors", progress_callback=None):
     """
     Load model weights from one or more files, supporting sharded safetensors and other formats.
@@ -279,10 +145,10 @@ def extract_base_name_from_sharded_files(model_paths):
     return base_name
-def convert_model_to_fp8_with_recovery(model_paths, output_dir, fp8_format, recovery_rules,
-                                       model_format="safetensors", progress=gr.Progress()):
-    """Convert model to FP8 with customizable per-tensor recovery strategies."""
-    progress(0.05, desc=f"Starting FP8 conversion with precision recovery for {model_format}...")
     try:
         metadata = read_model_metadata(model_paths, model_format)
         progress(0.1, desc="Loaded metadata.")
@@ -300,121 +166,63 @@ def convert_model_to_fp8_with_recovery(model_paths, output_dir, fp8_format, reco
         # Initialize outputs
         sd_fp8 = {}
-        recovery_weights = {}
-        stats = {
-            "total_layers": len(state_dict),
-            "processed_layers": 0,
-            "skipped_layers": [],
-            "recovery_counts": {"lora": 0, "diff": 0},
-            "rule_matches": {i: 0 for i in range(len(recovery_rules))}
         }
         # Process each tensor
         total = len(state_dict)
         for i, key in enumerate(state_dict):
-            progress(0.3 + 0.5 * (i / total), desc=f"Processing {i+1}/{total}: {key.split('.')[-1]}")
             weight = state_dict[key]
-            tensor_info = get_tensor_info(weight)
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
                 fp8_weight = weight.to(fp8_dtype)
                 sd_fp8[key] = fp8_weight
             else:
                 sd_fp8[key] = weight
-                stats["skipped_layers"].append(f"{key}: non-float dtype")
-                continue
-            # Find matching rule for this tensor
-            recovery_applied = False
-            matched_rule_index = -1
-            for rule_idx, rule in enumerate(recovery_rules):
-                if matches_pattern(key, tensor_info, rule):
-                    matched_rule_index = rule_idx
-                    recovery_method = rule["method"]
-                    try:
-                        if recovery_method == "lora" and weight.ndim == 2:
-                            # LoRA recovery for 2D tensors only
-                            rank = rule.get("rank", 64)
-                            # Adjust rank for smaller matrices
-                            adjusted_rank = min(rank, min(weight.shape) // 2)
-                            if adjusted_rank >= 4:
-                                A, B = low_rank_decomposition(weight, rank=adjusted_rank)
-                                if A is not None and B is not None:
-                                    recovery_weights[f"lora_A.{key}"] = A
-                                    recovery_weights[f"lora_B.{key}"] = B
-                                    stats["processed_layers"] += 1
-                                    stats["recovery_counts"]["lora"] += 1
-                                    stats["rule_matches"][rule_idx] += 1
-                                    recovery_applied = True
-                                    break
-                        elif recovery_method == "diff":
-                            # Difference/correction recovery for any tensor type
-                            corr = extract_correction_factors(weight, fp8_weight)
-                            if corr is not None:
-                                recovery_weights[f"diff.{key}"] = corr
-                                stats["processed_layers"] += 1
-                                stats["recovery_counts"]["diff"] += 1
-                                stats["rule_matches"][rule_idx] += 1
-                                recovery_applied = True
-                                break
-                        # If method is "none" or recovery failed, continue to next rule
-                        if recovery_method == "none":
-                            break
-                    except Exception as e:
-                        stats["skipped_layers"].append(f"{key}: error with rule {rule_idx} - {str(e)}")
-            if not recovery_applied:
-                reason = "no matching rule" if matched_rule_index == -1 else f"recovery failed with rule {matched_rule_index}"
-                stats["skipped_layers"].append(f"{key}: {reason}")
         # Extract base name for output files
         base_name = extract_base_name_from_sharded_files(model_paths)
         # Save FP8 model
         fp8_path = os.path.join(output_dir, f"{base_name}-fp8-{fp8_format}.safetensors")
-        save_file(sd_fp8, fp8_path, metadata={"format": model_format, "fp8_format": fp8_format, **metadata})
-        # Save recovery weights if any were generated
-        recovery_path = None
-        if recovery_weights:
-            recovery_path = os.path.join(output_dir, f"{base_name}-recovery.safetensors")
-            recovery_metadata = {
-                "format": model_format,
-                "fp8_format": fp8_format,
-                "recovery_rules": json.dumps(recovery_rules),
-                "stats": json.dumps(stats)
-            }
-            save_file(recovery_weights, recovery_path, metadata=recovery_metadata)
-        progress(0.9, desc="Saved FP8 and recovery files.")
         # Generate stats message
-        stats_msg = f"FP8 ({fp8_format}) conversion complete with precision recovery:\n"
-        stats_msg += f"- Total layers: {stats['total_layers']}\n"
-        stats_msg += f"- Layers with recovery: {stats['processed_layers']}\n"
-        stats_msg += f"  - LoRA recovery: {stats['recovery_counts']['lora']}\n"
-        stats_msg += f"  - Difference recovery: {stats['recovery_counts']['diff']}\n"
-        # Show rule effectiveness
-        stats_msg += "\nRule effectiveness:\n"
-        for rule_idx, rule in enumerate(recovery_rules):
-            matches = stats["rule_matches"][rule_idx]
-            if matches > 0:
-                method = rule["method"]
-                pattern = rule.get("key_pattern", "no pattern")
-                rank_info = f" (rank {rule.get('rank', 'N/A')})" if method == "lora" else ""
-                stats_msg += f"- Rule {rule_idx}: {matches} layers matched pattern '{pattern}' with {method}{rank_info}\n"
-        if not recovery_weights:
-            stats_msg += "\n⚠️ No recovery weights were generated. All layers use pure FP8."
-        progress(1.0, desc="✅ FP8 conversion with precision recovery complete!")
-        return True, stats_msg, stats, fp8_path, recovery_path
     except Exception as e:
         traceback.print_exc()
@@ -625,167 +433,12 @@ def upload_to_target(target_type, new_repo_id, output_dir, fp8_format, hf_token=
     else:
         raise ValueError("Unknown target")
-def generate_default_rules(architecture="auto"):
-    """Generate default recovery rules based on architecture."""
-    if architecture == "vae":
-        return """[
-    {
-        "key_pattern": "vae",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "encoder",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "decoder",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "all",
-        "method": "none"
-    }
-]"""
-    elif architecture == "text_encoder":
-        return """[
-    {
-        "key_pattern": "text",
-        "dim": 2,
-        "min_size": 10000,
-        "method": "lora",
-        "rank": 64
-    },
-    {
-        "key_pattern": "emb",
-        "dim": 2,
-        "min_size": 10000,
-        "method": "lora",
-        "rank": 64
-    },
-    {
-        "key_pattern": "attn",
-        "dim": 2,
-        "min_size": 10000,
-        "method": "lora",
-        "rank": 128
-    },
-    {
-        "key_pattern": "all",
-        "method": "none"
-    }
-]"""
-    elif architecture == "unet_transformer":
-        return """[
-    {
-        "key_pattern": "attn",
-        "dim": 2,
-        "min_size": 10000,
-        "method": "lora",
-        "rank": 128
-    },
-    {
-        "key_pattern": "transformer",
-        "dim": 2,
-        "min_size": 10000,
-        "method": "lora",
-        "rank": 96
-    },
-    {
-        "key_pattern": "all",
-        "method": "none"
-    }
-]"""
-    elif architecture == "unet_conv":
-        return """[
-    {
-        "key_pattern": "conv",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "resnet",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "down",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "up",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "all",
-        "method": "none"
-    }
-]"""
-    else:  # "all" or "auto"
-        return """[
-    {
-        "key_pattern": "vae",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "encoder",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "decoder",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "text",
-        "dim": 2,
-        "min_size": 10000,
-        "method": "lora",
-        "rank": 64
-    },
-    {
-        "key_pattern": "emb",
-        "dim": 2,
-        "min_size": 10000,
-        "method": "lora",
-        "rank": 64
-    },
-    {
-        "key_pattern": "attn",
-        "dim": 2,
-        "min_size": 10000,
-        "method": "lora",
-        "rank": 128
-    },
-    {
-        "key_pattern": "conv",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "resnet",
-        "dim": 4,
-        "method": "diff"
-    },
-    {
-        "key_pattern": "all",
-        "method": "none"
-    }
-]"""
 def process_and_upload_fp8(
     source_type,
     repo_url,
     filename_pattern,
     model_format,
     fp8_format,
-    recovery_rules_json,
     target_type,
     new_repo_id,
     hf_token,
@@ -800,20 +453,6 @@ def process_and_upload_fp8(
     if target_type == "huggingface" and not hf_token:
         return None, "❌ Hugging Face token required for target.", "", ""
-    # Parse recovery rules
-    try:
-        recovery_rules = json.loads(recovery_rules_json)
-    except json.JSONDecodeError:
-        return None, "❌ Invalid recovery rules JSON.", "", ""
-    # Validate rules
-    valid_methods = ["none", "lora", "diff"]
-    for rule in recovery_rules:
-        if "method" not in rule or rule["method"] not in valid_methods:
-            return None, f"❌ Invalid method in rule. Use 'none', 'lora', or 'diff'", "", ""
-        if rule["method"] == "lora" and "rank" not in rule:
-            return None, "❌ LoRA method requires 'rank' parameter", "", ""
     temp_dir = None
     output_dir = tempfile.mkdtemp()
     try:
@@ -822,9 +461,9 @@ def process_and_upload_fp8(
             source_type, repo_url, filename_pattern, model_format, hf_token, progress
         )
-        progress(0.8, desc="Converting to FP8 with precision recovery...")
-        success, msg, stats, fp8_path, recovery_path = convert_model_to_fp8_with_recovery(
-            model_paths, output_dir, fp8_format, recovery_rules, model_format, progress
         )
         if not success:
@@ -845,68 +484,40 @@ def process_and_upload_fp8(
                 original_filename += f" matching '{filename_pattern}'"
         fp8_filename = os.path.basename(fp8_path)
-        recovery_filename = os.path.basename(recovery_path) if recovery_path else ""
         readme = f"""---
 library_name: diffusers
 tags:
 - fp8
 - safetensors
-- precision-recovery
-- mixed-method
 - converted-by-gradio
 ---
-# FP8 Model with Per-Tensor Precision Recovery
 - **Source**: `{repo_url}`
 - **Original File(s)**: `{original_filename}`
 - **Original Format**: `{model_format}`
 - **FP8 Format**: `{fp8_format.upper()}`
 - **FP8 File**: `{fp8_filename}`
-- **Recovery File**: `{recovery_filename if recovery_filename else "None"}`
-## Recovery Rules Used
-```json
-{json.dumps(recovery_rules, indent=2)}
-```
-## Usage (Inference)
 ```python
 from safetensors.torch import load_file
 import torch
 # Load FP8 model
 fp8_state = load_file("{fp8_filename}")
-# Load recovery weights if available
-recovery_state = load_file("{recovery_filename}") if "{recovery_filename}" and os.path.exists("{recovery_filename}") else {{}}
-# Reconstruct high-precision weights
-reconstructed = {{}}
-for key in fp8_state:
-    fp8_weight = fp8_state[key].to(torch.float32)  # Convert to float32 for computation
-    # Apply LoRA recovery if available
-    lora_a_key = f"lora_A.{{key}}"
-    lora_b_key = f"lora_B.{{key}}"
-    if lora_a_key in recovery_state and lora_b_key in recovery_state:
-        A = recovery_state[lora_a_key].to(torch.float32)
-        B = recovery_state[lora_b_key].to(torch.float32)
-        # Reconstruct the low-rank approximation
-        lora_weight = B @ A
-        fp8_weight = fp8_weight + lora_weight
-    # Apply difference recovery if available
-    diff_key = f"diff.{{key}}"
-    if diff_key in recovery_state:
-        diff = recovery_state[diff_key].to(torch.float32)
-        fp8_weight = fp8_weight + diff
-    reconstructed[key] = fp8_weight
-# Use reconstructed weights in your model
-model.load_state_dict(reconstructed)
 ```
-> **Note**: For best results, use the same recovery configuration during inference as was used during extraction.
 > Requires PyTorch ≥ 2.1 for FP8 support.
 ## Statistics
-- **Total layers**: {stats['total_layers']}
-- **Layers with recovery**: {stats['processed_layers']}
-  - LoRA recovery: {stats['recovery_counts']['lora']}
-  - Difference recovery: {stats['recovery_counts']['diff']}
 """
         with open(os.path.join(output_dir, "README.md"), "w") as f:
@@ -924,23 +535,17 @@ model.load_state_dict(reconstructed)
         progress(1.0, desc="✅ Done!")
         # Generate result HTML
-        recovery_links = []
-        if recovery_path:
-            recovery_links.append(f"- **Recovery weights**: `{recovery_filename}`")
         result_html = f"""
 ✅ Success!
 Model uploaded to: <a href="{repo_url_final}" target="_blank">{new_repo_id}</a>
-Includes:
 - FP8 model: `{fp8_filename}`
-- {chr(10).join(recovery_links)}
 """
-        recovery_details = f"Recovery file: {recovery_filename}" if recovery_filename else "No recovery weights generated"
         return (gr.HTML(result_html),
-                "✅ FP8 conversion with precision recovery successful!",
                 msg,
-                recovery_details)
     except Exception as e:
         traceback.print_exc()
@@ -951,9 +556,9 @@ Includes:
             shutil.rmtree(temp_dir, ignore_errors=True)
         shutil.rmtree(output_dir, ignore_errors=True)
-with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery") as demo:
-    gr.Markdown("# 🔄 Advanced FP8 Quantizer with Per-Tensor Precision Recovery")
-    gr.Markdown("Convert model files (safetensors, pth, ckpt) → **FP8** + **customizable precision recovery**. Supports any number of sharded files.")
     with gr.Row():
         with gr.Column():
@@ -975,70 +580,6 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
             with gr.Accordion("FP8 Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
-            with gr.Accordion("Per-Tensor Recovery Rules", open=True):
-                gr.Markdown("""
-                ### Configure recovery strategy for each tensor pattern
-                Format: JSON array of rule objects:
-                ```json
-                [
-                    {
-                        "key_pattern": "vae",
-                        "dim": 4,
-                        "method": "diff"
-                    },
-                    {
-                        "key_pattern": "attn",
-                        "dim": 2,
-                        "min_size": 10000,
-                        "method": "lora",
-                        "rank": 64
-                    },
-                    {
-                        "key_pattern": "all",
-                        "method": "none"
-                    }
-                ]
-                ```
-                ### Rule Fields (all optional except "method"):
-                - `key_pattern`: Substring to match in weight keys (case-insensitive). Use "all" to match everything.
-                - `dim`: Tensor dimension (e.g., 2 for linear layers, 4 for convolutions)
-                - `type`: Tensor type ("conv", "linear", "bias", "input_proj", "output_proj")
-                - `min_size`: Minimum number of elements in tensor
-                - `shape_contains`: Specific dimension size that must be present in shape
-                - `method`: "none" (pure FP8), "lora" (low-rank adaptation), or "diff" (difference/correction)
-                - `rank`: Required for "lora" method (higher = better quality but larger file)
-                **Rules are applied in order** - first match wins. Always end with a catch-all rule.
-                """)
-                recovery_rules_json = gr.Textbox(
-                    value=generate_default_rules("all"),
-                    lines=15,
-                    label="Recovery Rules (JSON)",
-                    interactive=True
-                )
-                architecture_preset = gr.Dropdown(
-                    choices=[
-                        ("Auto-detect architecture", "auto"),
-                        ("VAE (Difference method)", "vae"),
-                        ("Text Encoder (LoRA)", "text_encoder"),
-                        ("UNet Transformers (LoRA)", "unet_transformer"),
-                        ("UNet Convolutions (Difference)", "unet_conv"),
-                        ("All Components (Mixed)", "all")
-                    ],
-                    value="auto",
-                    label="Architecture Preset"
-                )
-                architecture_preset.change(
-                    fn=generate_default_rules,
-                    inputs=architecture_preset,
-                    outputs=recovery_rules_json
-                )
             with gr.Accordion("Authentication", open=False):
                 hf_token = gr.Textbox(label="Hugging Face Token", type="password")
                 modelscope_token = gr.Textbox(label="ModelScope Token (optional)", type="password", visible=MODELScope_AVAILABLE)
@@ -1050,7 +591,7 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
             status_output = gr.Markdown()
             detailed_log = gr.Textbox(label="Processing Log", interactive=False, lines=10)
-            recovery_summary = gr.Textbox(label="Recovery Files Generated", interactive=False, lines=3)
     convert_btn = gr.Button("🚀 Convert & Upload", variant="primary")
     repo_link_output = gr.HTML()
@@ -1063,7 +604,6 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
             filename_pattern,
             model_format,
             fp8_format,
-            recovery_rules_json,
             target_type,
             new_repo_id,
             hf_token,
@@ -1082,7 +622,6 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
                 "auto",
                 "safetensors",
                 "e4m3fn",
-                generate_default_rules("vae"),
                 "huggingface"
             ],
             [
@@ -1091,7 +630,6 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
                 "auto",
                 "safetensors",
                 "e5m2",
-                generate_default_rules("text_encoder"),
                 "huggingface"
             ],
             [
@@ -1100,7 +638,6 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
                 "auto",
                 "safetensors",
                 "e5m2",
-                generate_default_rules("unet_transformer"),
                 "huggingface"
             ],
             [
@@ -1109,7 +646,6 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
                 "model-*.safetensors",
                 "safetensors",
                 "e5m2",
-                generate_default_rules("all"),
                 "huggingface"
             ],
             [
@@ -1118,70 +654,49 @@ with gr.Blocks(title="Advanced FP8 Quantizer with Per-Tensor Precision Recovery"
                 "sd-v1-4.ckpt",
                 "ckpt",
                 "e5m2",
-                generate_default_rules("all"),
                 "huggingface"
             ]
         ],
-        inputs=[source_type, repo_url, filename_pattern, model_format, fp8_format, recovery_rules_json, target_type],
         label="Example Conversions",
         cache_examples=False
     )
     gr.Markdown("""
-    ## 💡 Tensor Pattern Matching Guide
-    This tool uses **advanced tensor pattern matching** to determine which recovery method to apply to each layer:
-    ### **Key Patterns**
-    - Match by substring in weight key name
-    - Case-insensitive matching
-    - Special keyword "all" matches everything
-    ### **Tensor Properties**
-    - **Dimension (dim)**: Use `dim: 2` for linear layers, `dim: 4` for convolutions
-    - **Type**: Automatic classification based on shape:
-      - `conv`: 4D tensors with equal spatial dimensions
-      - `linear`: 2D tensors without extreme aspect ratio
-      - `input_proj`: 2D tensors with much larger second dimension
-      - `output_proj`: 2D tensors with much larger first dimension
-      - `bias`: 1D tensors
-    ### **Size Constraints**
-    - **min_size**: Only apply to tensors with at least N elements
-    - **shape_contains**: Match tensors containing a specific dimension size
-    ### **Rule Processing**
-    - Rules are evaluated **in order**
-    - First matching rule wins
-    - Always include a catch-all rule at the end
-    > **Pro Tip for VAE**: Use `"dim": 4` combined with `"key_pattern": "vae"` to reliably target VAE convolutional layers with difference recovery.
-    ## 📁 File Format Support
-    This tool supports multiple model formats:
-    - **Safetensors**: Modern, secure format for storing tensors. Supports sharded files (e.g., `model-00001-of-00005.safetensors`).
-    - **PTH/PT**: PyTorch checkpoint files. Can contain state dicts or full model objects.
-    - **CKPT**: Checkpoint files, commonly used for stable diffusion models.
-    ### Shard Support:
     - **Unlimited Shards**: Supports any number of sharded files (2, 5, 10, 20+)
     - **Auto-Detection**: Automatically finds all shards when using "auto" pattern
-    - **Parallel Downloads**: Downloads multiple shards simultaneously for faster processing
-    - **Memory Efficient**: Processes shards one at a time to manage memory usage
-    - **Progress Tracking**: Shows detailed progress for each shard download and processing
-    ### Filename Patterns:
-    - **Auto-detection**: Use "auto" to automatically find all sharded safetensors files
-    - **Wildcard patterns**: Use `model-*.safetensors` to match sharded files
-    - **Specific file**: Use exact filename for single files
-    For models with many shards (e.g., 5+ files), the tool will:
-    1. Automatically detect all shards
-    2. Download them in parallel (up to 4 simultaneous downloads)
-    3. Load them sequentially to manage memory
-    4. Merge them into a single FP8 model
     """)
-demo.launch()

 except ImportError:
     MODELScope_AVAILABLE = False
 def load_model_files(model_paths, model_format="safetensors", progress_callback=None):
     """
     Load model weights from one or more files, supporting sharded safetensors and other formats.
     return base_name
+def convert_model_to_fp8(model_paths, output_dir, fp8_format,
+                        model_format="safetensors", progress=gr.Progress()):
+    """Simple and fast FP8 conversion without recovery strategies."""
+    progress(0.05, desc=f"Starting FP8 conversion for {model_format}...")
     try:
         metadata = read_model_metadata(model_paths, model_format)
         progress(0.1, desc="Loaded metadata.")
         # Initialize outputs
         sd_fp8 = {}
+        conversion_stats = {
+            "total_tensors": len(state_dict),
+            "converted_tensors": 0,
+            "skipped_tensors": 0,
+            "skipped_reasons": []
         }
         # Process each tensor
         total = len(state_dict)
         for i, key in enumerate(state_dict):
+            if i % 100 == 0:  # Update progress every 100 tensors for speed
+                progress(0.3 + 0.6 * (i / total), desc=f"Converting {i}/{total} tensors...")
             weight = state_dict[key]
+            # Convert only float tensors to FP8
             if weight.dtype in [torch.float16, torch.float32, torch.bfloat16]:
                 fp8_weight = weight.to(fp8_dtype)
                 sd_fp8[key] = fp8_weight
+                conversion_stats["converted_tensors"] += 1
             else:
+                # Keep non-float tensors as-is (e.g., ints, bools)
                 sd_fp8[key] = weight
+                conversion_stats["skipped_tensors"] += 1
+                conversion_stats["skipped_reasons"].append(f"{key}: {weight.dtype}")
         # Extract base name for output files
         base_name = extract_base_name_from_sharded_files(model_paths)
         # Save FP8 model
         fp8_path = os.path.join(output_dir, f"{base_name}-fp8-{fp8_format}.safetensors")
+        save_file(sd_fp8, fp8_path, metadata={
+            "format": model_format,
+            "fp8_format": fp8_format,
+            "original_files": str(len(model_paths)),
+            "conversion_stats": json.dumps(conversion_stats),
+            **metadata
+        })
+        progress(0.95, desc="Saved FP8 file.")
         # Generate stats message
+        stats_msg = f"✅ FP8 ({fp8_format}) conversion complete!\n"
+        stats_msg += f"- Total tensors: {conversion_stats['total_tensors']}\n"
+        stats_msg += f"- Converted to FP8: {conversion_stats['converted_tensors']}\n"
+        stats_msg += f"- Skipped (non-float): {conversion_stats['skipped_tensors']}\n"
+        stats_msg += f"- Output file: {os.path.basename(fp8_path)}\n"
+        if conversion_stats["skipped_tensors"] > 0:
+            stats_msg += "\n⚠️ Some tensors were skipped (non-float types):\n"
+            for i, reason in enumerate(conversion_stats["skipped_reasons"][:5]):  # Show first 5
+                stats_msg += f"  - {reason}\n"
+            if len(conversion_stats["skipped_reasons"]) > 5:
+                stats_msg += f"  - ... and {len(conversion_stats['skipped_reasons']) - 5} more\n"
+        progress(1.0, desc="✅ FP8 conversion complete!")
+        return True, stats_msg, conversion_stats, fp8_path, None
     except Exception as e:
         traceback.print_exc()
     else:
         raise ValueError("Unknown target")
 def process_and_upload_fp8(
     source_type,
     repo_url,
     filename_pattern,
     model_format,
     fp8_format,
     target_type,
     new_repo_id,
     hf_token,
     if target_type == "huggingface" and not hf_token:
         return None, "❌ Hugging Face token required for target.", "", ""
     temp_dir = None
     output_dir = tempfile.mkdtemp()
     try:
             source_type, repo_url, filename_pattern, model_format, hf_token, progress
         )
+        progress(0.8, desc="Converting to FP8...")
+        success, msg, stats, fp8_path, _ = convert_model_to_fp8(
+            model_paths, output_dir, fp8_format, model_format, progress
         )
         if not success:
                 original_filename += f" matching '{filename_pattern}'"
         fp8_filename = os.path.basename(fp8_path)
         readme = f"""---
 library_name: diffusers
 tags:
 - fp8
 - safetensors
 - converted-by-gradio
 ---
+# FP8 Model Conversion
 - **Source**: `{repo_url}`
 - **Original File(s)**: `{original_filename}`
 - **Original Format**: `{model_format}`
 - **FP8 Format**: `{fp8_format.upper()}`
 - **FP8 File**: `{fp8_filename}`
+## Usage
 ```python
 from safetensors.torch import load_file
 import torch
 # Load FP8 model
 fp8_state = load_file("{fp8_filename}")
+# Convert tensors back to float32 for computation (auto-converted by PyTorch)
+model.load_state_dict(fp8_state)
 ```
+> **Note**: FP8 tensors are automatically converted to float32 when loaded in PyTorch.
 > Requires PyTorch ≥ 2.1 for FP8 support.
 ## Statistics
+- **Total tensors**: {stats['total_tensors']}
+- **Converted to FP8**: {stats['converted_tensors']}
+- **Skipped (non-float)**: {stats['skipped_tensors']}
 """
         with open(os.path.join(output_dir, "README.md"), "w") as f:
         progress(1.0, desc="✅ Done!")
         # Generate result HTML
         result_html = f"""
 ✅ Success!
 Model uploaded to: <a href="{repo_url_final}" target="_blank">{new_repo_id}</a>
 - FP8 model: `{fp8_filename}`
+- Converted {stats['converted_tensors']} tensors to {fp8_format.upper()}
 """
         return (gr.HTML(result_html),
+                "✅ FP8 conversion successful!",
                 msg,
+                "")
     except Exception as e:
         traceback.print_exc()
             shutil.rmtree(temp_dir, ignore_errors=True)
         shutil.rmtree(output_dir, ignore_errors=True)
+with gr.Blocks(title="Fast FP8 Model Converter") as demo:
+    gr.Markdown("# ⚡ Fast FP8 Model Converter")
+    gr.Markdown("Convert model files (safetensors, pth, ckpt) → **FP8**. Supports sharded files with auto-discovery. Simple and fast!")
     with gr.Row():
         with gr.Column():
             with gr.Accordion("FP8 Settings", open=True):
                 fp8_format = gr.Radio(["e4m3fn", "e5m2"], value="e5m2", label="FP8 Format")
             with gr.Accordion("Authentication", open=False):
                 hf_token = gr.Textbox(label="Hugging Face Token", type="password")
                 modelscope_token = gr.Textbox(label="ModelScope Token (optional)", type="password", visible=MODELScope_AVAILABLE)
             status_output = gr.Markdown()
             detailed_log = gr.Textbox(label="Processing Log", interactive=False, lines=10)
+            recovery_summary = gr.Textbox(label="Additional Info", interactive=False, lines=3)
     convert_btn = gr.Button("🚀 Convert & Upload", variant="primary")
     repo_link_output = gr.HTML()
             filename_pattern,
             model_format,
             fp8_format,
             target_type,
             new_repo_id,
             hf_token,
                 "auto",
                 "safetensors",
                 "e4m3fn",
                 "huggingface"
             ],
             [
                 "auto",
                 "safetensors",
                 "e5m2",
                 "huggingface"
             ],
             [
                 "auto",
                 "safetensors",
                 "e5m2",
                 "huggingface"
             ],
             [
                 "model-*.safetensors",
                 "safetensors",
                 "e5m2",
                 "huggingface"
             ],
             [
                 "sd-v1-4.ckpt",
                 "ckpt",
                 "e5m2",
                 "huggingface"
             ]
         ],
+        inputs=[source_type, repo_url, filename_pattern, model_format, fp8_format, target_type],
         label="Example Conversions",
         cache_examples=False
     )
     gr.Markdown("""
+    ## 📁 Fast FP8 Conversion Tool
+    This tool provides **fast and simple FP8 conversion** for various model formats:
+    ### **Supported Formats:**
+    - **Safetensors**: Modern, secure format. Supports sharded files (e.g., `model-00001-of-00005.safetensors`)
+    - **PTH/PT**: PyTorch checkpoint files
+    - **CKPT**: Checkpoint files (commonly used for stable diffusion models)
+    ### **Shard Support:**
     - **Unlimited Shards**: Supports any number of sharded files (2, 5, 10, 20+)
     - **Auto-Detection**: Automatically finds all shards when using "auto" pattern
+    - **Parallel Downloads**: Downloads multiple shards simultaneously (up to 4 at once)
+    - **Memory Efficient**: Processes files efficiently to manage memory
+    ### **Performance Features:**
+    - **Fast Conversion**: Simple dtype conversion without complex recovery strategies
+    - **Batch Processing**: Processes tensors in batches for better performance
+    - **Progress Tracking**: Shows detailed progress for each step
+    ### **How It Works:**
+    1. **Discovery**: Automatically detects sharded files or uses your specified pattern
+    2. **Download**: Downloads files in parallel for maximum speed
+    3. **Conversion**: Converts float tensors to FP8, leaves other types unchanged
+    4. **Upload**: Uploads the converted model to your target repository
+    ### **Usage Tips:**
+    - Use "auto" pattern to automatically detect all sharded safetensors files
+    - Use `model-*.safetensors` to match specific shard patterns
+    - For single files, just enter the filename (e.g., `model.safetensors`)
+    - FP8 conversion reduces model size by ~4x compared to FP32
+    - FP8 tensors are automatically converted to float32 when loaded in PyTorch
+    > **Note**: This is a simple conversion tool. For precision recovery options, use the advanced version.
     """)
+demo.launch().