Sync clean ModelOpt weights from BBuf/flux1-dev-modelopt-fp8-sglang-transformer

Browse files

Files changed (5) hide show

README.md +42 -0
config.json +57 -0
diffusion_pytorch_model-00001-of-00002.safetensors +3 -0
diffusion_pytorch_model-00002-of-00002.safetensors +3 -0
diffusion_pytorch_model.safetensors.index.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,42 @@

+---
+language:
+- en
+library_name: sglang
+base_model: black-forest-labs/FLUX.1-dev
+tags:
+- sglang
+- diffusion
+- modelopt
+- fp8
+- transformer
+---
+# flux1-dev-modelopt-fp8-sglang-transformer
+This repository contains the SGLang-native ModelOpt FP8 transformer override for `black-forest-labs/FLUX.1-dev`.
+It is intended to be used with SGLang Diffusion through `--transformer-path` while keeping the base model separate:
+```bash
+sglang generate \
+  --model-path black-forest-labs/FLUX.1-dev \
+  --transformer-path lmsys/flux1-dev-modelopt-fp8-sglang-transformer \
+  --prompt "A cinematic scene with detailed lighting" \
+  --save-output
+```
+## Contents
+The repository is intentionally minimal and contains only:
+- `config.json`
+- `*.safetensors` weight shard files
+- `*.safetensors.index.json` when the checkpoint is sharded
+Validation images, benchmark outputs, profiler traces, and conversion scratch artifacts are not stored in this model repository.
+## Notes
+- Quantization config is stored in `config.json` with `quant_method=modelopt` and `quant_algo=FP8`.
+- Use this checkpoint with an SGLang version that includes diffusion ModelOpt support for the corresponding model family.
+- The original base model license and usage terms still apply.

config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "_class_name": "FluxTransformer2DModel",
+    "_diffusers_version": "0.37.0",
+    "_name_or_path": "/tmp/flux1_fp8_run/base_model/FLUX.1-dev/transformer",
+    "attention_head_dim": 128,
+    "axes_dims_rope": [
+        16,
+        56,
+        56
+    ],
+    "guidance_embeds": true,
+    "in_channels": 64,
+    "joint_attention_dim": 4096,
+    "num_attention_heads": 24,
+    "num_layers": 19,
+    "num_single_layers": 38,
+    "out_channels": null,
+    "patch_size": 1,
+    "pooled_projection_dim": 768,
+    "quantization_config": {
+        "config_groups": {
+            "group_0": {
+                "input_activations": {
+                    "dynamic": false,
+                    "num_bits": 8,
+                    "type": "float"
+                },
+                "weights": {
+                    "dynamic": false,
+                    "num_bits": 8,
+                    "type": "float"
+                },
+                "targets": [
+                    "Linear"
+                ]
+            }
+        },
+        "ignore": [
+            "context_embedder",
+            "norm_out*",
+            "proj_out",
+            "time_text_embed*",
+            "x_embedder"
+        ],
+        "quant_algo": "FP8",
+        "kv_cache_scheme": {
+            "dynamic": false,
+            "num_bits": 8,
+            "type": "float"
+        },
+        "producer": {
+            "name": "modelopt",
+            "version": "0.43.0rc2.dev66+gf7557221e.d20260407"
+        },
+        "quant_method": "modelopt"
+    }
+}

diffusion_pytorch_model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b31bb763e18f4b8ccb4b6c7169f01cb7f85df77662b984619adc1b3e4f7e6fb
+size 15704547776

diffusion_pytorch_model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:471b52d758ee44f49706778e4a008bc9919cd74274e499ef0ac997178cb1d5da
+size 2360676752

diffusion_pytorch_model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff