MickJ commited on
Commit
358ff23
·
verified ·
1 Parent(s): cbc3f21

Sync clean ModelOpt weights from BBuf/flux1-dev-modelopt-fp8-sglang-transformer

Browse files
README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ library_name: sglang
5
+ base_model: black-forest-labs/FLUX.1-dev
6
+ tags:
7
+ - sglang
8
+ - diffusion
9
+ - modelopt
10
+ - fp8
11
+ - transformer
12
+ ---
13
+
14
+ # flux1-dev-modelopt-fp8-sglang-transformer
15
+
16
+ This repository contains the SGLang-native ModelOpt FP8 transformer override for `black-forest-labs/FLUX.1-dev`.
17
+
18
+ It is intended to be used with SGLang Diffusion through `--transformer-path` while keeping the base model separate:
19
+
20
+ ```bash
21
+ sglang generate \
22
+ --model-path black-forest-labs/FLUX.1-dev \
23
+ --transformer-path lmsys/flux1-dev-modelopt-fp8-sglang-transformer \
24
+ --prompt "A cinematic scene with detailed lighting" \
25
+ --save-output
26
+ ```
27
+
28
+ ## Contents
29
+
30
+ The repository is intentionally minimal and contains only:
31
+
32
+ - `config.json`
33
+ - `*.safetensors` weight shard files
34
+ - `*.safetensors.index.json` when the checkpoint is sharded
35
+
36
+ Validation images, benchmark outputs, profiler traces, and conversion scratch artifacts are not stored in this model repository.
37
+
38
+ ## Notes
39
+
40
+ - Quantization config is stored in `config.json` with `quant_method=modelopt` and `quant_algo=FP8`.
41
+ - Use this checkpoint with an SGLang version that includes diffusion ModelOpt support for the corresponding model family.
42
+ - The original base model license and usage terms still apply.
config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "FluxTransformer2DModel",
3
+ "_diffusers_version": "0.37.0",
4
+ "_name_or_path": "/tmp/flux1_fp8_run/base_model/FLUX.1-dev/transformer",
5
+ "attention_head_dim": 128,
6
+ "axes_dims_rope": [
7
+ 16,
8
+ 56,
9
+ 56
10
+ ],
11
+ "guidance_embeds": true,
12
+ "in_channels": 64,
13
+ "joint_attention_dim": 4096,
14
+ "num_attention_heads": 24,
15
+ "num_layers": 19,
16
+ "num_single_layers": 38,
17
+ "out_channels": null,
18
+ "patch_size": 1,
19
+ "pooled_projection_dim": 768,
20
+ "quantization_config": {
21
+ "config_groups": {
22
+ "group_0": {
23
+ "input_activations": {
24
+ "dynamic": false,
25
+ "num_bits": 8,
26
+ "type": "float"
27
+ },
28
+ "weights": {
29
+ "dynamic": false,
30
+ "num_bits": 8,
31
+ "type": "float"
32
+ },
33
+ "targets": [
34
+ "Linear"
35
+ ]
36
+ }
37
+ },
38
+ "ignore": [
39
+ "context_embedder",
40
+ "norm_out*",
41
+ "proj_out",
42
+ "time_text_embed*",
43
+ "x_embedder"
44
+ ],
45
+ "quant_algo": "FP8",
46
+ "kv_cache_scheme": {
47
+ "dynamic": false,
48
+ "num_bits": 8,
49
+ "type": "float"
50
+ },
51
+ "producer": {
52
+ "name": "modelopt",
53
+ "version": "0.43.0rc2.dev66+gf7557221e.d20260407"
54
+ },
55
+ "quant_method": "modelopt"
56
+ }
57
+ }
diffusion_pytorch_model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b31bb763e18f4b8ccb4b6c7169f01cb7f85df77662b984619adc1b3e4f7e6fb
3
+ size 15704547776
diffusion_pytorch_model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:471b52d758ee44f49706778e4a008bc9919cd74274e499ef0ac997178cb1d5da
3
+ size 2360676752
diffusion_pytorch_model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff