Zihan Min commited on
Commit
8704f55
Β·
1 Parent(s): fb1e189

upload 0.6+0.5 fuser

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. qwen3_0.6b+qwen2.5_0.5b_Fuser/config.json +57 -0
  2. aggregator_config.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/aggregator_config.json +0 -0
  3. projector_0.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_0.json +0 -0
  4. projector_0.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_0.pt +0 -0
  5. projector_1.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_1.json +0 -0
  6. projector_1.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_1.pt +0 -0
  7. projector_10.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_10.json +0 -0
  8. projector_10.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_10.pt +0 -0
  9. projector_11.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_11.json +0 -0
  10. projector_11.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_11.pt +0 -0
  11. projector_12.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_12.json +0 -0
  12. projector_12.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_12.pt +0 -0
  13. projector_13.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_13.json +0 -0
  14. projector_13.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_13.pt +0 -0
  15. projector_14.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_14.json +0 -0
  16. projector_14.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_14.pt +0 -0
  17. projector_15.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_15.json +0 -0
  18. projector_15.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_15.pt +0 -0
  19. projector_16.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_16.json +0 -0
  20. projector_16.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_16.pt +0 -0
  21. projector_17.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_17.json +0 -0
  22. projector_17.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_17.pt +0 -0
  23. projector_18.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_18.json +0 -0
  24. projector_18.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_18.pt +0 -0
  25. projector_19.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_19.json +0 -0
  26. projector_19.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_19.pt +0 -0
  27. projector_2.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_2.json +0 -0
  28. projector_2.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_2.pt +0 -0
  29. projector_20.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_20.json +0 -0
  30. projector_20.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_20.pt +0 -0
  31. projector_21.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_21.json +0 -0
  32. projector_21.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_21.pt +0 -0
  33. projector_22.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_22.json +0 -0
  34. projector_22.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_22.pt +0 -0
  35. projector_23.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_23.json +0 -0
  36. projector_23.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_23.pt +0 -0
  37. projector_24.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_24.json +0 -0
  38. projector_24.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_24.pt +0 -0
  39. projector_25.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_25.json +0 -0
  40. projector_25.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_25.pt +0 -0
  41. projector_26.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_26.json +0 -0
  42. projector_26.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_26.pt +0 -0
  43. projector_27.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_27.json +0 -0
  44. projector_27.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_27.pt +0 -0
  45. projector_3.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_3.json +0 -0
  46. projector_3.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_3.pt +0 -0
  47. projector_4.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_4.json +0 -0
  48. projector_4.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_4.pt +0 -0
  49. projector_5.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_5.json +0 -0
  50. projector_5.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_5.pt +0 -0
qwen3_0.6b+qwen2.5_0.5b_Fuser/config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "base_model": "Qwen/Qwen3-0.6B",
4
+ "teacher_model": "Qwen/Qwen2.5-0.5B-Instruct",
5
+ "include_response": false,
6
+ "is_do_alignment": false,
7
+ "alignment_strategy": "first",
8
+ "projector": {
9
+ "type": "C2CProjector",
10
+ "params": {
11
+ "hidden_dim": 1024,
12
+ "intermediate_dim": 1024,
13
+ "num_layers": 3,
14
+ "dropout": 0.1,
15
+ "initial_temperature": 1.0,
16
+ "final_temperature": 0.001,
17
+ "anneal_steps": 1929
18
+ }
19
+ },
20
+ "mapping": "last_aligned"
21
+ },
22
+ "training": {
23
+ "learning_rate": 1e-4,
24
+ "weight_decay": 0.01,
25
+ "num_epochs": 1,
26
+ "max_length": 2048,
27
+ "device": "cuda",
28
+ "scheduler_type": "linear",
29
+ "warmup_ratio": 0.1,
30
+ "max_grad_norm": 1.0,
31
+ "gradient_accumulation_steps": 8,
32
+ "per_device_train_batch_size": 4,
33
+ "num_processes": 8,
34
+ "freeze": ["teacher","base"],
35
+ "seed": 42
36
+ },
37
+ "output": {
38
+ "output_dir": "local/checkpoints/0.6+0.5B_C2C_general_again_test",
39
+ "save_steps": 500,
40
+ "eval_steps": 100,
41
+ "wandb_config": {
42
+ "project": "Rosetta",
43
+ "mode": "offline",
44
+ "entity": "nics-efc",
45
+ "run_name": "0.6B+0.5B_C2C_general_OpenHermes_500k"
46
+ }
47
+ },
48
+ "data": {
49
+ "type": "OpenHermesChatDataset",
50
+ "kwargs": {
51
+ "split": "train",
52
+ "max_word_count": 2048,
53
+ "num_samples": 500000
54
+ },
55
+ "train_ratio": 0.99
56
+ }
57
+ }
aggregator_config.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/aggregator_config.json RENAMED
File without changes
projector_0.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_0.json RENAMED
File without changes
projector_0.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_0.pt RENAMED
File without changes
projector_1.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_1.json RENAMED
File without changes
projector_1.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_1.pt RENAMED
File without changes
projector_10.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_10.json RENAMED
File without changes
projector_10.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_10.pt RENAMED
File without changes
projector_11.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_11.json RENAMED
File without changes
projector_11.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_11.pt RENAMED
File without changes
projector_12.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_12.json RENAMED
File without changes
projector_12.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_12.pt RENAMED
File without changes
projector_13.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_13.json RENAMED
File without changes
projector_13.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_13.pt RENAMED
File without changes
projector_14.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_14.json RENAMED
File without changes
projector_14.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_14.pt RENAMED
File without changes
projector_15.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_15.json RENAMED
File without changes
projector_15.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_15.pt RENAMED
File without changes
projector_16.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_16.json RENAMED
File without changes
projector_16.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_16.pt RENAMED
File without changes
projector_17.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_17.json RENAMED
File without changes
projector_17.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_17.pt RENAMED
File without changes
projector_18.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_18.json RENAMED
File without changes
projector_18.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_18.pt RENAMED
File without changes
projector_19.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_19.json RENAMED
File without changes
projector_19.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_19.pt RENAMED
File without changes
projector_2.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_2.json RENAMED
File without changes
projector_2.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_2.pt RENAMED
File without changes
projector_20.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_20.json RENAMED
File without changes
projector_20.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_20.pt RENAMED
File without changes
projector_21.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_21.json RENAMED
File without changes
projector_21.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_21.pt RENAMED
File without changes
projector_22.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_22.json RENAMED
File without changes
projector_22.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_22.pt RENAMED
File without changes
projector_23.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_23.json RENAMED
File without changes
projector_23.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_23.pt RENAMED
File without changes
projector_24.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_24.json RENAMED
File without changes
projector_24.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_24.pt RENAMED
File without changes
projector_25.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_25.json RENAMED
File without changes
projector_25.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_25.pt RENAMED
File without changes
projector_26.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_26.json RENAMED
File without changes
projector_26.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_26.pt RENAMED
File without changes
projector_27.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_27.json RENAMED
File without changes
projector_27.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_27.pt RENAMED
File without changes
projector_3.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_3.json RENAMED
File without changes
projector_3.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_3.pt RENAMED
File without changes
projector_4.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_4.json RENAMED
File without changes
projector_4.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_4.pt RENAMED
File without changes
projector_5.json β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_5.json RENAMED
File without changes
projector_5.pt β†’ qwen3_0.6b+qwen2.5_0.5b_Fuser/final/projector_5.pt RENAMED
File without changes