create_copies_for_each_mode

#3
by nithinraok - opened
Files changed (1) hide show
  1. app.py +11 -6
app.py CHANGED
@@ -4,7 +4,6 @@ import llava
4
  from peft import PeftModel
5
  import os
6
  from huggingface_hub import snapshot_download
7
- import copy
8
  import spaces
9
  # ---------------------------------
10
  # SINGLE-TURN MODEL SETUP
@@ -12,14 +11,16 @@ import spaces
12
 
13
  MODEL_BASE_SINGLE = snapshot_download(repo_id="nvidia/audio-flamingo-3")
14
  MODEL_BASE_THINK = os.path.join(MODEL_BASE_SINGLE, 'stage35')
 
 
15
 
16
  # model_single = llava.load(MODEL_BASE_SINGLE, model_base=None, devices=[0])
17
  model_single = llava.load(MODEL_BASE_SINGLE, model_base=None)
18
- model_single = model_single.to("cuda")
19
- model_single_copy = copy.deepcopy(model_single) # keep a copy of the original model for non-thinking mode
20
 
21
  generation_config_single = model_single.default_generation_config
22
 
 
23
  non_lora_trainables = torch.load(
24
  os.path.join(MODEL_BASE_THINK, "non_lora_trainables.bin"),
25
  map_location="cpu",
@@ -27,9 +28,13 @@ non_lora_trainables = torch.load(
27
  non_lora_trainables = {
28
  (k[6:] if k.startswith("model.") else k): v for k, v in non_lora_trainables.items()
29
  }
30
- model_single.load_state_dict(non_lora_trainables, strict=False)
 
 
 
 
31
  model_think = PeftModel.from_pretrained(
32
- model_single,
33
  MODEL_BASE_THINK,
34
  device_map="auto",
35
  torch_dtype=torch.float16,
@@ -51,7 +56,7 @@ def single_turn_infer(audio_file, prompt_text):
51
  try:
52
  sound = llava.Sound(audio_file)
53
  full_prompt = f"<sound>\n{prompt_text}"
54
- response = model_single_copy.generate_content([sound, full_prompt], generation_config=generation_config_single)
55
  return response
56
  except Exception as e:
57
  return f"❌ Error: {str(e)}"
 
4
  from peft import PeftModel
5
  import os
6
  from huggingface_hub import snapshot_download
 
7
  import spaces
8
  # ---------------------------------
9
  # SINGLE-TURN MODEL SETUP
 
11
 
12
  MODEL_BASE_SINGLE = snapshot_download(repo_id="nvidia/audio-flamingo-3")
13
  MODEL_BASE_THINK = os.path.join(MODEL_BASE_SINGLE, 'stage35')
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+
16
 
17
  # model_single = llava.load(MODEL_BASE_SINGLE, model_base=None, devices=[0])
18
  model_single = llava.load(MODEL_BASE_SINGLE, model_base=None)
19
+ model_single = model_single.to(device)
 
20
 
21
  generation_config_single = model_single.default_generation_config
22
 
23
+ # Load the thinking model with LoRA adapters
24
  non_lora_trainables = torch.load(
25
  os.path.join(MODEL_BASE_THINK, "non_lora_trainables.bin"),
26
  map_location="cpu",
 
28
  non_lora_trainables = {
29
  (k[6:] if k.startswith("model.") else k): v for k, v in non_lora_trainables.items()
30
  }
31
+
32
+ # Load model_think as a separate instance for thinking mode
33
+ model_think = llava.load(MODEL_BASE_SINGLE, model_base=None)
34
+ model_think = model_think.to(device)
35
+ model_think.load_state_dict(non_lora_trainables, strict=False)
36
  model_think = PeftModel.from_pretrained(
37
+ model_think,
38
  MODEL_BASE_THINK,
39
  device_map="auto",
40
  torch_dtype=torch.float16,
 
56
  try:
57
  sound = llava.Sound(audio_file)
58
  full_prompt = f"<sound>\n{prompt_text}"
59
+ response = model_single.generate_content([sound, full_prompt], generation_config=generation_config_single)
60
  return response
61
  except Exception as e:
62
  return f"❌ Error: {str(e)}"