Spaces:
Running
Running
Commit
·
02721f3
1
Parent(s):
cb7a4c9
Fix on returning GPU tensors to main function after embedding with zeroGPU. Representation model put under ZeroGPU spaces
Browse files- funcs/embeddings.py +13 -4
- funcs/representation_model.py +4 -2
funcs/embeddings.py
CHANGED
|
@@ -3,16 +3,12 @@ import time
|
|
| 3 |
import numpy as np
|
| 4 |
import os
|
| 5 |
|
| 6 |
-
|
| 7 |
from sentence_transformers import SentenceTransformer
|
| 8 |
from sklearn.pipeline import make_pipeline
|
| 9 |
from sklearn.decomposition import TruncatedSVD
|
| 10 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 11 |
from funcs.helper_functions import GPU_SPACE_DURATION
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
# If you want to disable cuda for testing purposes
|
| 17 |
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
| 18 |
|
|
@@ -121,9 +117,22 @@ def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndar
|
|
| 121 |
embeddings_out = np.round(embeddings_out, 3)
|
| 122 |
embeddings_out *= 100
|
| 123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
return embeddings_out, embedding_model
|
| 125 |
|
| 126 |
else:
|
| 127 |
print("Found pre-loaded embeddings.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
|
| 129 |
return embeddings_out, embedding_model
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import os
|
| 5 |
|
|
|
|
| 6 |
from sentence_transformers import SentenceTransformer
|
| 7 |
from sklearn.pipeline import make_pipeline
|
| 8 |
from sklearn.decomposition import TruncatedSVD
|
| 9 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 10 |
from funcs.helper_functions import GPU_SPACE_DURATION
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
# If you want to disable cuda for testing purposes
|
| 13 |
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
|
| 14 |
|
|
|
|
| 117 |
embeddings_out = np.round(embeddings_out, 3)
|
| 118 |
embeddings_out *= 100
|
| 119 |
|
| 120 |
+
# Move model to CPU before returning to avoid CUDA initialization in main process
|
| 121 |
+
if high_quality_mode_opt == "Yes" and hasattr(embedding_model, 'to'):
|
| 122 |
+
try:
|
| 123 |
+
embedding_model = embedding_model.to('cpu')
|
| 124 |
+
except:
|
| 125 |
+
pass # If moving to CPU fails, continue anyway
|
| 126 |
+
|
| 127 |
return embeddings_out, embedding_model
|
| 128 |
|
| 129 |
else:
|
| 130 |
print("Found pre-loaded embeddings.")
|
| 131 |
+
|
| 132 |
+
# Ensure embeddings are on CPU even when loaded from file
|
| 133 |
+
if hasattr(embeddings_out, 'cpu'):
|
| 134 |
+
embeddings_out = embeddings_out.cpu().numpy()
|
| 135 |
+
elif not isinstance(embeddings_out, np.ndarray):
|
| 136 |
+
embeddings_out = np.array(embeddings_out)
|
| 137 |
|
| 138 |
return embeddings_out, embedding_model
|
funcs/representation_model.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
from bertopic.representation import LlamaCPP
|
| 3 |
|
| 4 |
from pydantic import BaseModel
|
|
@@ -9,7 +10,7 @@ from gradio import Warning
|
|
| 9 |
from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, BaseRepresentation
|
| 10 |
from funcs.embeddings import torch_device
|
| 11 |
from funcs.prompts import phi3_prompt, phi3_start
|
| 12 |
-
from funcs.helper_functions import get_or_create_env_var
|
| 13 |
|
| 14 |
chosen_prompt = phi3_prompt #open_hermes_prompt # stablelm_prompt
|
| 15 |
chosen_start_tag = phi3_start #open_hermes_start # stablelm_start
|
|
@@ -38,7 +39,7 @@ print(f'The value of USE_GPU is {USE_GPU}')
|
|
| 38 |
if USE_GPU == "1":
|
| 39 |
print("Using GPU for representation functions")
|
| 40 |
torch_device = "gpu"
|
| 41 |
-
print("Cuda version installed is: ", version.cuda)
|
| 42 |
high_quality_mode = "Yes"
|
| 43 |
os.system("nvidia-smi")
|
| 44 |
else:
|
|
@@ -156,6 +157,7 @@ def find_model_file(hf_model_name: str, hf_model_file: str, search_folder: str,
|
|
| 156 |
|
| 157 |
return found_file
|
| 158 |
|
|
|
|
| 159 |
def create_representation_model(representation_type: str, llm_config: dict, hf_model_name: str, hf_model_file: str, chosen_start_tag: str, low_resource_mode: bool) -> dict:
|
| 160 |
"""
|
| 161 |
Creates a representation model based on the specified type and configuration.
|
|
|
|
| 1 |
import os
|
| 2 |
+
import spaces
|
| 3 |
from bertopic.representation import LlamaCPP
|
| 4 |
|
| 5 |
from pydantic import BaseModel
|
|
|
|
| 10 |
from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, BaseRepresentation
|
| 11 |
from funcs.embeddings import torch_device
|
| 12 |
from funcs.prompts import phi3_prompt, phi3_start
|
| 13 |
+
from funcs.helper_functions import get_or_create_env_var, GPU_SPACE_DURATION
|
| 14 |
|
| 15 |
chosen_prompt = phi3_prompt #open_hermes_prompt # stablelm_prompt
|
| 16 |
chosen_start_tag = phi3_start #open_hermes_start # stablelm_start
|
|
|
|
| 39 |
if USE_GPU == "1":
|
| 40 |
print("Using GPU for representation functions")
|
| 41 |
torch_device = "gpu"
|
| 42 |
+
#print("Cuda version installed is: ", version.cuda)
|
| 43 |
high_quality_mode = "Yes"
|
| 44 |
os.system("nvidia-smi")
|
| 45 |
else:
|
|
|
|
| 157 |
|
| 158 |
return found_file
|
| 159 |
|
| 160 |
+
@spaces.GPU(duration=GPU_SPACE_DURATION)
|
| 161 |
def create_representation_model(representation_type: str, llm_config: dict, hf_model_name: str, hf_model_file: str, chosen_start_tag: str, low_resource_mode: bool) -> dict:
|
| 162 |
"""
|
| 163 |
Creates a representation model based on the specified type and configuration.
|