Spaces:

seanpedrickcase
/

topic_modelling

Running

seanpedrickcase commited on Nov 13

Commit

02721f3

1 Parent(s): cb7a4c9

Fix on returning GPU tensors to main function after embedding with zeroGPU. Representation model put under ZeroGPU spaces

Files changed (2) hide show

funcs/embeddings.py CHANGED Viewed

@@ -3,16 +3,12 @@ import time
 import numpy as np
 import os
 from sentence_transformers import SentenceTransformer
 from sklearn.pipeline import make_pipeline
 from sklearn.decomposition import TruncatedSVD
 from sklearn.feature_extraction.text import TfidfVectorizer
 from funcs.helper_functions import GPU_SPACE_DURATION
 # If you want to disable cuda for testing purposes
 #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
@@ -121,9 +117,22 @@ def make_or_load_embeddings(docs: list, file_list: list, embeddings_out: np.ndar
                 embeddings_out = np.round(embeddings_out, 3)
                 embeddings_out *= 100
         return embeddings_out, embedding_model
     else:
         print("Found pre-loaded embeddings.")
         return embeddings_out, embedding_model

 import numpy as np
 import os
 from sentence_transformers import SentenceTransformer
 from sklearn.pipeline import make_pipeline
 from sklearn.decomposition import TruncatedSVD
 from sklearn.feature_extraction.text import TfidfVectorizer
 from funcs.helper_functions import GPU_SPACE_DURATION
 # If you want to disable cuda for testing purposes
 #os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
                 embeddings_out = np.round(embeddings_out, 3)
                 embeddings_out *= 100
+        # Move model to CPU before returning to avoid CUDA initialization in main process
+        if high_quality_mode_opt == "Yes" and hasattr(embedding_model, 'to'):
+            try:
+                embedding_model = embedding_model.to('cpu')
+            except:
+                pass  # If moving to CPU fails, continue anyway
         return embeddings_out, embedding_model
     else:
         print("Found pre-loaded embeddings.")
+        # Ensure embeddings are on CPU even when loaded from file
+        if hasattr(embeddings_out, 'cpu'):
+            embeddings_out = embeddings_out.cpu().numpy()
+        elif not isinstance(embeddings_out, np.ndarray):
+            embeddings_out = np.array(embeddings_out)
         return embeddings_out, embedding_model

funcs/representation_model.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 from bertopic.representation import LlamaCPP
 from pydantic import BaseModel
@@ -9,7 +10,7 @@ from gradio import Warning
 from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, BaseRepresentation
 from funcs.embeddings import torch_device
 from funcs.prompts import phi3_prompt, phi3_start
-from funcs.helper_functions import get_or_create_env_var
 chosen_prompt = phi3_prompt #open_hermes_prompt # stablelm_prompt
 chosen_start_tag =  phi3_start #open_hermes_start # stablelm_start
@@ -38,7 +39,7 @@ print(f'The value of USE_GPU is {USE_GPU}')
 if USE_GPU == "1":
     print("Using GPU for representation functions")
     torch_device = "gpu"
-    print("Cuda version installed is: ", version.cuda)
     high_quality_mode = "Yes"
     os.system("nvidia-smi")
 else:
@@ -156,6 +157,7 @@ def find_model_file(hf_model_name: str, hf_model_file: str, search_folder: str,
     return found_file
 def create_representation_model(representation_type: str, llm_config: dict, hf_model_name: str, hf_model_file: str, chosen_start_tag: str, low_resource_mode: bool) -> dict:
     """
     Creates a representation model based on the specified type and configuration.

 import os
+import spaces
 from bertopic.representation import LlamaCPP
 from pydantic import BaseModel
 from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, BaseRepresentation
 from funcs.embeddings import torch_device
 from funcs.prompts import phi3_prompt, phi3_start
+from funcs.helper_functions import get_or_create_env_var, GPU_SPACE_DURATION
 chosen_prompt = phi3_prompt #open_hermes_prompt # stablelm_prompt
 chosen_start_tag =  phi3_start #open_hermes_start # stablelm_start
 if USE_GPU == "1":
     print("Using GPU for representation functions")
     torch_device = "gpu"
+    #print("Cuda version installed is: ", version.cuda)
     high_quality_mode = "Yes"
     os.system("nvidia-smi")
 else:
     return found_file
+@spaces.GPU(duration=GPU_SPACE_DURATION)
 def create_representation_model(representation_type: str, llm_config: dict, hf_model_name: str, hf_model_file: str, chosen_start_tag: str, low_resource_mode: bool) -> dict:
     """
     Creates a representation model based on the specified type and configuration.