Spaces:

Hovhannes
/

E5QWEN_movie_rec

Running

App Files Files Community

manuschyan.h commited on Aug 1

Commit

b73366b

1 Parent(s): 981dd27

adding app

Browse files

Files changed (2) hide show

app.py +28 -60
movie_recommender.py +192 -0

app.py CHANGED Viewed

@@ -1,64 +1,32 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from movie_recommender import MovieRecommender
+# Initialize the recommender
+recommender = MovieRecommender()
+def get_recommendations(vibe_query):
+    """
+    This function takes a user's query, gets recommendations from the
+    MovieRecommender, and returns them as a formatted string.
+    """
+    if not vibe_query:
+        return "Please enter a vibe or movie description."
+    recommendations = recommender.recommend(vibe_query)
+    return recommendations
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=get_recommendations,
+    inputs=gr.Textbox(lines=5, label="Describe the vibe of the movie you want to watch", placeholder="e.g., A scifi horror movie about a group of people who are trapped in a building and have to fight off an alien."),
+    outputs="text",
+    title="🎬 Movie Recommender",
+    description="Describe the kind of movie you're in the mood for, and I'll give you some recommendations based on the vibe.",
+    examples=[
+        ["A heartwarming story about a talking animal who goes on an adventure."],
+        ["A dark and gritty detective noir set in 1940s Los Angeles."],
+        ["A mind-bending psychological thriller with an unreliable narrator."]
+    ]
 )
 if __name__ == "__main__":
+    iface.launch()

movie_recommender.py ADDED Viewed

	@@ -0,0 +1,192 @@

+from langchain_ollama.llms import OllamaLLM
+from langchain.agents import AgentType
+from langchain.agents import initialize_agent
+from langchain.agents import Tool
+from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer, AutoModel
+import os
+import glob
+from tqdm import tqdm
+import json
+import faiss
+import numpy as np
+def safe_get(data, *keys, default=None):
+    """Safely get nested dictionary values with fallback to default."""
+    for key in keys:
+        try:
+            data = data[key]
+        except (KeyError, TypeError):
+            return default
+    return data
+def get_movie_text(movie_json):
+    """Convert movie JSON to embedable text"""
+    title = safe_get(movie_json, 'titleText', 'text', default='Unknown')
+    year = safe_get(movie_json, 'releaseYear', 'year', default='Unknown')
+    plot = safe_get(movie_json, 'plot', 'plotText', 'plainText', default='No plot available')
+    genres = safe_get(movie_json, 'genres', 'genres', default=[])
+    genre_text = ', '.join([g.get('text', '') for g in genres]) if genres else 'Unknown genre'
+    keywords = safe_get(movie_json, 'keywords', 'edges', default=[])
+    keyword_text = ', '.join([kw['node']['text'] for kw in keywords[:10]]) if keywords else 'No keywords'
+    rating = safe_get(movie_json, 'ratingsSummary', 'aggregateRating', default='N/A')
+    movie_text = f"Title: {title}. Year: {year}. Genres: {genre_text}. Plot: {plot}. Keywords: {keyword_text}. Rating: {rating}"
+    return movie_text
+class MovieRecommender:
+    def __init__(self,
+                 model_name='intfloat/multilingual-e5-large-instruct',
+                 index_path="movie_index.faiss",
+                 texts_path="movie_texts.json",
+                 data_path="archive/movie_dataset/movie_dataset/*.json"):
+        self.model = SentenceTransformer(model_name)
+        self.index_path = index_path
+        self.texts_path = texts_path
+        self.data_path = data_path
+        self.llm = OllamaLLM(model="huihui_ai/qwen3-abliterated")
+        self.all_movies_text = []
+        self.faiss_index = None
+        self._load_or_build_index()
+    def _load_movies(self):
+        movie_files = glob.glob(self.data_path)[:3]
+        print(f"Processing {len(movie_files)} movie files...")
+        all_movies_data = []
+        for file in tqdm(movie_files, desc="Loading movie files"):
+            try:
+                with open(file, "r") as f:
+                    data = json.load(f)
+                    all_movies_data.extend(data)
+            except Exception as e:
+                print(f"Error loading {file}: {e}")
+        print(f"Loaded data for {len(all_movies_data)} movies.")
+        self.all_movies_text = [get_movie_text(movie) for movie in tqdm(all_movies_data, desc="Extracting text from movies")]
+        print(f"Processed {len(self.all_movies_text)} movies total")
+    def _build_index(self, batch_size=10):
+        print(f"Embedding movies in batches of {batch_size}...")
+        self.faiss_index = faiss.IndexFlatL2(1024)
+        num_batches = (len(self.all_movies_text) + batch_size - 1) // batch_size
+        for i in tqdm(range(num_batches), desc="Embedding batches"):
+            batch_texts = self.all_movies_text[i*batch_size:(i+1)*batch_size]
+            if not batch_texts:
+                continue
+            document_embeddings = self.model.encode(batch_texts)
+            self.faiss_index.add(document_embeddings)
+        print(f"FAISS index built with {self.faiss_index.ntotal} vectors.")
+        print(f"Saving FAISS index to {self.index_path}")
+        faiss.write_index(self.faiss_index, self.index_path)
+        with open(self.texts_path, "w") as f:
+            json.dump(self.all_movies_text, f)
+        print(f"Saved movie texts to {self.texts_path}")
+    def _load_or_build_index(self):
+        if os.path.exists(self.index_path) and os.path.exists(self.texts_path):
+            print(f"Loading existing FAISS index from {self.index_path}")
+            self.faiss_index = faiss.read_index(self.index_path)
+            print(f"Loading movie texts from {self.texts_path}")
+            with open(self.texts_path, "r") as f:
+                self.all_movies_text = json.load(f)
+            print(f"Loaded {len(self.all_movies_text)} movie texts.")
+        else:
+            print("Building new FAISS index...")
+            self._load_movies()
+            self._build_index()
+    def search(self, query, k=50):
+        if self.faiss_index is None:
+            raise Exception("FAISS index is not built or loaded.")
+        query_embedding = self.model.encode([query], prompt_name="query")
+        distances, indices = self.faiss_index.search(query_embedding, k)
+        top_indices = indices[0]
+        extracted_topk_movies = [self.all_movies_text[i] for i in top_indices]
+        return extracted_topk_movies
+    def recommend(self, vibe_query):
+        print(f"Searching for movies with vibe: {vibe_query}")
+        candidate_movies = self.search(vibe_query)
+        prompt = f"""
+You are a movie recommendation expert. I'm looking for movies with this vibe: "{vibe_query}"
+Here are 50 candidate movies that were found using semantic similarity:
+Create a vibe profile for each of these movies and then rank the movies based on the vibe profiles matching the requested vibe.
+{''.join(candidate_movies)}
+Please rank the TOP 10 movies that best match the requested vibe "{vibe_query}". For each movie, provide:
+1. **Movie Title and Year**
+2. **Rank Score** (1-10, where 10 is a perfect match)
+3. **Reason** (2-3 sentences explaining why this movie matches the vibe)
+4. **Description** (brief summary focusing on elements that match the vibe)
+5. **Hook** (1-2 hooky sentences that capture the essence of the movie)
+Format your response exactly like this:
+**TOP 10 MOVIE RECOMMENDATIONS:**
+[Movie Title] ([Year])**
+- **Rank Score:** [1-10]
+- **Reason:** [2-3 sentences explaining the match]
+- **Description:** [Brief summary highlighting vibe-matching elements]
+- **Hook:** [1-2 hooky sentences that capture the essence of the movie]
+**2. [Movie Title] ([Year])**
+- **Rank Score:** [1-10]
+- **Reason:** [2-3 sentences explaining the match]
+- **Description:** [Brief summary highlighting vibe-matching elements]
+- **Hook:** [1-2 hooky sentences that capture the essence of the movie]
+**3. [Movie Title] ([Year])**
+- **Rank Score:** [1-10]
+- **Reason:** [2-3 sentences explaining the match]
+- **Description:** [Brief summary highlighting vibe-matching elements]
+- **Hook:** [1-2 hooky sentencess that capture the essence of the movie]
+...
+**10. [Movie Title] ([Year])**
+- **Rank Score:** [1-10]
+- **Reason:** [2-3 sentences explaining the match]
+- **Description:** [Brief summary highlighting vibe-matching elements]
+- **Hook:** [1-2 hooky sentences that capture the essence of the movie]
+Focus on how well each movie captures the specific vibe requested, not just general quality.
+"""
+        response = self.llm.invoke(prompt)
+        return response
+if __name__ == "__main__":
+    recommender = MovieRecommender()
+    query = "A scifi horror movie about a group of people who are trapped in a building and have to fight off an alien."
+    recommendations = recommender.recommend(query)
+    print(recommendations)