""" affective_embedding.py ---------------------- Fuses text, face, and scene VAD projections into a unified affective embedding for multimodal fake news / deception detection. Rules: - Every post has text and scene VAD - Some posts may have no face VAD (no face detected in image) - post_id mapping ensures alignment """ import torch import torch.nn as nn import torch.nn.functional as F import pandas as pd import numpy as np # ------------------------------------------------------- # 🔹 1. Emotion Fusion Layer — combines variable-dim VAD sources # ------------------------------------------------------- class EmotionFusionLayer(nn.Module): """ Fuses VAD embeddings from text, face, and scene into a unified affective embedding. """ def __init__(self, input_dim, hidden_dim=256, output_dim=128): super().__init__() self.fc1 = nn.Linear(input_dim, hidden_dim) self.norm1 = nn.LayerNorm(hidden_dim) self.fc2 = nn.Linear(hidden_dim, output_dim) self.norm2 = nn.LayerNorm(output_dim) self.activation = nn.Tanh() def forward(self, vad_text, vad_face, vad_scene): fused = torch.cat([vad_text, vad_face, vad_scene], dim=-1) fused = self.activation(self.norm1(self.fc1(fused))) affective_embedding = self.activation(self.norm2(self.fc2(fused))) return F.normalize(affective_embedding, dim=-1) # ------------------------------------------------------- # 🔹 2. Utility: Align embeddings by post_id # ------------------------------------------------------- def align_embeddings_by_post(df_embeddings, post_id_col, embedding_col, post_order, zero_fill_dim=None): """ Align embeddings to posts using post_id mapping. If a post_id is missing, fill with zeros of the given dimension. Args: df_embeddings: DataFrame with columns [post_id_col, embedding_col] post_id_col: name of column for post_id embedding_col: name of column containing VAD embeddings post_order: list of post_ids in desired order zero_fill_dim: int, if embedding missing, fill with zeros of this size Returns: torch.Tensor of embeddings aligned to post_order """ emb_dict = {pid: np.array(emb) for pid, emb in zip(df_embeddings[post_id_col], df_embeddings[embedding_col])} aligned_embeddings = [] for pid in post_order: if pid in emb_dict: aligned_embeddings.append(emb_dict[pid]) else: if zero_fill_dim is None: raise ValueError(f"Post {pid} missing embedding and zero_fill_dim is not provided") aligned_embeddings.append(np.zeros(zero_fill_dim, dtype=np.float32)) return torch.tensor(np.stack(aligned_embeddings), dtype=torch.float32) # ------------------------------------------------------- # 🔹 3. Affective Embedding Generator # ------------------------------------------------------- class AffectiveEmbeddingGenerator: """ Loads precomputed VAD projections (text, face, scene), aligns them per post, fuses them using EmotionFusionLayer, and outputs affective embeddings. """ def __init__(self, text_vad_path, face_vad_path, scene_vad_path, post_to_image_path, device="cpu"): self.device = device # Load post-to-image mapping df_post_map = pd.read_csv(post_to_image_path) # must contain ['post_id','image_id'] self.post_order = df_post_map['post_id'].tolist() # ---------------- Text embeddings ---------------- self.vad_text = torch.load(text_vad_path).float() if len(self.vad_text) > len(self.post_order): self.vad_text = self.vad_text[:len(self.post_order)] # ---------------- Face embeddings ---------------- # ---------------- Face embeddings (optional) ---------------- try: df_face = pd.read_pickle(face_vad_path) df_face['image_filename'] = df_face['pth'].apply(lambda x: x.split('/')[-1]) df_face = df_face.merge(df_post_map, left_on='image_filename', right_on='image_id', how='left') face_dim = len(df_face['image_vad_embedding'].iloc[0]) self.vad_face = align_embeddings_by_post( df_face, post_id_col='post_id', embedding_col='image_vad_embedding', post_order=self.post_order, zero_fill_dim=face_dim ) print(f"✅ Face VAD loaded: {self.vad_face.shape}") except Exception as e: print(f"⚠️ Face VAD unavailable ({e}) — using zeros") face_dim = 64 self.vad_face = torch.zeros(len(self.post_order), face_dim) # ---------------- Scene embeddings ---------------- df_scene = pd.read_csv(scene_vad_path) # contains ['image','vad_embedding'] # Convert string to array if needed if df_scene['vad_embedding'].dtype == object: df_scene['vad_embedding'] = df_scene['vad_embedding'].apply(lambda x: np.fromstring(x, sep=",")) # Infer scene dimension from first row of CSV (before merging) scene_dim = len(df_scene['vad_embedding'].iloc[0]) # Strip .jpg from image column to match image_id format df_scene['image'] = df_scene['image'].str.replace('.jpg', '', regex=False) # Merge with post mapping: image -> image_id -> post_id df_scene = df_scene.merge(df_post_map, left_on='image', right_on='image_id', how='left') # Convert post_id to int for consistency df_scene['post_id'] = df_scene['post_id'].fillna('__missing__') # Keep only valid post_ids df_scene_valid = df_scene[df_scene['post_id'] != '__missing__'] # Align embeddings, zero-fill if missing self.vad_scene = align_embeddings_by_post( df_scene_valid, post_id_col='post_id', embedding_col='vad_embedding', post_order=self.post_order, zero_fill_dim=scene_dim ) # Ensure same device n = len(self.vad_text) self.vad_face = self.vad_face[:n] self.vad_scene = self.vad_scene[:n] print(f"Aligned shapes — Text: {self.vad_text.shape}, Face: {self.vad_face.shape}, Scene: {self.vad_scene.shape}") # Ensure same device self.vad_text = self.vad_text.to(device) self.vad_face = self.vad_face.to(device) self.vad_scene = self.vad_scene.to(device) # Initialize fusion model input_dim = self.vad_text.shape[1] + self.vad_face.shape[1] + self.vad_scene.shape[1] self.model = EmotionFusionLayer(input_dim=input_dim).to(device) def generate(self, save_path=None): """Generate affective embeddings and optionally save to disk""" with torch.no_grad(): affective_embedding = self.model(self.vad_text, self.vad_face, self.vad_scene) if save_path: np.save(save_path, affective_embedding.cpu().numpy()) print(f"✅ Affective embeddings saved to {save_path}") return affective_embedding # ------------------------------------------------------- # 🔹 4. Example Usage # ------------------------------------------------------- if __name__ == "__main__": generator = AffectiveEmbeddingGenerator( text_vad_path="Dataset/twitter/text_vad_embedding.pt", face_vad_path="Dataset/affectnet/df_with_image_vad_embedding.pkl", scene_vad_path="Dataset/twitter/scene_emotions_vad_proj.csv", post_to_image_path="Dataset/twitter/df_train_translated.csv", device="cpu" ) affective_embedding = generator.generate( save_path="Dataset/affectnet/affective_embedding.npy" ) print("Affective embedding shape:", affective_embedding.shape) print("Sample:", affective_embedding[:5]) # import numpy as np # # Load the saved embeddings # affective_embeddings = np.load("Dataset/affectnet/affective_embedding.npy") # print("Shape of embeddings:", affective_embeddings.shape) # # Sum absolute values across each row (each post) # zero_mask = np.sum(np.abs(affective_embeddings), axis=1) == 0 # # Count how many embeddings are zero # num_zero_embeddings = np.sum(zero_mask) # print(f"Number of posts with all-zero affective embeddings: {num_zero_embeddings} / {affective_embeddings.shape[0]}")