deceptionxai / affective_embedding.py
Andrea Sharon Silva
deployment: add Dockerfile, start.sh, download_models, app.py, fix port to 7860, update requirements for cloud deployment
90a1d9d
"""
affective_embedding.py
----------------------
Fuses text, face, and scene VAD projections into a unified affective embedding
for multimodal fake news / deception detection.
Rules:
- Every post has text and scene VAD
- Some posts may have no face VAD (no face detected in image)
- post_id mapping ensures alignment
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
# -------------------------------------------------------
# ๐Ÿ”น 1. Emotion Fusion Layer โ€” combines variable-dim VAD sources
# -------------------------------------------------------
class EmotionFusionLayer(nn.Module):
"""
Fuses VAD embeddings from text, face, and scene into a unified affective embedding.
"""
def __init__(self, input_dim, hidden_dim=256, output_dim=128):
super().__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.norm1 = nn.LayerNorm(hidden_dim)
self.fc2 = nn.Linear(hidden_dim, output_dim)
self.norm2 = nn.LayerNorm(output_dim)
self.activation = nn.Tanh()
def forward(self, vad_text, vad_face, vad_scene):
fused = torch.cat([vad_text, vad_face, vad_scene], dim=-1)
fused = self.activation(self.norm1(self.fc1(fused)))
affective_embedding = self.activation(self.norm2(self.fc2(fused)))
return F.normalize(affective_embedding, dim=-1)
# -------------------------------------------------------
# ๐Ÿ”น 2. Utility: Align embeddings by post_id
# -------------------------------------------------------
def align_embeddings_by_post(df_embeddings, post_id_col, embedding_col, post_order, zero_fill_dim=None):
"""
Align embeddings to posts using post_id mapping.
If a post_id is missing, fill with zeros of the given dimension.
Args:
df_embeddings: DataFrame with columns [post_id_col, embedding_col]
post_id_col: name of column for post_id
embedding_col: name of column containing VAD embeddings
post_order: list of post_ids in desired order
zero_fill_dim: int, if embedding missing, fill with zeros of this size
Returns:
torch.Tensor of embeddings aligned to post_order
"""
emb_dict = {pid: np.array(emb) for pid, emb in zip(df_embeddings[post_id_col], df_embeddings[embedding_col])}
aligned_embeddings = []
for pid in post_order:
if pid in emb_dict:
aligned_embeddings.append(emb_dict[pid])
else:
if zero_fill_dim is None:
raise ValueError(f"Post {pid} missing embedding and zero_fill_dim is not provided")
aligned_embeddings.append(np.zeros(zero_fill_dim, dtype=np.float32))
return torch.tensor(np.stack(aligned_embeddings), dtype=torch.float32)
# -------------------------------------------------------
# ๐Ÿ”น 3. Affective Embedding Generator
# -------------------------------------------------------
class AffectiveEmbeddingGenerator:
"""
Loads precomputed VAD projections (text, face, scene),
aligns them per post, fuses them using EmotionFusionLayer,
and outputs affective embeddings.
"""
def __init__(self, text_vad_path, face_vad_path, scene_vad_path,
post_to_image_path, device="cpu"):
self.device = device
# Load post-to-image mapping
df_post_map = pd.read_csv(post_to_image_path) # must contain ['post_id','image_id']
self.post_order = df_post_map['post_id'].tolist()
# ---------------- Text embeddings ----------------
self.vad_text = torch.load(text_vad_path).float()
if len(self.vad_text) > len(self.post_order):
self.vad_text = self.vad_text[:len(self.post_order)]
# ---------------- Face embeddings ----------------
# ---------------- Face embeddings (optional) ----------------
try:
df_face = pd.read_pickle(face_vad_path)
df_face['image_filename'] = df_face['pth'].apply(lambda x: x.split('/')[-1])
df_face = df_face.merge(df_post_map, left_on='image_filename', right_on='image_id', how='left')
face_dim = len(df_face['image_vad_embedding'].iloc[0])
self.vad_face = align_embeddings_by_post(
df_face,
post_id_col='post_id',
embedding_col='image_vad_embedding',
post_order=self.post_order,
zero_fill_dim=face_dim
)
print(f"โœ… Face VAD loaded: {self.vad_face.shape}")
except Exception as e:
print(f"โš ๏ธ Face VAD unavailable ({e}) โ€” using zeros")
face_dim = 64
self.vad_face = torch.zeros(len(self.post_order), face_dim)
# ---------------- Scene embeddings ----------------
df_scene = pd.read_csv(scene_vad_path) # contains ['image','vad_embedding']
# Convert string to array if needed
if df_scene['vad_embedding'].dtype == object:
df_scene['vad_embedding'] = df_scene['vad_embedding'].apply(lambda x: np.fromstring(x, sep=","))
# Infer scene dimension from first row of CSV (before merging)
scene_dim = len(df_scene['vad_embedding'].iloc[0])
# Strip .jpg from image column to match image_id format
df_scene['image'] = df_scene['image'].str.replace('.jpg', '', regex=False)
# Merge with post mapping: image -> image_id -> post_id
df_scene = df_scene.merge(df_post_map, left_on='image', right_on='image_id', how='left')
# Convert post_id to int for consistency
df_scene['post_id'] = df_scene['post_id'].fillna('__missing__')
# Keep only valid post_ids
df_scene_valid = df_scene[df_scene['post_id'] != '__missing__']
# Align embeddings, zero-fill if missing
self.vad_scene = align_embeddings_by_post(
df_scene_valid,
post_id_col='post_id',
embedding_col='vad_embedding',
post_order=self.post_order,
zero_fill_dim=scene_dim
)
# Ensure same device
n = len(self.vad_text)
self.vad_face = self.vad_face[:n]
self.vad_scene = self.vad_scene[:n]
print(f"Aligned shapes โ€” Text: {self.vad_text.shape}, Face: {self.vad_face.shape}, Scene: {self.vad_scene.shape}")
# Ensure same device
self.vad_text = self.vad_text.to(device)
self.vad_face = self.vad_face.to(device)
self.vad_scene = self.vad_scene.to(device)
# Initialize fusion model
input_dim = self.vad_text.shape[1] + self.vad_face.shape[1] + self.vad_scene.shape[1]
self.model = EmotionFusionLayer(input_dim=input_dim).to(device)
def generate(self, save_path=None):
"""Generate affective embeddings and optionally save to disk"""
with torch.no_grad():
affective_embedding = self.model(self.vad_text, self.vad_face, self.vad_scene)
if save_path:
np.save(save_path, affective_embedding.cpu().numpy())
print(f"โœ… Affective embeddings saved to {save_path}")
return affective_embedding
# -------------------------------------------------------
# ๐Ÿ”น 4. Example Usage
# -------------------------------------------------------
if __name__ == "__main__":
generator = AffectiveEmbeddingGenerator(
text_vad_path="Dataset/twitter/text_vad_embedding.pt",
face_vad_path="Dataset/affectnet/df_with_image_vad_embedding.pkl",
scene_vad_path="Dataset/twitter/scene_emotions_vad_proj.csv",
post_to_image_path="Dataset/twitter/df_train_translated.csv",
device="cpu"
)
affective_embedding = generator.generate(
save_path="Dataset/affectnet/affective_embedding.npy"
)
print("Affective embedding shape:", affective_embedding.shape)
print("Sample:", affective_embedding[:5])
# import numpy as np
# # Load the saved embeddings
# affective_embeddings = np.load("Dataset/affectnet/affective_embedding.npy")
# print("Shape of embeddings:", affective_embeddings.shape)
# # Sum absolute values across each row (each post)
# zero_mask = np.sum(np.abs(affective_embeddings), axis=1) == 0
# # Count how many embeddings are zero
# num_zero_embeddings = np.sum(zero_mask)
# print(f"Number of posts with all-zero affective embeddings: {num_zero_embeddings} / {affective_embeddings.shape[0]}")