Spaces:

sharonnnnn245
/

deceptionxai

Sleeping

deceptionxai / affective_embedding.py

Andrea Sharon Silva

deployment: add Dockerfile, start.sh, download_models, app.py, fix port to 7860, update requirements for cloud deployment

90a1d9d 3 months ago

raw

history blame contribute delete

8.4 kB

	"""
	affective_embedding.py
	----------------------

	Fuses text, face, and scene VAD projections into a unified affective embedding
	for multimodal fake news / deception detection.

	Rules:
	- Every post has text and scene VAD
	- Some posts may have no face VAD (no face detected in image)
	- post_id mapping ensures alignment
	"""

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import pandas as pd
	import numpy as np

	# -------------------------------------------------------
	# 🔹 1. Emotion Fusion Layer — combines variable-dim VAD sources
	# -------------------------------------------------------
	class EmotionFusionLayer(nn.Module):
	"""
	Fuses VAD embeddings from text, face, and scene into a unified affective embedding.
	"""
	def __init__(self, input_dim, hidden_dim=256, output_dim=128):
	super().__init__()
	self.fc1 = nn.Linear(input_dim, hidden_dim)
	self.norm1 = nn.LayerNorm(hidden_dim)
	self.fc2 = nn.Linear(hidden_dim, output_dim)
	self.norm2 = nn.LayerNorm(output_dim)
	self.activation = nn.Tanh()

	def forward(self, vad_text, vad_face, vad_scene):
	fused = torch.cat([vad_text, vad_face, vad_scene], dim=-1)
	fused = self.activation(self.norm1(self.fc1(fused)))
	affective_embedding = self.activation(self.norm2(self.fc2(fused)))
	return F.normalize(affective_embedding, dim=-1)

	# -------------------------------------------------------
	# 🔹 2. Utility: Align embeddings by post_id
	# -------------------------------------------------------
	def align_embeddings_by_post(df_embeddings, post_id_col, embedding_col, post_order, zero_fill_dim=None):
	"""
	Align embeddings to posts using post_id mapping.
	If a post_id is missing, fill with zeros of the given dimension.

	Args:
	df_embeddings: DataFrame with columns [post_id_col, embedding_col]
	post_id_col: name of column for post_id
	embedding_col: name of column containing VAD embeddings
	post_order: list of post_ids in desired order
	zero_fill_dim: int, if embedding missing, fill with zeros of this size

	Returns:
	torch.Tensor of embeddings aligned to post_order
	"""
	emb_dict = {pid: np.array(emb) for pid, emb in zip(df_embeddings[post_id_col], df_embeddings[embedding_col])}

	aligned_embeddings = []
	for pid in post_order:
	if pid in emb_dict:
	aligned_embeddings.append(emb_dict[pid])
	else:
	if zero_fill_dim is None:
	raise ValueError(f"Post {pid} missing embedding and zero_fill_dim is not provided")
	aligned_embeddings.append(np.zeros(zero_fill_dim, dtype=np.float32))

	return torch.tensor(np.stack(aligned_embeddings), dtype=torch.float32)

	# -------------------------------------------------------
	# 🔹 3. Affective Embedding Generator
	# -------------------------------------------------------
	class AffectiveEmbeddingGenerator:
	"""
	Loads precomputed VAD projections (text, face, scene),
	aligns them per post, fuses them using EmotionFusionLayer,
	and outputs affective embeddings.
	"""
	def __init__(self, text_vad_path, face_vad_path, scene_vad_path,
	post_to_image_path, device="cpu"):
	self.device = device

	# Load post-to-image mapping
	df_post_map = pd.read_csv(post_to_image_path) # must contain ['post_id','image_id']
	self.post_order = df_post_map['post_id'].tolist()

	# ---------------- Text embeddings ----------------
	self.vad_text = torch.load(text_vad_path).float()
	if len(self.vad_text) > len(self.post_order):
	self.vad_text = self.vad_text[:len(self.post_order)]

	# ---------------- Face embeddings ----------------
	# ---------------- Face embeddings (optional) ----------------
	try:
	df_face = pd.read_pickle(face_vad_path)
	df_face['image_filename'] = df_face['pth'].apply(lambda x: x.split('/')[-1])
	df_face = df_face.merge(df_post_map, left_on='image_filename', right_on='image_id', how='left')
	face_dim = len(df_face['image_vad_embedding'].iloc[0])
	self.vad_face = align_embeddings_by_post(
	df_face,
	post_id_col='post_id',
	embedding_col='image_vad_embedding',
	post_order=self.post_order,
	zero_fill_dim=face_dim
	)
	print(f"✅ Face VAD loaded: {self.vad_face.shape}")
	except Exception as e:
	print(f"⚠️ Face VAD unavailable ({e}) — using zeros")
	face_dim = 64
	self.vad_face = torch.zeros(len(self.post_order), face_dim)

	# ---------------- Scene embeddings ----------------
	df_scene = pd.read_csv(scene_vad_path) # contains ['image','vad_embedding']

	# Convert string to array if needed
	if df_scene['vad_embedding'].dtype == object:
	df_scene['vad_embedding'] = df_scene['vad_embedding'].apply(lambda x: np.fromstring(x, sep=","))

	# Infer scene dimension from first row of CSV (before merging)
	scene_dim = len(df_scene['vad_embedding'].iloc[0])

	# Strip .jpg from image column to match image_id format
	df_scene['image'] = df_scene['image'].str.replace('.jpg', '', regex=False)

	# Merge with post mapping: image -> image_id -> post_id
	df_scene = df_scene.merge(df_post_map, left_on='image', right_on='image_id', how='left')

	# Convert post_id to int for consistency
	df_scene['post_id'] = df_scene['post_id'].fillna('__missing__')

	# Keep only valid post_ids
	df_scene_valid = df_scene[df_scene['post_id'] != '__missing__']

	# Align embeddings, zero-fill if missing
	self.vad_scene = align_embeddings_by_post(
	df_scene_valid,
	post_id_col='post_id',
	embedding_col='vad_embedding',
	post_order=self.post_order,
	zero_fill_dim=scene_dim
	)
	# Ensure same device
	n = len(self.vad_text)
	self.vad_face = self.vad_face[:n]
	self.vad_scene = self.vad_scene[:n]

	print(f"Aligned shapes — Text: {self.vad_text.shape}, Face: {self.vad_face.shape}, Scene: {self.vad_scene.shape}")

	# Ensure same device
	self.vad_text = self.vad_text.to(device)
	self.vad_face = self.vad_face.to(device)
	self.vad_scene = self.vad_scene.to(device)

	# Initialize fusion model
	input_dim = self.vad_text.shape[1] + self.vad_face.shape[1] + self.vad_scene.shape[1]
	self.model = EmotionFusionLayer(input_dim=input_dim).to(device)

	def generate(self, save_path=None):
	"""Generate affective embeddings and optionally save to disk"""
	with torch.no_grad():
	affective_embedding = self.model(self.vad_text, self.vad_face, self.vad_scene)

	if save_path:
	np.save(save_path, affective_embedding.cpu().numpy())
	print(f"✅ Affective embeddings saved to {save_path}")

	return affective_embedding




	# -------------------------------------------------------
	# 🔹 4. Example Usage
	# -------------------------------------------------------
	if __name__ == "__main__":
	generator = AffectiveEmbeddingGenerator(
	text_vad_path="Dataset/twitter/text_vad_embedding.pt",
	face_vad_path="Dataset/affectnet/df_with_image_vad_embedding.pkl",
	scene_vad_path="Dataset/twitter/scene_emotions_vad_proj.csv",
	post_to_image_path="Dataset/twitter/df_train_translated.csv",
	device="cpu"
	)

	affective_embedding = generator.generate(
	save_path="Dataset/affectnet/affective_embedding.npy"
	)
	print("Affective embedding shape:", affective_embedding.shape)
	print("Sample:", affective_embedding[:5])


	# import numpy as np

	# # Load the saved embeddings
	# affective_embeddings = np.load("Dataset/affectnet/affective_embedding.npy")

	# print("Shape of embeddings:", affective_embeddings.shape)


	# # Sum absolute values across each row (each post)
	# zero_mask = np.sum(np.abs(affective_embeddings), axis=1) == 0

	# # Count how many embeddings are zero
	# num_zero_embeddings = np.sum(zero_mask)
	# print(f"Number of posts with all-zero affective embeddings: {num_zero_embeddings} / {affective_embeddings.shape[0]}")