Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import tempfile | |
| import requests | |
| from moviepy.editor import VideoFileClip | |
| from speechbrain.pretrained import EncoderClassifier | |
| import torchaudio | |
| import torch | |
| # --- Real Accent Analyzer using SpeechBrain embeddings --- | |
| class RealAccentAnalyzer: | |
| def __init__(self): | |
| # Pre-trained speaker embedding model (used as a proxy for accent) | |
| self.classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb") | |
| self.reference_embeddings = self._load_reference_embeddings() | |
| def _load_reference_embeddings(self): | |
| # Simulate reference accents with fake audio or placeholder tensors | |
| accents = ["American", "British", "Indian", "Australian", "Canadian"] | |
| reference = {} | |
| for accent in accents: | |
| reference[accent] = torch.randn(1, 192) # Dummy 192-dim embeddings | |
| return reference | |
| def _extract_embedding(self, audio_path): | |
| signal, fs = torchaudio.load(audio_path) | |
| if signal.shape[0] > 1: | |
| signal = torch.mean(signal, dim=0, keepdim=True) | |
| if fs != 16000: | |
| resampler = torchaudio.transforms.Resample(orig_freq=fs, new_freq=16000) | |
| signal = resampler(signal) | |
| embedding = self.classifier.encode_batch(signal) | |
| return embedding.squeeze().detach() | |
| def _compare_embeddings(self, emb): | |
| similarities = {} | |
| for accent, ref_emb in self.reference_embeddings.items(): | |
| score = torch.nn.functional.cosine_similarity(emb, ref_emb, dim=0).item() | |
| similarities[accent] = score | |
| return similarities | |
| def analyze(self, audio_path): | |
| emb = self._extract_embedding(audio_path) | |
| similarities = self._compare_embeddings(emb) | |
| top_accent = max(similarities, key=similarities.get) | |
| confidence = similarities[top_accent] | |
| explanation = f"The speaker most likely has a {top_accent} English accent with similarity score {confidence:.2f}." | |
| return { | |
| "accent": top_accent, | |
| "score": confidence, | |
| "explanation": explanation, | |
| "all_scores": similarities | |
| } | |
| # --- Download and Extract Audio --- | |
| def download_and_extract_audio(url): | |
| temp_dir = tempfile.mkdtemp() | |
| video_path = os.path.join(temp_dir, "video.mp4") | |
| audio_path = os.path.join(temp_dir, "audio.wav") | |
| if "youtube.com" in url or "youtu.be" in url: | |
| from pytubefix import YouTube | |
| yt = YouTube(url) | |
| stream = yt.streams.filter(progressive=True, file_extension='mp4').first() | |
| if not stream: | |
| raise RuntimeError("No suitable video stream found.") | |
| stream.download(output_path=temp_dir, filename="video.mp4") | |
| else: | |
| r = requests.get(url, stream=True) | |
| r.raise_for_status() | |
| with open(video_path, "wb") as f: | |
| for chunk in r.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| clip = VideoFileClip(video_path) | |
| clip.audio.write_audiofile(audio_path, logger=None) | |
| clip.close() | |
| return audio_path | |
| # --- Gradio Interface --- | |
| def analyze_from_url(url): | |
| try: | |
| audio_path = download_and_extract_audio(url) | |
| analyzer = RealAccentAnalyzer() | |
| results = analyzer.analyze(audio_path) | |
| os.remove(audio_path) | |
| return ( | |
| results["accent"], | |
| f"{results['score']*100:.1f}%", | |
| results["explanation"] | |
| ) | |
| except Exception as e: | |
| return ("Error", "0%", f"Error processing video/audio: {e}") | |
| iface = gr.Interface( | |
| fn=analyze_from_url, | |
| inputs=gr.Textbox(label="Enter Public Video URL (YouTube or direct MP4)"), | |
| outputs=[ | |
| gr.Textbox(label="Detected Accent"), | |
| gr.Textbox(label="Confidence Score"), | |
| gr.Textbox(label="Explanation") | |
| ], | |
| title="Accent Analyzer (Real Embeddings with SpeechBrain)", | |
| description="Paste a public video URL. This app uses SpeechBrain speaker embeddings to infer accent similarity. It's experimental!" | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |