"""Enhance audio files using AI-Coustics API.""" # /// script # requires-python = ">=3.12" # dependencies = [ # "librosa", # "requests", # "soundfile", # "tqdm", # ] # /// import json import os import time from pathlib import Path from typing import List, Optional import librosa import requests import soundfile as sf import tqdm class AiCousticsEnhancer: """Client for AI-Coustics audio enhancement API""" def __init__(self, api_key: str): """ Initialize the AI-Coustics API client Args: api_key: Your AI-Coustics API key """ self.api_key = api_key self.base_url = "https://api.ai-coustics.io/v2" self.headers = {"X-API-Key": self.api_key} def upload_audio( self, file_path: str, enhancement_level: int = 100, enhancement_model: str = "LARK_V2", loudness_target: int = -19, true_peak: int = -1, transcode: str = "WAV", ) -> dict: """ Upload an audio file for enhancement Args: file_path: Path to the audio file enhancement_level: Enhancement strength (0-100) enhancement_model: Model to use (LARK_V2 or FINCH_V2) loudness_target: Target loudness in LUFS true_peak: True peak level in dBFS transcode: Output format (WAV, MP3, etc.) Returns: Response dictionary with uid and metadata """ url = f"{self.base_url}/medias" # Prepare the enhancement parameters media_enhancement = { "loudness_target": loudness_target, "true_peak": true_peak, "enhancement_level": enhancement_level, "enhancement_model": enhancement_model, "transcode": transcode, } # Upload file with open(file_path, "rb") as f: files = {"file": f} data = {"media_enhancement": json.dumps(media_enhancement)} response = requests.post(url, headers=self.headers, files=files, data=data) response.raise_for_status() return response.json() def check_status(self, uid: str) -> dict: """ Check the processing status of an uploaded media file Args: uid: Unique identifier returned from upload Returns: Metadata dictionary with current status """ url = f"{self.base_url}/medias/{uid}/metadata" response = requests.get(url, headers=self.headers) response.raise_for_status() return response.json() def wait_for_completion( self, uid: str, poll_interval: int = 2, timeout: int = 300 ) -> dict: """ Poll the API until processing is complete Args: uid: Unique identifier returned from upload poll_interval: Seconds between status checks timeout: Maximum seconds to wait Returns: Final metadata dictionary """ start_time = time.time() while time.time() - start_time < timeout: metadata = self.check_status(uid) status = metadata.get("enhancement_status") print(f"Status: {status}") if status == "COMPLETED": return metadata elif status == "FAILED": raise Exception(f"Enhancement failed: {metadata}") time.sleep(poll_interval) raise TimeoutError(f"Processing did not complete within {timeout} seconds") def download_enhanced(self, uid: str, output_path: str): """ Download the enhanced audio file Args: uid: Unique identifier returned from upload output_path: Path where to save the enhanced file """ url = f"{self.base_url}/medias/{uid}/file" response = requests.get(url, headers=self.headers, stream=True) response.raise_for_status() # Save the file with open(output_path, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) print(f"Enhanced audio saved to: {output_path}") def get_enhanced_output_path(input_path: Path) -> Path: """Generate output path by adding '_enhanced' suffix to the name.""" return input_path.with_stem(input_path.stem + "_enhanced") def enhance_audio_files( input_paths: List[Path], api_key: str, enhancement_level: int = 90, enhancement_model: str = "LARK_V2", ) -> List[Path]: """ Process a list of audio files through AI-Coustics API. Args: input_paths: List of paths to audio files api_key: Your AI-Coustics API key enhancement_level: Enhancement strength (0-100) enhancement_model: Model to use (LARK_V2 or FINCH_V2) Returns: List of paths to enhanced audio files """ client = AiCousticsEnhancer(api_key) enhanced_files = [] valid_input_paths = [ p for p in input_paths if p.exists() and not get_enhanced_output_path(p).exists() ] for input_path in tqdm.tqdm(valid_input_paths): output_path = get_enhanced_output_path(input_path) if output_path.exists(): print(f"Found {output_path}, skipping") continue output_path.parent.mkdir(parents=True, exist_ok=True) try: print("Uploading to AI-Coustics...") result = client.upload_audio( str(input_path), enhancement_level=enhancement_level, enhancement_model=enhancement_model, ) uid = result["uid"] print(f"Uploaded successfully. UID: {uid}") print("Waiting for enhancement to complete...") client.wait_for_completion(uid) print("Enhancement completed!") client.download_enhanced(uid, str(output_path)) enhanced_files.append(output_path) except Exception as e: print(f"Error processing file {input_path}: {e}") continue print(f"\n{'=' * 50}") print( f"Processing complete! Enhanced {len(enhanced_files)}/{len(input_paths)} files" ) print(f"{'=' * 50}") return enhanced_files def collect_audio_files(paths: List[str]) -> List[Path]: """Collect audio files from a mix of files and directories.""" audio_files: List[Path] = [] for p in paths: path = Path(p).resolve() if path.is_file(): audio_files.append(path) elif path.is_dir(): audio_files.extend(Path(f) for f in librosa.util.find_files(path)) else: print(f"Warning: {p} is not a valid file or directory, skipping") return [f for f in audio_files if "_enhanced" not in f.stem] if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Enhance audio files using AI-Coustics API" ) parser.add_argument( "inputs", nargs="+", help="Input audio files or directories to process", ) parser.add_argument( "--api-key", default=os.environ.get("AICOUSTICS_API_KEY"), help="AI-Coustics API key (default: AICOUSTICS_API_KEY env var)", ) parser.add_argument( "--enhancement-level", type=int, default=100, help="Enhancement strength 0-100 (default: 100)", ) parser.add_argument( "--model", default="LARK_V2", choices=["LARK_V2", "FINCH_V2"], help="Enhancement model (default: LARK_V2)", ) args = parser.parse_args() if not args.api_key: parser.error("API key required via --api-key or AICOUSTICS_API_KEY env var") input_files = collect_audio_files(args.inputs) if not input_files: print("No valid audio files found to process.") exit(1) print(f"Found {len(input_files)} audio files to process") enhanced_files = enhance_audio_files( input_paths=input_files, api_key=args.api_key, enhancement_level=args.enhancement_level, enhancement_model=args.model, ) print("\nEnhanced files:") for file in enhanced_files: print(f" - {file}")