# ================================================================ # Makefile — AI Story Server (Python 3.11) # ================================================================ # Common usage: # make help # make install # CPU-friendly install # make install-cuda # build llama-cpp-python with CUDA/cuBLAS offload # make precache # download models + compute voice latents once # make run # run the Gradio app (prefers GPU if available) # make clean # clean caches (keeps venv) # make deepclean # remove venv + caches # --------------------------------------------------------------- # ---- Configurable vars ---- PYTHON ?= python3.11 VENV ?= .venv PY := $(VENV)/bin/python PIP := $(VENV)/bin/pip APP ?= app.py PORT ?= 7860 # Core runtime deps (CPU-safe). Torch comes via transitive deps where needed; # you may pin torch externally if required by your environment. REQS = \ "numpy<2" \ "gradio==4.27.0" \ "python-dotenv" \ "huggingface_hub" \ "ffmpeg-python" \ "nltk" \ "emoji" \ "langid" \ "noisereduce" \ "TTS" \ "llama-cpp-python>=0.2.90" # Dev tools (optional) DEV_REQS = \ "ruff" \ "black" \ "pip-tools" # ================================================================ # Meta # ================================================================ .PHONY: help venv install install-no-llama install-cuda install-dev \ precache run run-gpu check-ffmpeg check-python lint format \ freeze deps-update clean deepclean help: @echo "Targets:" @echo " install - Create venv (Python 3.11) and install CPU-safe deps" @echo " install-cuda - Build llama-cpp-python with CUDA/cuBLAS offload + install deps" @echo " install-dev - Install dev tools (ruff, black, pip-tools)" @echo " precache - Download models & compute voice latents once (no UI)" @echo " run - Run Gradio app on PORT=$(PORT) (prefers native GPU if present)" @echo " run-gpu - Run app forcing CUDA_VISIBLE_DEVICES (default 0)" @echo " lint - Run ruff" @echo " format - Run black and ruff --fix" @echo " freeze - Write requirements.txt from current venv" @echo " deps-update - Upgrade runtime deps" @echo " check-ffmpeg - Verify ffmpeg is installed" @echo " check-python - Verify Python 3.11 is available" @echo " clean - Clear caches/artifacts (keeps venv)" @echo " deepclean - Remove venv and caches" # ================================================================ # Environment / setup # ================================================================ check-python: @command -v $(PYTHON) >/dev/null 2>&1 || \ { echo "ERROR: $(PYTHON) not found. Please install Python 3.11 and retry."; exit 1; } @echo "OK: $(PYTHON) found." venv: check-python $(PYTHON) -m venv $(VENV) @echo "Virtual environment created at $(VENV)" install-no-llama: venv $(PIP) install --upgrade pip setuptools wheel $(PIP) install "numpy<2" "gradio==4.27.0" python-dotenv huggingface_hub ffmpeg-python nltk emoji langid noisereduce TTS install: venv $(PIP) install --upgrade pip setuptools wheel # CPU-friendly install of all deps including llama-cpp-python $(PIP) install $(REQS) # CUDA/cuBLAS build for llama-cpp-python (requires CUDA toolkit & compiler) install-cuda: venv $(PIP) install --upgrade pip setuptools wheel @echo "Building llama-cpp-python with CUDA/cuBLAS…" @export CMAKE_ARGS="-DLLAMA_CUBLAS=on"; \ export LLAMA_CUBLAS=1; \ $(PIP) install --no-binary=:all: --force-reinstall "llama-cpp-python>=0.2.90" # Install the rest of the deps (excluding llama-cpp-python which we just built) $(MAKE) install-no-llama @echo "CUDA install complete." install-dev: venv $(PIP) install --upgrade pip $(PIP) install $(DEV_REQS) # ================================================================ # Utility checks # ================================================================ check-ffmpeg: @command -v ffmpeg >/dev/null 2>&1 || { echo "ERROR: ffmpeg not found. Install ffmpeg and retry."; exit 1; } @ffmpeg -version | head -n 1 # ================================================================ # Workflow targets # ================================================================ # Pre-download model assets and compute voice latents (runs your app's functions) precache: install check-ffmpeg $(PY) - <<- 'PY' from app import precache_assets, init_models_and_latents precache_assets() init_models_and_latents() print("Precache complete.") PY run: install @echo "Starting app on port $(PORT)…" PORT=$(PORT) $(PY) $(APP) # Run, preferring a specific GPU (default GPU 0). App itself auto-detects CUDA. run-gpu: install @echo "Starting app with CUDA_VISIBLE_DEVICES=$${CUDA_VISIBLE_DEVICES:-0} on port $(PORT)…" CUDA_VISIBLE_DEVICES=$${CUDA_VISIBLE_DEVICES:-0} PORT=$(PORT) $(PY) $(APP) # Lint / format lint: install-dev $(VENV)/bin/ruff check . format: install-dev $(VENV)/bin/black . $(VENV)/bin/ruff check --fix . # Freeze dependency snapshot freeze: @echo "Writing requirements.txt from current venv…" $(VENV)/bin/pip freeze > requirements.txt @echo "requirements.txt updated." # Upgrade runtime deps (keeps numpy<2 guard) deps-update: venv $(PIP) install --upgrade pip $(PIP) install --upgrade "numpy<2" "gradio==4.27.0" python-dotenv huggingface_hub ffmpeg-python nltk emoji langid noisereduce TTS "llama-cpp-python>=0.2.90" # ================================================================ # Cleanup # ================================================================ clean: @echo "Cleaning caches…" @rm -rf __pycache__ */__pycache__ @rm -rf .pytest_cache .ruff_cache @rm -rf voices/*.tmp @rm -rf ~/.cache/huggingface/hub/tmp @rm -rf ~/.cache/huggingface/transformers @rm -rf ~/.cache/torch @rm -rf ~/.cache/pip @rm -rf ~/.local/share/tts/tmp @echo "Done." deepclean: clean @echo "Removing venv and model caches…" @rm -rf $(VENV) @rm -rf ~/.local/share/tts @rm -rf voices @echo "Done."