text-guided-image-colorization

Running

App Files Files Community

LogicGoInfotechSpaces commited on Nov 14

Commit

779884f

1 Parent(s): 115b125

Add FastAPI version using SDXL + ControlNet for text-guided colorization (from fffiloni)

Browse files

Files changed (3) hide show

Dockerfile +2 -1
app/main_sdxl.py +499 -0
requirements.txt +6 -1

Dockerfile CHANGED Viewed

@@ -63,4 +63,5 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
 ENTRYPOINT ["/entrypoint.sh"]
 # Run the application (port will be set via environment variable)
-CMD ["sh", "-c", "uvicorn app.main_fastai:app --host 0.0.0.0 --port ${PORT:-7860}"]

 ENTRYPOINT ["/entrypoint.sh"]
 # Run the application (port will be set via environment variable)
+# Use SDXL version for text-guided colorization
+CMD ["sh", "-c", "uvicorn app.main_sdxl:app --host 0.0.0.0 --port ${PORT:-7860}"]

app/main_sdxl.py ADDED Viewed

	@@ -0,0 +1,499 @@

+"""
+FastAPI application for Text-Guided Image Colorization using SDXL + ControlNet
+Based on fffiloni/text-guided-image-colorization
+"""
+import os
+import io
+import uuid
+import logging
+from pathlib import Path
+from typing import Optional, Tuple
+from fastapi import FastAPI, UploadFile, File, HTTPException, Depends, Request
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+import firebase_admin
+from firebase_admin import credentials, app_check, auth as firebase_auth
+from PIL import Image
+import torch
+import uvicorn
+import gradio as gr
+# SDXL + ControlNet imports
+from accelerate import Accelerator
+from diffusers import (
+    AutoencoderKL,
+    StableDiffusionXLControlNetPipeline,
+    ControlNetModel,
+    UNet2DConditionModel,
+)
+from transformers import (
+    BlipProcessor, BlipForConditionalGeneration,
+)
+from safetensors.torch import load_file
+from huggingface_hub import hf_hub_download, snapshot_download
+from app.config import settings
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Create writable directories
+Path("/tmp/hf_cache").mkdir(parents=True, exist_ok=True)
+Path("/tmp/matplotlib_config").mkdir(parents=True, exist_ok=True)
+Path("/tmp/colorize_uploads").mkdir(parents=True, exist_ok=True)
+Path("/tmp/colorize_results").mkdir(parents=True, exist_ok=True)
+# Initialize FastAPI app
+app = FastAPI(
+    title="Text-Guided Image Colorization API",
+    description="Image colorization using SDXL + ControlNet with automatic captioning",
+    version="1.0.0"
+)
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize Firebase Admin SDK
+firebase_cred_path = os.getenv("FIREBASE_CREDENTIALS_PATH", "/tmp/firebase-adminsdk.json")
+if os.path.exists(firebase_cred_path):
+    try:
+        cred = credentials.Certificate(firebase_cred_path)
+        firebase_admin.initialize_app(cred)
+        logger.info("Firebase Admin SDK initialized")
+    except Exception as e:
+        logger.warning("Failed to initialize Firebase: %s", str(e))
+        try:
+            firebase_admin.initialize_app()
+        except:
+            pass
+else:
+    logger.warning("Firebase credentials file not found. App Check will be disabled.")
+    try:
+        firebase_admin.initialize_app()
+    except:
+        pass
+# Storage directories
+UPLOAD_DIR = Path("/tmp/colorize_uploads")
+RESULT_DIR = Path("/tmp/colorize_results")
+# Mount static files
+app.mount("/results", StaticFiles(directory=str(RESULT_DIR)), name="results")
+app.mount("/uploads", StaticFiles(directory=str(UPLOAD_DIR)), name="uploads")
+# Global model variables
+pipe = None
+caption_model = None
+processor = None
+device = None
+weight_dtype = None
+model_load_error: Optional[str] = None
+# ========== Utility Functions ==========
+def apply_color(image: Image.Image, color_map: Image.Image) -> Image.Image:
+    """Apply color from color_map to image using LAB color space."""
+    # Convert to LAB color space
+    image_lab = image.convert('LAB')
+    color_map_lab = color_map.convert('LAB')
+    # Extract and merge LAB channels
+    l, _, _ = image_lab.split()
+    _, a_map, b_map = color_map_lab.split()
+    merged_lab = Image.merge('LAB', (l, a_map, b_map))
+    return merged_lab.convert('RGB')
+def remove_unlikely_words(prompt: str) -> str:
+    """Removes predefined unlikely phrases from prompt text."""
+    unlikely_words = []
+    a1 = [f'{i}s' for i in range(1900, 2000)]
+    a2 = [f'{i}' for i in range(1900, 2000)]
+    a3 = [f'year {i}' for i in range(1900, 2000)]
+    a4 = [f'circa {i}' for i in range(1900, 2000)]
+    b1 = [f"{y[0]} {y[1]} {y[2]} {y[3]} s" for y in a1]
+    b2 = [f"{y[0]} {y[1]} {y[2]} {y[3]}" for y in a1]
+    b3 = [f"year {y[0]} {y[1]} {y[2]} {y[3]}" for y in a1]
+    b4 = [f"circa {y[0]} {y[1]} {y[2]} {y[3]}" for y in a1]
+    manual = [
+        "black and white,", "black and white", "black & white,", "black & white", "circa",
+        "balck and white,", "monochrome,", "black-and-white,", "black-and-white photography,",
+        "black - and - white photography,", "monochrome bw,", "black white,", "black an white,",
+        "grainy footage,", "grainy footage", "grainy photo,", "grainy photo", "b&w photo",
+        "back and white", "back and white,", "monochrome contrast", "monochrome", "grainy",
+        "grainy photograph,", "grainy photograph", "low contrast,", "low contrast", "b & w",
+        "grainy black-and-white photo,", "bw", "bw,", "grainy black-and-white photo",
+        "b & w,", "b&w,", "b&w!,", "b&w", "black - and - white,", "bw photo,", "grainy  photo,",
+        "black-and-white photo,", "black-and-white photo", "black - and - white photography",
+        "b&w photo,", "monochromatic photo,", "grainy monochrome photo,", "monochromatic",
+        "blurry photo,", "blurry,", "blurry photography,", "monochromatic photo",
+        "black - and - white photograph,", "black - and - white photograph", "black on white,",
+        "black on white", "black-and-white", "historical image,", "historical picture,",
+        "historical photo,", "historical photograph,", "archival photo,", "taken in the early",
+        "taken in the late", "taken in the", "historic photograph,", "restored,", "restored",
+        "historical photo", "historical setting,",
+        "historic photo,", "historic", "desaturated!!,", "desaturated!,", "desaturated,", "desaturated",
+        "taken in", "shot on leica", "shot on leica sl2", "sl2",
+        "taken with a leica camera", "leica sl2", "leica", "setting",
+        "overcast day", "overcast weather", "slight overcast", "overcast",
+        "picture taken in", "photo taken in",
+        ", photo", ",  photo", ",   photo", ",    photo", ", photograph",
+        ",,", ",,,", ",,,,", " ,", "  ,", "   ,", "    ,",
+    ]
+    unlikely_words.extend(a1 + a2 + a3 + a4 + b1 + b2 + b3 + b4 + manual)
+    for word in unlikely_words:
+        prompt = prompt.replace(word, "")
+    return prompt
+# ========== Model Loading ==========
+@app.on_event("startup")
+async def startup_event():
+    """Load SDXL + ControlNet models on startup"""
+    global pipe, caption_model, processor, device, weight_dtype, model_load_error
+    try:
+        logger.info("🔄 Loading SDXL + ControlNet colorization models...")
+        # Ensure required directories exist
+        os.makedirs("sdxl_light_caption_output", exist_ok=True)
+        # Download controlnet model snapshot
+        try:
+            snapshot_download(
+                repo_id='nickpai/sdxl_light_caption_output',
+                local_dir='sdxl_light_caption_output'
+            )
+        except Exception as e:
+            logger.warning(f"Could not download controlnet snapshot: {e}")
+        # Device and precision setup
+        accelerator = Accelerator(mixed_precision="fp16")
+        weight_dtype = torch.float16 if accelerator.mixed_precision == "fp16" else torch.float32
+        device = accelerator.device
+        logger.info(f"Using device: {device}, dtype: {weight_dtype}")
+        # Pretrained paths
+        base_model_path = settings.BASE_MODEL_ID
+        safetensors_ckpt = settings.LIGHTNING_WEIGHTS
+        controlnet_path = "sdxl_light_caption_output/checkpoint-30000/controlnet"
+        # Load diffusion components
+        logger.info("Loading VAE...")
+        vae = AutoencoderKL.from_pretrained(base_model_path, subfolder="vae")
+        logger.info("Loading UNet...")
+        unet = UNet2DConditionModel.from_config(base_model_path, subfolder="unet")
+        unet.load_state_dict(load_file(hf_hub_download("ByteDance/SDXL-Lightning", safetensors_ckpt)))
+        logger.info("Loading ControlNet...")
+        controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=weight_dtype)
+        logger.info("Creating pipeline...")
+        pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+            base_model_path, vae=vae, unet=unet, controlnet=controlnet
+        )
+        pipe.to(device, dtype=weight_dtype)
+        pipe.safety_checker = None
+        # Load BLIP captioning model
+        logger.info("Loading BLIP captioning model...")
+        # Try large first, fallback to base
+        caption_model_name = "blip-image-captioning-large"
+        try:
+            processor = BlipProcessor.from_pretrained(f"Salesforce/{caption_model_name}")
+            caption_model = BlipForConditionalGeneration.from_pretrained(
+                f"Salesforce/{caption_model_name}", torch_dtype=weight_dtype
+            ).to(device)
+        except Exception as e:
+            logger.warning(f"Failed to load large model, trying base: {e}")
+            caption_model_name = "blip-image-captioning-base"
+            processor = BlipProcessor.from_pretrained(f"Salesforce/{caption_model_name}")
+            caption_model = BlipForConditionalGeneration.from_pretrained(
+                f"Salesforce/{caption_model_name}", torch_dtype=weight_dtype
+            ).to(device)
+        logger.info("✅ All models loaded successfully!")
+        model_load_error = None
+    except Exception as e:
+        error_msg = str(e)
+        logger.error(f"❌ Failed to load models: {error_msg}")
+        model_load_error = error_msg
+        # Don't raise - allow health check to work
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Cleanup on shutdown"""
+    global pipe, caption_model
+    if pipe:
+        del pipe
+    if caption_model:
+        del caption_model
+    logger.info("Application shutdown")
+# ========== Authentication ==========
+def _extract_bearer_token(authorization_header: str | None) -> str | None:
+    if not authorization_header:
+        return None
+    parts = authorization_header.split(" ", 1)
+    if len(parts) == 2 and parts[0].lower() == "bearer":
+        return parts[1].strip()
+    return None
+async def verify_request(request: Request):
+    """Verify Firebase authentication"""
+    if not firebase_admin._apps or os.getenv("DISABLE_AUTH", "false").lower() == "true":
+        return True
+    bearer = _extract_bearer_token(request.headers.get("Authorization"))
+    if bearer:
+        try:
+            decoded = firebase_auth.verify_id_token(bearer)
+            request.state.user = decoded
+            logger.info("Firebase Auth id_token verified for uid: %s", decoded.get("uid"))
+            return True
+        except Exception as e:
+            logger.warning("Auth token verification failed: %s", str(e))
+    if settings.ENABLE_APP_CHECK:
+        app_check_token = request.headers.get("X-Firebase-AppCheck")
+        if not app_check_token:
+            raise HTTPException(status_code=401, detail="Missing App Check token")
+        try:
+            app_check_claims = app_check.verify_token(app_check_token)
+            logger.info("App Check token verified for: %s", app_check_claims.get("app_id"))
+            return True
+        except Exception as e:
+            logger.warning("App Check token verification failed: %s", str(e))
+            raise HTTPException(status_code=401, detail="Invalid App Check token")
+    return True
+# ========== API Endpoints ==========
+@app.get("/api")
+async def api_info():
+    """API info endpoint"""
+    return {
+        "app": "Text-Guided Image Colorization API",
+        "version": "1.0.0",
+        "health": "/health",
+        "colorize": "/colorize",
+        "gradio": "/"
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    response = {
+        "status": "healthy",
+        "model_loaded": pipe is not None and caption_model is not None,
+        "model_type": "sdxl_controlnet",
+        "device": str(device) if device else None
+    }
+    if model_load_error:
+        response["model_error"] = model_load_error
+    return response
+def colorize_image_sdxl(
+    image: Image.Image,
+    positive_prompt: Optional[str] = None,
+    negative_prompt: Optional[str] = None,
+    seed: int = 123,
+    num_inference_steps: int = 8
+) -> Tuple[Image.Image, str]:
+    """
+    Colorize a grayscale or low-color image using SDXL + ControlNet.
+    Args:
+        image: PIL Image to colorize
+        positive_prompt: Additional descriptive text to enhance the caption
+        negative_prompt: Words or phrases to avoid during generation
+        seed: Random seed for reproducible generation
+        num_inference_steps: Number of inference steps
+    Returns:
+        Tuple of (colorized PIL Image, caption string)
+    """
+    if pipe is None or caption_model is None:
+        raise RuntimeError("Models not loaded")
+    torch.manual_seed(seed)
+    original_size = image.size
+    control_image = image.convert("L").convert("RGB").resize((512, 512))
+    # Image captioning
+    input_text = settings.CAPTION_PREFIX
+    inputs = processor(control_image, input_text, return_tensors="pt").to(device, dtype=weight_dtype)
+    caption_ids = caption_model.generate(**inputs)
+    caption = processor.decode(caption_ids[0], skip_special_tokens=True)
+    caption = remove_unlikely_words(caption)
+    # Construct final prompt
+    if positive_prompt:
+        final_prompt = f"{positive_prompt}, {caption}"
+    else:
+        final_prompt = caption
+    # Inference
+    result = pipe(
+        prompt=final_prompt,
+        negative_prompt=negative_prompt or settings.NEGATIVE_PROMPT,
+        num_inference_steps=num_inference_steps,
+        generator=torch.manual_seed(seed),
+        image=control_image
+    )
+    colorized = apply_color(control_image, result.images[0]).resize(original_size)
+    return colorized, caption
+@app.post("/colorize")
+async def colorize_api(
+    file: UploadFile = File(...),
+    positive_prompt: Optional[str] = None,
+    negative_prompt: Optional[str] = None,
+    seed: int = 123,
+    num_inference_steps: int = 8,
+    verified: bool = Depends(verify_request)
+):
+    """
+    Upload a grayscale image -> returns colorized image.
+    Uses SDXL + ControlNet with automatic captioning.
+    """
+    if pipe is None or caption_model is None:
+        raise HTTPException(status_code=503, detail="Colorization models not loaded")
+    if not file.content_type or not file.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="File must be an image")
+    try:
+        img_bytes = await file.read()
+        image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
+        logger.info("Colorizing image with SDXL + ControlNet...")
+        colorized, caption = colorize_image_sdxl(
+            image,
+            positive_prompt=positive_prompt,
+            negative_prompt=negative_prompt,
+            seed=seed,
+            num_inference_steps=num_inference_steps
+        )
+        output_filename = f"{uuid.uuid4()}.png"
+        output_path = RESULT_DIR / output_filename
+        colorized.save(output_path, "PNG")
+        logger.info("Colorized image saved: %s", output_filename)
+        return JSONResponse({
+            "success": True,
+            "result_id": output_filename.replace(".png", ""),
+            "caption": caption,
+            "download_url": f"/results/{output_filename}",
+            "api_download": f"/download/{output_filename.replace('.png', '')}"
+        })
+    except Exception as e:
+        logger.error("Error colorizing image: %s", str(e))
+        raise HTTPException(status_code=500, detail=f"Error colorizing image: {str(e)}")
+@app.get("/download/{file_id}")
+def download_result(file_id: str, verified: bool = Depends(verify_request)):
+    """Download colorized image by file ID"""
+    filename = f"{file_id}.png"
+    path = RESULT_DIR / filename
+    if not path.exists():
+        raise HTTPException(status_code=404, detail="Result not found")
+    return FileResponse(path, media_type="image/png")
+@app.get("/results/{filename}")
+def get_result(filename: str):
+    """Public endpoint to access colorized images"""
+    path = RESULT_DIR / filename
+    if not path.exists():
+        raise HTTPException(status_code=404, detail="Result not found")
+    return FileResponse(path, media_type="image/png")
+# ========== Gradio Interface (Optional) ==========
+def gradio_colorize(image, positive_prompt=None, negative_prompt=None, seed=123):
+    """Gradio colorization function"""
+    if image is None:
+        return None, ""
+    try:
+        if pipe is None or caption_model is None:
+            return None, "Models not loaded"
+        colorized, caption = colorize_image_sdxl(
+            image,
+            positive_prompt=positive_prompt,
+            negative_prompt=negative_prompt,
+            seed=seed
+        )
+        return colorized, caption
+    except Exception as e:
+        logger.error("Gradio colorization error: %s", str(e))
+        return None, str(e)
+title = "🎨 Text-Guided Image Colorization"
+description = "Upload a grayscale image and generate a color version guided by automatic captioning using SDXL + ControlNet."
+iface = gr.Interface(
+    fn=gradio_colorize,
+    inputs=[
+        gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(label="Positive Prompt", placeholder="Enter details to enhance the caption"),
+        gr.Textbox(label="Negative Prompt", value=settings.NEGATIVE_PROMPT),
+        gr.Slider(0, 1000, 123, label="Seed")
+    ],
+    outputs=[
+        gr.Image(type="pil", label="Colorized Image"),
+        gr.Textbox(label="Caption", show_copy_button=True)
+    ],
+    title=title,
+    description=description,
+)
+# Mount Gradio app at root
+app = gr.mount_gradio_app(app, iface, path="/")
+# ========== Run Server ==========
+if __name__ == "__main__":
+    port = int(os.getenv("PORT", "7860"))
+    uvicorn.run(app, host="0.0.0.0", port=port)

requirements.txt CHANGED Viewed

@@ -9,4 +9,9 @@ fastai
 huggingface_hub
 pydantic-settings
 opencv-python
-numpy

 huggingface_hub
 pydantic-settings
 opencv-python
+numpy
+accelerate
+transformers
+diffusers
+safetensors
+ftfy