Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import io | |
| import streamlit as st | |
| from PIL import Image, ImageDraw | |
| from google import genai | |
| from google.genai import types | |
| from pdf2image import convert_from_bytes | |
| # Helper functions | |
| def parse_list_boxes(text): | |
| """Extracts bounding boxes from response text""" | |
| pattern = r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]' | |
| matches = re.findall(pattern, text) | |
| return [[float(m) for m in match] for match in matches] | |
| def draw_bounding_boxes(image, boxes): | |
| """Draws bounding boxes on the image""" | |
| draw = ImageDraw.Draw(image) | |
| width, height = image.size | |
| for box in boxes: | |
| ymin = max(0.0, min(1.0, box[0])) | |
| xmin = max(0.0, min(1.0, box[1])) | |
| ymax = max(0.0, min(1.0, box[2])) | |
| xmax = max(0.0, min(1.0, box[3])) | |
| draw.rectangle([ | |
| xmin * width, | |
| ymin * height, | |
| xmax * width, | |
| ymax * height | |
| ], outline="#00FF00", width=3) | |
| return image | |
| # Streamlit UI | |
| st.title("PDF Themenerkennung mit Gemini") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| uploaded_file = st.file_uploader("PDF hochladen", type=["pdf"]) | |
| topic_name = st.text_input("Thema zur Erkennung", placeholder="z.B. 'Überschrift', 'Tabelle', 'Absatz'") | |
| if uploaded_file and topic_name: | |
| if st.button("Analysieren"): | |
| with st.spinner("Analysiere PDF..."): | |
| try: | |
| # Convert PDF to images | |
| pdf_bytes = uploaded_file.read() | |
| images = convert_from_bytes(pdf_bytes) | |
| results = [] | |
| # Initialize client | |
| client = genai.Client(api_key=os.getenv("KEY")) | |
| for page_num, image in enumerate(images): | |
| # Prepare image | |
| img_byte_arr = io.BytesIO() | |
| image.save(img_byte_arr, format='PNG') | |
| image_part = types.Part.from_bytes( | |
| data=img_byte_arr.getvalue(), | |
| mime_type="image/png" | |
| ) | |
| # Get topic boxes | |
| detection_prompt = ( | |
| f"Identifiziere alle {topic_name} Bereiche in diesem Dokument. " | |
| "Gib Bounding Boxes im Format [ymin, xmin, ymax, xmax] " | |
| "als reine Python-Liste ohne weiteren Text. " | |
| "Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]" | |
| ) | |
| box_response = client.models.generate_content( | |
| model="gemini-2.0-flash-exp", | |
| contents=[detection_prompt, image_part] | |
| ) | |
| # Get description | |
| desc_response = client.models.generate_content( | |
| model="gemini-2.0-flash-exp", | |
| contents=["Beschreibe diesen Dokumentenausschnitt detailliert.", image_part] | |
| ) | |
| # Process boxes | |
| try: | |
| boxes = parse_list_boxes(box_response.text) | |
| except Exception as e: | |
| st.error(f"Fehler bei Seite {page_num+1}: {str(e)}") | |
| boxes = [] | |
| # Draw boxes | |
| annotated_image = image.copy() | |
| if boxes: | |
| annotated_image = draw_bounding_boxes(annotated_image, boxes) | |
| results.append({ | |
| "page": page_num + 1, | |
| "image": annotated_image, | |
| "description": desc_response.text, | |
| "boxes": len(boxes) | |
| }) | |
| # Display results | |
| with col2: | |
| st.write(f"## Ergebnisse ({len(results)} Seiten)") | |
| tabs = st.tabs([f"Seite {res['page']}" for res in results]) | |
| for tab, res in zip(tabs, results): | |
| with tab: | |
| st.image(res["image"], | |
| caption=f"Seite {res['page']} - {res['boxes']} {topic_name} erkannt", | |
| use_container_width=True) | |
| st.write("**Beschreibung:**", res["description"]) | |
| except Exception as e: | |
| st.error(f"Fehler: {str(e)}") |