Spaces:
Running
Running
File size: 4,682 Bytes
6459986 6c79114 d1dce8a 6c79114 1d8d466 7e4f227 5f554b3 7e4f227 6c79114 7e4f227 6c79114 5f554b3 cdb1e78 7e4f227 cdb1e78 7e4f227 6c79114 d1dce8a 6c79114 7e4f227 02d80b6 7e4f227 6c79114 7e4f227 80e2b7f 7e4f227 80e2b7f 7e4f227 6c79114 7e4f227 6c79114 7e4f227 b908919 7e4f227 b908919 7e4f227 b908919 7e4f227 6c79114 7e4f227 b908919 7e4f227 d57a6ad 7e4f227 80e2b7f 6c79114 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | import os
import re
import io
import streamlit as st
from PIL import Image, ImageDraw
from google import genai
from google.genai import types
from pdf2image import convert_from_bytes
# Helper functions
def parse_list_boxes(text):
"""Extracts bounding boxes from response text"""
pattern = r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]'
matches = re.findall(pattern, text)
return [[float(m) for m in match] for match in matches]
def draw_bounding_boxes(image, boxes):
"""Draws bounding boxes on the image"""
draw = ImageDraw.Draw(image)
width, height = image.size
for box in boxes:
ymin = max(0.0, min(1.0, box[0]))
xmin = max(0.0, min(1.0, box[1]))
ymax = max(0.0, min(1.0, box[2]))
xmax = max(0.0, min(1.0, box[3]))
draw.rectangle([
xmin * width,
ymin * height,
xmax * width,
ymax * height
], outline="#00FF00", width=3)
return image
# Streamlit UI
st.title("PDF Themenerkennung mit Gemini")
col1, col2 = st.columns(2)
with col1:
uploaded_file = st.file_uploader("PDF hochladen", type=["pdf"])
topic_name = st.text_input("Thema zur Erkennung", placeholder="z.B. 'Überschrift', 'Tabelle', 'Absatz'")
if uploaded_file and topic_name:
if st.button("Analysieren"):
with st.spinner("Analysiere PDF..."):
try:
# Convert PDF to images
pdf_bytes = uploaded_file.read()
images = convert_from_bytes(pdf_bytes)
results = []
# Initialize client
client = genai.Client(api_key=os.getenv("KEY"))
for page_num, image in enumerate(images):
# Prepare image
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format='PNG')
image_part = types.Part.from_bytes(
data=img_byte_arr.getvalue(),
mime_type="image/png"
)
# Get topic boxes
detection_prompt = (
f"Identifiziere alle {topic_name} Bereiche in diesem Dokument. "
"Gib Bounding Boxes im Format [ymin, xmin, ymax, xmax] "
"als reine Python-Liste ohne weiteren Text. "
"Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
)
box_response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=[detection_prompt, image_part]
)
# Get description
desc_response = client.models.generate_content(
model="gemini-2.0-flash-exp",
contents=["Beschreibe diesen Dokumentenausschnitt detailliert.", image_part]
)
# Process boxes
try:
boxes = parse_list_boxes(box_response.text)
except Exception as e:
st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
boxes = []
# Draw boxes
annotated_image = image.copy()
if boxes:
annotated_image = draw_bounding_boxes(annotated_image, boxes)
results.append({
"page": page_num + 1,
"image": annotated_image,
"description": desc_response.text,
"boxes": len(boxes)
})
# Display results
with col2:
st.write(f"## Ergebnisse ({len(results)} Seiten)")
tabs = st.tabs([f"Seite {res['page']}" for res in results])
for tab, res in zip(tabs, results):
with tab:
st.image(res["image"],
caption=f"Seite {res['page']} - {res['boxes']} {topic_name} erkannt",
use_container_width=True)
st.write("**Beschreibung:**", res["description"])
except Exception as e:
st.error(f"Fehler: {str(e)}") |