File size: 4,682 Bytes
6459986
6c79114
d1dce8a
6c79114
 
 
1d8d466
7e4f227
5f554b3
7e4f227
6c79114
7e4f227
6c79114
 
 
5f554b3
cdb1e78
7e4f227
cdb1e78
 
 
 
 
 
 
 
 
 
 
 
 
 
7e4f227
6c79114
d1dce8a
6c79114
7e4f227
02d80b6
 
 
7e4f227
 
6c79114
7e4f227
80e2b7f
7e4f227
80e2b7f
7e4f227
 
 
 
6c79114
7e4f227
6c79114
 
7e4f227
 
 
 
 
 
 
 
 
b908919
7e4f227
 
 
 
 
 
 
 
 
 
 
b908919
7e4f227
 
 
 
b908919
 
7e4f227
 
 
 
 
 
6c79114
7e4f227
 
b908919
7e4f227
 
 
 
 
 
 
 
 
 
 
 
 
d57a6ad
7e4f227
 
 
 
 
 
 
80e2b7f
6c79114
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
import re
import io
import streamlit as st
from PIL import Image, ImageDraw
from google import genai
from google.genai import types
from pdf2image import convert_from_bytes

# Helper functions
def parse_list_boxes(text):
    """Extracts bounding boxes from response text"""
    pattern = r'\[([\d\.]+),\s*([\d\.]+),\s*([\d\.]+),\s*([\d\.]+)\]'
    matches = re.findall(pattern, text)
    return [[float(m) for m in match] for match in matches]

def draw_bounding_boxes(image, boxes):
    """Draws bounding boxes on the image"""
    draw = ImageDraw.Draw(image)
    width, height = image.size
    
    for box in boxes:
        ymin = max(0.0, min(1.0, box[0]))
        xmin = max(0.0, min(1.0, box[1]))
        ymax = max(0.0, min(1.0, box[2]))
        xmax = max(0.0, min(1.0, box[3]))

        draw.rectangle([
            xmin * width,
            ymin * height,
            xmax * width,
            ymax * height
        ], outline="#00FF00", width=3)
    return image

# Streamlit UI
st.title("PDF Themenerkennung mit Gemini")
col1, col2 = st.columns(2)

with col1:
    uploaded_file = st.file_uploader("PDF hochladen", type=["pdf"])
    topic_name = st.text_input("Thema zur Erkennung", placeholder="z.B. 'Überschrift', 'Tabelle', 'Absatz'")

    if uploaded_file and topic_name:
        if st.button("Analysieren"):
            with st.spinner("Analysiere PDF..."):
                try:
                    # Convert PDF to images
                    pdf_bytes = uploaded_file.read()
                    images = convert_from_bytes(pdf_bytes)
                    results = []

                    # Initialize client
                    client = genai.Client(api_key=os.getenv("KEY"))

                    for page_num, image in enumerate(images):
                        # Prepare image
                        img_byte_arr = io.BytesIO()
                        image.save(img_byte_arr, format='PNG')
                        
                        image_part = types.Part.from_bytes(
                            data=img_byte_arr.getvalue(),
                            mime_type="image/png"
                        )

                        # Get topic boxes
                        detection_prompt = (
                            f"Identifiziere alle {topic_name} Bereiche in diesem Dokument. "
                            "Gib Bounding Boxes im Format [ymin, xmin, ymax, xmax] "
                            "als reine Python-Liste ohne weiteren Text. "
                            "Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
                        )
                        box_response = client.models.generate_content(
                            model="gemini-2.0-flash-exp",
                            contents=[detection_prompt, image_part]
                        )

                        # Get description
                        desc_response = client.models.generate_content(
                            model="gemini-2.0-flash-exp",
                            contents=["Beschreibe diesen Dokumentenausschnitt detailliert.", image_part]
                        )

                        # Process boxes
                        try:
                            boxes = parse_list_boxes(box_response.text)
                        except Exception as e:
                            st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
                            boxes = []

                        # Draw boxes
                        annotated_image = image.copy()
                        if boxes:
                            annotated_image = draw_bounding_boxes(annotated_image, boxes)

                        results.append({
                            "page": page_num + 1,
                            "image": annotated_image,
                            "description": desc_response.text,
                            "boxes": len(boxes)
                        })

                    # Display results
                    with col2:
                        st.write(f"## Ergebnisse ({len(results)} Seiten)")
                        tabs = st.tabs([f"Seite {res['page']}" for res in results])
                        
                        for tab, res in zip(tabs, results):
                            with tab:
                                st.image(res["image"], 
                                       caption=f"Seite {res['page']} - {res['boxes']} {topic_name} erkannt",
                                       use_container_width=True)
                                st.write("**Beschreibung:**", res["description"])

                except Exception as e:
                    st.error(f"Fehler: {str(e)}")