pdf_gemini

Sleeping

App Files Files Community

pdf_gemini / app.py

Sebbe33

Update app.py

7e4f227 verified about 1 year ago

raw

history blame

4.68 kB

	import os
	import re
	import io
	import streamlit as st
	from PIL import Image, ImageDraw
	from google import genai
	from google.genai import types
	from pdf2image import convert_from_bytes

	# Helper functions
	def parse_list_boxes(text):
	"""Extracts bounding boxes from response text"""
	pattern = r'\[([\d\.]+),\s([\d\.]+),\s([\d\.]+),\s*([\d\.]+)\]'
	matches = re.findall(pattern, text)
	return [[float(m) for m in match] for match in matches]

	def draw_bounding_boxes(image, boxes):
	"""Draws bounding boxes on the image"""
	draw = ImageDraw.Draw(image)
	width, height = image.size

	for box in boxes:
	ymin = max(0.0, min(1.0, box[0]))
	xmin = max(0.0, min(1.0, box[1]))
	ymax = max(0.0, min(1.0, box[2]))
	xmax = max(0.0, min(1.0, box[3]))

	draw.rectangle([
	xmin * width,
	ymin * height,
	xmax * width,
	ymax * height
	], outline="#00FF00", width=3)
	return image

	# Streamlit UI
	st.title("PDF Themenerkennung mit Gemini")
	col1, col2 = st.columns(2)

	with col1:
	uploaded_file = st.file_uploader("PDF hochladen", type=["pdf"])
	topic_name = st.text_input("Thema zur Erkennung", placeholder="z.B. 'Überschrift', 'Tabelle', 'Absatz'")

	if uploaded_file and topic_name:
	if st.button("Analysieren"):
	with st.spinner("Analysiere PDF..."):
	try:
	# Convert PDF to images
	pdf_bytes = uploaded_file.read()
	images = convert_from_bytes(pdf_bytes)
	results = []

	# Initialize client
	client = genai.Client(api_key=os.getenv("KEY"))

	for page_num, image in enumerate(images):
	# Prepare image
	img_byte_arr = io.BytesIO()
	image.save(img_byte_arr, format='PNG')

	image_part = types.Part.from_bytes(
	data=img_byte_arr.getvalue(),
	mime_type="image/png"
	)

	# Get topic boxes
	detection_prompt = (
	f"Identifiziere alle {topic_name} Bereiche in diesem Dokument. "
	"Gib Bounding Boxes im Format [ymin, xmin, ymax, xmax] "
	"als reine Python-Liste ohne weiteren Text. "
	"Beispiel: [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8]]"
	)
	box_response = client.models.generate_content(
	model="gemini-2.0-flash-exp",
	contents=[detection_prompt, image_part]
	)

	# Get description
	desc_response = client.models.generate_content(
	model="gemini-2.0-flash-exp",
	contents=["Beschreibe diesen Dokumentenausschnitt detailliert.", image_part]
	)

	# Process boxes
	try:
	boxes = parse_list_boxes(box_response.text)
	except Exception as e:
	st.error(f"Fehler bei Seite {page_num+1}: {str(e)}")
	boxes = []

	# Draw boxes
	annotated_image = image.copy()
	if boxes:
	annotated_image = draw_bounding_boxes(annotated_image, boxes)

	results.append({
	"page": page_num + 1,
	"image": annotated_image,
	"description": desc_response.text,
	"boxes": len(boxes)
	})

	# Display results
	with col2:
	st.write(f"## Ergebnisse ({len(results)} Seiten)")
	tabs = st.tabs([f"Seite {res['page']}" for res in results])

	for tab, res in zip(tabs, results):
	with tab:
	st.image(res["image"],
	caption=f"Seite {res['page']} - {res['boxes']} {topic_name} erkannt",
	use_container_width=True)
	st.write("Beschreibung:", res["description"])

	except Exception as e:
	st.error(f"Fehler: {str(e)}")