Spaces:
Build error
Build error
| import cv2 | |
| import os | |
| import pytesseract | |
| import gradio as gr | |
| from gradio import Interface, Image, Text | |
| import numpy as np | |
| from PIL import Image | |
| from PIL import UnidentifiedImageError | |
| def process_image(input_image): | |
| try: | |
| # Convert the input image to a NumPy array if it's a PIL Image | |
| if isinstance(input_image, Image.Image): | |
| img = np.array(input_image) | |
| else: | |
| # If it's a file path or file-like object, read it directly with OpenCV | |
| img = cv2.imread(input_image) | |
| # Check that the image is in the expected format | |
| if img is None or img.dtype != np.uint8: | |
| raise Exception("Could not read the image. Please check the image format.") | |
| img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
| # img = cv2.imdecode(np.fromstring(input_image.read(), np.uint8), cv2.IMREAD_COLOR) | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV) | |
| rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18)) | |
| dilation = cv2.dilate(thresh1, rect_kernel, iterations=1) | |
| # Find text lines using connected component analysis | |
| text_lines = [] | |
| contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| for cnt in contours: | |
| x, y, w, h = cv2.boundingRect(cnt) | |
| text_lines.append((y, y + h, x, x + w)) | |
| # Sort text lines by their y-coordinates | |
| text_lines.sort(key=lambda line: line[0]) | |
| # Extract text from each line using Tesseract | |
| recognized_text = [] | |
| for y_min, y_max, x_min, x_max in text_lines: | |
| cropped_img = img[y_min:y_max, x_min:x_max] | |
| custom_config = r'-l eng+khm --oem 3 --psm 6' | |
| extracted_text = pytesseract.image_to_string(cropped_img, config=custom_config) | |
| recognized_text.append(extracted_text.strip()) | |
| # Combine recognized text into a single string | |
| full_text = "\n".join(recognized_text) | |
| # Draw bounding boxes on the image | |
| result_rgb = img.copy() | |
| for y_min, y_max, x_min, x_max in text_lines: | |
| cv2.rectangle(result_rgb, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2) | |
| return full_text, result_rgb | |
| except Exception as e: | |
| return "Could not process the image. Error: " + str(e), None | |
| iface = gr.Interface( | |
| process_image, | |
| inputs=[gr.Image(type="pil", label="Processed Image")], | |
| outputs=[ | |
| gr.Text(label="Detected Labels"), | |
| gr.Image(type="pil", label="Processed Image") | |
| ], | |
| title="Bank Statement OCR", | |
| # description="Upload an image containing text to perform OCR and see the detected text and image." | |
| flagging_options=["blurry", "incorrect", "other"],) | |
| iface.launch(debug=True , share=True) |