Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import io | |
| import fitz # PyMuPDF | |
| def redact_submission_ids(input_pdf): | |
| """Redacts Submission IDs and places a white rectangle above 'Document Details' on the first page.""" | |
| filename = input_pdf.name # Extract the original filename | |
| doc = fitz.open(input_pdf) | |
| # Pattern to find Submission IDs in the format trn:oid::: | |
| pattern = r"Submission ID trn:oid:::\d+:\d+" | |
| for page_num, page in enumerate(doc): | |
| # Redact Submission IDs | |
| text_instances = page.search_for("Submission ID trn:oid:::") | |
| for inst in text_instances: | |
| rect = fitz.Rect(inst.x0, inst.y0, inst.x1 + 100, inst.y1) # Expand width as needed | |
| page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1)) # White rectangle | |
| # If it's the first page, place a white rectangle above "Document Details" | |
| if page_num == 0: | |
| details_instances = page.search_for("Document Details") | |
| for inst in details_instances: | |
| # Draw above "Document Details" with increased width | |
| rect = fitz.Rect(0, inst.y0 - 50, page.rect.x1, inst.y0) # Extend width fully | |
| page.draw_rect(rect, color=(1, 1, 1), fill=(1, 1, 1)) # White rectangle | |
| # Save the redacted PDF to an in-memory byte stream | |
| output_pdf = io.BytesIO() | |
| doc.save(output_pdf) | |
| output_pdf.seek(0) | |
| return output_pdf, filename | |
| # Create Gradio Interface | |
| iface = gr.Interface( | |
| fn=redact_submission_ids, | |
| inputs=gr.File(label="Upload PDF"), # Updated from gr.inputs.File | |
| outputs=[gr.File(label="Download Redacted PDF"), gr.Textbox(label="File Name")], # Updated from gr.outputs.File | |
| live=False, | |
| title="Redact Submission IDs" | |
| ) | |
| if __name__ == '__main__': | |
| iface.launch(debug=True) | |