Spaces:
Sleeping
Sleeping
Update lib/read_pdf.py
Browse files- lib/read_pdf.py +1 -1
lib/read_pdf.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import pdfplumber
|
| 2 |
import re
|
| 3 |
-
|
| 4 |
# Extract text as paragraph delimiter without tables and graphs
|
| 5 |
def extract_and_format_paragraphs(pdf_path):
|
| 6 |
"""Extract and format paragraphs from a PDF text, applying filters to remove headers, footnotes, and specific sections."""
|
|
|
|
| 1 |
import pdfplumber
|
| 2 |
import re
|
| 3 |
+
import os
|
| 4 |
# Extract text as paragraph delimiter without tables and graphs
|
| 5 |
def extract_and_format_paragraphs(pdf_path):
|
| 6 |
"""Extract and format paragraphs from a PDF text, applying filters to remove headers, footnotes, and specific sections."""
|