Matthew Huntington 6 months ago
commit c59a4dbb7f

1
.gitignore vendored

@ -0,0 +1 @@
*.pdf

@ -0,0 +1,13 @@
import fitz
import re
doc = fitz.open("test.pdf")
index_page = doc[0]
blocks = index_page.get_text("blocks")
for block in blocks:
text = block[4].strip()
print(int(text))
match = re.search(r'(\d+)', text)
if match:
print(match.group(1))
Loading…
Cancel
Save