From c59a4dbb7fff539e321c39277b5a62364d79a119 Mon Sep 17 00:00:00 2001 From: Matthew Huntington Date: Wed, 25 Jun 2025 19:50:06 -0400 Subject: [PATCH] init --- .gitignore | 1 + app.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 .gitignore create mode 100644 app.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a136337 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pdf diff --git a/app.py b/app.py new file mode 100644 index 0000000..5192163 --- /dev/null +++ b/app.py @@ -0,0 +1,13 @@ +import fitz +import re + +doc = fitz.open("test.pdf") +index_page = doc[0] +blocks = index_page.get_text("blocks") + +for block in blocks: + text = block[4].strip() + print(int(text)) + match = re.search(r'(\d+)', text) + if match: + print(match.group(1))