|
|
|
|
@ -0,0 +1,13 @@
|
|
|
|
|
import fitz
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
doc = fitz.open("test.pdf")
|
|
|
|
|
index_page = doc[0]
|
|
|
|
|
blocks = index_page.get_text("blocks")
|
|
|
|
|
|
|
|
|
|
for block in blocks:
|
|
|
|
|
text = block[4].strip()
|
|
|
|
|
print(int(text))
|
|
|
|
|
match = re.search(r'(\d+)', text)
|
|
|
|
|
if match:
|
|
|
|
|
print(match.group(1))
|