import fitz import re doc = fitz.open("test.pdf") index_page = doc[0] blocks = index_page.get_text("blocks") for block in blocks: text = block[4].strip() print(int(text)) match = re.search(r'(\d+)', text) if match: print(match.group(1))