import fitz import sys doc = fitz.open(r'C:\git\spark-lesson\reference\sources\cenrs-book.pdf') print(f'Pages: {len(doc)}') for i in range(min(len(doc), 10)): text = doc[i].get_text() if text.strip(): print(f'=== PAGE {i+1} ===') print(text[:3000]) print('...')