import fitz import sys doc = fitz.open(r'C:\git\spark-lesson\reference\sources\liu-discharge-transitions-thesis.pdf') print(f'Pages: {len(doc)}') # Extract first 10 pages to see TOC for i in range(min(10, len(doc))): text = doc[i].get_text() if text.strip(): print(f'--- Page {i+1} ---') print(text[:2000])