You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

11 lines
295 B

import fitz
import sys
doc = fitz.open(r'C:\git\spark-lesson\reference\sources\cenrs-book.pdf')
print(f'Pages: {len(doc)}')
for i in range(min(len(doc), 10)):
text = doc[i].get_text()
if text.strip():
print(f'=== PAGE {i+1} ===')
print(text[:3000])
print('...')