You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
12 lines
333 B
12 lines
333 B
import fitz
|
|
import sys
|
|
|
|
doc = fitz.open(r'C:\git\spark-lesson\reference\sources\liu-discharge-transitions-thesis.pdf')
|
|
print(f'Pages: {len(doc)}')
|
|
|
|
# Extract first 10 pages to see TOC
|
|
for i in range(min(10, len(doc))):
|
|
text = doc[i].get_text()
|
|
if text.strip():
|
|
print(f'--- Page {i+1} ---')
|
|
print(text[:2000])
|