Welcome, guest | Sign In | My Account | Store | Cart
import fitz                             # this is PyMuPDF 1.9.0
doc = fitz.open("some.pdf")

# An easy start: create new PDFs of the first and last 10 pages ...
l = list(range(10))                     # first 10 pages
doc.select(l)                           # delete all others
doc.save("some-first-10.pdf", garbage=3)# save and clean new PDF
doc.close()
doc = fitz.open("some.pdf")             # recycle PDF
l = list(range(doc.pageCount-10, doc.pageCount))    # last 10 pages
doc.select(l)                           # delete all others
doc.save("some-last-10.pdf", garbage=3) # save and clean new PDF
doc.close()

# page numbers may occur multiple times and in any order ...
doc = fitz.open("some.pdf")             # recycle PDF
doc.select([1,1,1,3,3,3,5,5,5,0,0,0])   # create crazily tripled pages
doc.save("some-crazy-triples.pdf", garbage=3)   # save that & clean new PDF
doc.close()

# new PDF containing the original 2 times
doc = fitz.open("some.pdf")             # recycle PDF
l = list(range(doc.pageCount))          # list of all pages
l += l                                  # two times that [0,...,n,0,...,n]
doc.select(l)                           # PDF will now contain itself twice ...
doc.save("some-times-2.pdf")            # will hardly be bigger than original!
doc.close()

# delete pages without text (or whatever ...)
doc = fitz.open("some.pdf")             # recycle PDF
l = list(range(doc.pageCount))          # list of all pages
for i in l:
    if not doc.getPageText(i)           # if no text on page number i ...
        l.remove(i)                     # delete that page from list
doc.select(l)                           # select remaining pages from the PDF
doc.save("some-non-empty.pdf", garbage=3)         # save PDF, every page has some text now ...
doc.close()

History