#!/usr/bin/python # -*- coding: utf-8 -*- """ Created on Sun Jun 07 06:57:08 2015 @author: Jorj McKie Copyright (c) 2015 Jorj X. McKie The license of this program is governed by the GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007. See the "COPYING" file of this repository. This is an example for using the Python binding PyMuPDF for MuPDF. This program joins PDF files into one output file. Its features include: * Selection of page ranges * Optional rotation in steps of 90 degrees * Copy any table of contents to the output (default - can be switched off) * Editable PDF metadata Dependencies: wxPython 3.x, PyPDF2, PyMuPDF """ import os, sys import wx import wx.grid as gridlib import wx.lib.gridmovers as gridmovers import PyPDF2 # only used for output (make_pdf) import fitz # some abbreviations DefPos = wx.DefaultPosition DefSize = wx.DefaultSize class PDFTable(gridlib.PyGridTableBase): def __init__(self): gridlib.PyGridTableBase.__init__(self) self.colLabels = ['File','Pages','from','to','rotate'] self.dataTypes = [gridlib.GRID_VALUE_STRING, gridlib.GRID_VALUE_NUMBER, gridlib.GRID_VALUE_NUMBER, gridlib.GRID_VALUE_NUMBER, gridlib.GRID_VALUE_CHOICE + ':0, 90, 180, 270', ] self.data = [] #============================================================================== # Methods for the wxPyGridTableBase interface (mostly mandatory) #============================================================================== def GetNumberRows(self): return len(self.data) def GetNumberCols(self): return len(self.colLabels) def IsEmptyCell(self, row, col): try: return not self.data[row][col] except IndexError: return True def GetValue(self, row, col): return self.data[row][col] def SetValue(self, row, col, value): self.data[row][col] = value #============================================================================== # Provide column header names #============================================================================== def GetColLabelValue(self, col): return self.colLabels[col] #============================================================================== # Provide row header names (just the line numbers in our case) #============================================================================== def GetRowLabelValue(self,row): return str(row +1) #============================================================================== # Provide type of a cell value #============================================================================== def GetTypeName(self, row, col): return self.dataTypes[col] #============================================================================== # Move a row #============================================================================== def MoveRow(self,frm,to): grid = self.GetView() if grid: # Move the rowLabels and data rows oldData = self.data[frm] del self.data[frm] if to > frm: self.data.insert(to-1,oldData) else: self.data.insert(to,oldData) #============================================================================== # inform the grid about our doing #============================================================================== grid.BeginBatch() msg = gridlib.GridTableMessage( self, gridlib.GRIDTABLE_NOTIFY_ROWS_DELETED, frm, 1) grid.ProcessTableMessage(msg) msg = gridlib.GridTableMessage( self, gridlib.GRIDTABLE_NOTIFY_ROWS_INSERTED, to, 1) grid.ProcessTableMessage(msg) grid.EndBatch() #============================================================================== # Insert a row #============================================================================== def NewRow(self, zeile): grid = self.GetView() if grid: self.data.append(zeile) grid.BeginBatch() msg = gridlib.GridTableMessage( self, gridlib.GRIDTABLE_NOTIFY_ROWS_APPENDED, 1) grid.ProcessTableMessage(msg) grid.EndBatch() #============================================================================== # Duplicate a row #============================================================================== def DuplicateRow(self, row): grid = self.GetView() if grid: zeile = [self.data[row][0], self.data[row][1], self.data[row][2], self.data[row][3], self.data[row][4]] self.data.insert(row, zeile) grid.BeginBatch() msg = gridlib.GridTableMessage( self, gridlib.GRIDTABLE_NOTIFY_ROWS_INSERTED, row, 1) grid.ProcessTableMessage(msg) grid.EndBatch() #============================================================================== # Remove a row #============================================================================== def DeleteRow(self, row): grid = self.GetView() if grid: del self.data[row] grid.BeginBatch() msg = gridlib.GridTableMessage(self, gridlib.GRIDTABLE_NOTIFY_ROWS_DELETED, row, 1) grid.ProcessTableMessage(msg) grid.EndBatch() #============================================================================== # Define the grid #============================================================================== class MyGrid(gridlib.Grid): def __init__(self, parent): gridlib.Grid.__init__(self, parent, -1) table = PDFTable() # create PDFTable object #============================================================================== # Announce our table to the grid and let it manage it ('True') #============================================================================== self.SetTable(table, True) #============================================================================== # do some cell attribute setting #============================================================================== align1 = gridlib.GridCellAttr() align1.SetAlignment(wx.ALIGN_RIGHT, wx.ALIGN_CENTER) self.SetColAttr(2, align1) self.SetColAttr(3, align1) self.SetColAttr(4, align1) align2 = gridlib.GridCellAttr() align2.SetAlignment(wx.ALIGN_CENTER, wx.ALIGN_CENTER) self.SetColAttr(5, align2) #============================================================================== # Enable Row moving #============================================================================== gridmovers.GridRowMover(self) #============================================================================== # Bind: move a row #============================================================================== self.Bind(gridmovers.EVT_GRID_ROW_MOVE, self.OnRowMove, self) #============================================================================== # Bind: delete a row #============================================================================== self.Bind(gridlib.EVT_GRID_LABEL_RIGHT_DCLICK, self.OnRowDel, self) #============================================================================== # Bind: duplicate a row #============================================================================== self.Bind(gridlib.EVT_GRID_LABEL_LEFT_DCLICK, self.OnRowDup, self) #============================================================================== # Event Method: move a row #============================================================================== def OnRowMove(self,evt): frm = evt.GetMoveRow() # Row being moved to = evt.GetBeforeRow() # Before which row to insert self.GetTable().MoveRow(frm,to) #============================================================================== # Event Method: delete a row #============================================================================== def OnRowDel(self, evt): row = evt.GetRow() self.GetTable().DeleteRow(row) #============================================================================== # Event Method: duplicate a row #============================================================================== def OnRowDup(self, evt): row = evt.GetRow() col = evt.GetCol() if col < 0 and row >= 0: # else it is not a row duplication! self.GetTable().DuplicateRow(row) evt.Skip() #============================================================================== # # Define the dialog # #============================================================================== class PDFDialog (wx.Dialog): def __init__(self, parent): wx.Dialog.__init__ (self, parent, id = wx.ID_ANY, title = u"Join PDF files", pos = DefPos, size = wx.Size(900,710), style = wx.CAPTION| wx.CLOSE_BOX| wx.DEFAULT_DIALOG_STYLE| wx.MAXIMIZE_BOX| wx.MINIMIZE_BOX| wx.RESIZE_BORDER) self.SetSizeHintsSz(DefSize, DefSize) self.FileList = {} #============================================================================== # Create Sizer 01 (browse button and explaining text) #============================================================================== szr01 = wx.BoxSizer(wx.HORIZONTAL) self.btn_neu = wx.FilePickerCtrl(self, wx.ID_ANY, wx.EmptyString, u"Select a PDF file", u"*.pdf", DefPos, DefSize, wx.FLP_CHANGE_DIR|wx.FLP_FILE_MUST_EXIST|wx.FLP_SMALL, ) szr01.Add(self.btn_neu, 0, wx.ALIGN_TOP|wx.ALL, 5) msg_txt ="""ADD files with this button. Path and total page number will be appended to the table below.\nDUPLICATE row: double-click its number. MOVE row: drag its number with the mouse. DELETE row: right-double-click its number.""" msg = wx.StaticText(self, wx.ID_ANY, msg_txt, DefPos, wx.Size(-1, 50), wx.ALIGN_LEFT) msg.Wrap(-1) msg.SetFont(wx.Font(10, 74, 90, 90, False, "Arial")) szr01.Add(msg, 0, wx.ALIGN_TOP|wx.ALL, 5) #============================================================================== # Create Sizer 02 (contains the grid) #============================================================================== self.szr02 = MyGrid(self) self.szr02.AutoSizeColumn(0) self.szr02.AutoSizeColumn(1) self.szr02.SetColSize(2, 45) self.szr02.SetColSize(3, 45) self.szr02.SetColSize(4, 45) self.szr02.SetRowLabelSize(30) # Columns 1 and 2 are read only attr_ro = gridlib.GridCellAttr() attr_ro.SetReadOnly(True) self.szr02.SetColAttr(0, attr_ro) self.szr02.SetColAttr(1, attr_ro) #============================================================================== # Create Sizer 03 (output parameters) #============================================================================== szr03 = wx.FlexGridSizer( 5, 2, 0, 0 ) # 4 rows, 2 cols, gap sizes 0 szr03.SetFlexibleDirection( wx.BOTH ) szr03.SetNonFlexibleGrowMode( wx.FLEX_GROWMODE_SPECIFIED ) tx_ausdat = wx.StaticText(self, wx.ID_ANY, u"Output:", DefPos, DefSize, 0) tx_ausdat.Wrap(-1) szr03.Add(tx_ausdat, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALL, 5) self.btn_aus = wx.FilePickerCtrl(self, wx.ID_ANY, os.path.join(os.path.expanduser('~'), "joined.pdf"), u"Specify output file", u"*.pdf", DefPos, wx.Size(480,-1), wx.FLP_OVERWRITE_PROMPT| wx.FLP_SAVE|wx.FLP_SMALL| wx.FLP_USE_TEXTCTRL) szr03.Add(self.btn_aus, 0, wx.ALL, 5) tx_autor = wx.StaticText( self, wx.ID_ANY, u"Author:", DefPos, DefSize, 0 ) tx_autor.Wrap( -1 ) szr03.Add( tx_autor, 0, wx.ALL, 5 ) self.ausaut = wx.TextCtrl( self, wx.ID_ANY, os.path.basename(os.path.expanduser('~')), DefPos, wx.Size(480, -1), 0) szr03.Add( self.ausaut, 0, wx.ALL, 5 ) pdf_titel = wx.StaticText( self, wx.ID_ANY, u"Title:", DefPos, DefSize, 0 ) pdf_titel.Wrap( -1 ) szr03.Add( pdf_titel, 0, wx.ALL, 5 ) self.austit = wx.TextCtrl( self, wx.ID_ANY, u"Joined PDF files", DefPos, wx.Size(480, -1), 0 ) szr03.Add( self.austit, 0, wx.ALL, 5 ) tx_subject = wx.StaticText( self, wx.ID_ANY, u"Subject:", DefPos, DefSize, wx.ALIGN_RIGHT) tx_subject.Wrap( -1 ) szr03.Add( tx_subject, 0, wx.ALL, 5 ) self.aussub = wx.TextCtrl( self, wx.ID_ANY, u"Joined PDF files", DefPos, wx.Size(480, -1), 0 ) szr03.Add( self.aussub, 0, wx.ALL, 5 ) tx_blank = wx.StaticText( self, wx.ID_ANY, u" ", DefPos, DefSize, wx.ALIGN_RIGHT) tx_blank.Wrap( -1 ) szr03.Add( tx_blank, 0, wx.ALL, 5 ) self.noToC = wx.CheckBox( self, wx.ID_ANY, u"check if no table of contents wanted", DefPos, DefSize, wx.ALIGN_LEFT) szr03.Add( self.noToC, 0, wx.ALL, 5 ) #============================================================================== # Create Sizer 04 (OK / Cancel buttons) #============================================================================== szr04 = wx.StdDialogButtonSizer() szr04OK = wx.Button(self, wx.ID_OK) szr04.AddButton(szr04OK) szr04Cancel = wx.Button(self, wx.ID_CANCEL) szr04.AddButton(szr04Cancel) szr04.Realize(); #============================================================================== # 3 horizontal lines (decoration only) #============================================================================== linie1 = wx.StaticLine(self, wx.ID_ANY, DefPos, DefSize, wx.LI_HORIZONTAL) linie2 = wx.StaticLine(self, wx.ID_ANY, DefPos, DefSize, wx.LI_HORIZONTAL) linie3 = wx.StaticLine(self, wx.ID_ANY, DefPos, DefSize, wx.LI_HORIZONTAL) mainszr = wx.BoxSizer(wx.VERTICAL) mainszr.Add(szr01, 0, wx.EXPAND, 5) mainszr.Add(linie1, 0, wx.EXPAND |wx.ALL, 5) mainszr.Add(self.szr02, 1, wx.EXPAND, 5) mainszr.Add(linie2, 0, wx.EXPAND|wx.ALL, 5) mainszr.Add(szr03, 0, wx.EXPAND, 5) mainszr.Add(linie3, 0, wx.EXPAND |wx.ALL, 5) mainszr.Add(szr04, 0, wx.ALIGN_TOP|wx.ALIGN_CENTER_HORIZONTAL, 5) self.SetSizer(mainszr) self.Layout() self.Centre(wx.BOTH) #============================================================================== # Define event handlers for the buttons #============================================================================== self.btn_neu.Bind(wx.EVT_FILEPICKER_CHANGED, self.NewFile) self.btn_aus.Bind(wx.EVT_FILEPICKER_CHANGED, self.AusgabeDatei) def __del__(self): pass #============================================================================== # "NewFile" - Event Handler for including new files #============================================================================== def NewFile(self, event): dat = event.GetPath() if dat not in self.FileList: doc = fitz.Document(dat) if doc.needsPass: wx.MessageBox("Cannot read encrypted file\n" + dat, "Encrypted File Error") event.Skip() return self.FileList[dat] = doc else: doc = self.FileList[dat] seiten = doc.pageCount zeile = [dat, str(seiten), 1, str(seiten), 0] self.szr02.Table.NewRow(zeile) self.szr02.AutoSizeColumn(0) self.Layout() event.Skip() #============================================================================== # "AusgabeDatei" - Event Handler for out file #============================================================================== def AusgabeDatei(self, event): event.Skip() #============================================================================== # Create the joined PDF #============================================================================== def make_pdf(dlg): # no file selected: treat like "Cancel" if not len(dlg.szr02.Table.data): # no files there return None cdate = wx.DateTime.Now().Format("D:%Y%m%d%H%M%S-04'30'") ausgabe = dlg.btn_aus.GetPath() pdf_fle_out = open(ausgabe,"wb") pdf_out = PyPDF2.PdfFileWriter() aus_nr = 0 # current page number in output pdf_dict = {"/Creator":"PDF-Joiner", "/Producer":"PyMuPDF, PyPDF2", "/CreationDate": cdate, "/ModDate": cdate, "/Title": dlg.austit.Value, "/Author": dlg.ausaut.Value, "/Subject": dlg.aussub.Value} pdf_out.addMetadata(pdf_dict) parents = {} #============================================================================== # process one input file #============================================================================== for zeile in dlg.szr02.Table.data: dateiname = zeile[0] doc = dlg.FileList[dateiname] max_seiten = int(zeile[1]) #============================================================================== # user input minus 1, PDF pages count from zero # also correct any inconsistent input #============================================================================== von = int(zeile[2]) - 1 bis = int(zeile[3]) - 1 von = max(0, von) # "from" must not be < 0 bis = min(max_seiten - 1, bis) # "to" must not be > max pages - 1 bis = max(von, bis) # "to" cannot be < "from" rot = int(zeile[4]) # get rotation angle pdfin = PyPDF2.PdfFileReader(dateiname) for p in range(von, bis + 1): # read pages from input file pdf_page = pdfin.getPage(p) if rot > 0: pdf_page.rotateClockwise(rot) # rotate the page pdf_out.addPage(pdf_page) # output the page # title = "infile [from-to (max.pages)]" if dlg.noToC.Value: # no ToC wanted continue bm_main_title = "%s [%s-%s (%s)]" % \ (os.path.basename(dateiname[:-4]).encode("latin-1"), von + 1, bis + 1, max_seiten) bm_main = pdf_out.addBookmark(bm_main_title, aus_nr, None, None, False, False, "/Fit") print 1, bm_main_title, aus_nr parents[1] = bm_main # lvl 1 bookmark is infile's title toc = fitz.GetToC(doc) # get infile's table of contents bm_lst = [] # prepare the relevant sub-ToC for t in toc: if t[2] > von and t[2] <= bis + 1: # relevant page range only bm_lst.append([t[0] + 1, # indent increased 1 level t[1], # the title t[2] + aus_nr - von - 1]) # new page number aus_nr += (bis - von + 1) # increase output counter if bm_lst == []: # do we have a sub-ToC? continue # no, next infile # while indent gap is too large, prepend "filler" bookmarks to bm_lst while bm_lst[0][0] > 2: zeile = [bm_lst[0][0] - 1, "<>", bm_lst[0][2]] bm_lst.insert(0, zeile) # now add infile's bookmarks for b in bm_lst: bm = pdf_out.addBookmark(b[1].encode("latin-1"), b[2], parents[b[0]-1], None, False, False, "/Fit") parents[b[0]] = bm #============================================================================== # all input files processed #============================================================================== pdf_out.write(pdf_fle_out) pdf_fle_out.close() return ausgabe #============================================================================== # # Main program # #============================================================================== if wx.VERSION[0] >= 3: pass else: print "wx Version needs to be at least 3" sys.exit(1) app = None app = wx.App() this_dir = os.getcwd() #============================================================================== # create dialog #============================================================================== dlg = PDFDialog(None) #============================================================================== # Show dialog and wait ... #============================================================================== rc = dlg.ShowModal() #============================================================================== # if OK pressed, create output PDF #============================================================================== if rc == wx.ID_OK: ausgabe = make_pdf(dlg) dlg.Destroy() app = None