#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Created on Sun Jun 07 06:57:08 2015
@author: Jorj McKie
Copyright (c) 2015 Jorj X. McKie
The license of this program is governed by the GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007. See the "COPYING" file of this repository.
This is an example for using the Python binding PyMuPDF for MuPDF.
This program joins PDF files into one output file. Its features include:
* Selection of page ranges
* Optional rotation in steps of 90 degrees
* Copy any table of contents to the output (default - can be switched off)
* Editable PDF metadata
Dependencies:
wxPython 3.x, PyPDF2, PyMuPDF
"""
import os, sys
import wx
import wx.grid as gridlib
import wx.lib.gridmovers as gridmovers
import PyPDF2 # only used for output (make_pdf)
import fitz
# some abbreviations
DefPos = wx.DefaultPosition
DefSize = wx.DefaultSize
class PDFTable(gridlib.PyGridTableBase):
def __init__(self):
gridlib.PyGridTableBase.__init__(self)
self.colLabels = ['File','Pages','from','to','rotate']
self.dataTypes = [gridlib.GRID_VALUE_STRING,
gridlib.GRID_VALUE_NUMBER,
gridlib.GRID_VALUE_NUMBER,
gridlib.GRID_VALUE_NUMBER,
gridlib.GRID_VALUE_CHOICE + ':0, 90, 180, 270',
]
self.data = []
#==============================================================================
# Methods for the wxPyGridTableBase interface (mostly mandatory)
#==============================================================================
def GetNumberRows(self):
return len(self.data)
def GetNumberCols(self):
return len(self.colLabels)
def IsEmptyCell(self, row, col):
try:
return not self.data[row][col]
except IndexError:
return True
def GetValue(self, row, col):
return self.data[row][col]
def SetValue(self, row, col, value):
self.data[row][col] = value
#==============================================================================
# Provide column header names
#==============================================================================
def GetColLabelValue(self, col):
return self.colLabels[col]
#==============================================================================
# Provide row header names (just the line numbers in our case)
#==============================================================================
def GetRowLabelValue(self,row):
return str(row +1)
#==============================================================================
# Provide type of a cell value
#==============================================================================
def GetTypeName(self, row, col):
return self.dataTypes[col]
#==============================================================================
# Move a row
#==============================================================================
def MoveRow(self,frm,to):
grid = self.GetView()
if grid:
# Move the rowLabels and data rows
oldData = self.data[frm]
del self.data[frm]
if to > frm:
self.data.insert(to-1,oldData)
else:
self.data.insert(to,oldData)
#==============================================================================
# inform the grid about our doing
#==============================================================================
grid.BeginBatch()
msg = gridlib.GridTableMessage(
self, gridlib.GRIDTABLE_NOTIFY_ROWS_DELETED, frm, 1)
grid.ProcessTableMessage(msg)
msg = gridlib.GridTableMessage(
self, gridlib.GRIDTABLE_NOTIFY_ROWS_INSERTED, to, 1)
grid.ProcessTableMessage(msg)
grid.EndBatch()
#==============================================================================
# Insert a row
#==============================================================================
def NewRow(self, zeile):
grid = self.GetView()
if grid:
self.data.append(zeile)
grid.BeginBatch()
msg = gridlib.GridTableMessage(
self, gridlib.GRIDTABLE_NOTIFY_ROWS_APPENDED, 1)
grid.ProcessTableMessage(msg)
grid.EndBatch()
#==============================================================================
# Duplicate a row
#==============================================================================
def DuplicateRow(self, row):
grid = self.GetView()
if grid:
zeile = [self.data[row][0], self.data[row][1],
self.data[row][2], self.data[row][3],
self.data[row][4]]
self.data.insert(row, zeile)
grid.BeginBatch()
msg = gridlib.GridTableMessage(
self, gridlib.GRIDTABLE_NOTIFY_ROWS_INSERTED, row, 1)
grid.ProcessTableMessage(msg)
grid.EndBatch()
#==============================================================================
# Remove a row
#==============================================================================
def DeleteRow(self, row):
grid = self.GetView()
if grid:
del self.data[row]
grid.BeginBatch()
msg = gridlib.GridTableMessage(self,
gridlib.GRIDTABLE_NOTIFY_ROWS_DELETED, row, 1)
grid.ProcessTableMessage(msg)
grid.EndBatch()
#==============================================================================
# Define the grid
#==============================================================================
class MyGrid(gridlib.Grid):
def __init__(self, parent):
gridlib.Grid.__init__(self, parent, -1)
table = PDFTable() # create PDFTable object
#==============================================================================
# Announce our table to the grid and let it manage it ('True')
#==============================================================================
self.SetTable(table, True)
#==============================================================================
# do some cell attribute setting
#==============================================================================
align1 = gridlib.GridCellAttr()
align1.SetAlignment(wx.ALIGN_RIGHT, wx.ALIGN_CENTER)
self.SetColAttr(2, align1)
self.SetColAttr(3, align1)
self.SetColAttr(4, align1)
align2 = gridlib.GridCellAttr()
align2.SetAlignment(wx.ALIGN_CENTER, wx.ALIGN_CENTER)
self.SetColAttr(5, align2)
#==============================================================================
# Enable Row moving
#==============================================================================
gridmovers.GridRowMover(self)
#==============================================================================
# Bind: move a row
#==============================================================================
self.Bind(gridmovers.EVT_GRID_ROW_MOVE, self.OnRowMove, self)
#==============================================================================
# Bind: delete a row
#==============================================================================
self.Bind(gridlib.EVT_GRID_LABEL_RIGHT_DCLICK, self.OnRowDel, self)
#==============================================================================
# Bind: duplicate a row
#==============================================================================
self.Bind(gridlib.EVT_GRID_LABEL_LEFT_DCLICK, self.OnRowDup, self)
#==============================================================================
# Event Method: move a row
#==============================================================================
def OnRowMove(self,evt):
frm = evt.GetMoveRow() # Row being moved
to = evt.GetBeforeRow() # Before which row to insert
self.GetTable().MoveRow(frm,to)
#==============================================================================
# Event Method: delete a row
#==============================================================================
def OnRowDel(self, evt):
row = evt.GetRow()
self.GetTable().DeleteRow(row)
#==============================================================================
# Event Method: duplicate a row
#==============================================================================
def OnRowDup(self, evt):
row = evt.GetRow()
col = evt.GetCol()
if col < 0 and row >= 0: # else it is not a row duplication!
self.GetTable().DuplicateRow(row)
evt.Skip()
#==============================================================================
#
# Define the dialog
#
#==============================================================================
class PDFDialog (wx.Dialog):
def __init__(self, parent):
wx.Dialog.__init__ (self, parent, id = wx.ID_ANY,
title = u"Join PDF files",
pos = DefPos,
size = wx.Size(900,710),
style = wx.CAPTION|
wx.CLOSE_BOX|
wx.DEFAULT_DIALOG_STYLE|
wx.MAXIMIZE_BOX|
wx.MINIMIZE_BOX|
wx.RESIZE_BORDER)
self.SetSizeHintsSz(DefSize, DefSize)
self.FileList = {}
#==============================================================================
# Create Sizer 01 (browse button and explaining text)
#==============================================================================
szr01 = wx.BoxSizer(wx.HORIZONTAL)
self.btn_neu = wx.FilePickerCtrl(self, wx.ID_ANY,
wx.EmptyString,
u"Select a PDF file",
u"*.pdf",
DefPos, DefSize,
wx.FLP_CHANGE_DIR|wx.FLP_FILE_MUST_EXIST|wx.FLP_SMALL,
)
szr01.Add(self.btn_neu, 0, wx.ALIGN_TOP|wx.ALL, 5)
msg_txt ="""ADD files with this button. Path and total page number will be appended to the table below.\nDUPLICATE row: double-click its number. MOVE row: drag its number with the mouse. DELETE row: right-double-click its number."""
msg = wx.StaticText(self, wx.ID_ANY, msg_txt,
DefPos, wx.Size(-1, 50), wx.ALIGN_LEFT)
msg.Wrap(-1)
msg.SetFont(wx.Font(10, 74, 90, 90, False, "Arial"))
szr01.Add(msg, 0, wx.ALIGN_TOP|wx.ALL, 5)
#==============================================================================
# Create Sizer 02 (contains the grid)
#==============================================================================
self.szr02 = MyGrid(self)
self.szr02.AutoSizeColumn(0)
self.szr02.AutoSizeColumn(1)
self.szr02.SetColSize(2, 45)
self.szr02.SetColSize(3, 45)
self.szr02.SetColSize(4, 45)
self.szr02.SetRowLabelSize(30)
# Columns 1 and 2 are read only
attr_ro = gridlib.GridCellAttr()
attr_ro.SetReadOnly(True)
self.szr02.SetColAttr(0, attr_ro)
self.szr02.SetColAttr(1, attr_ro)
#==============================================================================
# Create Sizer 03 (output parameters)
#==============================================================================
szr03 = wx.FlexGridSizer( 5, 2, 0, 0 ) # 4 rows, 2 cols, gap sizes 0
szr03.SetFlexibleDirection( wx.BOTH )
szr03.SetNonFlexibleGrowMode( wx.FLEX_GROWMODE_SPECIFIED )
tx_ausdat = wx.StaticText(self, wx.ID_ANY, u"Output:",
DefPos, DefSize, 0)
tx_ausdat.Wrap(-1)
szr03.Add(tx_ausdat, 0, wx.ALIGN_CENTER_VERTICAL|wx.ALL, 5)
self.btn_aus = wx.FilePickerCtrl(self, wx.ID_ANY,
os.path.join(os.path.expanduser('~'), "joined.pdf"),
u"Specify output file",
u"*.pdf",
DefPos, wx.Size(480,-1),
wx.FLP_OVERWRITE_PROMPT|
wx.FLP_SAVE|wx.FLP_SMALL|
wx.FLP_USE_TEXTCTRL)
szr03.Add(self.btn_aus, 0, wx.ALL, 5)
tx_autor = wx.StaticText( self, wx.ID_ANY, u"Author:",
DefPos, DefSize, 0 )
tx_autor.Wrap( -1 )
szr03.Add( tx_autor, 0, wx.ALL, 5 )
self.ausaut = wx.TextCtrl( self, wx.ID_ANY,
os.path.basename(os.path.expanduser('~')),
DefPos, wx.Size(480, -1), 0)
szr03.Add( self.ausaut, 0, wx.ALL, 5 )
pdf_titel = wx.StaticText( self, wx.ID_ANY, u"Title:",
DefPos, DefSize, 0 )
pdf_titel.Wrap( -1 )
szr03.Add( pdf_titel, 0, wx.ALL, 5 )
self.austit = wx.TextCtrl( self, wx.ID_ANY,
u"Joined PDF files",
DefPos, wx.Size(480, -1), 0 )
szr03.Add( self.austit, 0, wx.ALL, 5 )
tx_subject = wx.StaticText( self, wx.ID_ANY, u"Subject:",
DefPos, DefSize, wx.ALIGN_RIGHT)
tx_subject.Wrap( -1 )
szr03.Add( tx_subject, 0, wx.ALL, 5 )
self.aussub = wx.TextCtrl( self, wx.ID_ANY,
u"Joined PDF files",
DefPos, wx.Size(480, -1), 0 )
szr03.Add( self.aussub, 0, wx.ALL, 5 )
tx_blank = wx.StaticText( self, wx.ID_ANY, u" ",
DefPos, DefSize, wx.ALIGN_RIGHT)
tx_blank.Wrap( -1 )
szr03.Add( tx_blank, 0, wx.ALL, 5 )
self.noToC = wx.CheckBox( self, wx.ID_ANY,
u"check if no table of contents wanted",
DefPos, DefSize, wx.ALIGN_LEFT)
szr03.Add( self.noToC, 0, wx.ALL, 5 )
#==============================================================================
# Create Sizer 04 (OK / Cancel buttons)
#==============================================================================
szr04 = wx.StdDialogButtonSizer()
szr04OK = wx.Button(self, wx.ID_OK)
szr04.AddButton(szr04OK)
szr04Cancel = wx.Button(self, wx.ID_CANCEL)
szr04.AddButton(szr04Cancel)
szr04.Realize();
#==============================================================================
# 3 horizontal lines (decoration only)
#==============================================================================
linie1 = wx.StaticLine(self, wx.ID_ANY,
DefPos, DefSize, wx.LI_HORIZONTAL)
linie2 = wx.StaticLine(self, wx.ID_ANY,
DefPos, DefSize, wx.LI_HORIZONTAL)
linie3 = wx.StaticLine(self, wx.ID_ANY,
DefPos, DefSize, wx.LI_HORIZONTAL)
mainszr = wx.BoxSizer(wx.VERTICAL)
mainszr.Add(szr01, 0, wx.EXPAND, 5)
mainszr.Add(linie1, 0, wx.EXPAND |wx.ALL, 5)
mainszr.Add(self.szr02, 1, wx.EXPAND, 5)
mainszr.Add(linie2, 0, wx.EXPAND|wx.ALL, 5)
mainszr.Add(szr03, 0, wx.EXPAND, 5)
mainszr.Add(linie3, 0, wx.EXPAND |wx.ALL, 5)
mainszr.Add(szr04, 0, wx.ALIGN_TOP|wx.ALIGN_CENTER_HORIZONTAL, 5)
self.SetSizer(mainszr)
self.Layout()
self.Centre(wx.BOTH)
#==============================================================================
# Define event handlers for the buttons
#==============================================================================
self.btn_neu.Bind(wx.EVT_FILEPICKER_CHANGED, self.NewFile)
self.btn_aus.Bind(wx.EVT_FILEPICKER_CHANGED, self.AusgabeDatei)
def __del__(self):
pass
#==============================================================================
# "NewFile" - Event Handler for including new files
#==============================================================================
def NewFile(self, event):
dat = event.GetPath()
if dat not in self.FileList:
doc = fitz.Document(dat)
if doc.needsPass:
wx.MessageBox("Cannot read encrypted file\n" + dat,
"Encrypted File Error")
event.Skip()
return
self.FileList[dat] = doc
else:
doc = self.FileList[dat]
seiten = doc.pageCount
zeile = [dat, str(seiten), 1, str(seiten), 0]
self.szr02.Table.NewRow(zeile)
self.szr02.AutoSizeColumn(0)
self.Layout()
event.Skip()
#==============================================================================
# "AusgabeDatei" - Event Handler for out file
#==============================================================================
def AusgabeDatei(self, event):
event.Skip()
#==============================================================================
# Create the joined PDF
#==============================================================================
def make_pdf(dlg):
# no file selected: treat like "Cancel"
if not len(dlg.szr02.Table.data): # no files there
return None
cdate = wx.DateTime.Now().Format("D:%Y%m%d%H%M%S-04'30'")
ausgabe = dlg.btn_aus.GetPath()
pdf_fle_out = open(ausgabe,"wb")
pdf_out = PyPDF2.PdfFileWriter()
aus_nr = 0 # current page number in output
pdf_dict = {"/Creator":"PDF-Joiner",
"/Producer":"PyMuPDF, PyPDF2",
"/CreationDate": cdate,
"/ModDate": cdate,
"/Title": dlg.austit.Value,
"/Author": dlg.ausaut.Value,
"/Subject": dlg.aussub.Value}
pdf_out.addMetadata(pdf_dict)
parents = {}
#==============================================================================
# process one input file
#==============================================================================
for zeile in dlg.szr02.Table.data:
dateiname = zeile[0]
doc = dlg.FileList[dateiname]
max_seiten = int(zeile[1])
#==============================================================================
# user input minus 1, PDF pages count from zero
# also correct any inconsistent input
#==============================================================================
von = int(zeile[2]) - 1
bis = int(zeile[3]) - 1
von = max(0, von) # "from" must not be < 0
bis = min(max_seiten - 1, bis) # "to" must not be > max pages - 1
bis = max(von, bis) # "to" cannot be < "from"
rot = int(zeile[4]) # get rotation angle
pdfin = PyPDF2.PdfFileReader(dateiname)
for p in range(von, bis + 1): # read pages from input file
pdf_page = pdfin.getPage(p)
if rot > 0:
pdf_page.rotateClockwise(rot) # rotate the page
pdf_out.addPage(pdf_page) # output the page
# title = "infile [from-to (max.pages)]"
if dlg.noToC.Value: # no ToC wanted
continue
bm_main_title = "%s [%s-%s (%s)]" % \
(os.path.basename(dateiname[:-4]).encode("latin-1"), von + 1,
bis + 1, max_seiten)
bm_main = pdf_out.addBookmark(bm_main_title, aus_nr,
None, None, False, False, "/Fit")
print 1, bm_main_title, aus_nr
parents[1] = bm_main # lvl 1 bookmark is infile's title
toc = fitz.GetToC(doc) # get infile's table of contents
bm_lst = [] # prepare the relevant sub-ToC
for t in toc:
if t[2] > von and t[2] <= bis + 1: # relevant page range only
bm_lst.append([t[0] + 1, # indent increased 1 level
t[1], # the title
t[2] + aus_nr - von - 1]) # new page number
aus_nr += (bis - von + 1) # increase output counter
if bm_lst == []: # do we have a sub-ToC?
continue # no, next infile
# while indent gap is too large, prepend "filler" bookmarks to bm_lst
while bm_lst[0][0] > 2:
zeile = [bm_lst[0][0] - 1, "<>", bm_lst[0][2]]
bm_lst.insert(0, zeile)
# now add infile's bookmarks
for b in bm_lst:
bm = pdf_out.addBookmark(b[1].encode("latin-1"), b[2],
parents[b[0]-1], None, False, False, "/Fit")
parents[b[0]] = bm
#==============================================================================
# all input files processed
#==============================================================================
pdf_out.write(pdf_fle_out)
pdf_fle_out.close()
return ausgabe
#==============================================================================
#
# Main program
#
#==============================================================================
if wx.VERSION[0] >= 3:
pass
else:
print "wx Version needs to be at least 3"
sys.exit(1)
app = None
app = wx.App()
this_dir = os.getcwd()
#==============================================================================
# create dialog
#==============================================================================
dlg = PDFDialog(None)
#==============================================================================
# Show dialog and wait ...
#==============================================================================
rc = dlg.ShowModal()
#==============================================================================
# if OK pressed, create output PDF
#==============================================================================
if rc == wx.ID_OK:
ausgabe = make_pdf(dlg)
dlg.Destroy()
app = None