Welcome, guest | Sign In | My Account | Store | Cart
import sys
import pyPdf

def getPDFContent(path):
    content
= ""
   
# Load PDF into pyPDF
    pdf
= pyPdf.PdfFileReader(file(path, "rb"))
   
# Iterate pages
   
for i in range(0, pdf.getNumPages()):
       
# Extract text from page and add to content
        content
+= pdf.getPage(i).extractText() + " \n"
   
# Collapse whitespace
    content
= u" ".join(content.replace(u"\xa0", u" ").strip().split())
   
return content

f
= open(sys.argv[1]+'.txt','w+')
f
.write(getPDFContent(sys.argv[1]))
f
.close()
#print getPDFContent(sys.argv[1]).encode("ascii", "xmlcharrefreplace")

History