Image extractor « Python recipes

Extract images (jpeg/gif) from screensaver files, webshots collection files ,powerpoints, microsoft word documents etc.

      import sys, os, string

#write now write only GIF/JPEG

#image types 
imgtypes=['JPG', 'GIF','GIF']

#signature at beginning of file
imgsigs=['JFIF', 'GIF87a', 'GIF89a']

#offset of signatures from
#file beginning
imgsigoffs=[6, 0, 0]

#our marker array
imgmarker=[]

def main():

	if len(sys.argv) < 2:
		print 'Usage: picdumper <file>\n'
		sys.exit(1)

	filename = os.path.abspath(sys.argv[1])

	if not os.path.isfile(filename):
		print 'Error: No such file ', filename
		sys.exit(2)

	#open file in binary mode
	try:
		infile = open(filename, 'rb')
	#dont bother about specific exceptions
	except:
		print 'Could not open file to read !', filename
		sys.exit(3)
		
	if infile is None:
		print 'Error opening file ', filename
		sys.exit(3)

	c = infile.read(1)

	lastmatch=""
	while c != '':

		#look for image sig
		for x in range(0, len(imgsigs)):

			#find if c is first character of imgsig
			sig=imgsigs[x]
			
			if c == sig[0]:
				#find if the rest of imgsig match
				lentoread=len(sig) - 1
				chunk=c + infile.read(lentoread)
				#print chunk
				#matches
				if chunk==sig:
					fpos=int(infile.tell())
					
					#now we are at end of sig, for getting image
					#pos we need to subtract length of sig and offset
					sigpos=fpos - len(sig)
					imgpos=sigpos - imgsigoffs[x]

					#write position and image type to marker
					imgmarker.append((imgpos, imgtypes[x]))
					lastmatch=imgtypes[x]
				else:
					#bug, we need to reset file position
					#to match other sigs correctly if this
					#one does not.
					currpos=int(infile.tell())
					prevpos=currpos-lentoread
					#seek to previous position
					infile.seek(prevpos)
					

		#read next char
		c=infile.read(1)

	posn=int(infile.tell())
	imgmarker.append((posn, lastmatch))

	print imgmarker
	#write images

	#rewind file
	infile.seek(0)

	imgcount=0

	#most collections store image in reverse
	#order that was appended
	x=len(imgmarker)-1

	while x>=1:

		imgcount += 1
		imginfo=imgmarker[x]

		imgposn=imginfo[0]
		imgtype=imginfo[1]

		#this is the tricky part, to get the correct image
		#we need the file posn before previous one!, that
		#is we need to jump a position. Otherwise all images
		#will be junk or of small resolution.
		imginfoprev=imgmarker[x-2]
		imgposnprev=imginfoprev[0]

		#get length in chars
		imglen= imgposn - imgposnprev
		
		#seek to file position
		infile.seek(imgposnprev)
		#read so many chars
		data=infile.read(imglen)

		#create file name
		imgname="image" + str(imgcount) + '.' + string.lower(imgtype)
		try:
			ofile=open(imgname, 'wb')
		except:
			print 'Could not open file ', imgname, ' for writing...\n'
			continue
		
		if ofile is None:
			print 'Error while trying to create file ', imgname, '!\n'
			continue
		else:
			print 'Dumping image file ', imgname, '...\n'
			ofile.write(data)
			ofile.close()

		#previous marker
		x-=1

		
	print 'Dumped ', imgcount, ' images\n'
	
if __name__=="__main__":
	main()

      

A piece of python script to dump out GIF/JPEG images from different files like powerpoints/webshots collections/screensavers etc. Sometimes I feel the need to do this, especially from powerpoints to have a better look at some of the embedded images. I dont know why anyone else would like to do it, but I felt that this was probably useful. :-)

Tags: graphics

2 comments

anaxagramma 13 years ago # | flag

I've created a faster enchanted version with PNG and multiple JPG support, I try to post it as a comment in 2 parts. Part 1:

import sys, os, time
## v0.2.1
#now write only GIF/JPEG
#v0.2.1 (by naxa):
#PNG and several other JPEG formats supported as well

## extra
BUFFER_LENGTH = 1024
# File signatures with their associated mime type
Types = {
    "\x47\x49\x46\x38\x37\x61": "GIF87a", #GIF87a type gif
    "\x47\x49\x46\x38\x39\x61": "GIF89a", #GIF89a type gif
    "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A": "PNG",
    "\xFF\xD8\xFF\xE0": "JPG", #JFIF jpeg
    "\xFF\xD8\xFF\xE1": "JPG", #EXIF jpeg
    "\xFF\xD8\xFF\xE8": "JPG" #SPIFF jpeg
    }
imgsigs = Types.keys()
imgtypes = [Types[f] for f in imgsigs]
imgsigoffs = [0]*len(imgtypes)
## end of extra
imgmarker=[]

def main():
    if len(sys.argv) < 2:
        print 'Usage: picdumper <file>\n'
        sys.exit(1)
    #
    if os.name == 'nt':
        # some hacks for windows CLI unicode problems
        filename = unicode(sys.argv[1], "mbcs") # better on recent windows'
        dirname = unicode( os.path.split( os.path.abspath(filename) )[0] )
        if not os.path.isdir(dirname): #couldn't find an easy solution yet for misencoded directory names
            print "Can't read directory, please try to navigate there manually\nand start me from the inside. Directory not found:\n%s"%dirname
            exit(3)
        file_list = os.listdir(unicode(dirname))
        file_map = dict( [ (f.encode("mbcs"), f) for f in file_list ] )
        # if two files are the same in 'mbcs' encoding, cannot help it yet
        if filename in file_map:
            filename = file_map[filename]
        print "Found", repr(filename)
        filename = os.path.abspath(filename)
    else: filename = os.path.abspath(sys.argv[1])

    img_filename_start = os.path.splitext( os.path.split(filename)[-1] )[0]
    if " " in img_filename_start: img_filename_start += " - image "
    else: img_filename_start += "__image"

    if not os.path.isfile(filename):
        print 'Error: No such file ', filename
        sys.exit(2)
    #
    #open file in binary mode
    try:
        infile = open(filename, 'rb')
    #dont bother about specific exceptions
    except Exception, e:
        print 'Could not open file to read !', filename
        print e
        sys.exit(3)
    #
    if infile is None:
        print 'Error opening file ', filename
        sys.exit(3)
    #
    # c = infile.read(1)
    c = infile.read(BUFFER_LENGTH)
    current_offset = 0
    lastmatch=""
    print "parsing file"

anaxagramma 13 years ago # | flag

Part 2:

    while c != '':
        #look for image sig
        for x,sig in enumerate(imgsigs):
            #find if c is first character of imgsig
            sig_inner_pos = 0
            while c[sig_inner_pos:].count(sig):
                pos = c.find(sig)
                sigpos=current_offset + pos #fpos=int(infile.tell())
                sig_inner_pos += pos + len(sig)
                imgpos=sigpos - imgsigoffs[x]

                #write position and image type to marker
                imgmarker.append((imgpos, imgtypes[x]))
                print "found %s.,"%len(imgmarker), imgtypes[x], repr(sig), "at", imgpos
                lastmatch=imgtypes[x]
            #
        #
        #read next char
        c=infile.read(BUFFER_LENGTH)
        current_offset += len(c)
    #
    posn=int(infile.tell())
    imgmarker.append((posn, lastmatch))

    print imgmarker
    #write images
    #rewind file
    infile.seek(0)
    imgcount=0
    #most collections store image in reverse
    #order that was appended
    x=len(imgmarker)-1
    decimal_digits = len(str(x))
    # while x>=1:
    while x>=2: #last one is dummy
        imgcount += 1
        imginfo=imgmarker[x]

        imgposn=imginfo[0]
        imgtype=imginfo[1]
        #this is the tricky part, to get the correct image
        #we need the file posn before previous one!, that
        #is we need to jump a position. Otherwise all images
        #will be junk or of small resolution.
        imginfoprev=imgmarker[x-2]
        imgposnprev=imginfoprev[0]
        #get length in chars
        imglen= imgposn - imgposnprev
        #seek to file position
        infile.seek(imgposnprev)
        #read so many chars
        data=infile.read(imglen)
        #create file name
        imgname=img_filename_start + "%0*d"%(decimal_digits, imgcount) + '.' + str.lower(imgtype)
        try:
            ofile=open(imgname, 'wb')
        except:
            print 'Could not open file ', imgname, ' for writing...\n'
            continue
        #
        if ofile is None:
            print 'Error while trying to create file ', imgname, '!\n'
            continue
        else:
            print 'Dumping image file', repr(imgname), '...'
            ofile.write(data)
            ofile.close()
        #previous marker
        x-=1
    #
    print 'Dumped', imgcount, 'images'
#

if __name__=="__main__":
    x = time.clock()
    main()
    print "Completed in %.5f sec"%(time.clock()-x) 
#

◄	Python recipes (4591)	►
◄	Anand's recipes (38)	►

Image extractor (Python recipe) by Anand
ActiveState Code (http://code.activestate.com/recipes/189862/)

2 comments

Tags

Required Modules

Other Information and Tasks

Accounts

Code Recipes

Feedback & Information

ActiveState

Image extractor (Python recipe) by Anand ActiveState Code (http://code.activestate.com/recipes/189862/)

2 comments

Tags

Required Modules

Other Information and Tasks

Accounts

Code Recipes

Feedback & Information

ActiveState

Image extractor (Python recipe) by Anand
ActiveState Code (http://code.activestate.com/recipes/189862/)