Welcome, guest | Sign In | My Account | Store | Cart

Extract images (jpeg/gif) from screensaver files, webshots collection files ,powerpoints, microsoft word documents etc.

Python, 144 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import sys, os, string

#write now write only GIF/JPEG

#image types 
imgtypes=['JPG', 'GIF','GIF']

#signature at beginning of file
imgsigs=['JFIF', 'GIF87a', 'GIF89a']

#offset of signatures from
#file beginning
imgsigoffs=[6, 0, 0]

#our marker array
imgmarker=[]

def main():

	if len(sys.argv) < 2:
		print 'Usage: picdumper <file>\n'
		sys.exit(1)

	filename = os.path.abspath(sys.argv[1])

	if not os.path.isfile(filename):
		print 'Error: No such file ', filename
		sys.exit(2)

	#open file in binary mode
	try:
		infile = open(filename, 'rb')
	#dont bother about specific exceptions
	except:
		print 'Could not open file to read !', filename
		sys.exit(3)
		
	if infile is None:
		print 'Error opening file ', filename
		sys.exit(3)

	c = infile.read(1)

	lastmatch=""
	while c != '':

		#look for image sig
		for x in range(0, len(imgsigs)):

			#find if c is first character of imgsig
			sig=imgsigs[x]
			
			if c == sig[0]:
				#find if the rest of imgsig match
				lentoread=len(sig) - 1
				chunk=c + infile.read(lentoread)
				#print chunk
				#matches
				if chunk==sig:
					fpos=int(infile.tell())
					
					#now we are at end of sig, for getting image
					#pos we need to subtract length of sig and offset
					sigpos=fpos - len(sig)
					imgpos=sigpos - imgsigoffs[x]

					#write position and image type to marker
					imgmarker.append((imgpos, imgtypes[x]))
					lastmatch=imgtypes[x]
				else:
					#bug, we need to reset file position
					#to match other sigs correctly if this
					#one does not.
					currpos=int(infile.tell())
					prevpos=currpos-lentoread
					#seek to previous position
					infile.seek(prevpos)
					

		#read next char
		c=infile.read(1)

	posn=int(infile.tell())
	imgmarker.append((posn, lastmatch))

	print imgmarker
	#write images

	#rewind file
	infile.seek(0)

	imgcount=0

	#most collections store image in reverse
	#order that was appended
	x=len(imgmarker)-1

	while x>=1:

		imgcount += 1
		imginfo=imgmarker[x]

		imgposn=imginfo[0]
		imgtype=imginfo[1]

		#this is the tricky part, to get the correct image
		#we need the file posn before previous one!, that
		#is we need to jump a position. Otherwise all images
		#will be junk or of small resolution.
		imginfoprev=imgmarker[x-2]
		imgposnprev=imginfoprev[0]

		#get length in chars
		imglen= imgposn - imgposnprev
		
		#seek to file position
		infile.seek(imgposnprev)
		#read so many chars
		data=infile.read(imglen)

		#create file name
		imgname="image" + str(imgcount) + '.' + string.lower(imgtype)
		try:
			ofile=open(imgname, 'wb')
		except:
			print 'Could not open file ', imgname, ' for writing...\n'
			continue
		
		if ofile is None:
			print 'Error while trying to create file ', imgname, '!\n'
			continue
		else:
			print 'Dumping image file ', imgname, '...\n'
			ofile.write(data)
			ofile.close()

		#previous marker
		x-=1

		
	print 'Dumped ', imgcount, ' images\n'
	
if __name__=="__main__":
	main()

A piece of python script to dump out GIF/JPEG images from different files like powerpoints/webshots collections/screensavers etc. Sometimes I feel the need to do this, especially from powerpoints to have a better look at some of the embedded images. I dont know why anyone else would like to do it, but I felt that this was probably useful. :-)

2 comments

anaxagramma 13 years ago  # | flag

I've created a faster enchanted version with PNG and multiple JPG support, I try to post it as a comment in 2 parts. Part 1:

import sys, os, time
## v0.2.1
#now write only GIF/JPEG
#v0.2.1 (by naxa):
#PNG and several other JPEG formats supported as well

## extra
BUFFER_LENGTH = 1024
# File signatures with their associated mime type
Types = {
    "\x47\x49\x46\x38\x37\x61": "GIF87a", #GIF87a type gif
    "\x47\x49\x46\x38\x39\x61": "GIF89a", #GIF89a type gif
    "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A": "PNG",
    "\xFF\xD8\xFF\xE0": "JPG", #JFIF jpeg
    "\xFF\xD8\xFF\xE1": "JPG", #EXIF jpeg
    "\xFF\xD8\xFF\xE8": "JPG" #SPIFF jpeg
    }
imgsigs = Types.keys()
imgtypes = [Types[f] for f in imgsigs]
imgsigoffs = [0]*len(imgtypes)
## end of extra
imgmarker=[]

def main():
    if len(sys.argv) < 2:
        print 'Usage: picdumper <file>\n'
        sys.exit(1)
    #
    if os.name == 'nt':
        # some hacks for windows CLI unicode problems
        filename = unicode(sys.argv[1], "mbcs") # better on recent windows'
        dirname = unicode( os.path.split( os.path.abspath(filename) )[0] )
        if not os.path.isdir(dirname): #couldn't find an easy solution yet for misencoded directory names
            print "Can't read directory, please try to navigate there manually\nand start me from the inside. Directory not found:\n%s"%dirname
            exit(3)
        file_list = os.listdir(unicode(dirname))
        file_map = dict( [ (f.encode("mbcs"), f) for f in file_list ] )
        # if two files are the same in 'mbcs' encoding, cannot help it yet
        if filename in file_map:
            filename = file_map[filename]
        print "Found", repr(filename)
        filename = os.path.abspath(filename)
    else: filename = os.path.abspath(sys.argv[1])

    img_filename_start = os.path.splitext( os.path.split(filename)[-1] )[0]
    if " " in img_filename_start: img_filename_start += " - image "
    else: img_filename_start += "__image"

    if not os.path.isfile(filename):
        print 'Error: No such file ', filename
        sys.exit(2)
    #
    #open file in binary mode
    try:
        infile = open(filename, 'rb')
    #dont bother about specific exceptions
    except Exception, e:
        print 'Could not open file to read !', filename
        print e
        sys.exit(3)
    #
    if infile is None:
        print 'Error opening file ', filename
        sys.exit(3)
    #
    # c = infile.read(1)
    c = infile.read(BUFFER_LENGTH)
    current_offset = 0
    lastmatch=""
    print "parsing file"
anaxagramma 13 years ago  # | flag

Part 2:

    while c != '':
        #look for image sig
        for x,sig in enumerate(imgsigs):
            #find if c is first character of imgsig
            sig_inner_pos = 0
            while c[sig_inner_pos:].count(sig):
                pos = c.find(sig)
                sigpos=current_offset + pos #fpos=int(infile.tell())
                sig_inner_pos += pos + len(sig)
                imgpos=sigpos - imgsigoffs[x]

                #write position and image type to marker
                imgmarker.append((imgpos, imgtypes[x]))
                print "found %s.,"%len(imgmarker), imgtypes[x], repr(sig), "at", imgpos
                lastmatch=imgtypes[x]
            #
        #
        #read next char
        c=infile.read(BUFFER_LENGTH)
        current_offset += len(c)
    #
    posn=int(infile.tell())
    imgmarker.append((posn, lastmatch))

    print imgmarker
    #write images
    #rewind file
    infile.seek(0)
    imgcount=0
    #most collections store image in reverse
    #order that was appended
    x=len(imgmarker)-1
    decimal_digits = len(str(x))
    # while x>=1:
    while x>=2: #last one is dummy
        imgcount += 1
        imginfo=imgmarker[x]

        imgposn=imginfo[0]
        imgtype=imginfo[1]
        #this is the tricky part, to get the correct image
        #we need the file posn before previous one!, that
        #is we need to jump a position. Otherwise all images
        #will be junk or of small resolution.
        imginfoprev=imgmarker[x-2]
        imgposnprev=imginfoprev[0]
        #get length in chars
        imglen= imgposn - imgposnprev
        #seek to file position
        infile.seek(imgposnprev)
        #read so many chars
        data=infile.read(imglen)
        #create file name
        imgname=img_filename_start + "%0*d"%(decimal_digits, imgcount) + '.' + str.lower(imgtype)
        try:
            ofile=open(imgname, 'wb')
        except:
            print 'Could not open file ', imgname, ' for writing...\n'
            continue
        #
        if ofile is None:
            print 'Error while trying to create file ', imgname, '!\n'
            continue
        else:
            print 'Dumping image file', repr(imgname), '...'
            ofile.write(data)
            ofile.close()
        #previous marker
        x-=1
    #
    print 'Dumped', imgcount, 'images'
#

if __name__=="__main__":
    x = time.clock()
    main()
    print "Completed in %.5f sec"%(time.clock()-x) 
#