Welcome, guest | Sign In | My Account | Store | Cart
```# Ad-hoc algorithm for copy-move forgery detection in images.
# Implemented by - vasiliauskas.agnius@gmail.com
# Robust match algorithm steps:
#  1. Blur image for eliminating image details
#  2. Convert image to degraded palette
#  3. Decompose image into small NxN pixel blocks
#  4. Alphabetically order these blocks by their pixel values
#  5. Extract only these adjacent blocks which have small absolute color difference
#  6. Cluster these blocks into clusters by intersection area among blocks
#  7. Extract only these clusters which are bigger than block size
#  8. Extract only these clusters which have similar cluster, by using some sort of similarity function (in this case Hausdorff distance between clusters)
#  9. Draw discovered similar clusters on image

import sys
from PIL import Image, ImageFilter, ImageDraw
import operator as op
from optparse import OptionParser

def Dist(p1,p2):
"""
Euclidean distance between 2 points
"""
x1, y1 = p1
x2, y2 = p2
return (((x1-x2)*(x1-x2)) + ((y1-y2)*(y1-y2)))**0.5

def intersectarea(p1,p2,size):
"""
Given 2 boxes, this function returns intersection area
"""
x1, y1 = p1
x2, y2 = p2
ix1, iy1 = max(x1,x2), max(y1,y2)
ix2, iy2 = min(x1+size,x2+size), min(y1+size,y2+size)
iarea = abs(ix2-ix1)*abs(iy2-iy1)
if iy2 < iy1 or ix2 < ix1: iarea = 0
return iarea

def Hausdorff_distance(clust1, clust2, forward, dir):
"""
Function measures distance between 2 sets. (Some kind of non-similarity between 2 sets if you like).
It is modified Hausdorff distance, because instead of max distance - average distance is taken.
This is done for function being more error-prone to cluster coordinates.
"""
if forward == None:
return max(Hausdorff_distance(clust1,clust2,True,dir),Hausdorff_distance(clust1,clust2,False,dir))
else:
clstart, clend = (clust1,clust2) if forward else (clust2,clust1)
dx, dy = dir if forward else (-dir[0],-dir[1])
return sum([min([Dist((p1[0]+dx,p1[1]+dy),p2) for p2 in clend]) for p1 in clstart])/len(clstart)

def hassimilarcluster(ind, clusters):
"""
For given cluster tells does it have twin cluster in image or not.
"""
item = op.itemgetter
global opt
found = False
tx = min(clusters[ind],key=item(0))[0]
ty = min(clusters[ind],key=item(1))[1]
for i, cl in enumerate(clusters):
if i != ind:
cx = min(cl,key=item(0))[0]
cy = min(cl,key=item(1))[1]
dx, dy = cx - tx, cy - ty
specdist = Hausdorff_distance(clusters[ind],cl,None,(dx,dy))
if specdist <= int(opt.rgsim):
found = True
break
return found

def blockpoints(pix, coords, size):
"""
Generator of pixel colors of given block.
"""
xs, ys = coords
for x in range(xs,xs+size):
for y in range(ys,ys+size):
yield pix[x,y]

def colortopalette(color, palette):
"""
Convert given color into palette color.
"""
for a,b in palette:
if color >= a and color < b:
return b

def imagetopalette(image, palcolors):
"""
Convert given image into custom palette colors
"""
assert image.mode == 'L', "Only grayscale images supported !"
pal = [(palcolors[i],palcolors[i+1]) for i in range(len(palcolors)-1)]
image.putdata([colortopalette(c,pal) for c in list(image.getdata())])

def getparts(image, block_len):
"""
Decompose given image into small blocks of data.
"""
img = image.convert('L') if image.mode != 'L' else image
w, h = img.size
parts = []
# Bluring image for abandoning image details and noise.
global opt
for n in range(int(opt.imblev)):
img = img.filter(ImageFilter.SMOOTH_MORE)
# Converting image to custom palette
imagetopalette(img, [x for x in range(256) if x%int(opt.impalred) == 0])

for x in range(w-block_len):
for y in range(h-block_len):
data = list(blockpoints(pix, (x,y), block_len)) + [(x,y)]
parts.append(data)
parts = sorted(parts)
return parts

def similarparts(imagparts):
"""
Return only these blocks which are similar by content.
"""
dupl = []
global opt
l = len(imagparts[0])-1

for i in range(len(imagparts)-1):
difs = sum(abs(x-y) for x,y in zip(imagparts[i][:l],imagparts[i+1][:l]))
mean = float(sum(imagparts[i][:l])) / l
dev = float(sum(abs(mean-val) for val in imagparts[i][:l])) / l
if dev/mean >= float(opt.blcoldev):
if difs <= int(opt.blsim):
if imagparts[i] not in dupl:
dupl.append(imagparts[i])
if imagparts[i+1] not in dupl:
dupl.append(imagparts[i+1])

return dupl

def clusterparts(parts, block_len):
"""
Further filtering out non essential blocks.
This is done by clustering blocks at first and after that
filtering out small clusters and clusters which doesn`t have
twin cluster in image.
"""
parts = sorted(parts, key=op.itemgetter(-1))
global opt
clusters = [[parts[0][-1]]]

# assign all parts to clusters
for i in range(1,len(parts)):
x, y = parts[i][-1]

# detect box already in cluster
fc = []
for k,cl in enumerate(clusters):
for xc,yc in cl:
ar = intersectarea((xc,yc),(x,y),block_len)
intrat = float(ar)/(block_len*block_len)
if intrat > float(opt.blint):
if not fc: clusters[k].append((x,y))
fc.append(k)
break

# if this is new cluster
if not fc:
clusters.append([(x,y)])
else:
# re-clustering boxes if in several clusters at once
while len(fc) > 1:
clusters[fc[0]] += clusters[fc[-1]]
del clusters[fc[-1]]
del fc[-1]

item = op.itemgetter
# filter out small clusters
clusters = [clust for clust in clusters if Dist((min(clust,key=item(0))[0],min(clust,key=item(1))[1]), (max(clust,key=item(0))[0],max(clust,key=item(1))[1]))/(block_len*1.4) >= float(opt.rgsize)]

# filter out clusters, which doesn`t have identical twin cluster
clusters = [clust for x,clust in enumerate(clusters) if hassimilarcluster(x,clusters)]

return clusters

def marksimilar(image, clust, size):
"""
Draw discovered similar image regions.
"""
global opt
blocks = []
if clust:
draw = ImageDraw.Draw(image)
for cl in clust:
for x,y in cl:
im = image.crop((x,y,x+size,y+size))
blocks.append((x,y,im))
for bl in blocks:
x,y,im = bl
image.paste(im,(x,y,x+size,y+size))
if int(opt.imauto):
for cl in clust:
cx1 = min([cx for cx,cy in cl])
cy1 = min([cy for cx,cy in cl])
cx2 = max([cx for cx,cy in cl]) + block_len
cy2 = max([cy for cx,cy in cl]) + block_len
draw.rectangle([cx1,cy1,cx2,cy2],outline="magenta")
return image

if __name__ == '__main__':
cmd = OptionParser("usage: %prog image_file [options]")
cmd.add_option('', '--imauto', help='Automatically search identical regions. (default: %default)', default=1)
cmd.add_option('', '--impalred',help='Image palette reduction factor. (default: %default)', default=15)
cmd.add_option('', '--rgsim', help='Region similarity threshold. (default: %default)', default=5)
cmd.add_option('', '--rgsize',help='Region size threshold. (default: %default)', default=1.5)
cmd.add_option('', '--blsim', help='Block similarity threshold. (default: %default)',default=200)
cmd.add_option('', '--blcoldev', help='Block color deviation threshold. (default: %default)', default=0.2)
cmd.add_option('', '--blint', help='Block intersection threshold. (default: %default)', default=0.2)
opt, args = cmd.parse_args()
if not args:
cmd.print_help()
sys.exit()
print 'Analyzing image, please wait... (can take some minutes)'
block_len = 15
im = Image.open(args[0])
lparts = getparts(im, block_len)
dparts = similarparts(lparts)
cparts = clusterparts(dparts, block_len) if int(opt.imauto) else [[elem[-1] for elem in dparts]]
im = marksimilar(im, cparts, block_len)
out = args[0].split('.')[0] + '_analyzed.jpg'
im.save(out)
print 'Done. Found', len(cparts) if int(opt.imauto) else 0, 'identical regions'
print 'Output is saved in file -', out
```

### History

• revision 5 (12 years ago)
• previous revisions are not available