Ad-hoc algorithm for copy-move forgery detection in images. This algorithm is robust so it can detect copy-move forgery in lossy compression formats such as jpeg. Because this algorithm is ad-hoc - it heavily depends on script parameters. So if it don`t finds any copy-move tamperings in image - try to lower essential parameter "block color deviation threshold". Something like (you can also try to change other parameters as well):
%script image_file --blcoldev=0.05
If you want to look at some copy-move forgery detection examples, - you should check this site:
http://coding-experiments.blogspot.com/2009/03/detecting-copy-move-forgery-in-images.html
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | # Ad-hoc algorithm for copy-move forgery detection in images.
# Implemented by - vasiliauskas.agnius@gmail.com
# Robust match algorithm steps:
# 1. Blur image for eliminating image details
# 2. Convert image to degraded palette
# 3. Decompose image into small NxN pixel blocks
# 4. Alphabetically order these blocks by their pixel values
# 5. Extract only these adjacent blocks which have small absolute color difference
# 6. Cluster these blocks into clusters by intersection area among blocks
# 7. Extract only these clusters which are bigger than block size
# 8. Extract only these clusters which have similar cluster, by using some sort of similarity function (in this case Hausdorff distance between clusters)
# 9. Draw discovered similar clusters on image
import sys
from PIL import Image, ImageFilter, ImageDraw
import operator as op
from optparse import OptionParser
def Dist(p1,p2):
"""
Euclidean distance between 2 points
"""
x1, y1 = p1
x2, y2 = p2
return (((x1-x2)*(x1-x2)) + ((y1-y2)*(y1-y2)))**0.5
def intersectarea(p1,p2,size):
"""
Given 2 boxes, this function returns intersection area
"""
x1, y1 = p1
x2, y2 = p2
ix1, iy1 = max(x1,x2), max(y1,y2)
ix2, iy2 = min(x1+size,x2+size), min(y1+size,y2+size)
iarea = abs(ix2-ix1)*abs(iy2-iy1)
if iy2 < iy1 or ix2 < ix1: iarea = 0
return iarea
def Hausdorff_distance(clust1, clust2, forward, dir):
"""
Function measures distance between 2 sets. (Some kind of non-similarity between 2 sets if you like).
It is modified Hausdorff distance, because instead of max distance - average distance is taken.
This is done for function being more error-prone to cluster coordinates.
"""
if forward == None:
return max(Hausdorff_distance(clust1,clust2,True,dir),Hausdorff_distance(clust1,clust2,False,dir))
else:
clstart, clend = (clust1,clust2) if forward else (clust2,clust1)
dx, dy = dir if forward else (-dir[0],-dir[1])
return sum([min([Dist((p1[0]+dx,p1[1]+dy),p2) for p2 in clend]) for p1 in clstart])/len(clstart)
def hassimilarcluster(ind, clusters):
"""
For given cluster tells does it have twin cluster in image or not.
"""
item = op.itemgetter
global opt
found = False
tx = min(clusters[ind],key=item(0))[0]
ty = min(clusters[ind],key=item(1))[1]
for i, cl in enumerate(clusters):
if i != ind:
cx = min(cl,key=item(0))[0]
cy = min(cl,key=item(1))[1]
dx, dy = cx - tx, cy - ty
specdist = Hausdorff_distance(clusters[ind],cl,None,(dx,dy))
if specdist <= int(opt.rgsim):
found = True
break
return found
def blockpoints(pix, coords, size):
"""
Generator of pixel colors of given block.
"""
xs, ys = coords
for x in range(xs,xs+size):
for y in range(ys,ys+size):
yield pix[x,y]
def colortopalette(color, palette):
"""
Convert given color into palette color.
"""
for a,b in palette:
if color >= a and color < b:
return b
def imagetopalette(image, palcolors):
"""
Convert given image into custom palette colors
"""
assert image.mode == 'L', "Only grayscale images supported !"
pal = [(palcolors[i],palcolors[i+1]) for i in range(len(palcolors)-1)]
image.putdata([colortopalette(c,pal) for c in list(image.getdata())])
def getparts(image, block_len):
"""
Decompose given image into small blocks of data.
"""
img = image.convert('L') if image.mode != 'L' else image
w, h = img.size
parts = []
# Bluring image for abandoning image details and noise.
global opt
for n in range(int(opt.imblev)):
img = img.filter(ImageFilter.SMOOTH_MORE)
# Converting image to custom palette
imagetopalette(img, [x for x in range(256) if x%int(opt.impalred) == 0])
pix = img.load()
for x in range(w-block_len):
for y in range(h-block_len):
data = list(blockpoints(pix, (x,y), block_len)) + [(x,y)]
parts.append(data)
parts = sorted(parts)
return parts
def similarparts(imagparts):
"""
Return only these blocks which are similar by content.
"""
dupl = []
global opt
l = len(imagparts[0])-1
for i in range(len(imagparts)-1):
difs = sum(abs(x-y) for x,y in zip(imagparts[i][:l],imagparts[i+1][:l]))
mean = float(sum(imagparts[i][:l])) / l
dev = float(sum(abs(mean-val) for val in imagparts[i][:l])) / l
if dev/mean >= float(opt.blcoldev):
if difs <= int(opt.blsim):
if imagparts[i] not in dupl:
dupl.append(imagparts[i])
if imagparts[i+1] not in dupl:
dupl.append(imagparts[i+1])
return dupl
def clusterparts(parts, block_len):
"""
Further filtering out non essential blocks.
This is done by clustering blocks at first and after that
filtering out small clusters and clusters which doesn`t have
twin cluster in image.
"""
parts = sorted(parts, key=op.itemgetter(-1))
global opt
clusters = [[parts[0][-1]]]
# assign all parts to clusters
for i in range(1,len(parts)):
x, y = parts[i][-1]
# detect box already in cluster
fc = []
for k,cl in enumerate(clusters):
for xc,yc in cl:
ar = intersectarea((xc,yc),(x,y),block_len)
intrat = float(ar)/(block_len*block_len)
if intrat > float(opt.blint):
if not fc: clusters[k].append((x,y))
fc.append(k)
break
# if this is new cluster
if not fc:
clusters.append([(x,y)])
else:
# re-clustering boxes if in several clusters at once
while len(fc) > 1:
clusters[fc[0]] += clusters[fc[-1]]
del clusters[fc[-1]]
del fc[-1]
item = op.itemgetter
# filter out small clusters
clusters = [clust for clust in clusters if Dist((min(clust,key=item(0))[0],min(clust,key=item(1))[1]), (max(clust,key=item(0))[0],max(clust,key=item(1))[1]))/(block_len*1.4) >= float(opt.rgsize)]
# filter out clusters, which doesn`t have identical twin cluster
clusters = [clust for x,clust in enumerate(clusters) if hassimilarcluster(x,clusters)]
return clusters
def marksimilar(image, clust, size):
"""
Draw discovered similar image regions.
"""
global opt
blocks = []
if clust:
draw = ImageDraw.Draw(image)
mask = Image.new('RGB', (size,size), 'cyan')
for cl in clust:
for x,y in cl:
im = image.crop((x,y,x+size,y+size))
im = Image.blend(im,mask,0.5)
blocks.append((x,y,im))
for bl in blocks:
x,y,im = bl
image.paste(im,(x,y,x+size,y+size))
if int(opt.imauto):
for cl in clust:
cx1 = min([cx for cx,cy in cl])
cy1 = min([cy for cx,cy in cl])
cx2 = max([cx for cx,cy in cl]) + block_len
cy2 = max([cy for cx,cy in cl]) + block_len
draw.rectangle([cx1,cy1,cx2,cy2],outline="magenta")
return image
if __name__ == '__main__':
cmd = OptionParser("usage: %prog image_file [options]")
cmd.add_option('', '--imauto', help='Automatically search identical regions. (default: %default)', default=1)
cmd.add_option('', '--imblev',help='Blur level for degrading image details. (default: %default)', default=8)
cmd.add_option('', '--impalred',help='Image palette reduction factor. (default: %default)', default=15)
cmd.add_option('', '--rgsim', help='Region similarity threshold. (default: %default)', default=5)
cmd.add_option('', '--rgsize',help='Region size threshold. (default: %default)', default=1.5)
cmd.add_option('', '--blsim', help='Block similarity threshold. (default: %default)',default=200)
cmd.add_option('', '--blcoldev', help='Block color deviation threshold. (default: %default)', default=0.2)
cmd.add_option('', '--blint', help='Block intersection threshold. (default: %default)', default=0.2)
opt, args = cmd.parse_args()
if not args:
cmd.print_help()
sys.exit()
print 'Analyzing image, please wait... (can take some minutes)'
block_len = 15
im = Image.open(args[0])
lparts = getparts(im, block_len)
dparts = similarparts(lparts)
cparts = clusterparts(dparts, block_len) if int(opt.imauto) else [[elem[-1] for elem in dparts]]
im = marksimilar(im, cparts, block_len)
out = args[0].split('.')[0] + '_analyzed.jpg'
im.save(out)
print 'Done. Found', len(cparts) if int(opt.imauto) else 0, 'identical regions'
print 'Output is saved in file -', out
|
Keep in mind- this is experimental thing, so use at your own risk :-)