Welcome, guest | Sign In | My Account | Store | Cart
# Ad-hoc algorithm for copy-move forgery detection in images.
# Implemented by - vasiliauskas.agnius@gmail.com
# Robust match algorithm steps:
#  1. Blur image for eliminating image details
#  2. Convert image to degraded palette
#  3. Decompose image into small NxN pixel blocks
#  4. Alphabetically order these blocks by their pixel values
#  5. Extract only these adjacent blocks which have small absolute color difference
#  6. Cluster these blocks into clusters by intersection area among blocks
#  7. Extract only these clusters which are bigger than block size
#  8. Extract only these clusters which have similar cluster, by using some sort of similarity function (in this case Hausdorff distance between clusters)
#  9. Draw discovered similar clusters on image

import sys
from PIL import Image, ImageFilter, ImageDraw
import operator as op
from optparse import OptionParser

def Dist(p1,p2):
 Euclidean distance between 2 points
 x1, y1 = p1
 x2, y2 = p2
 return (((x1-x2)*(x1-x2)) + ((y1-y2)*(y1-y2)))**0.5

def intersectarea(p1,p2,size):
 Given 2 boxes, this function returns intersection area
 x1, y1 = p1
 x2, y2 = p2
 ix1, iy1 = max(x1,x2), max(y1,y2)
 ix2, iy2 = min(x1+size,x2+size), min(y1+size,y2+size)
 iarea = abs(ix2-ix1)*abs(iy2-iy1)
 if iy2 < iy1 or ix2 < ix1: iarea = 0
 return iarea

def Hausdorff_distance(clust1, clust2, forward, dir):
 Function measures distance between 2 sets. (Some kind of non-similarity between 2 sets if you like).
 It is modified Hausdorff distance, because instead of max distance - average distance is taken.
 This is done for function being more error-prone to cluster coordinates.
 if forward == None:
  return max(Hausdorff_distance(clust1,clust2,True,dir),Hausdorff_distance(clust1,clust2,False,dir))
  clstart, clend = (clust1,clust2) if forward else (clust2,clust1)
  dx, dy = dir if forward else (-dir[0],-dir[1])
  return sum([min([Dist((p1[0]+dx,p1[1]+dy),p2) for p2 in clend]) for p1 in clstart])/len(clstart)

def hassimilarcluster(ind, clusters):
 For given cluster tells does it have twin cluster in image or not.
 item = op.itemgetter
 global opt
 found = False
 tx = min(clusters[ind],key=item(0))[0]
 ty = min(clusters[ind],key=item(1))[1]
 for i, cl in enumerate(clusters):
  if i != ind:
   cx = min(cl,key=item(0))[0]
   cy = min(cl,key=item(1))[1]
   dx, dy = cx - tx, cy - ty
   specdist = Hausdorff_distance(clusters[ind],cl,None,(dx,dy))
   if specdist <= int(opt.rgsim):
    found = True
 return found

def blockpoints(pix, coords, size):
 Generator of pixel colors of given block.
 xs, ys = coords
 for x in range(xs,xs+size):
  for y in range(ys,ys+size):
   yield pix[x,y]

def colortopalette(color, palette):
 Convert given color into palette color.
 for a,b in palette:
  if color >= a and color < b:
   return b

def imagetopalette(image, palcolors):
 Convert given image into custom palette colors
 assert image.mode == 'L', "Only grayscale images supported !"
 pal = [(palcolors[i],palcolors[i+1]) for i in range(len(palcolors)-1)]
 image.putdata([colortopalette(c,pal) for c in list(image.getdata())])

def getparts(image, block_len):
 Decompose given image into small blocks of data.
 img = image.convert('L') if image.mode != 'L' else image
 w, h = img.size 
 parts = []
 # Bluring image for abandoning image details and noise.
 global opt
 for n in range(int(opt.imblev)):
  img = img.filter(ImageFilter.SMOOTH_MORE)
 # Converting image to custom palette
 imagetopalette(img, [x for x in range(256) if x%int(opt.impalred) == 0])
 pix = img.load()
 for x in range(w-block_len):
  for y in range(h-block_len):
   data = list(blockpoints(pix, (x,y), block_len)) + [(x,y)]
 parts = sorted(parts)
 return parts

def similarparts(imagparts):
 Return only these blocks which are similar by content.
 dupl = []
 global opt
 l = len(imagparts[0])-1
 for i in range(len(imagparts)-1):  
  difs = sum(abs(x-y) for x,y in zip(imagparts[i][:l],imagparts[i+1][:l]))
  mean = float(sum(imagparts[i][:l])) / l
  dev = float(sum(abs(mean-val) for val in imagparts[i][:l])) / l
  if dev/mean >= float(opt.blcoldev):
   if difs <= int(opt.blsim):
    if imagparts[i] not in dupl:
    if imagparts[i+1] not in dupl:

 return dupl

def clusterparts(parts, block_len):
 Further filtering out non essential blocks.
 This is done by clustering blocks at first and after that
 filtering out small clusters and clusters which doesn`t have
 twin cluster in image.
 parts = sorted(parts, key=op.itemgetter(-1))
 global opt
 clusters = [[parts[0][-1]]]
 # assign all parts to clusters
 for i in range(1,len(parts)):
  x, y = parts[i][-1]
  # detect box already in cluster
  fc = []
  for k,cl in enumerate(clusters):
   for xc,yc in cl:
    ar = intersectarea((xc,yc),(x,y),block_len)
    intrat = float(ar)/(block_len*block_len)
    if intrat > float(opt.blint):
     if not fc: clusters[k].append((x,y))
  # if this is new cluster
  if not fc:
   # re-clustering boxes if in several clusters at once
   while len(fc) > 1:
    clusters[fc[0]] += clusters[fc[-1]]
    del clusters[fc[-1]]
    del fc[-1]
 item = op.itemgetter
 # filter out small clusters
 clusters = [clust for clust in clusters if Dist((min(clust,key=item(0))[0],min(clust,key=item(1))[1]), (max(clust,key=item(0))[0],max(clust,key=item(1))[1]))/(block_len*1.4) >= float(opt.rgsize)]
 # filter out clusters, which doesn`t have identical twin cluster
 clusters = [clust for x,clust in enumerate(clusters) if hassimilarcluster(x,clusters)]
 return clusters

def marksimilar(image, clust, size):
 Draw discovered similar image regions.
 global opt
 blocks = []
 if clust:
  draw = ImageDraw.Draw(image)
  mask = Image.new('RGB', (size,size), 'cyan')
  for cl in clust:
   for x,y in cl:
   	im = image.crop((x,y,x+size,y+size))
   	im = Image.blend(im,mask,0.5)
  for bl in blocks:
  	x,y,im = bl
  if int(opt.imauto):
   for cl in clust:
    cx1 = min([cx for cx,cy in cl])
    cy1 = min([cy for cx,cy in cl])
    cx2 = max([cx for cx,cy in cl]) + block_len
    cy2 = max([cy for cx,cy in cl]) + block_len
 return image

if __name__ == '__main__':
 cmd = OptionParser("usage: %prog image_file [options]")
 cmd.add_option('', '--imauto', help='Automatically search identical regions. (default: %default)', default=1)
 cmd.add_option('', '--imblev',help='Blur level for degrading image details. (default: %default)', default=8)
 cmd.add_option('', '--impalred',help='Image palette reduction factor. (default: %default)', default=15)
 cmd.add_option('', '--rgsim', help='Region similarity threshold. (default: %default)', default=5)
 cmd.add_option('', '--rgsize',help='Region size threshold. (default: %default)', default=1.5)
 cmd.add_option('', '--blsim', help='Block similarity threshold. (default: %default)',default=200)
 cmd.add_option('', '--blcoldev', help='Block color deviation threshold. (default: %default)', default=0.2)
 cmd.add_option('', '--blint', help='Block intersection threshold. (default: %default)', default=0.2)
 opt, args = cmd.parse_args()
 if not args:
 print 'Analyzing image, please wait... (can take some minutes)'
 block_len = 15
 im = Image.open(args[0])
 lparts = getparts(im, block_len)
 dparts = similarparts(lparts)
 cparts = clusterparts(dparts, block_len) if int(opt.imauto) else [[elem[-1] for elem in dparts]]
 im = marksimilar(im, cparts, block_len)
 out = args[0].split('.')[0] + '_analyzed.jpg'
 print 'Done. Found', len(cparts) if int(opt.imauto) else 0, 'identical regions'
 print 'Output is saved in file -', out


  • revision 5 (12 years ago)
  • previous revisions are not available