Welcome, guest | Sign In | My Account | Store | Cart

This script scans directories that was uploaded to CloudFront and build files index. When you modify some files, script automatically see what files was modified since the last update, and clear cache on CloudFront only for them.

Usage: script.py data_dir [index_file] [dir_prefix]

data_dir - path to directory with uploaded data

index_file - path to files index

dir_prefix - needed if you data_dir path is different from url at CloudFront.For example: Your data_dir is '/data' but url at CloudFront is http://url.com/social/data/ so dir_prefix will be '/social/data/'

Python, 167 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#/usr/bin/python2.6

import sys
import os
import os.path
import hashlib

from boto.cloudfront import CloudFrontConnection

################################################################################

AWS_ACCESS_KEY          = 'AKIAIPN42DSDDJ3354DSDS'
AWS_SECRET_ACCESS_KEY   = 'iu4d2QoU+kJFSfghfghfghfghfghfghl'
AWS_CF_DISTRIBUTION_ID  = 'ERKDWKOK23346LDD2'

IGNORE = ['.svn','.php','.py','staticData']

################################################################################

def get_files_from_dir(base_dir):
    file_list = []
    for root, subFolders, files in os.walk(base_dir):
        next = False
        for it in IGNORE:
            if it in root:
                next = True
                break
        if next:
            continue
        next = False
        for filename in files:
            for it in IGNORE:
                if it in filename:
                    next = True
            if next:
                continue
            file_list.append(os.path.join(root,filename).replace(base_dir,''))
    return file_list
    
def get_modified_files(base_dir,all_files,index,dir_prefix):
    new_files = []
    new_files_raw = []
    
    for filename in all_files:
        next = False
        for it in IGNORE:
            if it in filename:
                next = True
        if next:
            continue
        fc = file(base_dir+filename).read()
        if index.has_key(filename) and \
           hashlib.md5(fc).hexdigest() == index[filename]:
            continue
        else:
            new_files.append(os.path.join(dir_prefix,filename.strip('/')))
            new_files_raw.append(filename)
            
    return new_files,new_files_raw
    

def clear_cloudfront_cache(base_dir,index_file,dir_prefix='',passn=0):
    
    base_dir  = os.path.abspath(base_dir)
    all_files = get_files_from_dir(base_dir)
    
    if(os.path.exists(index_file)):
        data = file(index_file).read()
        os.unlink(index_file+'.back')
        file(index_file+'.back','w').write(data)
    else:
        data = ''
        file(index_file+'.back','w').write('')
        
    index = {}
    data = data.split('\n')
    for line in data:
        if not line: 
            continue
        path,md5 = line.split('\t#\t')
        index[path] = md5
    
    new_files,new_files_raw = get_modified_files(base_dir,all_files,index,dir_prefix)
    
    for filename in index.iterkeys():
        if filename not in all_files:
            next = False
            for it in IGNORE:
                if it in filename:
                    next = True
            if next:
                continue
            new_files.append(os.path.join(dir_prefix,filename.strip('/')))
            new_files_raw.append(filename)
    
    if new_files:
        for filename in new_files:
            print 'Modified: %s' % filename
    else:
        print 'No files were modified.\n'
        sys.exit()
		
    print '\nUploading %s files\n' % len(new_files) 
        
    inp = ''
    while (inp != 'y' and inp != 'n'):
        inp = raw_input('Upload changes to CloudFront(y/n): ')
        
        
    if inp == 'y':
        try:
            conn = CloudFrontConnection(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY)
            k = 0
            for i in xrange(0,len(new_files),800):
                if k < passn:
                    k += 1
                    continue
                res = True
                res = conn.create_invalidation_request(AWS_CF_DISTRIBUTION_ID, new_files[i:i+900]) 
                if res:
                    print '\nInvalidation request created'
                    for filename in new_files_raw[i:i+800]:
                        fc = file(base_dir+filename).read()
                        index[filename] = hashlib.md5(fc).hexdigest()
                if k >= (passn+2):
                    print '\nToo many files. Repeat update after 15 minutes.' 
                    break
                k += 1
        except Exception,e:
            save_index(index_file,index)
            sys.exit('\nError: %s' % e)
        
        save_index(index_file,index)            
            
    
def save_index(index_file,index):
    if(os.path.exists(index_file)):
        os.unlink(index_file)
    index_fp = file(index_file,'w')
    for filename,md5 in index.iteritems():
        index_fp.write('\t#\t'.join([filename,md5])+'\n')
    index_fp.close()  
    

if __name__ == '__main__':
    print ''
    if(len(sys.argv)>1):
        base_dir = sys.argv[1]
        try:
            index_file = sys.argv[2]
        except:
            index_file = 'cloudfront_cache.ind'
        
        try:
            dir_prefix = sys.argv[3]
        except:
            dir_prefix = ''
			
        try:
            passn = int(sys.argv[4])
        except:
            passn = 0
            
        clear_cloudfront_cache(base_dir,index_file,dir_prefix,passn)
        print ''
    else:
        print 'Usage: %s data_dir [index_file] [dir_prefix]' % sys.argv[0]