This script scans directories that was uploaded to CloudFront and build files index. When you modify some files, script automatically see what files was modified since the last update, and clear cache on CloudFront only for them.
Usage: script.py data_dir [index_file] [dir_prefix]
data_dir - path to directory with uploaded data
index_file - path to files index
dir_prefix - needed if you data_dir path is different from url at CloudFront.For example: Your data_dir is '/data' but url at CloudFront is http://url.com/social/data/ so dir_prefix will be '/social/data/'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | #/usr/bin/python2.6
import sys
import os
import os.path
import hashlib
from boto.cloudfront import CloudFrontConnection
################################################################################
AWS_ACCESS_KEY = 'AKIAIPN42DSDDJ3354DSDS'
AWS_SECRET_ACCESS_KEY = 'iu4d2QoU+kJFSfghfghfghfghfghfghl'
AWS_CF_DISTRIBUTION_ID = 'ERKDWKOK23346LDD2'
IGNORE = ['.svn','.php','.py','staticData']
################################################################################
def get_files_from_dir(base_dir):
file_list = []
for root, subFolders, files in os.walk(base_dir):
next = False
for it in IGNORE:
if it in root:
next = True
break
if next:
continue
next = False
for filename in files:
for it in IGNORE:
if it in filename:
next = True
if next:
continue
file_list.append(os.path.join(root,filename).replace(base_dir,''))
return file_list
def get_modified_files(base_dir,all_files,index,dir_prefix):
new_files = []
new_files_raw = []
for filename in all_files:
next = False
for it in IGNORE:
if it in filename:
next = True
if next:
continue
fc = file(base_dir+filename).read()
if index.has_key(filename) and \
hashlib.md5(fc).hexdigest() == index[filename]:
continue
else:
new_files.append(os.path.join(dir_prefix,filename.strip('/')))
new_files_raw.append(filename)
return new_files,new_files_raw
def clear_cloudfront_cache(base_dir,index_file,dir_prefix='',passn=0):
base_dir = os.path.abspath(base_dir)
all_files = get_files_from_dir(base_dir)
if(os.path.exists(index_file)):
data = file(index_file).read()
os.unlink(index_file+'.back')
file(index_file+'.back','w').write(data)
else:
data = ''
file(index_file+'.back','w').write('')
index = {}
data = data.split('\n')
for line in data:
if not line:
continue
path,md5 = line.split('\t#\t')
index[path] = md5
new_files,new_files_raw = get_modified_files(base_dir,all_files,index,dir_prefix)
for filename in index.iterkeys():
if filename not in all_files:
next = False
for it in IGNORE:
if it in filename:
next = True
if next:
continue
new_files.append(os.path.join(dir_prefix,filename.strip('/')))
new_files_raw.append(filename)
if new_files:
for filename in new_files:
print 'Modified: %s' % filename
else:
print 'No files were modified.\n'
sys.exit()
print '\nUploading %s files\n' % len(new_files)
inp = ''
while (inp != 'y' and inp != 'n'):
inp = raw_input('Upload changes to CloudFront(y/n): ')
if inp == 'y':
try:
conn = CloudFrontConnection(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY)
k = 0
for i in xrange(0,len(new_files),800):
if k < passn:
k += 1
continue
res = True
res = conn.create_invalidation_request(AWS_CF_DISTRIBUTION_ID, new_files[i:i+900])
if res:
print '\nInvalidation request created'
for filename in new_files_raw[i:i+800]:
fc = file(base_dir+filename).read()
index[filename] = hashlib.md5(fc).hexdigest()
if k >= (passn+2):
print '\nToo many files. Repeat update after 15 minutes.'
break
k += 1
except Exception,e:
save_index(index_file,index)
sys.exit('\nError: %s' % e)
save_index(index_file,index)
def save_index(index_file,index):
if(os.path.exists(index_file)):
os.unlink(index_file)
index_fp = file(index_file,'w')
for filename,md5 in index.iteritems():
index_fp.write('\t#\t'.join([filename,md5])+'\n')
index_fp.close()
if __name__ == '__main__':
print ''
if(len(sys.argv)>1):
base_dir = sys.argv[1]
try:
index_file = sys.argv[2]
except:
index_file = 'cloudfront_cache.ind'
try:
dir_prefix = sys.argv[3]
except:
dir_prefix = ''
try:
passn = int(sys.argv[4])
except:
passn = 0
clear_cloudfront_cache(base_dir,index_file,dir_prefix,passn)
print ''
else:
print 'Usage: %s data_dir [index_file] [dir_prefix]' % sys.argv[0]
|