Converts the CRUTEM text data files into a convenient Python dictionary. The data files are at: http://www.metoffice.gov.uk/hadobs/crutem4/data/station_files/CRUTEM.4.4.0.0.station_files.zip
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | #!/usr/bin/env python3
# Author: Brian Fiedler 29 June 2016
# Converts the CRUTEM text data files into a convenient Python dictionary.
# Go to http://www.metoffice.gov.uk/hadobs/crutem4/data/download.html and you should see
# a link to http://www.metoffice.gov.uk/hadobs/crutem4/data/station_files/CRUTEM.4.4.0.0.station_files.zip
# Download that and unzip it, and set path_crutem below.
# Then: python CRUTEM_to_pkl.py
# And: python CRUTEM_to_pkl.py mini
# Example of using the pkl files that were produced:
# inpkl = open('/data/crutem4/crutem44_mini.pkl','rb')
# crutem = pickle.load(inpkl)
# sitename = crutem['723530']['Name']
# monthAvgTemp = crutem['723530']['obs'][2011][6]
# print("July 2011 temperature at",sitename,":",monthAvgTemp)
import glob,sys,pickle
path_crutem = "/data/crutem4/CRUTEM.4.4.0.0.station_files" # Configure for your computer
outpklname='crutem44_all.pkl'
if len(sys.argv)>1 and sys.argv[1]=='mini':
outpklname='crutem44_mini.pkl' #note: mini in name triggers filter below
files=glob.glob(path_crutem+'/*/*')
print(files)
files.sort()
##############
def crutemread(fn,verbose=False):
''' for reading hadcrut station data files'''
inf = open(fn,'r',encoding="ISO-8859-1") # open fn for reading
lines = inf.readlines()#
h={} # data file will be converted to a dictionary and store here
doingObs = False # this changes to True when 'Obs.' is encountered in a line
h['obs']={} # will have integer years as keys, to hold list of monthly temperatures
for line in lines:
if line[0:4]=='Obs:':
doingObs=True
continue
if not doingObs: # parameter values are put in dictionary
s = line.strip().split('=')
parts = [x.capitalize() for x in s[0].split()]
key = ''.join(parts)
v = s[-1].strip()
if v.isdigit():
value = int(v)
else:
try:
value = float(v)
except:
value = v
if verbose: print(key,value)
h[key] = value
else: #process the line the begins with a year number
s = line.strip().split()
key = int(s[0])
values = [float(x) for x in s[1:]] # 12 temperature numbers, and 12 code numbers
h['obs'][key] = values
return h
# When making a minature pkl file with just 12 sites, only these values for filenames are retained
minikeep = '724830 725300 722230 225500 724210 756039 719360 702000 723530 040300 725460 014920'.split()
qall = {} # will be a master dictionary of dictionaries, to be pickled
count = 0
for filename in files:
z = filename.split('/')
if 'mini' in outpklname and z[-1] not in minikeep: continue
print(filename)
q = crutemread(filename,verbose=False) # The file is put into a Python dictionary
recn = z[-1] # dictionary key is the file name
qall[recn] = q # add dictionary to the master dictionary that will be pickled
count += 1
poufa = open(outpklname,'wb')
pickle.dump(qall,poufa,-1)
poufa.close()
print("wrote",outpklname,"number of sites=",count)
|
Tags: pickle