#!/usr/bin/env python3
# Author: Brian Fiedler 29 June 2016
# Converts the CRUTEM text data files into a convenient Python dictionary.
# Go to http://www.metoffice.gov.uk/hadobs/crutem4/data/download.html and you should see
# a link to http://www.metoffice.gov.uk/hadobs/crutem4/data/station_files/CRUTEM.4.4.0.0.station_files.zip
# Download that and unzip it, and set path_crutem below.
# Then: python CRUTEM_to_pkl.py
# And: python CRUTEM_to_pkl.py mini
# Example of using the pkl files that were produced:
# inpkl = open('/data/crutem4/crutem44_mini.pkl','rb')
# crutem = pickle.load(inpkl)
# sitename = crutem['723530']['Name']
# monthAvgTemp = crutem['723530']['obs'][2011][6]
# print("July 2011 temperature at",sitename,":",monthAvgTemp)
import glob,sys,pickle
path_crutem = "/data/crutem4/CRUTEM.4.4.0.0.station_files" # Configure for your computer
outpklname='crutem44_all.pkl'
if len(sys.argv)>1 and sys.argv[1]=='mini':
outpklname='crutem44_mini.pkl' #note: mini in name triggers filter below
files=glob.glob(path_crutem+'/*/*')
print(files)
files.sort()
##############
def crutemread(fn,verbose=False):
''' for reading hadcrut station data files'''
inf = open(fn,'r',encoding="ISO-8859-1") # open fn for reading
lines = inf.readlines()#
h={} # data file will be converted to a dictionary and store here
doingObs = False # this changes to True when 'Obs.' is encountered in a line
h['obs']={} # will have integer years as keys, to hold list of monthly temperatures
for line in lines:
if line[0:4]=='Obs:':
doingObs=True
continue
if not doingObs: # parameter values are put in dictionary
s = line.strip().split('=')
parts = [x.capitalize() for x in s[0].split()]
key = ''.join(parts)
v = s[-1].strip()
if v.isdigit():
value = int(v)
else:
try:
value = float(v)
except:
value = v
if verbose: print(key,value)
h[key] = value
else: #process the line the begins with a year number
s = line.strip().split()
key = int(s[0])
values = [float(x) for x in s[1:]] # 12 temperature numbers, and 12 code numbers
h['obs'][key] = values
return h
# When making a minature pkl file with just 12 sites, only these values for filenames are retained
minikeep = '724830 725300 722230 225500 724210 756039 719360 702000 723530 040300 725460 014920'.split()
qall = {} # will be a master dictionary of dictionaries, to be pickled
count = 0
for filename in files:
z = filename.split('/')
if 'mini' in outpklname and z[-1] not in minikeep: continue
print(filename)
q = crutemread(filename,verbose=False) # The file is put into a Python dictionary
recn = z[-1] # dictionary key is the file name
qall[recn] = q # add dictionary to the master dictionary that will be pickled
count += 1
poufa = open(outpklname,'wb')
pickle.dump(qall,poufa,-1)
poufa.close()
print("wrote",outpklname,"number of sites=",count)
Diff to Previous Revision
--- revision 1 2016-06-29 20:06:02
+++ revision 2 2016-06-29 20:09:44
@@ -25,36 +25,36 @@
files.sort()
##############
def crutemread(fn,verbose=False):
- ''' for reading hadcrut station data files'''
- inf = open(fn,'r',encoding="ISO-8859-1") # open fn for reading
- lines = inf.readlines()#
- h={} # data file will be converted to a dictionary and store here
- doingObs = False # this changes to True when 'Obs.' is encountered in a line
- h['obs']={} # will have integer years as keys, to hold list of monthly temperatures
- for line in lines:
- if line[0:4]=='Obs:':
- doingObs=True
- continue
- if not doingObs: # parameter values are put in dictionary
- s = line.strip().split('=')
- parts = [x.capitalize() for x in s[0].split()]
- key = ''.join(parts)
- v = s[-1].strip()
- if v.isdigit():
- value = int(v)
- else:
- try:
- value = float(v)
- except:
- value = v
- if verbose: print(key,value)
- h[key] = value
- else: #process the line the begins with a year number
- s = line.strip().split()
- key = int(s[0])
- values = [float(x) for x in s[1:]] # 12 temperature numbers, and 12 code numbers
- h['obs'][key] = values
- return h
+ ''' for reading hadcrut station data files'''
+ inf = open(fn,'r',encoding="ISO-8859-1") # open fn for reading
+ lines = inf.readlines()#
+ h={} # data file will be converted to a dictionary and store here
+ doingObs = False # this changes to True when 'Obs.' is encountered in a line
+ h['obs']={} # will have integer years as keys, to hold list of monthly temperatures
+ for line in lines:
+ if line[0:4]=='Obs:':
+ doingObs=True
+ continue
+ if not doingObs: # parameter values are put in dictionary
+ s = line.strip().split('=')
+ parts = [x.capitalize() for x in s[0].split()]
+ key = ''.join(parts)
+ v = s[-1].strip()
+ if v.isdigit():
+ value = int(v)
+ else:
+ try:
+ value = float(v)
+ except:
+ value = v
+ if verbose: print(key,value)
+ h[key] = value
+ else: #process the line the begins with a year number
+ s = line.strip().split()
+ key = int(s[0])
+ values = [float(x) for x in s[1:]] # 12 temperature numbers, and 12 code numbers
+ h['obs'][key] = values
+ return h
# When making a minature pkl file with just 12 sites, only these values for filenames are retained