Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/env python3
# Author: Brian Fiedler 29 June 2016
# Converts the CRUTEM text data files into a convenient Python dictionary.
# Go to http://www.metoffice.gov.uk/hadobs/crutem4/data/download.html and you should see
# a link to http://www.metoffice.gov.uk/hadobs/crutem4/data/station_files/CRUTEM.4.4.0.0.station_files.zip
# Download that and unzip it, and set path_crutem below.
# Then: python CRUTEM_to_pkl.py 
# And: python CRUTEM_to_pkl.py mini
# Example of using the pkl files that were produced:
# inpkl = open('/data/crutem4/crutem44_mini.pkl','rb')
# crutem = pickle.load(inpkl)
# sitename = crutem['723530']['Name'] 
# monthAvgTemp = crutem['723530']['obs'][2011][6] 
# print("July 2011 temperature at",sitename,":",monthAvgTemp)
import glob,sys,pickle

path_crutem = "/data/crutem4/CRUTEM.4.4.0.0.station_files" # Configure for your computer

outpklname='crutem44_all.pkl'
if len(sys.argv)>1 and sys.argv[1]=='mini':
    outpklname='crutem44_mini.pkl' #note: mini in name triggers filter below

files=glob.glob(path_crutem+'/*/*')
print(files)
files.sort()
##############
def crutemread(fn,verbose=False):
    ''' for reading hadcrut station data files'''
    inf = open(fn,'r',encoding="ISO-8859-1") # open fn for reading
    lines = inf.readlines()#
    h={} # data file will be converted to a dictionary and store here
    doingObs = False # this changes to True when 'Obs.' is encountered in a line
    h['obs']={} # will have integer years as keys, to hold list of monthly temperatures
    for line in lines: 
        if line[0:4]=='Obs:':
            doingObs=True
            continue
        if not doingObs: # parameter values are put in dictionary
            s = line.strip().split('=') 
            parts = [x.capitalize() for x in s[0].split()]
            key = ''.join(parts)
            v = s[-1].strip()
            if v.isdigit():
                value = int(v)
            else:
                try:
                    value = float(v)
                except:
                    value = v
            if verbose: print(key,value)
            h[key] = value
        else: #process the line the begins with a year number
            s = line.strip().split()
            key = int(s[0])
            values = [float(x) for x in s[1:]] # 12 temperature numbers, and 12 code numbers
            h['obs'][key] = values
    return h 


# When making a minature pkl file with just 12 sites, only these values for filenames are retained
minikeep = '724830 725300 722230 225500 724210 756039 719360 702000 723530 040300 725460 014920'.split()
qall = {} # will be a master dictionary of dictionaries, to be pickled
count = 0
for filename in files:
    z = filename.split('/')
    if 'mini' in outpklname and z[-1] not in minikeep: continue 
    print(filename)
    q = crutemread(filename,verbose=False) # The file is put into a Python dictionary
    recn = z[-1] # dictionary key is the file name
    qall[recn] = q  # add dictionary to the master dictionary that will be pickled
    count += 1

poufa = open(outpklname,'wb')
pickle.dump(qall,poufa,-1)
poufa.close()
print("wrote",outpklname,"number of sites=",count)

Diff to Previous Revision

--- revision 1 2016-06-29 20:06:02
+++ revision 2 2016-06-29 20:09:44
@@ -25,36 +25,36 @@
 files.sort()
 ##############
 def crutemread(fn,verbose=False):
-	''' for reading hadcrut station data files'''
-	inf = open(fn,'r',encoding="ISO-8859-1") # open fn for reading
-	lines = inf.readlines()#
-	h={} # data file will be converted to a dictionary and store here
-	doingObs = False # this changes to True when 'Obs.' is encountered in a line
-	h['obs']={} # will have integer years as keys, to hold list of monthly temperatures
-	for line in lines: 
-		if line[0:4]=='Obs:':
-			doingObs=True
-			continue
-		if not doingObs: # parameter values are put in dictionary
-			s = line.strip().split('=') 
-			parts = [x.capitalize() for x in s[0].split()]
-			key = ''.join(parts)
-			v = s[-1].strip()
-			if v.isdigit():
-				value = int(v)
-			else:
-				try:
-					value = float(v)
-				except:
-					value = v
-			if verbose: print(key,value)
-			h[key] = value
-		else: #process the line the begins with a year number
-			s = line.strip().split()
-			key = int(s[0])
-			values = [float(x) for x in s[1:]] # 12 temperature numbers, and 12 code numbers
-			h['obs'][key] = values
-	return h 
+    ''' for reading hadcrut station data files'''
+    inf = open(fn,'r',encoding="ISO-8859-1") # open fn for reading
+    lines = inf.readlines()#
+    h={} # data file will be converted to a dictionary and store here
+    doingObs = False # this changes to True when 'Obs.' is encountered in a line
+    h['obs']={} # will have integer years as keys, to hold list of monthly temperatures
+    for line in lines: 
+        if line[0:4]=='Obs:':
+            doingObs=True
+            continue
+        if not doingObs: # parameter values are put in dictionary
+            s = line.strip().split('=') 
+            parts = [x.capitalize() for x in s[0].split()]
+            key = ''.join(parts)
+            v = s[-1].strip()
+            if v.isdigit():
+                value = int(v)
+            else:
+                try:
+                    value = float(v)
+                except:
+                    value = v
+            if verbose: print(key,value)
+            h[key] = value
+        else: #process the line the begins with a year number
+            s = line.strip().split()
+            key = int(s[0])
+            values = [float(x) for x in s[1:]] # 12 temperature numbers, and 12 code numbers
+            h['obs'][key] = values
+    return h 
 
 
 # When making a minature pkl file with just 12 sites, only these values for filenames are retained

History