Welcome, guest | Sign In | My Account | Store | Cart

Converts the CRUTEM text data files into a convenient Python dictionary. The data files are at: http://www.metoffice.gov.uk/hadobs/crutem4/data/station_files/CRUTEM.4.4.0.0.station_files.zip

Python, 76 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python3
# Author: Brian Fiedler 29 June 2016
# Converts the CRUTEM text data files into a convenient Python dictionary.
# Go to http://www.metoffice.gov.uk/hadobs/crutem4/data/download.html and you should see
# a link to http://www.metoffice.gov.uk/hadobs/crutem4/data/station_files/CRUTEM.4.4.0.0.station_files.zip
# Download that and unzip it, and set path_crutem below.
# Then: python CRUTEM_to_pkl.py 
# And: python CRUTEM_to_pkl.py mini
# Example of using the pkl files that were produced:
# inpkl = open('/data/crutem4/crutem44_mini.pkl','rb')
# crutem = pickle.load(inpkl)
# sitename = crutem['723530']['Name'] 
# monthAvgTemp = crutem['723530']['obs'][2011][6] 
# print("July 2011 temperature at",sitename,":",monthAvgTemp)
import glob,sys,pickle

path_crutem = "/data/crutem4/CRUTEM.4.4.0.0.station_files" # Configure for your computer

outpklname='crutem44_all.pkl'
if len(sys.argv)>1 and sys.argv[1]=='mini':
    outpklname='crutem44_mini.pkl' #note: mini in name triggers filter below

files=glob.glob(path_crutem+'/*/*')
print(files)
files.sort()
##############
def crutemread(fn,verbose=False):
    ''' for reading hadcrut station data files'''
    inf = open(fn,'r',encoding="ISO-8859-1") # open fn for reading
    lines = inf.readlines()#
    h={} # data file will be converted to a dictionary and store here
    doingObs = False # this changes to True when 'Obs.' is encountered in a line
    h['obs']={} # will have integer years as keys, to hold list of monthly temperatures
    for line in lines: 
        if line[0:4]=='Obs:':
            doingObs=True
            continue
        if not doingObs: # parameter values are put in dictionary
            s = line.strip().split('=') 
            parts = [x.capitalize() for x in s[0].split()]
            key = ''.join(parts)
            v = s[-1].strip()
            if v.isdigit():
                value = int(v)
            else:
                try:
                    value = float(v)
                except:
                    value = v
            if verbose: print(key,value)
            h[key] = value
        else: #process the line the begins with a year number
            s = line.strip().split()
            key = int(s[0])
            values = [float(x) for x in s[1:]] # 12 temperature numbers, and 12 code numbers
            h['obs'][key] = values
    return h 


# When making a minature pkl file with just 12 sites, only these values for filenames are retained
minikeep = '724830 725300 722230 225500 724210 756039 719360 702000 723530 040300 725460 014920'.split()
qall = {} # will be a master dictionary of dictionaries, to be pickled
count = 0
for filename in files:
    z = filename.split('/')
    if 'mini' in outpklname and z[-1] not in minikeep: continue 
    print(filename)
    q = crutemread(filename,verbose=False) # The file is put into a Python dictionary
    recn = z[-1] # dictionary key is the file name
    qall[recn] = q  # add dictionary to the master dictionary that will be pickled
    count += 1

poufa = open(outpklname,'wb')
pickle.dump(qall,poufa,-1)
poufa.close()
print("wrote",outpklname,"number of sites=",count)