Welcome, guest | Sign In | My Account | Store | Cart

A simple 3 function utility library for downloading, parsing and capitalizing ISO 3166 country names.

Python, 120 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python
"""
A small utility library for processing ISO 3166 country codes.

References

ISO 3166 home page URL :-

    http://www.iso.org/iso/country_codes.htm

ISO 3166-1-alpha-2 text file URL :-

    http://www.iso.org/iso/list-en1-semic-3.txt

"""
import os
import urllib2

#-----------------------------------------------------------------------------
def get_lastest_iso3166(filename=None, url=None):
    """
    Retrieves the latest ISO 3166-1 alpha-2 country code text file from the
    iso.org website.

    Saves URL contents to filename. Defaults to the current working directory
    and uses the basename from URL if not specified.

    Assumes the default ISO 3166 URL if not specified.

    """
    #   Pick default URL if not specified.
    if url is None:
        url='http://www.iso.org/iso/list-en1-semic-3.txt'

    print 'downloading %s' % url

    request = urllib2.Request(url)
    response = urllib2.urlopen(request)

    if filename is None:
        save_path = os.path.dirname(__file__)
        basename = os.path.basename(response.geturl())
        filename = os.path.join(save_path, basename)

    fh = open(filename, 'wb')
    fh.write(response.read())
    fh.close()

#-----------------------------------------------------------------------------
def capitalize_country_name(cname):
    """Fixes capitalization edge cases in ISO 3166 country names"""
    #   Replace some non-ASCII character codes with ASCII equivalents.
    cname = cname.replace("\xc9", 'e')
    cname = cname.replace("\xd4", '0')
    cname = cname.replace("\xc5", 'a')

    tokens = [t.capitalize() for t in cname.split()]
    for (i, t) in enumerate(tokens):
        #   General cases.
        if t.upper().startswith('(U.'):
            tokens[i] = t.upper()
        elif t[0] == '(':
            tokens[i] = '(' + t[1:].capitalize()
        elif '-' in t:
            tokens[i] = '-'.join([e.capitalize() for e in t.split('-')])

        #   Some annoying special cases :-)
        if t.lower() in ('of', 'and', 'the', 'former'):
            tokens[i] = t.lower()
        elif t == "D'ivoire":
            tokens[i] = "D'Ivoire"
        elif t == "Mcdonald":
            tokens[i] = "McDonald"
        elif t.upper() == "U.S.":
            tokens[i] = "U.S."

    return ' '.join(tokens)

#-----------------------------------------------------------------------------
def iter_capitalized_iso3166(filename):
    """
    A generator function that parses the free ISO 3166-1 alpha-2 country
    codes text file returning a tuple containing the alpha-2 code and country
    name (in that order).

    Country names are capitalized correctly (taking edge cases into account).

    Example :-

    ('AF', 'Afghanistan')
    ('AX', 'Aland Islands')
    ('AL', 'Albania')
    ('DZ', 'Algeria')
    ('AS', 'American Samoa')
    ...

    References

    ISO 3166 home page URL :-

        http://www.iso.org/iso/country_codes.htm

    ISO 3166-1-alpha-2 text file URL :-

        http://www.iso.org/iso/list-en1-semic-3.txt

    """
    line_count = 0
    for line in open(filename):
        line_count += 1

        if line_count > 2:
            cname, cc = line.strip().split(';')
            yield (cc, capitalize_country_name(cname))

#-----------------------------------------------------------------------------
if __name__ == '__main__':
    get_lastest_iso3166()
    for record in iter_capitalized_iso3166('list-en1-semic-3.txt'):
        print '%s - %s' % record

Does what it says on the tin.

This was knocked really quickly but should save others some time and makes country names in your applications look neat and tidy. Handles a bunch of irksome edge cases.