Welcome, guest | Sign In | My Account | Store | Cart

Specification and validator for Chicago Manual of Style page ranges.

Python, 70 lines
 ``` 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71``` ```#!/usr/bin/python2.5 # -*- coding: utf-8 -*- '''An attempt to implement a check on Chicago Manual of Style page ranges. http://www.chicagomanualofstyle.org/ch09/ch09_sec064.html 9.64 Abbreviating, or condensing, inclusive numbers ''' import codecs from glob import glob import re def proper_range(fir, sec): ''' >>> valid = [(3, 10), (71, 72), (96, 117), (100, 104), (1100, 1113), (101, 8), (1103, 4), (321, 28), (212, 302), (1496, 1504), (498, 532)] >>> all([proper_range(*pair) for pair in valid]) True >>> invalid = [(71, 2), (1103, 2), (321, 20), (321, 328)] >>> any([proper_range(*pair) for pair in invalid]) False ''' if fir < 100: if sec < fir: #print "** fir < 100 and didn't use all digits" return False # (12, 3) else: return True # (12, 13) if fir % 100 == 0: if fir > sec: #print "** fir multiple of 100 and didn't use all digits" return False # (100, 4) else: return True # (100, 104) if len(str(fir)) == len(str(sec)) and int(str(fir)[0]) == int(str(sec)[0]): if len(str(fir)) > 3: if all(a != b for a,b in zip(str(fir), str(sec))[1:]): return True # (1496, 1504) else: #print "** not all least significant digits changed in 4+ digit number" return False # (1496, 1506) else: if fir < sec: #print "** used more digits than needed" # (389, 391) return False else: return True # (498, 532) if int(str(fir)[-len(str(sec)):]) > sec: #print "** first is larger than second" return False #(1103, 102) #print "** defaulting to true" return True files = sorted(glob('[!~]*.mdn')) pages_pattern = re.compile(r'\[(\d+-\d+)\]') for file_name in files: source = codecs.open(file_name, "r", "UTF-8", "replace") for line in source: matches = pages_pattern.findall(line) if matches: for page_range in matches: fir, sec = page_range.split('-') if not proper_range(int(fir), int(sec)): print file_name, page_range ```
 Created by Joseph Reagle on Fri, 21 Aug 2009 (MIT)