Specification and validator for Chicago Manual of Style page ranges.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | #!/usr/bin/python2.5
# -*- coding: utf-8 -*-
'''An attempt to implement a check on Chicago Manual of Style page ranges.
http://www.chicagomanualofstyle.org/ch09/ch09_sec064.html
9.64 Abbreviating, or condensing, inclusive numbers
'''
import codecs
from glob import glob
import re
def proper_range(fir, sec):
'''
>>> valid = [(3, 10), (71, 72), (96, 117), (100, 104), (1100, 1113), (101, 8), (1103, 4), (321, 28), (212, 302), (1496, 1504), (498, 532)]
>>> all([proper_range(*pair) for pair in valid])
True
>>> invalid = [(71, 2), (1103, 2), (321, 20), (321, 328)]
>>> any([proper_range(*pair) for pair in invalid])
False
'''
if fir < 100:
if sec < fir:
#print "** fir < 100 and didn't use all digits"
return False # (12, 3)
else:
return True # (12, 13)
if fir % 100 == 0:
if fir > sec:
#print "** fir multiple of 100 and didn't use all digits"
return False # (100, 4)
else:
return True # (100, 104)
if len(str(fir)) == len(str(sec)) and int(str(fir)[0]) == int(str(sec)[0]):
if len(str(fir)) > 3:
if all(a != b for a,b in zip(str(fir), str(sec))[1:]):
return True # (1496, 1504)
else:
#print "** not all least significant digits changed in 4+ digit number"
return False # (1496, 1506)
else:
if fir < sec:
#print "** used more digits than needed" # (389, 391)
return False
else:
return True # (498, 532)
if int(str(fir)[-len(str(sec)):]) > sec:
#print "** first is larger than second"
return False #(1103, 102)
#print "** defaulting to true"
return True
files = sorted(glob('[!~]*.mdn'))
pages_pattern = re.compile(r'\[(\d+-\d+)\]')
for file_name in files:
source = codecs.open(file_name, "r", "UTF-8", "replace")
for line in source:
matches = pages_pattern.findall(line)
if matches:
for page_range in matches:
fir, sec = page_range.split('-')
if not proper_range(int(fir), int(sec)):
print file_name, page_range
|