Welcome, guest | Sign In | My Account | Store | Cart
```#!/usr/bin/python2.5
# -*- coding: utf-8 -*-
'''An attempt to implement a check on Chicago Manual of Style page ranges.

http://www.chicagomanualofstyle.org/ch09/ch09_sec064.html
9.64 Abbreviating, or condensing, inclusive numbers
'''

import codecs
from glob import glob
import re

def proper_range(fir, sec):
'''
>>> valid = [(3, 10), (71, 72), (96, 117), (100, 104), (1100, 1113), (101, 8), (1103, 4), (321, 28), (212, 302), (1496, 1504), (498, 532)]
>>> all([proper_range(*pair) for pair in valid])
True

>>> invalid = [(71, 2), (1103, 2), (321, 20), (321, 328)]
>>> any([proper_range(*pair) for pair in invalid])
False

'''

if fir < 100:
if sec < fir:
#print "** fir < 100 and didn't use all digits"
return False # (12, 3)
else:
return True # (12, 13)

if fir % 100 == 0:
if fir > sec:
#print "** fir multiple of 100 and didn't use all digits"
return False # (100, 4)
else:
return True # (100, 104)

if len(str(fir)) == len(str(sec)) and int(str(fir)[0]) == int(str(sec)[0]):
if len(str(fir)) > 3:
if all(a != b for a,b in zip(str(fir), str(sec))[1:]):
return True # (1496, 1504)
else:
#print "** not all least significant digits changed in 4+ digit number"
return False # (1496, 1506)
else:
if fir < sec:
#print "** used more digits than needed" # (389, 391)
return False
else:
return True # (498, 532)

if int(str(fir)[-len(str(sec)):]) > sec:
#print "** first is larger than second"
return False #(1103, 102)

#print "** defaulting to true"
return True

files = sorted(glob('[!~]*.mdn'))
pages_pattern = re.compile(r'\[(\d+-\d+)\]')
for file_name in files:
source = codecs.open(file_name, "r", "UTF-8", "replace")
for line in source:
matches = pages_pattern.findall(line)
if matches:
for page_range in matches:
fir, sec = page_range.split('-')
if not proper_range(int(fir), int(sec)):
print file_name, page_range

```