DEL = '/'
class PorReader(object):
def __init__(self, file):
if type(file) in (str, unicode): file = open(file)
self.file = file
self.pos = -1
self.buffer = ""
def consumeOne(self, skip=False):
p = self.buffer.find(DEL, self.pos+1)
output = ""
while p == -1:
if not skip: output += self.buffer[self.pos+1:]
self.buffer = self.file.read(1024)
self.pos = -1
p = self.buffer.find(DEL, self.pos+1)
if not self.buffer: break
if not skip: output += self.buffer[self.pos+1:p]
self.pos = p
if not skip:
output = output.replace("\r\n", "")
return output
def consume(self, n=1):
return [self.consumeOne() for i in range(n)]
def skip(self, n=1):
for i in range(n):
self.consumeOne(skip=True)
HEAD = 'SPSS for Microsoft Windows Release 15.04'
FLOAT, STR, INT = 0,1,2
class SPSSVariable(object):
def __init__(self, name, label=None, numeric=True, decimals=0):
self.name = name
self.label = label
self.numeric = numeric
self.decimals = decimals
self.valuelabels = None
self.index = None
def __str__(self):
t = 'S'
if self.numeric: t = 'I'
if self.numeric and self.decimals: t = 'F'
return "%s%s%s" % (self.name, (' "%s" ' % self.label if self.label else ''),t)
def splitstring(slen=None, s=None, reader=None):
if slen is None:
slen = reader.consume(2)
if s is None: slen, s = slen
if type(slen) == str: slen = readnum(slen)
while slen > len(s):
if reader:
s += "/"+reader.consumeOne()
else:
raise Exception("!")
keep = s[slen:]
s = s[:slen]
return s, keep
class SPSSFile(object):
def __init__(self, file):
self.variables = []
self.vardict = {}
self.data = []
self.init(file)
def addvar(self, var):
var.index = len(self.variables)
self.variables.append(var)
self.vardict[var.name] = var
def getvar(self, varname):
return self.vardict[varname]
def get(self, var, row):
if type(var) in (str, unicode):
var = self.vardict[var]
return row[var.index]
def init(self, file):
r = PorReader(file)
r.skip(5)
h = r.consumeOne()
if not h.startswith(HEAD): raise Exception("Cannot read .por")
numvars = readnum(h[len(HEAD):])
h = r.skip(1)
keep = r.consumeOne()
while True:
action = keep[0]
#print "ACTION: %s" % action
if action == '7':
data = r.consume(8)
while data[-2][0] <> 'C': data += r.consume()
decimals = readnum(data[4])
numeric = keep[1:] == '0'
name, dummy = splitstring(data[:2])
labellen, label = data[-2:]
label, keep = splitstring(labellen[1:], label, r)
v = SPSSVariable(name, label, numeric, decimals)
self.addvar(v)
#print "ADDED VAR ", v, data, `keep`, labellen[1:]
if action == 'D': # value labels
numvars = readnum(keep[1:])
varnames = []
keep = r.consumeOne()
for i in range(numvars):
name, keep = splitstring(keep, r.consumeOne(), reader=r)
varnames.append(name)
numlabels = readnum(keep)
keep = r.consumeOne()
labels = {}
numeric = self.getvar(varnames[0]).numeric
for i in range(numlabels):
if numeric:
val = readnum(keep)
name, keep = splitstring(reader=r)
else:
val, keep = splitstring(keep, r.consumeOne(), reader=r)
name, keep = splitstring(keep, r.consumeOne(), reader=r)
labels[val] = name
#print "VALUE LABELS", varnames, labels
for varname in varnames:
self.getvar(varname).valuelabels = labels
if action == 'F': # data
keep = keep[1:]
while True:
row = []
for var in self.variables:
if not keep: keep = r.consumeOne()
if keep.startswith("Z"):
return
if var.numeric:
if keep.startswith("*."):
row.append(None)
keep = keep[2:]
else:
try:
row.append(readnum(keep))
except Exception, e:
print row
print "Exception on %s" % var
raise e
keep = ""
else:
slen = keep
x, keep = splitstring(slen, r.consumeOne())
row.append(x)
self.data.append(tuple(row))
if action == 'Z': # data
print "Done!"
return
def _codec(str_in, base_from=36, base_to=10):
"""
Base36 Encoder/Decoder
by Mike Crute (mcrute@gmail.com) on August 26, 2008
This code has been placed in the public domain.
"""
ASCII = { "0": 48, "9": 57, "A": 65, "Z": 90 }
# There are 8 characters between 9 and A
from_digits = [chr(x) for x in range(ASCII["0"], ASCII["9"] + 8 + base_from)
if (x >= ASCII["0"] and x <= ASCII["9"]) or
(x >= ASCII["A"] and x <= ASCII["Z"])][:base_from]
to_digits = [chr(x) for x in range(ASCII["0"], ASCII["9"] + 8 + base_to)
if (x >= ASCII["0"] and x <= ASCII["9"]) or
(x >= ASCII["A"] and x <= ASCII["Z"])][:base_to]
x = long(0)
for digit in str(str_in).upper():
x = x * len(from_digits) + from_digits.index(digit)
result = ""
# This is going to assemble our number in reverse order
# so we'll have to fix it before we return it
while x > 0:
result += to_digits[x % len(to_digits)]
x /= len(to_digits)
return result[::-1]
def decode(s):
while s.startswith("0"): s = s[1:]
if not s: return 0
try:
return int(_codec(s, 30, 10))
except ValueError, e:
raise ValueError("Cannot decode %r: %s" % (s, e))
def readnum(s):
neg = s.startswith("-")
if neg: s = s[1:]
if "+" in s:
num, exp = map(decode, s.split("+"))
result = 30**exp
elif "-" in s:
num, exp = map(decode, s.split("-"))
result = 1. / (30**exp)
else:
if "." in s:
i, d = s.split(".")
else:
i, d = s, None
result = decode(i)
if d:
for j, digit in enumerate(d):
result += decode(digit) / 30.**(j+1)
return result * (-1 if neg else 1)
if __name__ == '__main__':
import sys
fn = sys.argv[1]
f = SPSSFile(fn)
print len(f.variables), len(f.data)