Welcome, guest | Sign In | My Account | Store | Cart
DEL = '/'
class PorReader(object):
   
def __init__(self, file):
       
if type(file) in (str, unicode): file = open(file)
       
self.file = file
       
self.pos = -1
       
self.buffer = ""
   
def consumeOne(self, skip=False):
        p
= self.buffer.find(DEL, self.pos+1)
        output
= ""
       
while p == -1:
           
if not skip: output += self.buffer[self.pos+1:]
           
self.buffer = self.file.read(1024)
           
self.pos = -1
            p
= self.buffer.find(DEL, self.pos+1)
           
if not self.buffer: break
       
if not skip: output += self.buffer[self.pos+1:p]
       
self.pos = p
       
if not skip:
            output
= output.replace("\r\n", "")
           
return output
   
def consume(self, n=1):
       
return [self.consumeOne() for i in range(n)]
   
def skip(self, n=1):
       
for i in range(n):
           
self.consumeOne(skip=True)

HEAD
= 'SPSS for Microsoft Windows Release 15.04'

FLOAT
, STR, INT = 0,1,2

class SPSSVariable(object):
   
def __init__(self, name, label=None, numeric=True, decimals=0):
       
self.name = name
       
self.label = label
       
self.numeric = numeric
       
self.decimals = decimals
       
self.valuelabels = None
       
self.index = None
   
def __str__(self):
        t
= 'S'
       
if self.numeric: t = 'I'
       
if self.numeric and self.decimals: t = 'F'
       
return "%s%s%s" % (self.name, (' "%s" ' % self.label if self.label else ''),t)

def splitstring(slen=None, s=None, reader=None):
   
if slen is None:
        slen
= reader.consume(2)
   
if s is None: slen, s = slen
   
if type(slen) == str: slen = readnum(slen)
   
while slen > len(s):
       
if reader:
            s
+= "/"+reader.consumeOne()
       
else:
           
raise Exception("!")
    keep
= s[slen:]
    s
= s[:slen]
   
return s, keep

class SPSSFile(object):
   
def __init__(self, file):
       
self.variables = []
       
self.vardict = {}
       
self.data = []
       
self.init(file)
   
def addvar(self, var):
       
var.index = len(self.variables)
       
self.variables.append(var)
       
self.vardict[var.name] = var
   
def getvar(self, varname):
       
return self.vardict[varname]
   
def get(self, var, row):
       
if type(var) in (str, unicode):
           
var = self.vardict[var]
       
return row[var.index]
   
def init(self, file):
        r
= PorReader(file)
        r
.skip(5)
        h
= r.consumeOne()
       
if not h.startswith(HEAD): raise Exception("Cannot read .por")
        numvars
= readnum(h[len(HEAD):])
        h
= r.skip(1)
        keep
= r.consumeOne()
       
while True:
            action
= keep[0]
           
#print "ACTION: %s" % action
           
if action == '7':
                data
= r.consume(8)
               
while data[-2][0] <> 'C': data += r.consume()
                decimals
= readnum(data[4])
                numeric
= keep[1:] == '0'
                name
, dummy = splitstring(data[:2])
                labellen
, label = data[-2:]
                label
, keep = splitstring(labellen[1:], label, r)
                v
= SPSSVariable(name, label, numeric, decimals)
               
self.addvar(v)
               
#print "ADDED VAR ", v, data, `keep`, labellen[1:]
           
if action == 'D': # value labels
                numvars
= readnum(keep[1:])
                varnames
= []
                keep
= r.consumeOne()
               
for i in range(numvars):
                    name
, keep = splitstring(keep, r.consumeOne(), reader=r)
                    varnames
.append(name)
                numlabels
= readnum(keep)
                keep
= r.consumeOne()
                labels
= {}
                numeric
= self.getvar(varnames[0]).numeric
               
for i in range(numlabels):
                   
if numeric:
                        val
= readnum(keep)
                        name
, keep = splitstring(reader=r)
                   
else:
                        val
, keep = splitstring(keep, r.consumeOne(), reader=r)
                        name
, keep = splitstring(keep, r.consumeOne(), reader=r)
                    labels
[val] = name
               
#print "VALUE LABELS", varnames, labels
               
for varname in varnames:
                   
self.getvar(varname).valuelabels = labels
           
if action == 'F': # data
                keep
= keep[1:]
               
while True:
                    row
= []
                   
for var in self.variables:
                       
if not keep: keep = r.consumeOne()
                       
if keep.startswith("Z"):
                           
return
                       
if var.numeric:
                           
if keep.startswith("*."):
                                row
.append(None)
                                keep
= keep[2:]
                           
else:
                               
try:
                                    row
.append(readnum(keep))
                               
except Exception, e:
                                   
print row
                                   
print "Exception on %s" % var
                                   
raise e
                                keep
= ""
                       
else:
                            slen
= keep
                            x
, keep = splitstring(slen, r.consumeOne())
                            row
.append(x)
                   
self.data.append(tuple(row))
           
if action == 'Z': # data
               
print "Done!"
               
return

def _codec(str_in, base_from=36, base_to=10):
   
"""
    Base36 Encoder/Decoder
    by Mike Crute (mcrute@gmail.com) on August 26, 2008
    This code has been placed in the public domain.
    """

    ASCII
= { "0": 48, "9": 57, "A": 65, "Z": 90 }
   
# There are 8 characters between 9 and A
    from_digits
= [chr(x) for x in range(ASCII["0"], ASCII["9"] + 8 + base_from)
                           
if (x >= ASCII["0"] and x <= ASCII["9"]) or
                               
(x >= ASCII["A"] and x <= ASCII["Z"])][:base_from]
    to_digits
= [chr(x) for x in range(ASCII["0"], ASCII["9"] + 8 + base_to)
                           
if (x >= ASCII["0"] and x <= ASCII["9"]) or
                               
(x >= ASCII["A"] and x <= ASCII["Z"])][:base_to]
    x
= long(0)
   
for digit in str(str_in).upper():
        x
= x * len(from_digits) + from_digits.index(digit)
    result
= ""
   
# This is going to assemble our number in reverse order
   
# so we'll have to fix it before we return it
   
while x > 0:
        result
+= to_digits[x % len(to_digits)]
        x
/= len(to_digits)
   
return result[::-1]

def decode(s):
   
while s.startswith("0"): s = s[1:]
   
if not s: return 0
   
try:
       
return int(_codec(s, 30, 10))
   
except ValueError, e:
       
raise ValueError("Cannot decode %r: %s" % (s, e))


def readnum(s):
    neg
= s.startswith("-")
   
if neg: s = s[1:]
   
if "+" in s:
        num
, exp = map(decode, s.split("+"))
        result
= 30**exp
   
elif "-" in s:
        num
, exp = map(decode, s.split("-"))
        result
= 1. / (30**exp)
   
else:
       
if "." in s:
            i
, d = s.split(".")
       
else:
            i
, d = s, None
        result
= decode(i)
       
if d:
           
for j, digit in enumerate(d):
                result
+= decode(digit) / 30.**(j+1)
   
return result * (-1 if neg else 1)



if __name__ == '__main__':
   
import sys
    fn
= sys.argv[1]
    f
= SPSSFile(fn)
   
print len(f.variables), len(f.data)

History