A function to create and load a table in SQLite from a Microsoft Jet table, using DAO. Also recreates indexes. Fetches and loads records in blocks with a default size of 1000 rows.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | """
jetout.py - Reconstructs a given table from a Jet (MS Access) file in SQLite,
storing created table on disk; uses win32com to get to DAO 3.6 (Office 2000),
and uses sqlite3. To ensure index names are unique to the entire database,
names of recreated indexes are prefixed with the table name.
Date/Time typed columns are reformatted to ISO's yyyy-mm-dd, and may be recalled
as datetime.date types using the built-in DATE converter in pysqlite's
converters dictionary, if the database is opened with the argument
'detect_types=sqlite3.PARSE_DECLTYPES' in the 'connect' statement.
To modify this module to import Date/Time type as datetime.datetime instead
of datetime.date: change 'DATE' to 'TIMESTAMP' for CREATE TABLE, and modify
last line of sepdate function to include 'timepart'.
Usage:
sqlitefromjet("input.mdb", "input table name", "output.db"
[, # of rows to fetch/commit at a time])
"""
def sqlitefromjet(mdb, tbl, sqlite_db, fetchsize=1000):
import win32com
from win32com import client
import sqlite3
from itertools import imap, izip
engine = win32com.client.Dispatch("DAO.DBEngine.36") # Office 2000
db = engine.OpenDatabase(mdb)
table = db.OpenRecordset(tbl)
connection = sqlite3.connect(sqlite_db)
cursor = connection.cursor()
if tbl.find(' ') != -1 and tbl[:1] != '[':
tbl = '[' + tbl + ']' # bracket table names with spaces
# build SQL statement to create table
createStr = 'CREATE TABLE ' + tbl + ' ('
fieldrange = range(table.Fields.Count)
isType = [] # check field types (use to make ISO date from Date/Time)
pkf = False # look for AutoNumber PK field to make INTEGER PRIMARY KEY
havePK = False
for tdef in db.TableDefs:
if tdef.Name == table.Name:
tblDef = tdef # grab TableDef object for its indexes
break
for idx in tblDef.Indexes:
if idx.Primary:
for idxf in idx.Fields:
if pkf:
pkf = False
break
pkf = idxf.Name # grab PK field, if only one
break # found the PK; all done
for field in fieldrange:
createStr = createStr + '[' + table.Fields(field).Name + ']'
ftype = table.Fields(field).Type # get int representing type
if ftype == 4:
if pkf:
if table.Fields(field).Name == pkf:
# if field was AutoNumber PK, will still autoincrement
createStr += ' INTEGER PRIMARY KEY, '
havePK = True
else:
createStr += ' INTEGER, '
isType.append(1)
elif ftype in (2,3):
isType.append(1) # number
createStr += ' INTEGER, '
elif ftype in (5,6,7,20):
if ftype == 5:
isType.append(3) # currency type
else:
isType.append(1) # number
createStr += ' NUMERIC, '
elif ftype == 8:
isType.append(2) # date
createStr += ' DATE, ' # (or TIMESTAMP)
else:
isType.append(0) # text
createStr += ' TEXT, '
if table.Fields(field).Required:
createStr = createStr[:-2] + ' NOT NULL, '
createStr = createStr[:-2] + ');'
cursor.execute(createStr) # create table
stmt = "INSERT INTO " + tbl + " VALUES(" # build INSERT for executemany
for fieldnum in fieldrange:
stmt += "?, "
stmt = stmt[:-2] + ")"
df = [x for x in fieldrange if isType[x] == 2] # date column offsets
cf = [x for x in fieldrange if isType[x] == 3] # currency column offsets
lastSet = False
while not lastSet:
# fetch 'fetchsize' records at a time (default of 1,ooo)
fetched = map(list, table.GetRows(fetchsize)) # in mutable form
fetchnum = len(fetched[0]) # get actual number of rows fetched
print "\tFetched " + "%s" % fetchnum + " rows"
# check if all 'fetchsize' rows fetched...
if fetchnum < fetchsize:
lastSet = True # ...last set (or read error) if not
for x in df:
fetched[x] = imap(sepdate, fetched[x]) # yyyy-mm-dd
for x in cf:
fetched[x] = imap(conv_curr, fetched[x]) # get currency
rows = izip(*fetched) # put [fields][rows] into [rows][fields]
cursor.executemany(stmt, rows) # INSERT one fetch
connection.commit() # commit one transaction per fetch
print "INSERTs complete - creating indexes..."
for idx in tblDef.Indexes:
newName = table.Name + idx.Name
if idx.Unique:
if havePK and idx.Primary:
continue # already have unique index if int PK
else:
createStr = "CREATE UNIQUE INDEX [" + \
newName + "] ON " + tbl + "("
else:
createStr = "CREATE INDEX [" + newName + "] ON " + \
tbl + "("
for idxf in idx.Fields:
createStr += "[" + idxf.Name + "], "
createStr = createStr[:-2] + ");"
cursor.execute(createStr) # add an index
print "ok - all done"
cursor.close()
connection.close()
table.Close()
db.Close()
def sepdate(dt):
"""sepdate('%m/%d/%y %H:%M:%S') -> '%Y-%m-%d'
Return ISO-format yyyy-mm-dd date from Access-formatted Date/Time."""
dt = "%s" % dt
if dt.find(" ") != -1:
datepart, timepart = dt.split(" ")
else:
return None
month, day, year = datepart.split("/")
if int(year) <= 29:
year = '20' + year # make same assumptions about 2-digit
elif int(year) <= 99:
year = '19' + year # years as Access' Short Date format
return year + "-" + month + "-" + day # + " " + timepart # TIMESTAMP
def conv_curr(curr):
"""conv_curr(Access currency-typed field) -> float
Return a float from MS Access currency datatype, which is a fixed-point integer
scaled by 10,000"""
return float(curr[1])/10000 # convert fixed-point int to float
|
To directly load a table from MS Access into SQLite, this might be handy. This solution was chosen as an alternative to exporting and loading CSV files or creating and running SQL scripts.
To get to the TableDef object and its indexing information, this code loops through the TableDefs collection to find the right TableDef. How else might a TableDef be accessed?
To modify this module to import Date/Time type as datetime.datetime instead of datetime.date: change 'DATE' to 'TIMESTAMP' for CREATE TABLE, and modify last line of separatedate function to include 'timepart'.
For information regarding pysqlite type conversion (useful for reading out date or timestamp columns created): http://initd.org/pub/software/pysqlite/doc/usage-guide.html#extensions-and-caveats http://www-eleves-isia.cma.fr/Doc/python-sqlite-1.0.1/doc/rest/manual.txt
Very practical, thank you! But... The
while not lastSet:
in line 88ff logic is broken. Whenever a table contains a number of recordsets that is an integer multiple of "fetchsize" the while loop does not terminate before an exception occurs. What is needed is:
while not table.EOF:
And lastSet and fetchnum are then superfluous.
Please check!
I've tried to convert this script with 2to3.py, but get an UnboundLocalError with tblDef which I can't fix. How do I run this in python 3.4, I am new to the language. Thx