Welcome, guest | Sign In | My Account | Store | Cart

When loading text files into database tables (MSSQL in this example), the source columns often do not match the table definition. This script was written to find the maximum length of each column in a delimited text file, then modify the a table create DDL file to make each character column wide enough, so truncation errors do not occur.

Python, 63 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# SQLColumnWidth.py - M.Keranen (mksql@yahoo.com) - 09/22/2006
# ------------------------------------------------------------
# Find max width of each character column in a delimited text file,
# and modify DDL file to max width of respective source columns.

import sys

if len(sys.argv)<2:
    print "\nUsage: %s datafile tableddl.sql" % (sys.argv[0])
    sys.exit()
else:
    dummy, datafile, sqlfile = sys.argv
    print

f = open(datafile,'r')

cw = {}
c = f.readline()
cw = cw.fromkeys(c.split(','),0)

r = 0
l = f.readline()
while l != '':
    r += 1

    col = l.split('|')
    if len(col) != len(cw): print 'Length error in row %s' % (r)
    i = 0

    for c in cw.iterkeys():
        if len(col[i]) > cw[c]: cw[c] = len(col[i])            
        i += 1
    l = f.readline()

#print cw
f.close()


# Match cw dict to CREATE TABLE statement for VARCHAR columns,
# and replace width where data is present larger than column definition

sql = open(sqlfile,'r').read()
lsql = sql.lower() # Copy of SQL to eliminate case in searches

for c in cw.iterkeys():
    
    dt = lsql.find(c)+len(c)+1
    if lsql[dt:dt+7] == 'varchar' or lsql[dt:dt+4] == 'char':
        lp = dt + lsql[dt:].find('(')+1
        rp = dt + lsql[dt:].find(')')

        # Original values
        ow = int(lsql[lp:rp])
        osql = sql[lsql.find(c):rp+1]
        
        if ow < cw[c]:
            # Update both lower cased SQL and original to maintain positions
            lsql = lsql[:lp] + str(cw[c]) + lsql[rp:]
            sql = sql[:lp] + str(cw[c]) + sql[rp:]
            print "%s ->%s" % (osql, sql[lsql.find(c)+len(c):rp+1])
        
open(sqlfile+'.new','w').write(sql)
print "\n%s written.\n" % (sqlfile+'.new')