Welcome, guest | Sign In | My Account | Store | Cart

This allows you to hold on to your csv in a dict form, do lookups and modifications, and also write it in a preserved order. You can also change which column you want to be your lookup column (making sure that there is a unique id for every row of that column. In my example of usage, it assumes that both classes are contained withing the same file named 'CustomDictReader.py'

Python, 173 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import csv, collections, copy

'''
# CSV TEST FILE 'test.csv'

TBLID,DATETIME,VAL
C1,01:01:2011:00:01:23,5
C2,01:01:2012:00:01:23,8
C3,01:01:2013:00:01:23,4
C4,01:01:2011:01:01:23,9
C5,01:01:2011:02:01:23,1
C6,01:01:2011:03:01:23,5
C7,01:01:2011:00:01:23,6
C8,01:01:2011:00:21:23,8
C9,01:01:2011:12:01:23,1


#usage

>>> import CustomDictReader
>>> import pprint
>>> test = CustomDictReader.CSVRW()
>>> success, thedict = test.createCsvDict('TBLID',',',None,'test.csv')
>>> pprint.pprint(dict(d))
{'C1': OrderedDict([('TBLID', 'C1'), ('DATETIME', '01:01:2011:00:01:23'), ('VAL', '5')]),
 'C2': OrderedDict([('TBLID', 'C2'), ('DATETIME', '01:01:2012:00:01:23'), ('VAL', '8')]),
 'C3': OrderedDict([('TBLID', 'C3'), ('DATETIME', '01:01:2013:00:01:23'), ('VAL', '4')]),
 'C4': OrderedDict([('TBLID', 'C4'), ('DATETIME', '01:01:2011:01:01:23'), ('VAL', '9')]),
 'C5': OrderedDict([('TBLID', 'C5'), ('DATETIME', '01:01:2011:02:01:23'), ('VAL', '1')]),
 'C6': OrderedDict([('TBLID', 'C6'), ('DATETIME', '01:01:2011:03:01:23'), ('VAL', '5')]),
 'C7': OrderedDict([('TBLID', 'C7'), ('DATETIME', '01:01:2011:00:01:23'), ('VAL', '6')]),
 'C8': OrderedDict([('TBLID', 'C8'), ('DATETIME', '01:01:2011:00:21:23'), ('VAL', '8')]),
 'C9': OrderedDict([('TBLID', 'C9'), ('DATETIME', '01:01:2011:12:01:23'), ('VAL', '1')])}

'''

class CustomDictReader(csv.DictReader):
    '''
        override the next() function and  use an
        ordered dict in order to preserve writing back
        into the file
    '''

    def __init__(self, f, fieldnames = None, restkey = None, restval = None, dialect ="excel", *args, **kwds):
        csv.DictReader.__init__(self, f, fieldnames = None, restkey = None, restval = None, dialect = "excel", *args, **kwds)

    def next(self):
        if self.line_num == 0:
            # Used only for its side effect.
            self.fieldnames
        row = self.reader.next()
        self.line_num = self.reader.line_num

        # unlike the basic reader, we prefer not to return blanks,
        # because we will typically wind up with a dict full of None
        # values
        while row == []:
            row = self.reader.next()
        d = collections.OrderedDict(zip(self.fieldnames, row))

        lf = len(self.fieldnames)
        lr = len(row)
        if lf < lr:
            d[self.restkey] = row[lf:]
        elif lf > lr:
            for key in self.fieldnames[lr:]:
                d[key] = self.restval
        return d

class CSVRW(object):

    def __init__(self):
        self.file_name = ""
        self.csv_delim = ""
        self.csv_dict  = collections.OrderedDict()

    def setCsvFileName(self, name):
        '''
            @brief stores csv file name
            @param name- the file name
        '''
        self.file_name = name

    def getCsvFileName():
        '''
            @brief getter
            @return returns the file name
        '''
        return self.file_name

    def getCsvDict(self):
        '''
            @brief getter
            @return returns a deep copy of the csv as a dictionary
        '''
        return copy.deepcopy(self.csv_dict)

    def clearCsvDict(self):
        '''
            @brief resets the dictionary
        '''
        self.csv_dict = collections.OrderedDict()

    def updateCsvDict(self, newCsvDict):
        '''
            creates a deep copy of the dict passed in and
            sets it to the member one
        '''
        self.csv_dict = copy.deepcopy(newCsvDict)

    def createCsvDict(self,dictKey, delim, handle = None, name = None, readMode = 'rb', **kwargs):
        '''
            @brief create a dict from a csv file where:
                the top level keys are the first line in the dict, overrideable w/ **kwargs
                each row is a dict
                each row can be accessed by the value stored in the column associated w/ dictKey

                that is to say, if you want to index into your csv file based on the contents of the
                third column, pass the name of that col in as 'dictKey'

            @param dictKey  - row key whose value will act as an index
            @param delim    - csv file deliminator
            @param handle   - file handle (leave as None if you wish to pass in a file name)
            @param name     - file name   (leave as None if you wish to pass in a file handle)
            @param readMode - 'r' || 'rb'
            @param **kwargs - additional args allowed by the csv module
            @return bool    - SUCCESS|FAIL
        '''
        retVal         = (False, None)
        self.csv_delim = delim

        try:
            reader = None
            if isinstance(handle, file):
                self.setCsvFileName(handle.name)
                reader = CustomDictReader(handle, delim, **kwargs)
            else:
                if None == name:
                    name = self.getCsvFileName()
                else:
                    self.setCsvFileName(name)
                reader = CustomDictReader(open(name, readMode), delim, **kwargs)

            for row in reader:
                self.csv_dict[row[dictKey]] = row

            retVal = (True, self.getCsvDict())

        except IOError:
            retVal = (False, 'Error opening file')

        return retVal

    def createCsv(writeMode, outFileName = None, delim = None):
        '''
            @brief create a csv from self.csv_dict
            @param writeMode   - 'w' || 'wb'
            @param outFileName - file name || file handle
            @param delim       - csv deliminator
            @return none
        '''
        if None == outFileName:
            outFileName = self.file_name
        if None == delim:
            delim = self.csv_delim

        with open(outFileName, writeMode) as fout:
            for key in self.csv_dict.values():
                fout.write(delim.join(key.keys()) + '\n')
                break

            for key in self.csv_dict.values():
                fout.write(delim.join(key.values()) + '\n')

2 comments

James Fishwick 7 years, 7 months ago  # | flag

In your useage example, should be: pprint.pprint(dict(d))

Having trouble with exporting, expanding on your example with test.createCsv('wb') I get: NameError: global name 'self' is not defined

Martin Miller 7 years, 7 months ago  # | flag

The CSVRW.createCsv() method definition is missing its initial self argument.