Ever wanted to take a CSV file as input, cut it up and only extract the fields that you want ?
Here's how!
$ cat cars.csv Year,Make,Model,Length 1997,Ford,E350,2.34 2000,Mercury,Cougar,2.38
$ csvcut.py -f 0 -f -1 - < cars.csv Year,Length 1997,2.34 2000,2.38
--JamesMills (prologic)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | #!/usr/bin/env python
"""csvcut
Tool to read and cut up CSV files by fields. Each line of the input file
is read, parsed and broken up into their respective fields. The fields
you want to extract are given by the -f/--field option by specifiying
the field number you'd like. You can specify two or more fields by
using two or more -f/--field options. Entire records/rows can be kipped
by using the -s/--skip option. If no fields are given this acts much
like the cat tool.
"""
__version__ = "0.5"
__author__ = "James Mills"
import sys
import csv
import optparse
USAGE = "%prog [options] <file>"
VERSION = "%prog v" + __version__
def parse_options():
parser = optparse.OptionParser(usage=USAGE, version=VERSION)
parser.add_option("-f", "--field",
action="append", type="int",
default=[], dest="fields",
help="Field no. to cut (multiple allowed)")
parser.add_option("-s", "--skip",
action="append", type="int",
default=[], dest="skip",
help="Specify records to skip (multiple allowed)")
opts, args = parser.parse_args()
if len(args) < 1:
parser.print_help()
raise SystemExit, 1
return opts, args
def generate_rows(f):
sniffer = csv.Sniffer()
dialect = sniffer.sniff(f.readline())
f.seek(0)
reader = csv.reader(f, dialect)
for line in reader:
yield line
def main():
opts, args = parse_options()
filename = args[0]
if filename == "-":
fd = sys.stdin
else:
fd = open(filename, "rU")
rows = generate_rows(fd)
for i, row in enumerate(rows):
if i in opts.skip:
continue
if opts.fields:
print ",".join([row[x] for x in opts.fields])
else:
print ",".join(row)
if __name__ == "__main__":
main()
|