Welcome, guest | Sign In | My Account | Store | Cart

Ever wanted to take a CSV file as input, cut it up and only extract the fields that you want ?

Here's how!

$ cat cars.csv Year,Make,Model,Length 1997,Ford,E350,2.34 2000,Mercury,Cougar,2.38

$ csvcut.py -f 0 -f -1 - < cars.csv Year,Length 1997,2.34 2000,2.38

--JamesMills (prologic)

Python, 76 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python

"""csvcut

Tool to read and cut up CSV files by fields. Each line of the input file
is read, parsed and broken up into their respective fields. The fields
you want to extract are given by the -f/--field option by specifiying
the field number you'd like. You can specify two or more fields by
using two or more -f/--field options. Entire records/rows can be kipped
by using the -s/--skip option. If no fields are given this acts much
like the cat tool.
"""

__version__ = "0.5"
__author__ = "James Mills"

import sys
import csv
import optparse

USAGE = "%prog [options] <file>"
VERSION = "%prog v" + __version__

def parse_options():
    parser = optparse.OptionParser(usage=USAGE, version=VERSION)

    parser.add_option("-f", "--field",
            action="append", type="int",
            default=[], dest="fields",
            help="Field no. to cut (multiple allowed)")

    parser.add_option("-s", "--skip",
            action="append", type="int",
            default=[], dest="skip",
            help="Specify records to skip (multiple allowed)")

    opts, args = parser.parse_args()

    if len(args) < 1:
        parser.print_help()
        raise SystemExit, 1

    return opts, args

def generate_rows(f):
    sniffer = csv.Sniffer()
    dialect = sniffer.sniff(f.readline())
    f.seek(0)

    reader = csv.reader(f, dialect)
    for line in reader:
        yield line

def main():
    opts, args = parse_options()

    filename = args[0]

    if filename == "-":
        fd = sys.stdin
    else:
        fd = open(filename, "rU")

    rows = generate_rows(fd)

    for i, row in enumerate(rows):
        if i in opts.skip:
            continue

        if opts.fields:
            print ",".join([row[x] for x in opts.fields])
        else:
            print ",".join(row)

if __name__ == "__main__":
    main()