Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/env python

import string, sys

text_characters = "".join(map(chr, range(32, 127)) + list("\n\r\t\b"))
_null_trans = string.maketrans("", "")

def istextfile(filename, blocksize = 512):
    return istext(open(filename).read(blocksize))

def istext(s):
    if "\0" in s:
        return 0
    
    if not s:  # Empty files are considered text
        return 1

    # Get the non-text characters (maps a character to itself then
    # use the 'remove' option to get rid of the text characters.)
    t = s.translate(_null_trans, text_characters)

    # If more than 30% non-text characters, then
    # this is considered a binary file
    if len(t)/len(s) > 0.30:
        return 0
    return 1

def main(argv):
    import os, getopt
    try:
        args, dirnames = getopt.getopt(argv[1:], "h", ["help"])
    except getopt.error:
        args = "dummy"
    if args:
        print "Usage: %s <directory> [<directory> ...]" % (argv[0],)
        print " Shows which files in a directory are text and which are binary"
        sys.exit(0)

    table = {0: "binary", 1: "text"}
    if not dirnames:
        dirnames = ["."]
    for dirname in dirnames:
        try:
            filenames = os.listdir(dirname)
        except OSError, err:
            print >>sys.stderr, err
            continue
        for filename in filenames:
            fullname = os.path.join(dirname, filename)
            try:
                print table[istextfile(fullname)], repr(fullname)[1:-1]
            except IOError:  # eg, this is a directory
                pass
    
if __name__ == "__main__":
    main(sys.argv)

History

  • revision 2 (19 years ago)
  • previous revisions are not available