Welcome, guest | Sign In | My Account | Store | Cart
import struct

FLAGS= CONTAINER, SKIPPER, TAGITEM, IGNORE, NOVERN, XTAGITEM= [2**_ for _ in xrange(6)]

# CONTAINER: datum contains other boxes
# SKIPPER: ignore first 4 bytes of datum
# TAGITEM: "official" tag item
# NOVERN: datum is 8 bytes (2 4-bytes BE integers)
# XTAGITEM: datum is a triplet (I believe) of "mean", "name", "data" items
CALLBACK= TAGITEM | XTAGITEM
FLAGS.append(CALLBACK)

TAGTYPES= (
    ('ftyp', 0),
    ('moov', CONTAINER),
    ('mdat', 0),
    ('udta', CONTAINER),
    ('meta', CONTAINER|SKIPPER),
    ('ilst', CONTAINER),
    ('\xa9ART', TAGITEM),
    ('\xa9nam', TAGITEM),
    ('\xa9too', TAGITEM),
    ('\xa9alb', TAGITEM),
    ('\xa9day', TAGITEM),
    ('\xa9gen', TAGITEM),
    ('\xa9wrt', TAGITEM),
    ('trkn', TAGITEM|NOVERN),
    ('\xa9cmt', TAGITEM),
    ('trak', CONTAINER),
    ('----', XTAGITEM),
    ('mdia', CONTAINER),
    ('minf', CONTAINER),
)

flagged= {}
for flag in FLAGS:
    flagged[flag]= frozenset(_[0] for _ in TAGTYPES if _[1] & flag)

def _xtra(s):
    "Convert '----' atom data into dictionaries"
    offset= 0
    result= {}
    while offset < len(s):
        atomsize= struct.unpack("!i", s[offset:offset+4])[0]
        atomtype= s[offset+4:offset+8]
        if atomtype == "data":
            result[atomtype]= s[offset+16:offset+atomsize]
        else:
            result[atomtype]= s[offset+12:offset+atomsize]
        offset+= atomsize
    return result

def _analyse(fp, offset0, offset1):
    "Walk the atom tree in a mp4 file"
    offset= offset0
    while offset < offset1:
        fp.seek(offset)
        atomsize= struct.unpack("!i", fp.read(4))[0]
        atomtype= fp.read(4)
        if atomtype in flagged[CONTAINER]:
            data= ''
            for reply in _analyse(fp, offset+(atomtype in flagged[SKIPPER] and 12 or 8),
                offset+atomsize):
                yield reply
        else:
            fp.seek(offset+8)
            if atomtype in flagged[TAGITEM]:
                data=fp.read(atomsize-8)[16:]
                if atomtype in flagged[NOVERN]:
                    data= struct.unpack("!ii", data)
            elif atomtype in flagged[XTAGITEM]:
                data= _xtra(fp.read(atomsize-8))
            else:
                data= fp.read(min(atomsize-8, 32))
        if not atomtype in flagged[IGNORE]: yield atomtype, atomsize, data
        offset+= atomsize

def mp4_atoms(pathname):
    fp= open(pathname, "rb")
    fp.seek(0,2)
    size=fp.tell()
    for atom in _analyse(fp, 0, size):
        yield atom
    fp.close()

class M4ATags(dict):
    "An example class reading .m4a tags"
    cvt= {
        'trkn': 'Track',
        '\xa9ART': 'Artist',
        '\xa9nam': 'Title',
        '\xa9alb': 'Album',
        '\xa9day': 'Year',
        '\xa9gen': 'Genre',
        '\xa9cmt': 'Comment',
        '\xa9wrt': 'Writer',
        '\xa9too': 'Tool',
    }
    def __init__(self, pathname=None):
        super(dict, self).__init__()
        if pathname is None: return
        for atomtype, atomsize, atomdata in mp4_atoms(pathname):
            self.atom2tag(atomtype, atomdata)

    def atom2tag(self, atomtype, atomdata):
        "Insert items using descriptive key instead of atomtype"
        if atomtype == "----":
            key= atomdata['name'].title()
            value= atomdata['data'].decode("utf-8")
        else:
            try: key= self.cvt[atomtype]
            except KeyError: return
            if atomtype == "trkn":
                value= atomdata[0]
            else:
                try: value= atomdata.decode("utf-8")
                except AttributeError:
                    print `atomtype`, `atomdata`
                    raise
        self[key]= value

if __name__=="__main__":
    import sys, pprint
    r= M4ATag(sys.argv[1]) # pathname of an .mp4/.m4a file as first argument
    pprint.pprint(r)

History