Welcome, guest | Sign In | My Account | Store | Cart
# -*- coding: Windows-1251 -*-
'''
fix_mbox_from.py

  Utility for fixing incorrect 'From' line after batch .EML files import 
  via Thunderbird's ImportExportTools version LE 2.3.2.1.

  2010-05-01 bug report sent to addon author.

  
mailbox.py (Python 2.4.5) pattern for matching 'From' line:

    _fromlinepattern = r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+" \
                       r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*$"

Correct 'From':
From - Tue Apr 27 19:42:22 2010

Broken 'From':
From - Sat May 01 2010 15:07:31 GMT+0400 (Russian Daylight Time)
'''
import sys
import re
import os

__author__ = 'Denis Barmenkov <denis.barmenkov@gmail.com>'
__source__ = 'http://code.activestate.com/recipes/577214-fix-mbox-files-after-importing-eml-into-tb-using-i/'

bad_pattern_text = r"^(From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d)\s+" \
                   r"(\d\d\d\d)\s+(\d?\d:\d\d(:\d\d)?)\s+" \
                   r"GMT\+\d\d\d\d\s+\([^\)]+\)\s*$"

bad_pattern = re.compile(bad_pattern_text)

mbox_fn = sys.argv[1]
print 'File: %s' % mbox_fn
temp_fn = mbox_fn + '.temp'
orig_fn = mbox_fn + '.source'
assert not os.path.exists(orig_fn)

#src_size = os.path.getsize(mbox_fn)

fsrc = open(mbox_fn, 'r')
fdest = open(temp_fn, 'w')

fix_count = 0
for line_index, rawline in enumerate(fsrc):
    #if line_index % 100 == 0:
    #    pos = fsrc.tell()
    #    print '%d%%,' % (100 * pos // src_size),
    line = rawline.splitlines()[0]
    m = bad_pattern.match(line)
    if m:
        line = '%s %s %s' % m.group(1, 3, 2)
        fix_count += 1
    fdest.write(line + '\n')
print 
print 'Fixed %s "From" lines' % fix_count

fdest.close()
fsrc.close()

os.rename(mbox_fn, orig_fn)
os.rename(temp_fn, mbox_fn)

Diff to Previous Revision

--- revision 1 2010-05-02 13:20:19
+++ revision 2 2010-05-02 13:21:00
@@ -24,7 +24,7 @@
 import os
 
 __author__ = 'Denis Barmenkov <denis.barmenkov@gmail.com>'
-__source__ = 'http://code.activestate.com/recipes/'
+__source__ = 'http://code.activestate.com/recipes/577214-fix-mbox-files-after-importing-eml-into-tb-using-i/'
 
 bad_pattern_text = r"^(From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d)\s+" \
                    r"(\d\d\d\d)\s+(\d?\d:\d\d(:\d\d)?)\s+" \

History