#!/usr/bin/env python from __future__ import with_statement import re import cgi colorcodes = {'bold':{True:'\033[1m',False:'\033[22m'}, 'cyan':{True:'\033[36m',False:'\033[39m'}, 'blue':{True:'\033[34m',False:'\033[39m'}, 'red':{True:'\033[31m',False:'\033[39m'}, 'magenta':{True:'\033[35m',False:'\033[39m'}, 'green':{True:'\033[32m',False:'\033[39m'}, 'underline':{True:'\033[4m',False:'\033[24m'}} def recolor(color, text): regexp = "(?:%s)(.*?)(?:%s)" % (colorcodes[color][True], colorcodes[color][False]) regexp = regexp.replace('[', r'\[') return re.sub(regexp, r'''<span style="color: %s">\1</span>''' % color, text) def bold(text): regexp = "(?:%s)(.*?)(?:%s)" % (colorcodes['bold'][True], colorcodes['bold'][False]) regexp = regexp.replace('[', r'\[') return re.sub(regexp, r'<span style="font-weight:bold">\1</span>', text) def underline(text): regexp = "(?:%s)(.*?)(?:%s)" % (colorcodes['underline'][True], colorcodes['underline'][False]) regexp = regexp.replace('[', r'\[') return re.sub(regexp, r'<span style="text-decoration: underline">\1</span>', text) def removebells(text): return text.replace('\07', '') def removebackspaces(text): regexp = r'.(\010\033\[K)' n = 1 while n > 0: text, n = re.subn(regexp, '', text, 1) return text re_string = re.compile(r'(?P<htmlchars>[<&>])|(?P<space>^[ \t]+)|(?P<lineend>\r\n|\r|\n)|(?P<protocal>(^|\s)((http|ftp)://.*?))(\s|$)', re.S|re.M|re.I) def plaintext2html(text, tabstop=4): def do_sub(m): c = m.groupdict() if c['htmlchars']: return cgi.escape(c['htmlchars']) if c['lineend']: return '<br>' elif c['space']: t = m.group().replace('\t', ' '*tabstop) t = t.replace(' ', ' ') return t elif c['space'] == '\t': return ' '*tabstop; else: url = m.group('protocal') if url.startswith(' '): prefix = ' ' url = url[1:] else: prefix = '' last = m.groups()[-1] if last in ['\n', '\r', '\r\n']: last = '<br>' return '%s%s' % (prefix, url) result = re.sub(re_string, do_sub, text) result = recolor('cyan', result) result = recolor('blue', result) result = recolor('red', result) result = recolor('magenta', result) result = recolor('green', result) result = bold(result) result = underline(result) result = removebells(result) result = removebackspaces(result) return '<html><body>%s</body></html>' % result if __name__ == '__main__': import sys with open(sys.argv[-1]) as f: text = f.read() print plaintext2html(text)