"""
Convert text/enriched MIME type to text/html MIME type.
Based on the program in Appendix B of RFC 1896 -- see
http://www.rfc-editor.org/rfc/rfc1896.txt.
However, it was entirely rewritten for Python and
refactored for flexibility and comprehension. Support
for color and fontfamily was added.
Usage: python enriched2html.py < source.txt > target.html
"""
__author__=["Jack Trainor (jacktrainor@gmail.com)",]
__version__="May 2009"
import array
import sys
import os, os.path
class StdIo(array.array):
""" Wraps array to provide C stdio functions
for a block of text. """
def __new__(cls, tc='c'):
return super(StdIo, cls).__new__(cls, 'c')
def __init__(self, s=""):
self.fromstring(s)
self.position = 0
def __repr__(self):
return self.tostring()
def getc(self):
if self.position < len(self):
c = self[self.position]
self.position += 1
else:
c = None
return c
def lookaheadc(self):
c = self.getc()
self.ungetc(c)
return c
def ungetc(self, c):
if c != None:
if self.position > 0:
self.position -= 1
def putc(self, c):
self.append(c)
def puts(self, s):
self.fromstring(s)
def get_text_until(self, delimiters=()):
chars = []
c = self.getc()
while c and c not in delimiters:
chars.append(c)
c = self.getc()
text = "".join(chars)
return c, text
COMMAND_MAP = {
"param": "",
"nofill": "pre",
"bold": "b",
"italic": "i",
"underline": "u",
"fixed": "tt",
"center": "center",
"excerpt": "blockquote",
"color": "font",
"fontfamily": "font",
#____UNIMPLEMENTED____
"paraindent": "",
"indentright": "",
"flushleft": "",
"flushright": "",
"flushboth": "",
"bigger": "",
"smaller": "",
"indent": ""
}
class TextBlock(object):
""" Wraps a block of text """
def __init__(self):
self.text = ""
def __repr__(self):
return "[%s]" % self.text
class CommandBlock(TextBlock):
""" TextBlock in which the text is the command plus end flag
param list """
def __init__(self):
TextBlock.__init__(self)
self.end = False
self.params = []
def __repr__(self):
return "[cmd: %s end: %d]" % (self.text, self.end)
def is_text_block(block):
return (block and isinstance(block, TextBlock))
def is_command_block(block):
return (block and isinstance(block, CommandBlock))
def is_param_block(block):
return (is_command_block(block) and block.text == "param")
def is_end_param_block(block):
return (is_param_block(block) and block.end == True)
class Converter(object):
""" Converts text/enriched text to text/html format by first
splitting into a stream of text blocks and command blocks,
then processing those blocks into html code. """
def __init__(self, text):
self.text = text
self.input = None
self.output = None
self.blocks = []
self.block_index = 0
self.no_fill_count = 0
def execute(self):
text = self.convert(self.text)
return text
def convert(self, text):
self.input = StdIo(text)
self.output = StdIo()
self.read_blocks()
#self.debug_blocks()
self.write_blocks()
self.output.puts('\n')
return str(self.output)
def get_block(self):
if self.block_index < len(self.blocks):
block = self.blocks[self.block_index]
self.block_index += 1
else:
block = None
return block
def unget_block(self, block):
if block:
if self.block_index > 0:
self.block_index -= 1
def get_params(self, command_block):
while True:
block_1 = self.get_block()
block_2 = self.get_block()
block_3 = self.get_block()
if is_param_block(block_1) and is_text_block(block_2) and is_end_param_block(block_3):
command_block.params.append(block_2.text)
else:
self.unget_block(block_3)
self.unget_block(block_2)
self.unget_block(block_1)
break
def write_blocks(self):
self.block_index = 0
block = self.get_block()
while block:
if is_command_block(block) and not block.end:
self.get_params(block)
if is_command_block(block):
self.write_command_block(block)
elif is_text_block(block):
self.write_text_block(block)
block = self.get_block()
def write_command_block(self, block):
command = block.text
html_command = ""
mapped_command = COMMAND_MAP.get(command, "")
if not mapped_command:
if not block.end:
mapped_command = "?" + command
else:
mapped_command = "?" + command
if not block.end:
if command == "color":
html_command =("<%s color=\"%s\">" % (mapped_command, block.params[0]))
elif command == "fontfamily":
html_command =("<%s face=\"%s\">" % (mapped_command, block.params[0]))
else:
html_command = ("<%s>" % mapped_command)
else:
html_command = ("%s>" % mapped_command)
if command == "nofill":
if not block.end:
self.no_fill_count += 1
else:
self.no_fill_count -= 1
self.output.puts(html_command)
def write_text_block(self, block):
newline_count = 0
for c in block.text:
if c == '\n' and self.no_fill_count <= 0:
if newline_count == 0:
c = ' '
newline_count += 1
else:
newline_count = 0
if c == '<':
c = "<"
elif c == '>':
c = ">"
elif c == '&':
c = "&"
elif c == '\n':
if self.no_fill_count <= 0:
c = "
"
self.output.puts(c)
def debug_blocks(self):
block = self.get_block()
while block:
print block
block = self.get_block()
def read_command_block(self, c):
block = CommandBlock()
c, text = self.input.get_text_until(">")
if text and text[0] == '/':
text = text[1:]
block.end = True
block.text = text.lower()
return c, block
def read_text_block(self, c):
block = TextBlock()
c2, text = self.input.get_text_until('<')
block.text = c + text
if c2 == '<':
self.input.ungetc(c2)
return c2, block
def read_blocks(self):
c = self.input.getc()
while c:
if c == '<':
if self.input.lookaheadc() == '<':
c = self.input.getc()
c, block = self.read_text_block('<')
else:
c, block = self.read_command_block('<')
else:
c, block = self.read_text_block(c)
self.blocks.append(block)
c = self.input.getc()
def convert_file(fp_in=sys.stdin):
text = fp_in.read()
html = Converter(text).execute()
return html
def output_html(html, fp_out=sys.stdout):
fp_out.write(html)
def main():
html = convert_file()
output_html(html)
if __name__ == "__main__":
main()