Welcome, guest | Sign In | My Account | Store | Cart

Converts text stream in text/enriched MIME format from file or stdin to text/html output to file or stdout.

Python, 285 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
"""
Convert text/enriched MIME type to text/html MIME type.

Based on the program in Appendix B of RFC 1896 -- see
http://www.rfc-editor.org/rfc/rfc1896.txt. 

However, it was entirely rewritten for Python and 
refactored for flexibility and comprehension. Support 
for color and fontfamily was added.

Usage: python enriched2html.py < source.txt > target.html
"""
__author__=["Jack Trainor (jacktrainor@gmail.com)",]
__version__="May 2009"

import array
import sys
import os, os.path


class StdIo(array.array):
    """ Wraps array to provide C stdio functions 
    for a block of text. """
    def __new__(cls, tc='c'):
        return super(StdIo, cls).__new__(cls, 'c')
    
    def __init__(self, s=""):
        self.fromstring(s)
        self.position = 0

    def __repr__(self):
        return self.tostring()
        
    def getc(self):
        if self.position < len(self):
            c = self[self.position]
            self.position += 1
        else:
            c = None
        return c
    
    def lookaheadc(self):
        c = self.getc()
        self.ungetc(c)
        return c
    
    def ungetc(self, c):
        if c != None:
            if self.position > 0:
                self.position -= 1

    def putc(self, c):
        self.append(c)

    def puts(self, s):
        self.fromstring(s)

    def get_text_until(self, delimiters=()):
        chars = []
        c = self.getc()
        while c and c not in delimiters:
            chars.append(c)
            c = self.getc()
        text = "".join(chars)
        return c, text
        
        
COMMAND_MAP = {
    "param": "",
    "nofill": "pre",
    "bold": "b",
    "italic": "i",
    "underline": "u",
    "fixed": "tt",
    "center": "center",
    "excerpt": "blockquote",
    "color": "font",
    "fontfamily": "font",
#____UNIMPLEMENTED____
    "paraindent": "",
    "indentright": "",
    "flushleft": "",
    "flushright": "",
    "flushboth": "",
    "bigger": "",
    "smaller": "",
    "indent": ""
}

class TextBlock(object):
    """ Wraps a block of text """
    def __init__(self):
        self.text = ""
        
    def __repr__(self):
        return "[%s]" % self.text
        
class CommandBlock(TextBlock):
    """ TextBlock in which the text is the command plus end flag
    param list """
    def __init__(self):
        TextBlock.__init__(self)
        self.end = False
        self.params = []

    def __repr__(self):
        return "[cmd: %s end: %d]" % (self.text, self.end)

                
def is_text_block(block):
    return (block and isinstance(block, TextBlock))

def is_command_block(block):
    return (block and isinstance(block, CommandBlock))

def is_param_block(block):
    return (is_command_block(block) and block.text == "param")

def is_end_param_block(block):
    return (is_param_block(block) and block.end == True)


class Converter(object):
    """ Converts text/enriched text to text/html format by first
    splitting into a stream of text blocks and command blocks,
    then processing those  blocks into html code. """
    def __init__(self, text):
        self.text = text
        self.input = None
        self.output = None
        self.blocks  = []
        self.block_index = 0
        self.no_fill_count = 0
        
    def execute(self):
        text = self.convert(self.text)
        return text
    
    def convert(self, text):        
        self.input = StdIo(text)
        self.output = StdIo()
        self.read_blocks()
        #self.debug_blocks()
        self.write_blocks()
        self.output.puts('\n')   
        return str(self.output)
    
    def get_block(self):
        if self.block_index < len(self.blocks):
            block = self.blocks[self.block_index]
            self.block_index += 1
        else:
            block = None
        return block

    def unget_block(self, block):
        if block:
            if self.block_index > 0:
                self.block_index -= 1
            
    def get_params(self, command_block):
        while True:
            block_1 = self.get_block()
            block_2 = self.get_block()
            block_3 = self.get_block()
            if is_param_block(block_1) and is_text_block(block_2) and is_end_param_block(block_3):
                command_block.params.append(block_2.text)
            else:
                self.unget_block(block_3)
                self.unget_block(block_2)
                self.unget_block(block_1)
                break
            
    def write_blocks(self):
        self.block_index = 0
        block = self.get_block()
        while block:
            if is_command_block(block) and not block.end:
                self.get_params(block)
            if is_command_block(block):
                self.write_command_block(block)
            elif is_text_block(block):
                self.write_text_block(block)
            block = self.get_block()   
    
    def write_command_block(self, block):
        command = block.text
        html_command = ""
        mapped_command = COMMAND_MAP.get(command, "")
        if not mapped_command:
            if not block.end:
                mapped_command = "?" + command
            else:
                mapped_command = "?" + command
                
        if not block.end:
            if command == "color":
                html_command =("<%s color=\"%s\">" % (mapped_command, block.params[0]))
            elif command == "fontfamily":
                html_command =("<%s face=\"%s\">" % (mapped_command, block.params[0]))
            else:
                html_command = ("<%s>" % mapped_command)
        else:
            html_command = ("</%s>" % mapped_command)
        
        if command == "nofill":
            if not block.end:
                self.no_fill_count += 1
            else:
                self.no_fill_count -= 1
            
        self.output.puts(html_command)

    def write_text_block(self, block):
        newline_count = 0
        for c in block.text:
            if c == '\n' and self.no_fill_count <= 0:
                if newline_count == 0:
                    c = ' '
                newline_count += 1
            else:
                newline_count = 0
                
            if c == '<':
                c = "&lt;"
            elif c == '>':
                c = "&gt;"
            elif c == '&':
                c = "&amp;"
            elif c == '\n':
                if self.no_fill_count <= 0:
                    c = "<br/>"
            self.output.puts(c)

    def debug_blocks(self):
        block = self.get_block()
        while block:
            print block
            block = self.get_block()

    def read_command_block(self, c):
        block = CommandBlock()
        c, text = self.input.get_text_until(">")
        if text and text[0] == '/':
            text = text[1:]
            block.end = True
        block.text = text.lower()
        return c, block
        
    def read_text_block(self, c):
        block = TextBlock()
        c2, text = self.input.get_text_until('<')
        block.text = c + text   
        if c2 == '<':
            self.input.ungetc(c2)
        return c2, block
        
    def read_blocks(self):
        c = self.input.getc()
        while c:
            if c == '<':
                if self.input.lookaheadc() == '<':
                    c = self.input.getc()
                    c, block = self.read_text_block('<')
                else:
                    c, block = self.read_command_block('<')
            else:
                c, block = self.read_text_block(c)
            self.blocks.append(block)
            c = self.input.getc()      

def convert_file(fp_in=sys.stdin):
    text = fp_in.read()
    html = Converter(text).execute()
    return html

def output_html(html, fp_out=sys.stdout):
    fp_out.write(html)

def main():
    html = convert_file()
    output_html(html)
    
if __name__ == "__main__":
    main()

When it was designed, the text/enriched MIME type was intended to be superseded eventually by text/html. Currently text/enriched email is rare, but not unknown. Meanwhile, some email clients, such as gmail, do not convert text/enriched so only plain text is shown.

I wrote this utility to convert email text/enriched text with highlighting I was receiving so I wouldn't lose the highlighting.