The thing about this class is to "build" a mail msg object, automatic, from an URL or a local html file WITH all images included. The class takes care of the image parsing / downloading / embedding + "cid: ID-here" replacements. The return is a valid MIMEMultipart("related") msg object which can be used to send valid HTML mail.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | # HtmlMail python class
# Compose HTML mails from URLs or local files with all images included
#
# Author: Catalin Constantin <dazoot@gmail.com>
import sys, os, urllib2, urlparse
from email.MIMEText import MIMEText
from email.MIMEImage import MIMEImage
from email.MIMEMultipart import MIMEMultipart
import email, re
class HtmlMail:
def __init__(self, location, encoding="iso-8859-1"):
self.location=location
if location.find("http://")==0:
self.is_http=True
else:
self.is_http=False
self.encoding=encoding
self.p1=re.compile("(<img.*?src=\")(.*?)(\".*?>)", re.IGNORECASE|re.DOTALL)
self.p2=re.compile("(<.*?background=\")(.*?)(\".*?>)", re.IGNORECASE|re.DOTALL)
self.p3=re.compile("(<input.*?src=\")(.*?)(\".*?>)", re.IGNORECASE|re.DOTALL)
self.img_c=0
def set_log(self,log):
self.log=log
def _handle_image(self, matchobj):
img=matchobj.group(2)
if not self.images.has_key(img):
self.img_c+=1
self.images[img]="dazoot-img%d" % self.img_c
return "%scid:%s%s" % (matchobj.group(1), self.images[img], matchobj.group(3))
def _parse_images(self):
self.images={}
self.content=self.p1.sub(self._handle_image, self.content)
self.content=self.p2.sub(self._handle_image, self.content)
self.content=self.p3.sub(self._handle_image, self.content)
return self.images
def _read_image(self, imglocation):
if self.is_http:
img_url=urlparse.urljoin(self.location, imglocation)
content=urllib2.urlopen(img_url).read()
return content
else:
return file(imglocation, "rb").read()
def get_msg(self):
if self.is_http:
content=urllib2.urlopen(self.location).read()
else:
content=file(self.location, "r").read()
self.content=content
msg=MIMEMultipart("related")
images=self._parse_images()
tmsg=MIMEText(self.content, "html", self.encoding)
msg.attach(tmsg)
for img in images.keys():
img_content=self._read_image(img)
img_msg=MIMEImage(img_content)
img_type, img_ext=img_msg["Content-Type"].split("/")
del img_msg["MIME-Version"]
del img_msg["Content-Type"]
del img_msg["Content-Transfer-Encoding"]
img_msg.add_header("Content-Type", "%s/%s; name=\"%s.%s\"" % (img_type, img_ext, images[img], img_ext))
img_msg.add_header("Content-Transfer-Encoding", "base64")
img_msg.add_header("Content-ID", "<%s>" % images[img])
img_msg.add_header("Content-Disposition", "inline; filename=\"%s.%s\"" % (images[img], img_ext))
msg.attach(img_msg)
return msg
if __name__=="__main__":
# test the class here
import smtplib
hm=HtmlMail("http://www.egirl.ro/newsletter/december2005_2/")
msg=hm.get_msg()
msg["Subject"]="Egirl Newsletter"
msg["From"]="Catalin Constantin <dazoot@gmail.com>"
msg["To"]="dazoot@gmail.com"
s=smtplib.SMTP("localhost")
s.sendmail("dazoot@gmail.com", msg["To"], msg.as_string())
s.quit()
|
urllib2.URLError: <urlopen error unknown url type: cid>. very nice example indeed !
one small problem, if you have an input field in your html code before any img tags, the p3 regular expression will try to re-encode an already captured image resulting in the above stated exception
urllib2.URLError: <urlopen error unknown url type: cid>
due to the fact that you try to urlopen "cid:dazoot-img" which of course can not be found.
to solve this you have to adapt the regular expression match a little
this if of course the same for self.p1.
for self.p2 I would rather write something like this
self.p2=re.compile("(very nice example indeed !
one small problem, if you have an input field in your html code before any img tags, the p3 regular expression will try to re-encode an already captured image resulting in the above stated exception
urllib2.URLError: <urlopen error unknown url type: cid>
due to the fact that you try to urlopen "cid:dazoot-img" which of course can not be found.
to solve this you have to adapt the regular expression match a little
this if of course the same for self.p1.
for self.p2 I would rather write something like this
self.p2=re.compile("(
what a messy comment ! sorry for my messy comment, I am quite sure I did not post it like that, I hope the webmaster will clean it up (like remove all the duplicate lines!)
the p2 regular expression didn't quite came out the way is should so here it is again
self.p2=re.compile("(<.?background=\")([^\"])(\".*?>)", re.IGNORECASE|re.DOTALL)
meaning, capture all the text after the sequence 'background="' up to but not including the next quote
(hope these lines will be here only once ;-)
greetz,
Peter