Welcome, guest | Sign In | My Account | Store | Cart

The thing about this class is to "build" a mail msg object, automatic, from an URL or a local html file WITH all images included. The class takes care of the image parsing / downloading / embedding + "cid: ID-here" replacements. The return is a valid MIMEMultipart("related") msg object which can be used to send valid HTML mail.

Python, 94 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# HtmlMail python class
# Compose HTML mails from URLs or local files with all images included
#
# Author: Catalin Constantin <dazoot@gmail.com>

import sys, os, urllib2, urlparse
from email.MIMEText import MIMEText
from email.MIMEImage import MIMEImage
from email.MIMEMultipart import MIMEMultipart
import email, re

class HtmlMail:
	def __init__(self, location, encoding="iso-8859-1"):
		self.location=location
		if location.find("http://")==0:
			self.is_http=True
		else:
			self.is_http=False
		
		self.encoding=encoding

		self.p1=re.compile("(<img.*?src=\")(.*?)(\".*?>)", re.IGNORECASE|re.DOTALL)
		self.p2=re.compile("(<.*?background=\")(.*?)(\".*?>)", re.IGNORECASE|re.DOTALL)
		self.p3=re.compile("(<input.*?src=\")(.*?)(\".*?>)", re.IGNORECASE|re.DOTALL)
		
		self.img_c=0

	def set_log(self,log):
		self.log=log
	
	def _handle_image(self, matchobj):
		img=matchobj.group(2)
		if not self.images.has_key(img):
			self.img_c+=1
			self.images[img]="dazoot-img%d" % self.img_c
		return "%scid:%s%s" % (matchobj.group(1), self.images[img], matchobj.group(3))
		
	def _parse_images(self):
		self.images={}
		self.content=self.p1.sub(self._handle_image, self.content)
		self.content=self.p2.sub(self._handle_image, self.content)
		self.content=self.p3.sub(self._handle_image, self.content)
		return self.images
		
	def _read_image(self, imglocation):
		if self.is_http:
			img_url=urlparse.urljoin(self.location, imglocation)
			content=urllib2.urlopen(img_url).read()
			return content
		else:
			return file(imglocation, "rb").read()

	def get_msg(self):
		if self.is_http:
			content=urllib2.urlopen(self.location).read()
		else:
			content=file(self.location, "r").read()
		self.content=content
		
		msg=MIMEMultipart("related")
		images=self._parse_images()

		tmsg=MIMEText(self.content, "html", self.encoding)
		msg.attach(tmsg)

		for img in images.keys():
			img_content=self._read_image(img)
			img_msg=MIMEImage(img_content)
			img_type, img_ext=img_msg["Content-Type"].split("/")

			del img_msg["MIME-Version"]
			del img_msg["Content-Type"]
			del img_msg["Content-Transfer-Encoding"]

			img_msg.add_header("Content-Type", "%s/%s; name=\"%s.%s\"" % (img_type, img_ext, images[img], img_ext))
			img_msg.add_header("Content-Transfer-Encoding", "base64")
			img_msg.add_header("Content-ID", "<%s>" % images[img])
			img_msg.add_header("Content-Disposition", "inline; filename=\"%s.%s\"" % (images[img], img_ext))
			msg.attach(img_msg)

		return msg

if __name__=="__main__":
	# test the class here
	import smtplib
	hm=HtmlMail("http://www.egirl.ro/newsletter/december2005_2/")
	msg=hm.get_msg()
	msg["Subject"]="Egirl Newsletter"
	msg["From"]="Catalin Constantin <dazoot@gmail.com>"
	msg["To"]="dazoot@gmail.com"
	
	s=smtplib.SMTP("localhost")
	s.sendmail("dazoot@gmail.com", msg["To"], msg.as_string())
	s.quit()
	

2 comments

pepe ke 17 years, 2 months ago  # | flag

urllib2.URLError: <urlopen error unknown url type: cid>. very nice example indeed !

one small problem, if you have an input field in your html code before any img tags, the p3 regular expression will try to re-encode an already captured image resulting in the above stated exception

urllib2.URLError: <urlopen error unknown url type: cid>

due to the fact that you try to urlopen "cid:dazoot-img" which of course can not be found.

to solve this you have to adapt the regular expression match a little

    self.p3=re.compile("(&lt;input[^&gt;]+src=\")(.*?)(\".*?&gt;)", re.IGNORECASE|re.DOTALL)

this if of course the same for self.p1.

for self.p2 I would rather write something like this

self.p2=re.compile("(very nice example indeed !

one small problem, if you have an input field in your html code before any img tags, the p3 regular expression will try to re-encode an already captured image resulting in the above stated exception

urllib2.URLError: <urlopen error unknown url type: cid>

due to the fact that you try to urlopen "cid:dazoot-img" which of course can not be found.

to solve this you have to adapt the regular expression match a little

    self.p3=re.compile("(&lt;input[^&gt;]+src=\")(.*?)(\".*?&gt;)", re.IGNORECASE|re.DOTALL)

this if of course the same for self.p1.

for self.p2 I would rather write something like this

self.p2=re.compile("(

pepe ke 17 years, 2 months ago  # | flag

what a messy comment ! sorry for my messy comment, I am quite sure I did not post it like that, I hope the webmaster will clean it up (like remove all the duplicate lines!)

the p2 regular expression didn't quite came out the way is should so here it is again

self.p2=re.compile("(<.?background=\")([^\"])(\".*?>)", re.IGNORECASE|re.DOTALL)

meaning, capture all the text after the sequence 'background="' up to but not including the next quote

(hope these lines will be here only once ;-)

greetz,

Peter