As you display message on a web page, you have to sanitize input data coming from users to avoid XSS. Here is a small recipe where we can use a special class for our string to be sure we get safe all the way long.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | from xml.sax.saxutils import quoteattr
class SafeHTMLMixin(object):
def sanitize(self, s):
"""sanitize value following
https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content
"""
if isinstance(s, (list, tuple)):
return tuple(self.sanitize(x) for x in s)
elif not isinstance(s, SafeHTMLMixin):
return quoteattr(
s,
entities={'"': '"', '/': '/', "'": '''})[1:-1]
else:
return s
def __add__(self, s):
return self.__class__(super(SafeHTMLMixin, self).__add__(self.sanitize(s)))
def __radd__(self, s):
return self.__class__(self.sanitize(s)) + self
def __mul__(self, i):
return self.__class__(super(SafeHTMLMixin, self).__mul__(i))
def __rmul__(self, i):
return self.__class__(super(SafeHTMLMixin, self).__rmul__(i))
def __mod__(self, s):
return self.__class__(super(SafeHTMLMixin, self).__mod__(self.sanitize(s)))
def __repr__(self):
return '%s(%s)' % (self.__class__.__name__,
super(SafeHTMLMixin, self).__repr__())
class SafeHTMLStr(SafeHTMLMixin, str):
"""a string that will sanitize all str concatenated to it (or inserted
via format)
SafeHTMLStr itself is not quoted::
>>> SafeHTMLStr('<ABC/>') + SafeHTMLStr('<script src="x"/>')
SafeHTMLStr('<ABC/><script src="x"/>')
While any string or unicode input is quoted and keep being SafeHTML::
>>> SafeHTMLStr('<ABC/>') + '<script src="x"/>'
SafeHTMLStr('<ABC/><script src="x"/>')
>>> SafeHTMLStr('<ABC/>') + u'<script src="x"/>'
SafeHTMLStr('<ABC/><script src="x"/>')
>>> '<script src="x"/>' + SafeHTMLStr('<ABC/>')
SafeHTMLStr('<script src="x"/><ABC/>')
>>> SafeHTMLStr('<ABC/>') * 2
SafeHTMLStr('<ABC/><ABC/>')
>>> SafeHTMLStr('<ABC>%s</ABC>') % '<script src="x"/>'
SafeHTMLStr('<ABC><script src="x"/></ABC>')
>>> SafeHTMLStr('<ABC %s>%s</ABC>') % (
... SafeHTMLStr('spam="foo"'), '<script src="x"/>')
SafeHTMLStr('<ABC spam="foo"><script src="x"/></ABC>')
"""
class SafeHTMLUnicode(SafeHTMLMixin, unicode):
"""a unicode string that will sanitize all str concatenated to it
(or inserted via format)
SafeHTMLUnicode itself is not quoted::
>>> SafeHTMLUnicode(u'<ABC/>') + SafeHTMLUnicode(u'<script src="x"/>')
SafeHTMLUnicode(u'<ABC/><script src="x"/>')
While any string or unicode input is quoted and keep being SafeHTML::
>>> SafeHTMLUnicode(u'<ABC/>') + '<script src="x"/>'
SafeHTMLUnicode(u'<ABC/><script src="x"/>')
>>> SafeHTMLUnicode(u'<ABC/>') + u'<script src="x"/>'
SafeHTMLUnicode(u'<ABC/><script src="x"/>')
>>> '<script src="x"/>' + SafeHTMLUnicode(u'<ABC/>')
SafeHTMLUnicode(u'<script src="x"/><ABC/>')
>>> SafeHTMLUnicode(u'<ABC/>') * 2
SafeHTMLUnicode(u'<ABC/><ABC/>')
>>> SafeHTMLUnicode(u'<ABC>%s</ABC>') % '<script src="x"/>'
SafeHTMLUnicode(u'<ABC><script src="x"/></ABC>')
>>> SafeHTMLUnicode(u'<ABC %s>%s</ABC>') % (
... SafeHTMLUnicode(u'spam="foo"'), '<script src="x"/>')
SafeHTMLUnicode(u'<ABC spam="foo"><script src="x"/></ABC>')
"""
if __name__ == "__main__":
import doctest
doctest.testmod()
|
Any suggestion is welcome.