from xml.sax.saxutils import quoteattr
class SafeHTMLMixin(object):
def sanitize(self, s):
"""sanitize value following
https://www.owasp.org/index.php/XSS_%28Cross_Site_Scripting%29_Prevention_Cheat_Sheet#RULE_.231_-_HTML_Escape_Before_Inserting_Untrusted_Data_into_HTML_Element_Content
"""
if isinstance(s, (list, tuple)):
return tuple(self.sanitize(x) for x in s)
elif not isinstance(s, SafeHTMLMixin):
return quoteattr(
s,
entities={'"': '"', '/': '/', "'": '''})[1:-1]
else:
return s
def __add__(self, s):
return self.__class__(super(SafeHTMLMixin, self).__add__(self.sanitize(s)))
def __radd__(self, s):
return self.__class__(self.sanitize(s)) + self
def __mul__(self, i):
return self.__class__(super(SafeHTMLMixin, self).__mul__(i))
def __rmul__(self, i):
return self.__class__(super(SafeHTMLMixin, self).__rmul__(i))
def __mod__(self, s):
return self.__class__(super(SafeHTMLMixin, self).__mod__(self.sanitize(s)))
def __repr__(self):
return '%s(%s)' % (self.__class__.__name__,
super(SafeHTMLMixin, self).__repr__())
class SafeHTMLStr(SafeHTMLMixin, str):
"""a string that will sanitize all str concatenated to it (or inserted
via format)
SafeHTMLStr itself is not quoted::
>>> SafeHTMLStr('') + SafeHTMLStr('')
SafeHTMLStr('')
While any string or unicode input is quoted and keep being SafeHTML::
>>> SafeHTMLStr('') + ''
SafeHTMLStr('<script src="x"/>')
>>> SafeHTMLStr('') + u''
SafeHTMLStr('<script src="x"/>')
>>> '' + SafeHTMLStr('')
SafeHTMLStr('<script src="x"/>')
>>> SafeHTMLStr('') * 2
SafeHTMLStr('')
>>> SafeHTMLStr('%s') % ''
SafeHTMLStr('<script src="x"/>')
>>> SafeHTMLStr('%s') % (
... SafeHTMLStr('spam="foo"'), '')
SafeHTMLStr('<script src="x"/>')
"""
class SafeHTMLUnicode(SafeHTMLMixin, unicode):
"""a unicode string that will sanitize all str concatenated to it
(or inserted via format)
SafeHTMLUnicode itself is not quoted::
>>> SafeHTMLUnicode(u'') + SafeHTMLUnicode(u'')
SafeHTMLUnicode(u'')
While any string or unicode input is quoted and keep being SafeHTML::
>>> SafeHTMLUnicode(u'') + ''
SafeHTMLUnicode(u'<script src="x"/>')
>>> SafeHTMLUnicode(u'') + u''
SafeHTMLUnicode(u'<script src="x"/>')
>>> '' + SafeHTMLUnicode(u'')
SafeHTMLUnicode(u'<script src="x"/>')
>>> SafeHTMLUnicode(u'') * 2
SafeHTMLUnicode(u'')
>>> SafeHTMLUnicode(u'%s') % ''
SafeHTMLUnicode(u'<script src="x"/>')
>>> SafeHTMLUnicode(u'%s') % (
... SafeHTMLUnicode(u'spam="foo"'), '')
SafeHTMLUnicode(u'<script src="x"/>')
"""
if __name__ == "__main__":
import doctest
doctest.testmod()