Source code for markupsafe

# -*- coding: utf-8 -*-
"""
    markupsafe
    ~~~~~~~~~~

    Implements a Markup string.

    :copyright: (c) 2010 by Armin Ronacher.
    :license: BSD, see LICENSE for more details.
"""
import re
import string
from collections import Mapping
from markupsafe._compat import text_type, string_types, int_types, \
     unichr, iteritems, PY2


__all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']


_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
_entity_re = re.compile(r'&([^;]+);')


[docs]class Markup(text_type): r"""Marks a string as being safe for inclusion in HTML/XML output without needing to be escaped. This implements the `__html__` interface a couple of frameworks and web applications use. :class:`Markup` is a direct subclass of `unicode` and provides all the methods of `unicode` just that it escapes arguments passed and always returns `Markup`. The `escape` function returns markup objects so that double escaping can't happen. The constructor of the :class:`Markup` class can be used for three different things: When passed an unicode object it's assumed to be safe, when passed an object with an HTML representation (has an `__html__` method) that representation is used, otherwise the object passed is converted into a unicode string and then assumed to be safe: >>> Markup("Hello <em>World</em>!") Markup(u'Hello <em>World</em>!') >>> class Foo(object): ... def __html__(self): ... return '<a href="#">foo</a>' ... >>> Markup(Foo()) Markup(u'<a href="#">foo</a>') If you want object passed being always treated as unsafe you can use the :meth:`escape` classmethod to create a :class:`Markup` object: >>> Markup.escape("Hello <em>World</em>!") Markup(u'Hello &lt;em&gt;World&lt;/em&gt;!') Operations on a markup string are markup aware which means that all arguments are passed through the :func:`escape` function: >>> em = Markup("<em>%s</em>") >>> em % "foo & bar" Markup(u'<em>foo &amp; bar</em>') >>> strong = Markup("<strong>%(text)s</strong>") >>> strong % {'text': '<blink>hacker here</blink>'} Markup(u'<strong>&lt;blink&gt;hacker here&lt;/blink&gt;</strong>') >>> Markup("<em>Hello</em> ") + "<foo>" Markup(u'<em>Hello</em> &lt;foo&gt;') """ __slots__ = () def __new__(cls, base=u'', encoding=None, errors='strict'): if hasattr(base, '__html__'): base = base.__html__() if encoding is None: return text_type.__new__(cls, base) return text_type.__new__(cls, base, encoding, errors) def __html__(self): return self def __add__(self, other): if isinstance(other, string_types) or hasattr(other, '__html__'): return self.__class__(super(Markup, self).__add__(self.escape(other))) return NotImplemented def __radd__(self, other): if hasattr(other, '__html__') or isinstance(other, string_types): return self.escape(other).__add__(self) return NotImplemented def __mul__(self, num): if isinstance(num, int_types): return self.__class__(text_type.__mul__(self, num)) return NotImplemented __rmul__ = __mul__ def __mod__(self, arg): if isinstance(arg, tuple): arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg) else: arg = _MarkupEscapeHelper(arg, self.escape) return self.__class__(text_type.__mod__(self, arg)) def __repr__(self): return '%s(%s)' % ( self.__class__.__name__, text_type.__repr__(self) )
[docs] def join(self, seq): return self.__class__(text_type.join(self, map(self.escape, seq)))
join.__doc__ = text_type.join.__doc__
[docs] def split(self, *args, **kwargs): return list(map(self.__class__, text_type.split(self, *args, **kwargs)))
split.__doc__ = text_type.split.__doc__
[docs] def rsplit(self, *args, **kwargs): return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs)))
rsplit.__doc__ = text_type.rsplit.__doc__
[docs] def splitlines(self, *args, **kwargs): return list(map(self.__class__, text_type.splitlines( self, *args, **kwargs)))
splitlines.__doc__ = text_type.splitlines.__doc__
[docs] def unescape(self): r"""Unescape markup again into an text_type string. This also resolves known HTML4 and XHTML entities: >>> Markup("Main &raquo; <em>About</em>").unescape() u'Main \xbb <em>About</em>' """ from markupsafe._constants import HTML_ENTITIES def handle_match(m): name = m.group(1) if name in HTML_ENTITIES: return unichr(HTML_ENTITIES[name]) try: if name[:2] in ('#x', '#X'): return unichr(int(name[2:], 16)) elif name.startswith('#'): return unichr(int(name[1:])) except ValueError: pass return u'' return _entity_re.sub(handle_match, text_type(self))
[docs] def striptags(self): r"""Unescape markup into an text_type string and strip all tags. This also resolves known HTML4 and XHTML entities. Whitespace is normalized to one: >>> Markup("Main &raquo; <em>About</em>").striptags() u'Main \xbb About' """ stripped = u' '.join(_striptags_re.sub('', self).split()) return Markup(stripped).unescape()
@classmethod
[docs] def escape(cls, s): """Escape the string. Works like :func:`escape` with the difference that for subclasses of :class:`Markup` this function would return the correct subclass. """ rv = escape(s) if rv.__class__ is not cls: return cls(rv) return rv
def make_simple_escaping_wrapper(name): orig = getattr(text_type, name) def func(self, *args, **kwargs): args = _escape_argspec(list(args), enumerate(args), self.escape) _escape_argspec(kwargs, iteritems(kwargs), self.escape) return self.__class__(orig(self, *args, **kwargs)) func.__name__ = orig.__name__ func.__doc__ = orig.__doc__ return func for method in '__getitem__', 'capitalize', \ 'title', 'lower', 'upper', 'replace', 'ljust', \ 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \ 'translate', 'expandtabs', 'swapcase', 'zfill': locals()[method] = make_simple_escaping_wrapper(method) # new in python 2.5 if hasattr(text_type, 'partition'):
[docs] def partition(self, sep): return tuple(map(self.__class__, text_type.partition(self, self.escape(sep))))
[docs] def rpartition(self, sep): return tuple(map(self.__class__, text_type.rpartition(self, self.escape(sep))))
# new in python 2.6 if hasattr(text_type, 'format'):
[docs] def format(*args, **kwargs): self, args = args[0], args[1:] formatter = EscapeFormatter(self.escape) kwargs = _MagicFormatMapping(args, kwargs) return self.__class__(formatter.vformat(self, args, kwargs))
def __html_format__(self, format_spec): if format_spec: raise ValueError('Unsupported format specification ' 'for Markup.') return self # not in python 3 if hasattr(text_type, '__getslice__'): __getslice__ = make_simple_escaping_wrapper('__getslice__') del method, make_simple_escaping_wrapper
class _MagicFormatMapping(Mapping): """This class implements a dummy wrapper to fix a bug in the Python standard library for string formatting. See http://bugs.python.org/issue13598 for information about why this is necessary. """ def __init__(self, args, kwargs): self._args = args self._kwargs = kwargs self._last_index = 0 def __getitem__(self, key): if key == '': idx = self._last_index self._last_index += 1 try: return self._args[idx] except LookupError: pass key = str(idx) return self._kwargs[key] def __iter__(self): return iter(self._kwargs) def __len__(self): return len(self._kwargs) if hasattr(text_type, 'format'): class EscapeFormatter(string.Formatter): def __init__(self, escape): self.escape = escape def format_field(self, value, format_spec): if hasattr(value, '__html_format__'): rv = value.__html_format__(format_spec) elif hasattr(value, '__html__'): if format_spec: raise ValueError('No format specification allowed ' 'when formatting an object with ' 'its __html__ method.') rv = value.__html__() else: rv = string.Formatter.format_field(self, value, format_spec) return text_type(self.escape(rv)) def _escape_argspec(obj, iterable, escape): """Helper for various string-wrapped functions.""" for key, value in iterable: if hasattr(value, '__html__') or isinstance(value, string_types): obj[key] = escape(value) return obj class _MarkupEscapeHelper(object): """Helper for Markup.__mod__""" def __init__(self, obj, escape): self.obj = obj self.escape = escape __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape) __unicode__ = __str__ = lambda s: text_type(s.escape(s.obj)) __repr__ = lambda s: str(s.escape(repr(s.obj))) __int__ = lambda s: int(s.obj) __float__ = lambda s: float(s.obj) # we have to import it down here as the speedups and native # modules imports the markup type which is define above. try: from markupsafe._speedups import escape, escape_silent, soft_unicode except ImportError: from markupsafe._native import escape, escape_silent, soft_unicode if not PY2: soft_str = soft_unicode __all__.append('soft_str')