Alternative to the builtin repr() uses the \N{FULL NAME} format instead of \uXXXX for unicode strings.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | def urepr(x):
import re, unicodedata
def toname(m):
try:
return r"\N{%s}" % unicodedata.name(unichr(int(m.group(1), 16)))
except ValueError:
return m.group(0)
return re.sub(
r"\\[xu]((?<=x)[0-9a-f]{2}|(?<=u)[0-9a-f]{4})",
toname,
repr(x)
)
def displayhook(x):
if x is not None:
print urepr(x)
def install():
import sys
sys.displayhook = displayhook
def uninstall():
import sys
sys.displayhook = sys.__displayhook__
|
When working in the interactive prompt the terminal or IDE often cannot display unicode characters. This module helps make the representation of unicode strings more readable.
>>> s = unichr(0xA0) + unichr(0x20AC)
>>> s
u'\xa0\u20ac'
>>> import urepr
>>> urepr.install()
>>> s
u'\N{NO-BREAK SPACE}\N{EURO SIGN}'
>>> eval(urepr.urepr(s)) == s
True
Know issues: this is significantly longer than the default representation.