Welcome, guest | Sign In | My Account | Store | Cart
# This recipe refers:
#
#  http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061

import re
rx=re.compile(u"([\u2e80-\uffff])", re.UNICODE)

def cjkwrap(text, width, encoding="utf8"):
     return reduce(lambda line, word, width=width: '%s%s%s' %              
                (line,
                 [' ','\n', ''][(len(line)-line.rfind('\n')-1
                       + len(word.split('\n',1)[0] ) >= width) or
                      line[-1:] == '\0' and 2],
                 word),
                rx.sub(r'\1\0 ', unicode(text,encoding)).split(' ')
            ).replace('\0', '').encode(encoding)

# Here is the Chese test message, it is in Chinese 'gbk' encoding
# Since this site doesn't support Chinese, I have to use hex format :(
msg='\xce\xd2\xc3\xc7\xd7\xd4\xbc\xba\xbf\xc9\xd2\xd4\xb5\xc4\xa3\xac\
\xb2\xbb\xca\xc7\xc2\xf0? whay \xce\xd2\xc3\xc7\xd5\xfd\xb5\xc4\n \
\xd2\xaa\xc7\xf3\xca\xc7\xca\xb2\xc3\xb4\xa1\xa2how to dothat? no \
problem? !! \
\xb5\xab\xca\xc7\xd6\xd0\xce\xc4\xbe\xcd\xb2\xbb\xcd\xac\xc1\xcb\xa3\
\xac\xd2\xbb\xb8\xf6\xba\xba\xd7\xd6\xb5\xc4\xbf\xd5\xbc\xe4\xd5\xbc\
\xd3\xc3\xb5\xc8\xd3\xda2\xb8\xf6\xd3\xa2\xce\xc4\xa1\xa3\xba\xba\xd7\
\xd6\xb5\xc4\xb7\xd6\xb4\xca\xa3\xac\xca\xc7\xc3\xbb\xd3\xd0\xbf\xd5\
\xb8\xf1\xb5\xc4\xa1\xa3\n\n \
\xbd\xe8\xbc\xf8CJKSplitter\xb5\xc4\xbf\xaa\xb7\xa2\xbe\xad\xd1\xe9\
\xa3\xac\xce\xd2\xd7\xbc\xb1\xb8\xd2\xb2\xd7\xf6\xd2\xbb\xb8\xf6\xd6\
\xd0\xce\xc4\xd5\xdb\xd0\xd0\xcb\xe3\xb7\xa8\xa3\xa8\xd3\xa6\xb8\xc3\
\xd2\xb2\xba\xdc\xc8\xdd\xd2\xd7\xd6\xa7\xb3\xd6\xc8\xd5\xba\xab\xce\
\xc4\xd7\xd6\xa3\xa9\xa3\xac\xb5\xb1\xc8\xbb\xb2\xbb\xbf\xc9\xc4\xdc\
\xd3\xd0\xd3\xa2\xce\xc4\xb5\xc4\xc4\xc7\xc3\xb4\xb8\xdf\xd0\xa7\xc1\
\xcb\xa3\xba\n \
\xd3\xa2\xce\xc4\xca\xc7ascii\xa3\xac\xc3\xbf\xb8\xf6\xd7\xd6\xb7\xfb\
\xd5\xbc\xd3\xc3\xbf\xd5\xbc\xe4\xcf\xe0\xcd\xac\xa3\xac\xd6\xb1\xbd\
\xd3\xca\xb9\xd3\xc3\xbf\xd5\xb8\xf1\xbe\xcd\xbf\xc9\xd2\xd4\xb7\xd6\
\xb4\xca\xa1\xa3\xd2\xf2\xb4\xcb\xd3\xa2\xce\xc4\xb0\xe6\xb1\xbe\xb5\
\xc4\xd5\xdb\xd0\xd0\xcb\xe3\xb7\xa8\xd4\xdapython \
cookbook\xc9\xcf\xd3\xd0\xba\xdc\xbe\xad\xb5\xe4\xb5\xc4\xb8\xdf\xd0\
\xa7\xcb\xe3\xb7\xa8\xa3\xba\n\n one-liner word-wrap function'

# ok, now I print it the real msg here, you will see some long lines
print msg

# example: make it fit in 50 columns, and use 'gbk' encoding
print cjkwrap(msg, 50, 'gbk')

# you will see the correct wraped lines ...

History

  • revision 3 (19 years ago)
  • previous revisions are not available