# This recipe refers: # # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061 import re rx=re.compile(u"([\u2e80-\uffff])", re.UNICODE) def cjkwrap(text, width, encoding="utf8"): return reduce(lambda line, word, width=width: '%s%s%s' % (line, [' ','\n', ''][(len(line)-line.rfind('\n')-1 + len(word.split('\n',1)[0] ) >= width) or line[-1:] == '\0' and 2], word), rx.sub(r'\1\0 ', unicode(text,encoding)).split(' ') ).replace('\0', '').encode(encoding) # Here is the Chese test message, it is in Chinese 'gbk' encoding # Since this site doesn't support Chinese, I have to use hex format :( msg='\xce\xd2\xc3\xc7\xd7\xd4\xbc\xba\xbf\xc9\xd2\xd4\xb5\xc4\xa3\xac\ \xb2\xbb\xca\xc7\xc2\xf0? whay \xce\xd2\xc3\xc7\xd5\xfd\xb5\xc4\n \ \xd2\xaa\xc7\xf3\xca\xc7\xca\xb2\xc3\xb4\xa1\xa2how to dothat? no \ problem? !! \ \xb5\xab\xca\xc7\xd6\xd0\xce\xc4\xbe\xcd\xb2\xbb\xcd\xac\xc1\xcb\xa3\ \xac\xd2\xbb\xb8\xf6\xba\xba\xd7\xd6\xb5\xc4\xbf\xd5\xbc\xe4\xd5\xbc\ \xd3\xc3\xb5\xc8\xd3\xda2\xb8\xf6\xd3\xa2\xce\xc4\xa1\xa3\xba\xba\xd7\ \xd6\xb5\xc4\xb7\xd6\xb4\xca\xa3\xac\xca\xc7\xc3\xbb\xd3\xd0\xbf\xd5\ \xb8\xf1\xb5\xc4\xa1\xa3\n\n \ \xbd\xe8\xbc\xf8CJKSplitter\xb5\xc4\xbf\xaa\xb7\xa2\xbe\xad\xd1\xe9\ \xa3\xac\xce\xd2\xd7\xbc\xb1\xb8\xd2\xb2\xd7\xf6\xd2\xbb\xb8\xf6\xd6\ \xd0\xce\xc4\xd5\xdb\xd0\xd0\xcb\xe3\xb7\xa8\xa3\xa8\xd3\xa6\xb8\xc3\ \xd2\xb2\xba\xdc\xc8\xdd\xd2\xd7\xd6\xa7\xb3\xd6\xc8\xd5\xba\xab\xce\ \xc4\xd7\xd6\xa3\xa9\xa3\xac\xb5\xb1\xc8\xbb\xb2\xbb\xbf\xc9\xc4\xdc\ \xd3\xd0\xd3\xa2\xce\xc4\xb5\xc4\xc4\xc7\xc3\xb4\xb8\xdf\xd0\xa7\xc1\ \xcb\xa3\xba\n \ \xd3\xa2\xce\xc4\xca\xc7ascii\xa3\xac\xc3\xbf\xb8\xf6\xd7\xd6\xb7\xfb\ \xd5\xbc\xd3\xc3\xbf\xd5\xbc\xe4\xcf\xe0\xcd\xac\xa3\xac\xd6\xb1\xbd\ \xd3\xca\xb9\xd3\xc3\xbf\xd5\xb8\xf1\xbe\xcd\xbf\xc9\xd2\xd4\xb7\xd6\ \xb4\xca\xa1\xa3\xd2\xf2\xb4\xcb\xd3\xa2\xce\xc4\xb0\xe6\xb1\xbe\xb5\ \xc4\xd5\xdb\xd0\xd0\xcb\xe3\xb7\xa8\xd4\xdapython \ cookbook\xc9\xcf\xd3\xd0\xba\xdc\xbe\xad\xb5\xe4\xb5\xc4\xb8\xdf\xd0\ \xa7\xcb\xe3\xb7\xa8\xa3\xba\n\n one-liner word-wrap function' # ok, now I print it the real msg here, you will see some long lines print msg # example: make it fit in 50 columns, and use 'gbk' encoding print cjkwrap(msg, 50, 'gbk') # you will see the correct wraped lines ...