''' Remove diacritical marks from strings containing characters from any latin alphabets. Tested on both Python 2.x and Python 3.x ''' import unicodedata def remove_diacritic(input): ''' Accept a unicode string, and return a normal string (bytes in Python 3) without any diacritical marks. ''' return unicodedata.normalize('NFKD', input).encode('ASCII', 'ignore') if __name__ == '__main__': import sys input = '\xc0 quelle \xe9cole va-tu?' if sys.hexversion >= 0x3000000: # On Python >= 3.0.0 output = remove_diacritic(input).decode() else: # On Python < 3.0.0 output = remove_diacritic(unicode(input, 'ISO-8859-1')) print(input) print(output) assert(output == 'A quelle ecole va-tu?')