#!/usr/bin/python # accent2htmlcode.py might be useful to someone out there # on the other hand, if it causes extensive damage # to your computer, home and property I am not responsible in # any way htmlcodes = ['Á', 'á', 'À', 'Â', 'à', 'Â', 'â', 'Ä', 'ä', 'Ã', 'ã', 'Å', 'å', '&Aelig;', 'æ', 'Ç', 'ç', '&Eth;', 'ð', 'É', 'é', 'È', 'è', 'Ê', 'ê', 'Ë', 'ë', 'Í', 'í', 'Ì', 'ì', 'Î', 'î', 'Ï', 'ï', 'Ñ', 'ñ', 'Ó', 'ó', 'Ò', 'ò', 'Ô', 'ô', 'Ö', 'ö', 'Õ', 'õ', 'Ø', 'ø', 'ß', '&Thorn;', 'þ', 'Ú', 'ú', 'Ù', 'ù', 'Û', 'û', 'Ü', 'ü', 'Ý', 'ý', 'ÿ', '©', '®', '™', '€', '¢', '£', '‘', '’', '“', '”', '«', '»', '—', '–', '°', '±', '¼', '½', '¾', '×', '÷', 'α', 'β', '&infin'] funnychars = ['\xc1','\xe1','\xc0','\xc2','\xe0','\xc2','\xe2','\xc4','\xe4','\xc3','\xe3','\xc5','\xe5','\xc6','\xe6','\xc7','\xe7','\xd0','\xf0','\xc9','\xe9','\xc8','\xe8','\xca','\xea','\xcb','\xeb','\xcd','\xed','\xcc','\xec','\xce','\xee','\xcf','\xef','\xd1','\xf1','\xd3','\xf3','\xd2','\xf2','\xd4','\xf4','\xd6','\xf6','\xd5','\xf5','\xd8','\xf8','\xdf','\xde','\xfe','\xda','\xfa','\xd9','\xf9','\xdb','\xfb','\xdc','\xfc','\xdd','\xfd','\xff','\xa9','\xae','\u2122','\u20ac','\xa2','\xa3','\u2018','\u2019','\u201c','\u201d','\xab','\xbb','\u2014','\u2013','\xb0','\xb1','\xbc','\xbd','\xbe','\xd7','\xf7','\u03b1','\u03b2','\u221e'] filename = raw_input("Write the full name of the file you wish to fix: \n") filetext = open(filename, 'r') textcontent = filetext.read() newtext = '' for char in textcontent: if char not in funnychars: newtext = newtext + char else: newtext = newtext + htmlcodes[funnychars.index(char)] resultfile = open('result.txt', 'w') resultfile.write(newtext) resultfile.close() filetext.close()