import random import re import StringIO class misspell(object): def __init__(self): # create a regex to match a word with ending punctucation self.punctuation = re.compile('\S+[' + re.escape(",'.:;!?") + ']$') def misspell(self, text): self.text = StringIO.StringIO(text).readlines() misspelled = [] for line in self.text: # split hyphenated words into independent words line = re.sub(r'(\S+)\-(\S+)', r'\1 \2', line) # split each line in a list of words tokens = line.split() for token in tokens: # don't misspell a number if token.isdigit(): misspelled.append(token + ' ') continue # don't misspell an email address or URL if '@' in token or '://' in token: misspelled.append(token + ' ') continue # does the word end with puncuation? has_punc = re.match(self.punctuation, token) # explode the word to a list token = list(token) # word doesn't end in puctuation and is longer than 4 chars if not has_punc and len(token) >= 4: start = random.randint(1,len(token) - 3) stop = start + 2 f,s = token[start:stop] token[start:stop] = s,f # word does end in puctuation and is longer that 5 chars elif has_punc and len(token) >=5: start = random.randint(1,len(token) - 4) stop = start + 2 f,s = token[start:stop] token[start:stop] = s,f # add the word to the line misspelled.append((''.join(token) + ' ')) # end the line misspelled.append('\n') return ''.join(misspelled) if __name__ == '__main__': # example usage of the misspell class message = """ According to research at an English University, it doesn't matter in what order the letters in a word are, the only important thing is that the first and last letters be in the right places. The rest can be a total mess and you can still read it without problem. This is because the human mind does not read every letter by itself, but the word as a whole.""" msg = misspell() print msg.misspell(message)