Welcome, guest | Sign In | My Account | Store | Cart
import random
import re
import StringIO

class misspell(object):
    def __init__(self):
        # create a regex to match a word with ending punctucation
        self.punctuation = re.compile('\S+[' + re.escape(",'.:;!?") + ']$')
    def misspell(self, text):
        self.text = StringIO.StringIO(text).readlines()
        misspelled = []
        for line in self.text:
            # split hyphenated words into independent words           
            line = re.sub(r'(\S+)\-(\S+)', r'\1 \2', line)
            
            # split each line in a list of words
            tokens = line.split()
        
            for token in tokens:
                # don't misspell a number
                if token.isdigit():
                    misspelled.append(token + ' ')
                    continue
                
                # don't misspell an email address or URL
                if '@' in token or '://' in token:
                    misspelled.append(token + ' ')
                    continue
                
                # does the word end with puncuation?                
                has_punc = re.match(self.punctuation, token)
                
                # explode the word to a list                
                token = list(token)

                # word doesn't end in puctuation and is longer than 4 chars
                if not has_punc and len(token) >= 4:
                    start = random.randint(1,len(token) - 3)
                    stop = start + 2
                    f,s = token[start:stop]
                    token[start:stop] = s,f
                    
                # word does end in puctuation and is longer that 5 chars
                elif has_punc and len(token) >=5:
                    start = random.randint(1,len(token) - 4)
                    stop = start + 2
                    f,s = token[start:stop]
                    token[start:stop] = s,f
                                   
                # add the word to the line
                misspelled.append((''.join(token) + ' '))
                
            # end the line                
            misspelled.append('\n')
            
        return ''.join(misspelled)

if __name__ == '__main__':
    # example usage of the misspell class
    message = """
    According to research at an English University, it doesn't matter 
    in what order the letters in a word are, the only important thing is 
    that the first and last letters be in the right places. The rest can
    be a total mess and you can still read it without problem. This is
    because the human mind does not read every letter by itself, but 
    the word as a whole."""
 
    msg = misspell()
    print msg.misspell(message)
    

History

  • revision 2 (18 years ago)
  • previous revisions are not available