Welcome, guest | Sign In | My Account | Store | Cart

convert number represented the way they are spoken to actual numbers

Python, 109 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
import re
_known = {
    'zero': 0,
    'one': 1,
    'two': 2,
    'three': 3,
    'four': 4,
    'five': 5,
    'six': 6,
    'seven': 7,
    'eight': 8,
    'nine': 9,
    'ten': 10,
    'eleven': 11,
    'twelve': 12,
    'thirteen': 13,
    'fourteen': 14,
    'fifteen': 15,
    'sixteen': 16,
    'seventeen': 17,
    'eighteen': 18,
    'nineteen': 19,
    'twenty': 20,
    'thirty': 30,
    'forty': 40,
    'fifty': 50,
    'sixty': 60,
    'seventy': 70,
    'eighty': 80,
    'ninety': 90
    }
def spoken_word_to_number(n):
    """Assume n is a positive integer".
assert _positive_integer_number('nine hundred') == 900
assert spoken_word_to_number('one hundred') == 100
assert spoken_word_to_number('eleven') == 11
assert spoken_word_to_number('twenty two') == 22
assert spoken_word_to_number('thirty-two') == 32
assert spoken_word_to_number('forty two') == 42
assert spoken_word_to_number('two hundred thirty two') == 232
assert spoken_word_to_number('two thirty two') == 232
assert spoken_word_to_number('nineteen hundred eighty nine') == 1989
assert spoken_word_to_number('nineteen eighty nine') == 1989
assert spoken_word_to_number('one thousand nine hundred and eighty nine') == 1989
assert spoken_word_to_number('nine eighty') == 980
assert spoken_word_to_number('nine two') == 92 # wont be able to convert this one
assert spoken_word_to_number('nine thousand nine hundred') == 9900
assert spoken_word_to_number('one thousand nine hundred one') == 1901
"""

    n = n.lower().strip()
    if n in _known:
        return _known[n]
    else:
        inputWordArr = re.split('[ -]', n)

    assert len(inputWordArr) > 1 #all single words are known
    #Check the pathological case where hundred is at the end or thousand is at end
    if inputWordArr[-1] == 'hundred':
        inputWordArr.append('zero')
        inputWordArr.append('zero')
    if inputWordArr[-1] == 'thousand':
        inputWordArr.append('zero')
        inputWordArr.append('zero')
        inputWordArr.append('zero')
    if inputWordArr[0] == 'hundred':
        inputWordArr.insert(0, 'one')
    if inputWordArr[0] == 'thousand':
        inputWordArr.insert(0, 'one')

    inputWordArr = [word for word in inputWordArr if word not in ['and', 'minus', 'negative']]
    currentPosition = 'unit'
    prevPosition = None
    output = 0
    for word in reversed(inputWordArr):
        if currentPosition == 'unit':
            number = _known[word]
            output += number
            if number > 9:
                currentPosition = 'hundred'
            else:
                currentPosition = 'ten'
        elif currentPosition == 'ten':
            if word != 'hundred':
                number = _known[word]
                if number < 10:
                    output += number*10
                else:
                    output += number
            #else: nothing special
            currentPosition = 'hundred'
        elif currentPosition == 'hundred':
            if word not in [ 'hundred', 'thousand']:
                number = _known[word]
                output += number*100
                currentPosition = 'thousand'
            elif word == 'thousand':
                currentPosition = 'thousand'
            else:
                currentPosition = 'hundred'
        elif currentPosition == 'thousand':
            assert word != 'hundred'
            if word != 'thousand':
                number = _known[word]
                output += number*1000
        else:
            assert "Can't be here" == None

    return(output)

To take in user input in a natural format a person would need to read numbers the way they are sometimes written. I am writing a natural language to FQL converter therefore i needed to build this. the top table was taken from http://code.activestate.com/recipes/413172-numbers-and-plural-words-as-spoken-english

3 comments

AR 9 years, 11 months ago  # | flag

This code does not accept "one hundred seven" -> 107 or "one thousand seven". Any work around for this?

AR 9 years, 11 months ago  # | flag

Add:

'oh':0 to _known dictionary

And the part with if word != 'hundred': number = _known[word] if number < 10: output += number*10 else: output += number #else: nothing special

is modified to: if word not in ['hundred','thousand']: number = _known[word] if number < 10: output += number*10 else: output += number currentPosition = 'hundred' elif word == 'hundred': currentPosition = 'hundred' elif word == 'thousand': currentPosition = 'thousand'

However, there are still some exceptions: Example: "ninety three oh one"

Akshay Nagpal 8 years, 3 months ago  # | flag

I have just released a python module to PyPI called word2number for the exact purpose. https://github.com/akshaynagpal/w2n

Install it using:

pip install word2number

make sure your pip is updated to the latest version.

Usage:

from word2number import w2n

print w2n.word_to_num("two million three thousand nine hundred and eighty four")
2003984