示例#1
0
def extractnumber_en(text, short_scale=True, ordinals=False):
    """
    This function extracts a number from a text string,
    handles pronunciations in long scale and short scale

    https://en.wikipedia.org/wiki/Names_of_large_numbers

    Args:
        text (str): the string to normalize
        short_scale (bool): use short scale if True, long scale if False
        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
    Returns:
        (int) or (float) or False: The extracted number or False if no number
                                   was found

    """

    string_num_en = {
        "half": 0.5,
        "halves": 0.5,
        "couple": 2,
        "hundred": 100,
        "hundreds": 100,
        "thousand": 1000,
        "thousands": 1000,
        "million": 1000000,
        'millions': 1000000}

    string_num_ordinal_en = {}

    for num in NUM_STRING_EN:
        num_string = NUM_STRING_EN[num]
        string_num_en[num_string] = num

    # first, second...
    if ordinals:
        if short_scale:
            for num in SHORT_ORDINAL_STRING_EN:
                num_string = SHORT_ORDINAL_STRING_EN[num]
                string_num_ordinal_en[num_string] = num
                string_num_en[num_string] = num
        else:
            for num in LONG_ORDINAL_STRING_EN:
                num_string = LONG_ORDINAL_STRING_EN[num]
                string_num_ordinal_en[num_string] = num
                string_num_en[num_string] = num

    # negate next number (-2 = 0 - 2)
    negatives = ["negative", "minus"]

    # sum the next number (twenty two = 20 + 2)
    sums = ['twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty',
            'ninety']

    # multiply the previous number (one hundred = 1 * 100)
    multiplies = ["hundred", "thousand", "hundreds", "thousands", "million",
                  "millions"]

    # split sentence parse separately and sum ( 2 and a half = 2 + 0.5 )
    fraction_marker = [" and "]

    # decimal marker ( 1 point 5 = 1 + 0.5)
    decimal_marker = [" point ", " dot "]

    if short_scale:
        for num in SHORT_SCALE_EN:
            num_string = SHORT_SCALE_EN[num]
            string_num_en[num_string] = num
            string_num_en[num_string + "s"] = num
            multiplies.append(num_string)
            multiplies.append(num_string + "s")
    else:
        for num in LONG_SCALE_EN:
            num_string = LONG_SCALE_EN[num]
            string_num_en[num_string] = num
            string_num_en[num_string + "s"] = num
            multiplies.append(num_string)
            multiplies.append(num_string + "s")

    # 2 and 3/4
    for c in fraction_marker:
        components = text.split(c)

        if len(components) == 2:
            # ensure first is not a fraction and second is a fraction
            num1 = extractnumber_en(components[0])
            num2 = extractnumber_en(components[1])
            if num1 is not None and num2 is not None \
                    and num1 >= 1 and 0 < num2 < 1:
                return num1 + num2

    # 2 point 5
    for c in decimal_marker:
        components = text.split(c)
        if len(components) == 2:
            number = extractnumber_en(components[0])
            decimal = extractnumber_en(components[1])
            if number is not None and decimal is not None:
                # TODO handle number dot number number number
                if "." not in str(decimal):
                    return number + float("0." + str(decimal))

    aWords = text.split()
    aWords = [word for word in aWords if word not in ["the", "a", "an"]]
    val = False
    prev_val = None
    to_sum = []
    for idx, word in enumerate(aWords):

        if not word:
            continue
        prev_word = aWords[idx - 1] if idx > 0 else ""
        next_word = aWords[idx + 1] if idx + 1 < len(aWords) else ""

        # is this word already a number ?
        if is_numeric(word):
            # if word.isdigit():            # doesn't work with decimals
            val = float(word)

        # is this word the name of a number ?
        if word in string_num_en:
            val = string_num_en[word]

        # is the prev word an ordinal number and current word is one?
        # second one, third one
        if ordinals and prev_word in string_num_ordinal_en and val is 1:
            val = prev_val

        # is the prev word a number and should we sum it?
        # twenty two, fifty six
        if prev_word in sums and word in string_num_en:
            if val and val < 10:
                val = prev_val + val

        # is the prev word a number and should we multiply it?
        # twenty hundred, six hundred
        if word in multiplies:
            if not prev_val:
                prev_val = 1
            val = prev_val * val

        # is this a spoken fraction?
        # half cup
        if val is False:
            val = isFractional_en(word, short_scale=short_scale)

        # 2 fifths
        if not ordinals:
            next_value = isFractional_en(next_word, short_scale=short_scale)
            if next_value:
                if not val:
                    val = 1
                val = val * next_value

        # is this a negative number?
        if val and prev_word and prev_word in negatives:
            val = 0 - val

        # let's make sure it isn't a fraction
        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])

        else:
            prev_val = val

            # handle long numbers
            # six hundred sixty six
            # two million five hundred thousand
            if word in multiplies and next_word not in multiplies:
                to_sum.append(val)
                val = 0
                prev_val = 0

    if val is not None:
        for v in to_sum:
            val = val + v
    return val
示例#2
0
def extractnumber_it(text):
    """
    Questa funzione prepara il testo dato per l'analisi rendendo
    numeri testuali come interi o frazioni.
    In italiano non è un modo abituale ma può essere interessante
    per Mycroft
    E' la versione portoghese riadattata in italiano
     args:
         text (str): la stringa da normalizzare
    Ritorna:
         (int) o (float): il valore del numero estratto

    """
    aWords = text.split()
    count = 0
    result = None
    while count < len(aWords):
        val = 0
        word = aWords[count]
        next_next_word = None
        if count + 1 < len(aWords):
            next_word = aWords[count + 1]
            if count + 2 < len(aWords):
                next_next_word = aWords[count + 2]
        else:
            next_word = None

        # is current word a number?
        if word in it_numbers:
            if word == "mila":
                val = it_numbers[word]
                val = result * val
                result = 0
            else:
                val = it_numbers[word]

        elif word.isdigit():  # doesn't work with decimals
            val = int(word)
        elif is_numeric(word):
            val = float(word)

        elif isFractional_it(word):
            if not result:
                result = 1
            result = result * isFractional_it(word)
            # "un terzo" is 1/3 but "il terzo" is 3
            if aWords[count - 1] == "il":
                result = 1.0 // isFractional_it(word)

            count += 1
            continue

        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
            #   and is_numeric(aPieces[1])):
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])

        if not val:
            # cerca numero composto come ventuno ventitre centoventi"
            val = extractnumber_long_it(word)

        if val:
            if result is None:
                result = 0
            # handle fractions
            # if next_word != "avos":
            result += val
            # else:
            #    result = float(result) / float(val)

        if next_word is None:
            break

        # number word and fraction
        ands = ["e"]
        if next_word in ands:
            zeros = 0
            if result is None:
                count += 1
                continue
            newWords = aWords[count + 2:]
            newText = ""
            for word in newWords:
                newText += word + " "

            afterAndVal = extractnumber_it(newText[:-1])
            if afterAndVal:
                if result < afterAndVal or result < 20:
                    while afterAndVal > 1:
                        afterAndVal = afterAndVal / 10.0
                    for word in newWords:
                        if word == "zero" or word == "0":
                            zeros += 1
                        else:
                            break
                for _ in range(0, zeros):
                    afterAndVal = afterAndVal / 10.0
                result += afterAndVal
                break
        elif next_next_word is not None:
            if next_next_word in ands:
                newWords = aWords[count + 3:]
                newText = ""
                for word in newWords:
                    newText += word + " "
                afterAndVal = extractnumber_it(newText[:-1])
                if afterAndVal:
                    if result is None:
                        result = 0
                    result += afterAndVal
                    break

        decimals = ["punto", "virgola", ".", ","]
        if next_word in decimals:
            zeros = 0
            newWords = aWords[count + 2:]
            newText = ""
            for word in newWords:
                newText += word + " "
            for word in newWords:
                if word == "zero" or word == "0":
                    zeros += 1
                else:
                    break
            afterDotVal = str(extractnumber_it(newText[:-1]))
            afterDotVal = zeros * "0" + afterDotVal
            result = float(str(result) + "." + afterDotVal)
            break
        count += 1

    if result is None:
        return False

    # Return the $str with the number related words removed
    # (now empty strings, so strlen == 0)
    # aWords = [word for word in aWords if len(word) > 0]
    # text = ' '.join(aWords)
    if "." in str(result):
        integer, dec = str(result).split(".")
        # cast float to int
        if dec == "0":
            result = int(integer)

    return result
示例#3
0
def extractnumber_sv(text):
    """
    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
    Returns:
        (int) or (float): The value of extracted number
    """
    aWords = text.split()
    and_pass = False
    valPreAnd = False
    val = False
    count = 0
    while count < len(aWords):
        word = aWords[count]
        if is_numeric(word):
            val = float(word)
        elif word == "första":
            val = 1
        elif word == "andra":
            val = 2
        elif word == "tredje":
            val = 3
        elif word == "fjärde":
            val = 4
        elif word == "femte":
            val = 5
        elif word == "sjätte":
            val = 6
        elif is_fractional_sv(word):
            val = is_fractional_sv(word)
        else:
            if word == "en":
                val = 1
            if word == "ett":
                val = 1
            elif word == "två":
                val = 2
            elif word == "tre":
                val = 3
            elif word == "fyra":
                val = 4
            elif word == "fem":
                val = 5
            elif word == "sex":
                val = 6
            elif word == "sju":
                val = 7
            elif word == "åtta":
                val = 8
            elif word == "nio":
                val = 9
            elif word == "tio":
                val = 10
            if val:
                if count < (len(aWords) - 1):
                    wordNext = aWords[count + 1]
                else:
                    wordNext = ""
                valNext = is_fractional_sv(wordNext)

                if valNext:
                    val = val * valNext
                    aWords[count + 1] = ""

        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])
            elif and_pass:
                # added to value, quit here
                val = valPreAnd
                break
            else:
                count += 1
                continue

        aWords[count] = ""

        if and_pass:
            aWords[count - 1] = ''  # remove "och"
            val += valPreAnd
        elif count + 1 < len(aWords) and aWords[count + 1] == 'och':
            and_pass = True
            valPreAnd = val
            val = False
            count += 2
            continue
        elif count + 2 < len(aWords) and aWords[count + 2] == 'och':
            and_pass = True
            valPreAnd = val
            val = False
            count += 3
            continue

        break

    if not val:
        return False

    return val
示例#4
0
def extractnumber_fr(text):
    """Takes in a string and extracts a number.
    Args:
        text (str): the string to extract a number from
    Returns:
        (str): The number extracted or the original text.
    """
    # normalize text, keep articles for ordinals versus fractionals
    text = normalize_fr(text, False)
    # split words by whitespace
    aWords = text.split()
    count = 0
    result = None
    add = False
    while count < len(aWords):
        val = None
        word = aWords[count]
        wordNext = ""
        wordPrev = ""
        if count < (len(aWords) - 1):
            wordNext = aWords[count + 1]
        if count > 0:
            wordPrev = aWords[count - 1]

        if word in articles_fr:
            count += 1
            continue
        if word in ["et", "plus", "+"]:
            count += 1
            add = True
            continue

        # is current word a numeric number?
        if word.isdigit():
            val = int(word)
            count += 1
        elif is_numeric(word):
            val = float(word)
            count += 1
        elif wordPrev in articles_fr and getOrdinal_fr(word):
            val = getOrdinal_fr(word)
            count += 1
        # is current word the denominator of a fraction?
        elif isFractional_fr(word):
            val = isFractional_fr(word)
            count += 1

        # is current word the numerator of a fraction?
        if val and wordNext:
            valNext = isFractional_fr(wordNext)
            if valNext:
                val = float(val) * valNext
                count += 1

        if not val:
            count += 1
            # is current word a numeric fraction like "2/3"?
            aPieces = word.split('/')
            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
            #   and is_numeric(aPieces[1])):
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])

        # is current word followed by a decimal value?
        if wordNext == "virgule":
            zeros = 0
            newWords = aWords[count + 1:]
            # count the number of zeros after the decimal sign
            for word in newWords:
                if word == "zéro" or word == "0":
                    zeros += 1
                else:
                    break
            afterDotVal = None
            # extract the number after the zeros
            if newWords[zeros].isdigit():
                afterDotVal = newWords[zeros]
                countDot = count + zeros + 2
            # if a number was extracted (since comma is also a
            # punctuation sign)
            if afterDotVal:
                count = countDot
                if not val:
                    val = 0
                # add the zeros
                afterDotString = zeros * "0" + afterDotVal
                val = float(str(val) + "." + afterDotString)
        if val:
            if add:
                result += val
                add = False
            else:
                result = val

    # if result == False:
    if not result:
        return normalize_fr(text, True)

    return result
示例#5
0
def extractnumber_da(text):
    """
    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
    Returns:
        (int) or (float): The value of extracted number


    undefined articles cannot be suppressed in German:
    'ein Pferd' means 'one horse' and 'a horse'

    """
    aWords = text.split()
    aWords = [word for word in aWords if word not in ["den", "det"]]
    and_pass = False
    valPreAnd = False
    val = False
    count = 0
    while count < len(aWords):
        word = aWords[count]
        if is_numeric(word):
            if word.isdigit():  # doesn't work with decimals
                val = float(word)
        elif isFractional_da(word):
            val = isFractional_da(word)
        elif isOrdinal_da(word):
            val = isOrdinal_da(word)
        else:
            if word in da_numbers:
                val = da_numbers[word]
                if count < (len(aWords) - 1):
                    wordNext = aWords[count + 1]
                else:
                    wordNext = ""
                valNext = isFractional_da(wordNext)

                if valNext:
                    val = val * valNext
                    aWords[count + 1] = ""

        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
            #   and is_numeric(aPieces[1])):
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])
            elif and_pass:
                # added to value, quit here
                val = valPreAnd
                break
            else:
                count += 1
                continue

        aWords[count] = ""

        if and_pass:
            aWords[count - 1] = ''  # remove "og"
            val += valPreAnd
        elif count + 1 < len(aWords) and aWords[count + 1] == 'og':
            and_pass = True
            valPreAnd = val
            val = False
            count += 2
            continue
        elif count + 2 < len(aWords) and aWords[count + 2] == 'og':
            and_pass = True
            valPreAnd = val
            val = False
            count += 3
            continue

        break

    if not val:
        return False

    return val
示例#6
0
def extractnumber_da(text):
    """
    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
    Returns:
        (int) or (float): The value of extracted number


    undefined articles cannot be suppressed in German:
    'ein Pferd' means 'one horse' and 'a horse'

    """
    aWords = text.split()
    aWords = [word for word in aWords if
              word not in ["den", "det"]]
    and_pass = False
    valPreAnd = False
    val = False
    count = 0
    while count < len(aWords):
        word = aWords[count]
        if is_numeric(word):
            if word.isdigit():            # doesn't work with decimals
                val = float(word)
        elif isFractional_da(word):
            val = isFractional_da(word)
        elif isOrdinal_da(word):
            val = isOrdinal_da(word)
        else:
            if word in da_numbers:
                val = da_numbers[word]
                if count < (len(aWords) - 1):
                    wordNext = aWords[count + 1]
                else:
                    wordNext = ""
                valNext = isFractional_da(wordNext)

                if valNext:
                    val = val * valNext
                    aWords[count + 1] = ""

        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
            #   and is_numeric(aPieces[1])):
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])
            elif and_pass:
                # added to value, quit here
                val = valPreAnd
                break
            else:
                count += 1
                continue

        aWords[count] = ""

        if and_pass:
            aWords[count - 1] = ''  # remove "og"
            val += valPreAnd
        elif count + 1 < len(aWords) and aWords[count + 1] == 'og':
            and_pass = True
            valPreAnd = val
            val = False
            count += 2
            continue
        elif count + 2 < len(aWords) and aWords[count + 2] == 'og':
            and_pass = True
            valPreAnd = val
            val = False
            count += 3
            continue

        break

    if not val:
        return False

    return val
示例#7
0
def extractnumber_it(text):
    """
    Questa funzione prepara il testo dato per l'analisi rendendo
    numeri testuali come interi o frazioni.
    In italiano non è un modo abituale ma può essere interessante
    per Mycroft
    E' la versione portoghese riadattata in italiano
     args:
         text (str): la stringa da normalizzare
    Ritorna:
         (int) o (float): il valore del numero estratto

    """
    aWords = text.split()
    count = 0
    result = None
    while count < len(aWords):
        val = 0
        word = aWords[count]
        next_next_word = None
        if count + 1 < len(aWords):
            next_word = aWords[count + 1]
            if count + 2 < len(aWords):
                next_next_word = aWords[count + 2]
        else:
            next_word = None

        # is current word a number?
        if word in it_numbers:
            if word == "mila":
                val = it_numbers[word]
                val = result * val
                result = 0
            else:
                val = it_numbers[word]

        elif word.isdigit():  # doesn't work with decimals
            val = int(word)
        elif is_numeric(word):
            val = float(word)

        elif isFractional_it(word):
            if not result:
                result = 1
            result = result * isFractional_it(word)
            # "un terzo" is 1/3 but "il terzo" is 3
            if aWords[count - 1] == "il":
                result = 1.0 // isFractional_it(word)

            count += 1
            continue

        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
            #   and is_numeric(aPieces[1])):
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])

        if not val:
            # cerca numero composto come ventuno ventitre centoventi"
            val = extractnumber_long_it(word)

        if val:
            if result is None:
                result = 0
            # handle fractions
            # if next_word != "avos":
            result += val
            # else:
            #    result = float(result) / float(val)

        if next_word is None:
            break

        # number word and fraction
        ands = ["e"]
        if next_word in ands:
            zeros = 0
            if result is None:
                count += 1
                continue
            newWords = aWords[count + 2:]
            newText = ""
            for word in newWords:
                newText += word + " "

            afterAndVal = extractnumber_it(newText[:-1])
            if afterAndVal:
                if result < afterAndVal or result < 20:
                    while afterAndVal > 1:
                        afterAndVal = afterAndVal / 10.0
                    for word in newWords:
                        if word == "zero" or word == "0":
                            zeros += 1
                        else:
                            break
                for _ in range(0, zeros):
                    afterAndVal = afterAndVal / 10.0
                result += afterAndVal
                break
        elif next_next_word is not None:
            if next_next_word in ands:
                newWords = aWords[count + 3:]
                newText = ""
                for word in newWords:
                    newText += word + " "
                afterAndVal = extractnumber_it(newText[:-1])
                if afterAndVal:
                    if result is None:
                        result = 0
                    result += afterAndVal
                    break

        decimals = ["punto", "virgola", ".", ","]
        if next_word in decimals:
            zeros = 0
            newWords = aWords[count + 2:]
            newText = ""
            for word in newWords:
                newText += word + " "
            for word in newWords:
                if word == "zero" or word == "0":
                    zeros += 1
                else:
                    break
            afterDotVal = str(extractnumber_it(newText[:-1]))
            afterDotVal = zeros * "0" + afterDotVal
            result = float(str(result) + "." + afterDotVal)
            break
        count += 1

    if result is None:
        return False

    # Return the $str with the number related words removed
    # (now empty strings, so strlen == 0)
    # aWords = [word for word in aWords if len(word) > 0]
    # text = ' '.join(aWords)
    if "." in str(result):
        integer, dec = str(result).split(".")
        # cast float to int
        if dec == "0":
            result = int(integer)

    return result
示例#8
0
def extractnumber_pt(text):
    """
    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
    Returns:
        (int) or (float): The value of extracted number

    """
    aWords = text.split()
    count = 0
    result = None
    while count < len(aWords):
        val = 0
        word = aWords[count]
        next_next_word = None
        if count + 1 < len(aWords):
            next_word = aWords[count + 1]
            if count + 2 < len(aWords):
                next_next_word = aWords[count + 2]
        else:
            next_word = None

        # is current word a number?
        if word in pt_numbers:
            val = pt_numbers[word]
        elif word.isdigit():  # doesn't work with decimals
            val = int(word)
        elif is_numeric(word):
            val = float(word)
        elif isFractional_pt(word):
            if not result:
                result = 1
            result = result * isFractional_pt(word)
            count += 1
            continue

        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
            #   and is_numeric(aPieces[1])):
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])

        if val:
            if result is None:
                result = 0
            # handle fractions
            if next_word != "avos":
                result += val
            else:
                result = float(result) / float(val)

        if next_word is None:
            break

        # number word and fraction
        ands = ["e"]
        if next_word in ands:
            zeros = 0
            if result is None:
                count += 1
                continue
            newWords = aWords[count + 2:]
            newText = ""
            for word in newWords:
                newText += word + " "

            afterAndVal = extractnumber_pt(newText[:-1])
            if afterAndVal:
                if result < afterAndVal or result < 20:
                    while afterAndVal > 1:
                        afterAndVal = afterAndVal / 10.0
                    for word in newWords:
                        if word == "zero" or word == "0":
                            zeros += 1
                        else:
                            break
                for _ in range(0, zeros):
                    afterAndVal = afterAndVal / 10.0
                result += afterAndVal
                break
        elif next_next_word is not None:
            if next_next_word in ands:
                newWords = aWords[count + 3:]
                newText = ""
                for word in newWords:
                    newText += word + " "
                afterAndVal = extractnumber_pt(newText[:-1])
                if afterAndVal:
                    if result is None:
                        result = 0
                    result += afterAndVal
                    break

        decimals = ["ponto", "virgula", u"v�rgula", ".", ","]
        if next_word in decimals:
            zeros = 0
            newWords = aWords[count + 2:]
            newText = ""
            for word in newWords:
                newText += word + " "
            for word in newWords:
                if word == "zero" or word == "0":
                    zeros += 1
                else:
                    break
            afterDotVal = str(extractnumber_pt(newText[:-1]))
            afterDotVal = zeros * "0" + afterDotVal
            result = float(str(result) + "." + afterDotVal)
            break
        count += 1

    if result is None:
        return False

    # Return the $str with the number related words removed
    # (now empty strings, so strlen == 0)
    # aWords = [word for word in aWords if len(word) > 0]
    # text = ' '.join(aWords)
    if "." in str(result):
        integer, dec = str(result).split(".")
        # cast float to int
        if dec == "0":
            result = int(integer)

    return result
示例#9
0
def extractnumber_es(text):
    """
    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
    Returns:
        (int) or (float): The value of extracted number

    """
    aWords = text.split()
    count = 0
    result = None
    while count < len(aWords):
        val = 0
        word = aWords[count]
        next_next_word = None
        if count + 1 < len(aWords):
            next_word = aWords[count + 1]
            if count + 2 < len(aWords):
                next_next_word = aWords[count + 2]
        else:
            next_word = None

        # is current word a number?
        if word in es_numbers:
            val = es_numbers[word]
        elif word.isdigit():  # doesn't work with decimals
            val = int(word)
        elif is_numeric(word):
            val = float(word)
        elif isFractional_es(word):
            if not result:
                result = 1
            result = result * isFractional_es(word)
            count += 1
            continue

        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
            #   and is_numeric(aPieces[1])):
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])

        if val:
            if result is None:
                result = 0
            # handle fractions
            if next_word != "avos":
                result += val
            else:
                result = float(result) / float(val)

        if next_word is None:
            break

        # number word and fraction
        ands = ["e"]
        if next_word in ands:
            zeros = 0
            if result is None:
                count += 1
                continue
            newWords = aWords[count + 2:]
            newText = ""
            for word in newWords:
                newText += word + " "

            afterAndVal = extractnumber_es(newText[:-1])
            if afterAndVal:
                if result < afterAndVal or result < 20:
                    while afterAndVal > 1:
                        afterAndVal = afterAndVal / 10.0
                    for word in newWords:
                        if word == "cero" or word == "0":
                            zeros += 1
                        else:
                            break
                for _ in range(0, zeros):
                    afterAndVal = afterAndVal / 10.0
                result += afterAndVal
                break
        elif next_next_word is not None:
            if next_next_word in ands:
                newWords = aWords[count + 3:]
                newText = ""
                for word in newWords:
                    newText += word + " "
                afterAndVal = extractnumber_es(newText[:-1])
                if afterAndVal:
                    if result is None:
                        result = 0
                    result += afterAndVal
                    break

        decimals = ["punto", "coma", ".", ","]
        if next_word in decimals:
            zeros = 0
            newWords = aWords[count + 2:]
            newText = ""
            for word in newWords:
                newText += word + " "
            for word in newWords:
                if word == "cero" or word == "0":
                    zeros += 1
                else:
                    break
            afterDotVal = str(extractnumber_es(newText[:-1]))
            afterDotVal = zeros * "0" + afterDotVal
            result = float(str(result) + "." + afterDotVal)
            break
        count += 1

    if result is None:
        return False

    # Return the $str with the number related words removed
    # (now empty strings, so strlen == 0)
    # aWords = [word for word in aWords if len(word) > 0]
    # text = ' '.join(aWords)
    if "." in str(result):
        integer, dec = str(result).split(".")
        # cast float to int
        if dec == "0":
            result = int(integer)

    return result
示例#10
0
文件: parse_en.py 项目: hungnt1/core
def extractnumber_en(text):
    """
    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
    Returns:
        (int) or (float): The value of extracted number

    """
    aWords = text.split()
    aWords = [word for word in aWords if word not in ["the", "a", "an"]]
    and_pass = False
    valPreAnd = False
    val = False
    count = 0
    while count < len(aWords):
        word = aWords[count]
        if is_numeric(word):
            # if word.isdigit():            # doesn't work with decimals
            val = float(word)
        elif word == "first":
            val = 1
        elif word == "second":
            val = 2
        elif isFractional_en(word):
            val = isFractional_en(word)
        else:
            if word == "one":
                val = 1
            elif word == "two":
                val = 2
            elif word == "three":
                val = 3
            elif word == "four":
                val = 4
            elif word == "five":
                val = 5
            elif word == "six":
                val = 6
            elif word == "seven":
                val = 7
            elif word == "eight":
                val = 8
            elif word == "nine":
                val = 9
            elif word == "ten":
                val = 10
            if val:
                if count < (len(aWords) - 1):
                    wordNext = aWords[count + 1]
                else:
                    wordNext = ""
                valNext = isFractional_en(wordNext)

                if valNext:
                    val = val * valNext
                    aWords[count + 1] = ""

        # if val == False:
        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
            #   and is_numeric(aPieces[1])):
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])
            elif and_pass:
                # added to value, quit here
                val = valPreAnd
                break
            else:
                count += 1
                continue

        aWords[count] = ""

        if and_pass:
            aWords[count - 1] = ''  # remove "and"
            val += valPreAnd
        elif count + 1 < len(aWords) and aWords[count + 1] == 'and':
            and_pass = True
            valPreAnd = val
            val = False
            count += 2
            continue
        elif count + 2 < len(aWords) and aWords[count + 2] == 'and':
            and_pass = True
            valPreAnd = val
            val = False
            count += 3
            continue

        break

    # if val == False:
    if not val:
        return False

    # Return the string with the number related words removed
    # (now empty strings, so strlen == 0)
    aWords = [word for word in aWords if len(word) > 0]
    text = ' '.join(aWords)

    return val
示例#11
0
def extractnumber_en(text, short_scale=True, ordinals=False):
    """
    This function extracts a number from a text string,
    handles pronunciations in long scale and short scale

    https://en.wikipedia.org/wiki/Names_of_large_numbers

    Args:
        text (str): the string to normalize
        short_scale (bool): use short scale if True, long scale if False
        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
    Returns:
        (int) or (float) or False: The extracted number or False if no number
                                   was found

    """

    string_num_en = {
        "half": 0.5,
        "halves": 0.5,
        "couple": 2,
        "hundred": 100,
        "hundreds": 100,
        "thousand": 1000,
        "thousands": 1000,
        "million": 1000000,
        'millions': 1000000
    }

    string_num_ordinal_en = {}

    for num in NUM_STRING_EN:
        num_string = NUM_STRING_EN[num]
        string_num_en[num_string] = num

    # first, second...
    if ordinals:
        if short_scale:
            for num in SHORT_ORDINAL_STRING_EN:
                num_string = SHORT_ORDINAL_STRING_EN[num]
                string_num_ordinal_en[num_string] = num
                string_num_en[num_string] = num
        else:
            for num in LONG_ORDINAL_STRING_EN:
                num_string = LONG_ORDINAL_STRING_EN[num]
                string_num_ordinal_en[num_string] = num
                string_num_en[num_string] = num

    # negate next number (-2 = 0 - 2)
    negatives = ["negative", "minus"]

    # sum the next number (twenty two = 20 + 2)
    sums = [
        'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty',
        'ninety'
    ]

    # multiply the previous number (one hundred = 1 * 100)
    multiplies = [
        "hundred", "thousand", "hundreds", "thousands", "million", "millions"
    ]

    # split sentence parse separately and sum ( 2 and a half = 2 + 0.5 )
    fraction_marker = [" and "]

    # decimal marker ( 1 point 5 = 1 + 0.5)
    decimal_marker = [" point ", " dot "]

    if short_scale:
        for num in SHORT_SCALE_EN:
            num_string = SHORT_SCALE_EN[num]
            string_num_en[num_string] = num
            string_num_en[num_string + "s"] = num
            multiplies.append(num_string)
            multiplies.append(num_string + "s")
    else:
        for num in LONG_SCALE_EN:
            num_string = LONG_SCALE_EN[num]
            string_num_en[num_string] = num
            string_num_en[num_string + "s"] = num
            multiplies.append(num_string)
            multiplies.append(num_string + "s")

    # 2 and 3/4
    for c in fraction_marker:
        components = text.split(c)

        if len(components) == 2:
            # ensure first is not a fraction and second is a fraction
            num1 = extractnumber_en(components[0])
            num2 = extractnumber_en(components[1])
            if num1 is not None and num2 is not None \
                    and num1 >= 1 and 0 < num2 < 1:
                return num1 + num2

    # 2 point 5
    for c in decimal_marker:
        components = text.split(c)
        if len(components) == 2:
            number = extractnumber_en(components[0])
            decimal = extractnumber_en(components[1])
            if number is not None and decimal is not None:
                # TODO handle number dot number number number
                if "." not in str(decimal):
                    return number + float("0." + str(decimal))

    aWords = text.split()
    aWords = [word for word in aWords if word not in ["the", "a", "an"]]
    val = False
    prev_val = None
    to_sum = []
    for idx, word in enumerate(aWords):

        if not word:
            continue
        prev_word = aWords[idx - 1] if idx > 0 else ""
        next_word = aWords[idx + 1] if idx + 1 < len(aWords) else ""

        # is this word already a number ?
        if is_numeric(word):
            # if word.isdigit():            # doesn't work with decimals
            val = float(word)

        # is this word the name of a number ?
        if word in string_num_en:
            val = string_num_en[word]

        # is the prev word an ordinal number and current word is one?
        # second one, third one
        if ordinals and prev_word in string_num_ordinal_en and val is 1:
            val = prev_val

        # is the prev word a number and should we sum it?
        # twenty two, fifty six
        if prev_word in sums and word in string_num_en:
            if val and val < 10:
                val = prev_val + val

        # is the prev word a number and should we multiply it?
        # twenty hundred, six hundred
        if word in multiplies:
            if not prev_val:
                prev_val = 1
            val = prev_val * val

        # is this a spoken fraction?
        # half cup
        if val is False:
            val = isFractional_en(word, short_scale=short_scale)

        # 2 fifths
        if not ordinals:
            next_value = isFractional_en(next_word, short_scale=short_scale)
            if next_value:
                if not val:
                    val = 1
                val = val * next_value

        # is this a negative number?
        if val and prev_word and prev_word in negatives:
            val = 0 - val

        # let's make sure it isn't a fraction
        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])

        else:
            prev_val = val

            # handle long numbers
            # six hundred sixty six
            # two million five hundred thousand
            if word in multiplies and next_word not in multiplies:
                to_sum.append(val)
                val = 0
                prev_val = 0

    if val is not None:
        for v in to_sum:
            val = val + v
    return val
示例#12
0
def extractnumber_it(text, short_scale=False, ordinals=False):
    """
    This function extracts a number from a text string,
    handles pronunciations in long scale and short scale

    https://en.wikipedia.org/wiki/Names_of_large_numbers

    Args:
        text (str): the string to normalize
        short_scale (bool): use short scale if True, long scale if False
        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
    Returns:
        (int) or (float) or False: The extracted number or False if no number
                                   was found

    """

    string_num_ordinal_it = {}
    # first, second...
    if ordinals:
        if short_scale:
            for num in SHORT_ORDINAL_STRING_IT:
                num_string = SHORT_ORDINAL_STRING_IT[num]
                string_num_ordinal_it[num_string] = num
                STRING_NUM_ITA[num_string] = num
        else:
            for num in LONG_ORDINAL_STRING_IT:
                num_string = LONG_ORDINAL_STRING_IT[num]
                string_num_ordinal_it[num_string] = num
                STRING_NUM_ITA[num_string] = num

    # negate next number (-2 = 0 - 2)
    negatives = ['meno']  # 'negativo' non è usuale in italiano

    # multiply the previous number (one hundred = 1 * 100)
    multiplies = [
        'decina', 'decine', 'dozzina', 'dozzine', 'centinaia', 'centinaio',
        'migliaia', 'migliaio', 'mila'
    ]

    # split sentence parse separately and sum ( 2 and a half = 2 + 0.5 )
    fraction_marker = [' e ']

    # decimal marker ( 1 point 5 = 1 + 0.5)
    decimal_marker = [' punto ', ' virgola ']

    if short_scale:
        for num in SHORT_SCALE_IT:
            num_string = SHORT_SCALE_IT[num]
            STRING_NUM_ITA[num_string] = num
            multiplies.append(num_string)
    else:
        for num in LONG_SCALE_IT:
            num_string = LONG_SCALE_IT[num]
            STRING_NUM_ITA[num_string] = num
            multiplies.append(num_string)

    # 2 e 3/4 ed altri casi
    for separator in fraction_marker:
        components = text.split(separator)
        zeros = 0

        if len(components) == 2:
            # count zeros in fraction part
            sub_components = components[1].split(' ')
            for element in sub_components:
                if element == 'zero' or element == '0':
                    zeros += 1
                else:
                    break
            # ensure first is not a fraction and second is a fraction
            num1 = extractnumber_it(components[0])
            num2 = extractnumber_it(components[1])
            if num1 is not None and num2 is not None \
                    and num1 >= 1 and 0 < num2 < 1:
                return num1 + num2
            # sette e quaranta  sette e zero zero due
            elif num1 is not None and num2 is not None \
                    and num1 >= 1 and num2 > 1:
                return num1 + num2 / pow(10, len(str(num2)) + zeros)

    # 2 punto 5
    for separator in decimal_marker:
        zeros = 0
        # count zeros in fraction part
        components = text.split(separator)

        if len(components) == 2:
            sub_components = components[1].split(' ')
            for element in sub_components:
                if element == 'zero' or element == '0':
                    zeros += 1
                else:
                    break

            number = int(extractnumber_it(components[0]))
            decimal = int(extractnumber_it(components[1]))
            if number is not None and decimal is not None:
                if '.' not in str(decimal):
                    return number + decimal / pow(10,
                                                  len(str(decimal)) + zeros)

    all_words = text.split()
    val = False
    prev_val = None
    to_sum = []
    for idx, word in enumerate(all_words):

        if not word:
            continue
        prev_word = all_words[idx - 1] if idx > 0 else ''
        next_word = all_words[idx + 1] if idx + 1 < len(all_words) else ''

        # is this word already a number ?
        if is_numeric(word):
            val = float(word)

        # is this word the name of a number ?
        if word in STRING_NUM_ITA:
            val = STRING_NUM_ITA[word]

        #  tre quarti  un quarto  trenta secondi
        if isFractional_it(word) and prev_val:
            if word[:-1] == 'second' and not ordinals:
                val = prev_val * 2
            else:
                val = prev_val

        # is the prev word a number and should we multiply it?
        # twenty hundred, six hundred
        if word in multiplies:
            if not prev_val:
                prev_val = 1
            val = prev_val * val

        # is this a spoken fraction?
        # mezza tazza
        if val is False:
            val = isFractional_it(word, short_scale=short_scale)

        # 2 quinti
        if not ordinals:
            next_value = isFractional_it(next_word, short_scale=short_scale)
            if next_value:
                if not val:
                    val = 1
                val = val * next_value

        # is this a negative number?
        if val and prev_word and prev_word in negatives:
            val = 0 - val

        if not val:
            val = extractnumber_long_it(word)

        # let's make sure it isn't a fraction
        if not val:
            # look for fractions like '2/3'
            all_pieces = word.split('/')
            if look_for_fractions(all_pieces):
                val = float(all_pieces[0]) / float(all_pieces[1])
        else:
            prev_val = val
            # handle long numbers
            # six hundred sixty six
            # two million five hundred thousand
            if word in multiplies and next_word not in multiplies:
                to_sum.append(val)
                val = 0
                prev_val = 0
            elif extractnumber_long_it(word) > 100 and \
                extractnumber_long_it(next_word) and \
                    next_word not in multiplies:
                to_sum.append(val)
                val = 0
                prev_val = 0

    if val is not None:
        for addend in to_sum:
            val = val + addend
    return val
示例#13
0
def extractnumber_sv(text):
    """
    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
    Returns:
        (int) or (float): The value of extracted number
    """
    aWords = text.split()
    and_pass = False
    valPreAnd = False
    val = False
    count = 0
    while count < len(aWords):
        word = aWords[count]
        if is_numeric(word):
            val = float(word)
        elif word == "första":
            val = 1
        elif word == "andra":
            val = 2
        elif word == "tredje":
            val = 3
        elif word == "fjärde":
            val = 4
        elif word == "femte":
            val = 5
        elif word == "sjätte":
            val = 6
        elif is_fractional_sv(word):
            val = is_fractional_sv(word)
        else:
            if word == "en":
                val = 1
            if word == "ett":
                val = 1
            elif word == "två":
                val = 2
            elif word == "tre":
                val = 3
            elif word == "fyra":
                val = 4
            elif word == "fem":
                val = 5
            elif word == "sex":
                val = 6
            elif word == "sju":
                val = 7
            elif word == "åtta":
                val = 8
            elif word == "nio":
                val = 9
            elif word == "tio":
                val = 10
            if val:
                if count < (len(aWords) - 1):
                    wordNext = aWords[count + 1]
                else:
                    wordNext = ""
                valNext = is_fractional_sv(wordNext)

                if valNext:
                    val = val * valNext
                    aWords[count + 1] = ""

        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])
            elif and_pass:
                # added to value, quit here
                val = valPreAnd
                break
            else:
                count += 1
                continue

        aWords[count] = ""

        if and_pass:
            aWords[count - 1] = ''  # remove "och"
            val += valPreAnd
        elif count + 1 < len(aWords) and aWords[count + 1] == 'och':
            and_pass = True
            valPreAnd = val
            val = False
            count += 2
            continue
        elif count + 2 < len(aWords) and aWords[count + 2] == 'och':
            and_pass = True
            valPreAnd = val
            val = False
            count += 3
            continue

        break

    if not val:
        return False

    return val
示例#14
0
def extractnumber_it(text, short_scale=False, ordinals=False):
    """
    This function extracts a number from a text string,
    handles pronunciations in long scale and short scale

    https://en.wikipedia.org/wiki/Names_of_large_numbers

    Args:
        text (str): the string to normalize
        short_scale (bool): use short scale if True, long scale if False
        ordinals (bool): consider ordinal numbers, third=3 instead of 1/3
    Returns:
        (int) or (float) or False: The extracted number or False if no number
                                   was found

    """

    string_num_ordinal_it = {}
    # first, second...
    if ordinals:
        if short_scale:
            for num in SHORT_ORDINAL_STRING_IT:
                num_string = SHORT_ORDINAL_STRING_IT[num]
                string_num_ordinal_it[num_string] = num
                STRING_NUM_ITA[num_string] = num
        else:
            for num in LONG_ORDINAL_STRING_IT:
                num_string = LONG_ORDINAL_STRING_IT[num]
                string_num_ordinal_it[num_string] = num
                STRING_NUM_ITA[num_string] = num

    # negate next number (-2 = 0 - 2)
    negatives = ['meno']  # 'negativo' non è usuale in italiano

    # multiply the previous number (one hundred = 1 * 100)
    multiplies = ['decina', 'decine', 'dozzina', 'dozzine',
                  'centinaia', 'centinaio', 'migliaia', 'migliaio', 'mila']

    # split sentence parse separately and sum ( 2 and a half = 2 + 0.5 )
    fraction_marker = [' e ']

    # decimal marker ( 1 point 5 = 1 + 0.5)
    decimal_marker = [' punto ', ' virgola ']

    if short_scale:
        for num in SHORT_SCALE_IT:
            num_string = SHORT_SCALE_IT[num]
            STRING_NUM_ITA[num_string] = num
            multiplies.append(num_string)
    else:
        for num in LONG_SCALE_IT:
            num_string = LONG_SCALE_IT[num]
            STRING_NUM_ITA[num_string] = num
            multiplies.append(num_string)

    # 2 e 3/4 ed altri casi
    for separator in fraction_marker:
        components = text.split(separator)
        zeros = 0

        if len(components) == 2:
            # count zeros in fraction part
            sub_components = components[1].split(' ')
            for element in sub_components:
                if element == 'zero' or element == '0':
                    zeros += 1
                else:
                    break
            # ensure first is not a fraction and second is a fraction
            num1 = extractnumber_it(components[0])
            num2 = extractnumber_it(components[1])
            if num1 is not None and num2 is not None \
                    and num1 >= 1 and 0 < num2 < 1:
                return num1 + num2
            # sette e quaranta  sette e zero zero due
            elif num1 is not None and num2 is not None \
                    and num1 >= 1 and num2 > 1:
                return num1 + num2 / pow(10, len(str(num2)) + zeros)

    # 2 punto 5
    for separator in decimal_marker:
        zeros = 0
        # count zeros in fraction part
        components = text.split(separator)

        if len(components) == 2:
            sub_components = components[1].split(' ')
            for element in sub_components:
                if element == 'zero' or element == '0':
                    zeros += 1
                else:
                    break

            number = int(extractnumber_it(components[0]))
            decimal = int(extractnumber_it(components[1]))
            if number is not None and decimal is not None:
                if '.' not in str(decimal):
                    return number + decimal / pow(10,
                                                  len(str(decimal)) + zeros)

    all_words = text.split()
    val = False
    prev_val = None
    to_sum = []
    for idx, word in enumerate(all_words):

        if not word:
            continue
        prev_word = all_words[idx - 1] if idx > 0 else ''
        next_word = all_words[idx + 1] if idx + 1 < len(all_words) else ''

        # is this word already a number ?
        if is_numeric(word):
            val = float(word)

        # is this word the name of a number ?
        if word in STRING_NUM_ITA:
            val = STRING_NUM_ITA[word]

        #  tre quarti  un quarto  trenta secondi
        if isFractional_it(word) and prev_val:
            if word[:-1] == 'second' and not ordinals:
                val = prev_val * 2
            else:
                val = prev_val

        # is the prev word a number and should we multiply it?
        # twenty hundred, six hundred
        if word in multiplies:
            if not prev_val:
                prev_val = 1
            val = prev_val * val

        # is this a spoken fraction?
        # mezza tazza
        if val is False:
            val = isFractional_it(word, short_scale=short_scale)

        # 2 quinti
        if not ordinals:
            next_value = isFractional_it(next_word, short_scale=short_scale)
            if next_value:
                if not val:
                    val = 1
                val = val * next_value

        # is this a negative number?
        if val and prev_word and prev_word in negatives:
            val = 0 - val

        if not val:
            val = extractnumber_long_it(word)

        # let's make sure it isn't a fraction
        if not val:
            # look for fractions like '2/3'
            all_pieces = word.split('/')
            if look_for_fractions(all_pieces):
                val = float(all_pieces[0]) / float(all_pieces[1])
        else:
            prev_val = val
            # handle long numbers
            # six hundred sixty six
            # two million five hundred thousand
            if word in multiplies and next_word not in multiplies:
                to_sum.append(val)
                val = 0
                prev_val = 0
            elif extractnumber_long_it(word) > 100 and \
                extractnumber_long_it(next_word) and \
                    next_word not in multiplies:
                to_sum.append(val)
                val = 0
                prev_val = 0

    if val is not None:
        for addend in to_sum:
            val = val + addend
    return val
示例#15
0
def extractnumber_en(text):
    """
    This function prepares the given text for parsing by making
    numbers consistent, getting rid of contractions, etc.
    Args:
        text (str): the string to normalize
    Returns:
        (int) or (float): The value of extracted number

    """
    aWords = text.split()
    aWords = [word for word in aWords if word not in ["the", "a", "an"]]
    and_pass = False
    valPreAnd = False
    val = False
    count = 0
    while count < len(aWords):
        word = aWords[count]
        if is_numeric(word):
            # if word.isdigit():            # doesn't work with decimals
            val = float(word)
        elif word == "first":
            val = 1
        elif word == "second":
            val = 2
        elif isFractional_en(word):
            val = isFractional_en(word)
        else:
            if word == "one":
                val = 1
            elif word == "two":
                val = 2
            elif word == "three":
                val = 3
            elif word == "four":
                val = 4
            elif word == "five":
                val = 5
            elif word == "six":
                val = 6
            elif word == "seven":
                val = 7
            elif word == "eight":
                val = 8
            elif word == "nine":
                val = 9
            elif word == "ten":
                val = 10
            if val:
                if count < (len(aWords) - 1):
                    wordNext = aWords[count + 1]
                else:
                    wordNext = ""
                valNext = isFractional_en(wordNext)

                if valNext:
                    val = val * valNext
                    aWords[count + 1] = ""

        # if val == False:
        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            # if (len(aPieces) == 2 and is_numeric(aPieces[0])
            #   and is_numeric(aPieces[1])):
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])
            elif and_pass:
                # added to value, quit here
                val = valPreAnd
                break
            else:
                count += 1
                continue

        aWords[count] = ""

        if and_pass:
            aWords[count - 1] = ''  # remove "and"
            val += valPreAnd
        elif count + 1 < len(aWords) and aWords[count + 1] == 'and':
            and_pass = True
            valPreAnd = val
            val = False
            count += 2
            continue
        elif count + 2 < len(aWords) and aWords[count + 2] == 'and':
            and_pass = True
            valPreAnd = val
            val = False
            count += 3
            continue

        break

    # if val == False:
    if not val:
        return False

    # Return the string with the number related words removed
    # (now empty strings, so strlen == 0)
    aWords = [word for word in aWords if len(word) > 0]
    text = ' '.join(aWords)

    return val
def _extract_whole_number_with_text_en(tokens, short_scale, ordinals):
    """
    Handle numbers not handled by the decimal or fraction functions. This is
    generally whole numbers. Note that phrases such as "one half" will be
    handled by this function, while "one and a half" are handled by the
    fraction function.

    Args:
        tokens [_Token]:
        short_scale boolean:
        ordinals boolean:

    Returns:
        int or float, [_Tokens]
        The value parsed, and tokens that it corresponds to.

    """
    multiplies, string_num_ordinal, string_num_scale = \
        _initialize_number_data(short_scale)

    number_words = []  # type: [_Token]
    val = False
    prev_val = None
    next_val = None
    to_sum = []
    for idx, token in enumerate(tokens):
        current_val = None
        if next_val:
            next_val = None
            continue

        word = token.word
        if word in _ARTICLES or word in _NEGATIVES:
            number_words.append(token)
            continue

        prev_word = tokens[idx - 1].word if idx > 0 else ""
        next_word = tokens[idx + 1].word if idx + 1 < len(tokens) else ""

        if word not in string_num_scale and \
                word not in _STRING_NUM_EN and \
                word not in _SUMS and \
                word not in multiplies and \
                not (ordinals and word in string_num_ordinal) and \
                not is_numeric(word) and \
                not isFractional_en(word, short_scale=short_scale) and \
                not look_for_fractions(word.split('/')):
            words_only = [token.word for token in number_words]
            if number_words and not all(
                [w in _ARTICLES | _NEGATIVES for w in words_only]):
                break
            else:
                number_words = []
                continue
        elif word not in multiplies \
                and prev_word not in multiplies \
                and prev_word not in _SUMS \
                and not (ordinals and prev_word in string_num_ordinal) \
                and prev_word not in _NEGATIVES \
                and prev_word not in _ARTICLES:
            number_words = [token]
        elif prev_word in _SUMS and word in _SUMS:
            number_words = [token]
        else:
            number_words.append(token)

        # is this word already a number ?
        if is_numeric(word):
            if word.isdigit():  # doesn't work with decimals
                val = int(word)
            else:
                val = float(word)
            current_val = val

        # is this word the name of a number ?
        if word in _STRING_NUM_EN:
            val = _STRING_NUM_EN.get(word)
            current_val = val
        elif word in string_num_scale:
            val = string_num_scale.get(word)
            current_val = val
        elif ordinals and word in string_num_ordinal:
            val = string_num_ordinal[word]
            current_val = val

        # is the prev word an ordinal number and current word is one?
        # second one, third one
        if ordinals and prev_word in string_num_ordinal and val is 1:
            val = prev_val

        # is the prev word a number and should we sum it?
        # twenty two, fifty six
        if prev_word in _SUMS and val and val < 10:
            val = prev_val + val

        # is the prev word a number and should we multiply it?
        # twenty hundred, six hundred
        if word in multiplies:
            if not prev_val:
                prev_val = 1
            val = prev_val * val

        # is this a spoken fraction?
        # half cup
        if val is False:
            val = isFractional_en(word, short_scale=short_scale)
            current_val = val

        # 2 fifths
        if not ordinals:
            next_val = isFractional_en(next_word, short_scale=short_scale)
            if next_val:
                if not val:
                    val = 1
                val = val * next_val
                number_words.append(tokens[idx + 1])

        # is this a negative number?
        if val and prev_word and prev_word in _NEGATIVES:
            val = 0 - val

        # let's make sure it isn't a fraction
        if not val:
            # look for fractions like "2/3"
            aPieces = word.split('/')
            if look_for_fractions(aPieces):
                val = float(aPieces[0]) / float(aPieces[1])
                current_val = val

        else:
            if prev_word in _SUMS and word not in _SUMS and current_val >= 10:
                # Backtrack - we've got numbers we can't sum.
                number_words.pop()
                val = prev_val
                break
            prev_val = val

            # handle long numbers
            # six hundred sixty six
            # two million five hundred thousand
            if word in multiplies and next_word not in multiplies:
                to_sum.append(val)
                val = 0
                prev_val = 0

    if val is not None and to_sum:
        val += sum(to_sum)

    return val, number_words