Exemplo n.º 1
0
def post_number(sentence):
    words = utils.tokenize(sentence)

    abb_map = mapper.load(POSTNUM_ABB_PATH, first_caps=True)

    for i in range(1, len(words)):
        word = utils.strip(words[i-1])
        if word in abb_map and utils.is_number(utils.strip(words[i])):
            words[i-1] = utils.replace(words[i-1], str(abb_map[word]))

    return " ".join(words)
Exemplo n.º 2
0
def full_name(sentence):
    words = utils.tokenize(sentence)

    first_list = lister.load(FIRSTNAME_PATH)
    sur_list = lister.load(SURNAME_PATH)

    for i in range(1, len(words)):
        first = utils.strip(words[i-1])
        last = utils.strip(words[i])
        if last in sur_list and first in first_list:
            words[i-1] = utils.replace(words[i-1], str(first[:1] + "."))

    return " ".join(words)
Exemplo n.º 3
0
def features(sentence):
    avoid = lister.load(AVOID_PATH)
    words = len(sentence.split(" "))
    length = len(sentence)
    clean = 0
    major = 0
    avoided = 0
    for i in sentence.split():
        word = utils.ultraStrip(i)
        if word in avoid:
            avoided += 1
            continue
        clean += len(word)
        if utils.strip(i)[0].isupper():
            major += 1
    quotes = 0
    dots = 0
    commas = 0
    spaces = 0
    for i in sentence:
        if i == "\"":
            quotes += 1
        if i == ".":
            dots += 1
        if i == ",":
            commas += 1
        if i == " ":
            spaces += 1

    return words, length, avoided, clean, major, quotes, dots, commas, spaces
Exemplo n.º 4
0
def execute(sentence):
    words = utils.tokenize(sentence)
    parsed = []
    last_ok = -1
    num_map = mapper.load(NUM_PATH)
    for i in range(0, len(words)):
        word = words[i]
        changed = utils.strip(word)
        if changed in num_map and last_ok < i:
            number = 0
            buffer = 0
            last_ok = i
            for j in range(i, len(words)):
                actual = words[j]
                actual = utils.strip(actual)
                if actual not in num_map:
                    break

                if num_map[actual] == '1000000':
                    if buffer == 0:
                        break
                    else:
                        number += 1000000 * buffer
                        buffer = 0
                elif num_map[actual] == '1000':
                    if buffer == 0:
                        break
                    else:
                        number += 1000 * buffer
                        buffer = 0
                elif num_map[actual] == '100':
                    buffer = buffer * 100
                else:
                    buffer += int(num_map[actual])
                last_ok = j
                if actual != words[j]:
                    break

            number += buffer
            parsed.append(utils.replace(words[last_ok], str(number)))
        else:
            if last_ok < i:
                parsed.append(utils.replace(word, changed))
    return " ".join(parsed)
Exemplo n.º 5
0
def basic(sentence):
    words = utils.tokenize(sentence)

    abb_map = mapper.load(ABB_PATH)

    for i in range(0, len(words)):
        word = utils.strip(words[i])
        if word in abb_map:
            words[i] = utils.replace(words[i], str(abb_map[word]))

    return " ".join(words)