def post_number(sentence): words = utils.tokenize(sentence) abb_map = mapper.load(POSTNUM_ABB_PATH, first_caps=True) for i in range(1, len(words)): word = utils.strip(words[i-1]) if word in abb_map and utils.is_number(utils.strip(words[i])): words[i-1] = utils.replace(words[i-1], str(abb_map[word])) return " ".join(words)
def full_name(sentence): words = utils.tokenize(sentence) first_list = lister.load(FIRSTNAME_PATH) sur_list = lister.load(SURNAME_PATH) for i in range(1, len(words)): first = utils.strip(words[i-1]) last = utils.strip(words[i]) if last in sur_list and first in first_list: words[i-1] = utils.replace(words[i-1], str(first[:1] + ".")) return " ".join(words)
def features(sentence): avoid = lister.load(AVOID_PATH) words = len(sentence.split(" ")) length = len(sentence) clean = 0 major = 0 avoided = 0 for i in sentence.split(): word = utils.ultraStrip(i) if word in avoid: avoided += 1 continue clean += len(word) if utils.strip(i)[0].isupper(): major += 1 quotes = 0 dots = 0 commas = 0 spaces = 0 for i in sentence: if i == "\"": quotes += 1 if i == ".": dots += 1 if i == ",": commas += 1 if i == " ": spaces += 1 return words, length, avoided, clean, major, quotes, dots, commas, spaces
def execute(sentence): words = utils.tokenize(sentence) parsed = [] last_ok = -1 num_map = mapper.load(NUM_PATH) for i in range(0, len(words)): word = words[i] changed = utils.strip(word) if changed in num_map and last_ok < i: number = 0 buffer = 0 last_ok = i for j in range(i, len(words)): actual = words[j] actual = utils.strip(actual) if actual not in num_map: break if num_map[actual] == '1000000': if buffer == 0: break else: number += 1000000 * buffer buffer = 0 elif num_map[actual] == '1000': if buffer == 0: break else: number += 1000 * buffer buffer = 0 elif num_map[actual] == '100': buffer = buffer * 100 else: buffer += int(num_map[actual]) last_ok = j if actual != words[j]: break number += buffer parsed.append(utils.replace(words[last_ok], str(number))) else: if last_ok < i: parsed.append(utils.replace(word, changed)) return " ".join(parsed)
def basic(sentence): words = utils.tokenize(sentence) abb_map = mapper.load(ABB_PATH) for i in range(0, len(words)): word = utils.strip(words[i]) if word in abb_map: words[i] = utils.replace(words[i], str(abb_map[word])) return " ".join(words)