예제 #1
0
def prepare(path_univ_dir):
    files = os.listdir(path_univ_dir)
    for file in files:
        text_set = set()
        texts = list()
        with open(os.path.join(path_univ_dir, file), 'r') as f:
            for line in f:
                text = re.sub(stop_word_re, '', line.strip())
                for word_type, word_re in word_type_re.items():
                    text = re.sub(word_re, word_type, text)
                text = word_replace(text, homo_dict)
                text = word_replace(text, syno_dict)
                if text not in text_set:
                    text_set.add(text)
                    texts.append(text)
        with open(os.path.join(path_univ_dir, file), 'w') as f:
            for text in texts:
                f.write(text + '\n')
예제 #2
0
def predict(text, name):
    text = re.sub(stop_word_re, '', text.strip())
    for word_type, word_re in word_type_re.items():
        text = re.sub(word_re, word_type, text)
    text = word_replace(text, homo_dict)
    text = word_replace(text, syno_dict)
    cache_sents = map_item(name, caches)
    seq = word2ind.texts_to_sequences([text])[0]
    pad_seq = pad_sequences([seq], maxlen=seq_len)
    model = map_item(name, models)
    encode_seq = model.predict([pad_seq])
    encode_mat = np.repeat(encode_seq, len(cache_sents), axis=0)
    dists = np.sqrt(np.sum(np.square(encode_mat - cache_sents), axis=1))
    min_dists = sorted(dists)[:3]
    min_inds = np.argsort(dists)[:3]
    min_preds = [labels[ind] for ind in min_inds]
    if __name__ == '__main__':
        min_texts = [texts[ind] for ind in min_inds]
        formats = list()
        for pred, prob, text in zip(min_preds, min_dists, min_texts):
            formats.append('{} {:.3f} {}'.format(pred, prob, text))
        return ', '.join(formats)
    else:
        return min_preds[0]
예제 #3
0
def clean(text):
    text = re.sub(stop_word_re, '', text)
    for word_type, word_re in word_type_re.items():
        text = re.sub(word_re, word_type, text)
    text = word_replace(text, homo_dict)
    return word_replace(text, syno_dict)
예제 #4
0
def clean(text):
    text = re.sub(stop_word_re, '', text.strip())
    text = word_replace(text, homo_dict)
    return word_replace(text, syno_dict)