Exemplo n.º 1
0
def memm_greedy(sentences, max_sentence_len, features_map, counters_dict, clf):
    all_words_features = list()
    sentences_predictions = list()
    for j in range(len(sentences)):
        sentences_predictions.append(list())

    for i in range(max_sentence_len):
        start = datetime.now()
        for j in range(len(sentences)):
            sentence = sentences[j]
            if i < len(sentence):
                word = sentence[i]
                word_features = FeaturesUtils.get_word_features(
                    i, sentence, sentences_predictions[j],
                    DictUtils.is_rare(counters_dict, word))
                all_words_features.append(word_features)
            else:
                all_words_features.append(dict())

        all_prediction_word_i = get_prediction_of_all_words(
            all_words_features, clf, features_map)

        end = datetime.now()
        print('word i={1} Running Time: {0}'.format(end - start, i))

        for j in range(len(all_prediction_word_i)):
            sentences_predictions[j].append(all_prediction_word_i[j])
        all_words_features = list()

    return sentences_predictions
Exemplo n.º 2
0
def create_features(words_features_list, words, tags, dict_e):
    for i in range(len(words)):
        is_rare = DictUtils.is_rare(dict_e, (words[i], tags[i]))
        word_feature_dict = FeaturesUtils.get_word_features(
            i, words, tags, is_rare)
        word_feature_dict['tag'] = tags[i]
        words_features_list.append(word_feature_dict)
        print(word_feature_dict)
Exemplo n.º 3
0
def get_word_features_list(i, words, prev_predictions, prev_prev_predictions,
                           counters_dict):
    all_word_features = list()
    prev_list = list()
    for prev_prediction in prev_predictions:
        for prev_prev_prediction in prev_prev_predictions:
            word_features = FeaturesUtils.get_word_features(
                i, words, get_tag_list(i, prev_prediction,
                                       prev_prev_prediction),
                DictUtils.is_rare(counters_dict, words[i]))
            all_word_features.append(word_features)
            prev_list.append((prev_prediction, prev_prev_prediction))
    return all_word_features, prev_list