Exemplo n.º 1
0
def predict(nlp, cls, file_path_txt, out_file_path):
    fb = FeatureBuilder(nlp)
    features_matrix_str = fb.get_features_of_file(file_path_txt)

    pred_labels = cls.predict(features_matrix_str)

    out_file = open(out_file_path, 'w')
    for (idxs, features_list), label in zip(features_matrix_str, pred_labels):
        if label == 1:
            sent_num, obj1, obj2 = idxs
            sent_num = 'sent' + str(sent_num)
            obj1, obj2 = str(obj1), str(obj2)
            out_file.write(sent_num + '\t' + obj1 + '\t' + 'Live_In' + '\t' +
                           obj2 + '\t\n')

    out_file.close()
Exemplo n.º 2
0
def train_classifier(nlp, train_txt_file, train_annotation_file):
    fb = FeatureBuilder(nlp)
    features_matrix = fb.get_features_of_file(train_txt_file)

    annotation_dict, r2i = annotation_to_dict(train_annotation_file)
    lc = LabelChecker(annotation_dict, r2i)

    cls = MyClassifier(fb.features_to_index)
    gold_labels = lc.get_labels_of(features_matrix)
    cls.train_on(features_matrix, gold_labels)

    pred_labels = cls.predict(features_matrix)
    acc_all = accuracy_score(gold_labels, pred_labels)
    acc_filtered = accuracy_of(gold_labels, pred_labels)
    print 'train - accuracy all %0.2f%%' % (acc_all * 100.0)
    print 'train - accuracy filtered %0.2f%%' % (acc_filtered * 100.0)

    return cls