def predict(nlp, cls, file_path_txt, out_file_path): fb = FeatureBuilder(nlp) features_matrix_str = fb.get_features_of_file(file_path_txt) pred_labels = cls.predict(features_matrix_str) out_file = open(out_file_path, 'w') for (idxs, features_list), label in zip(features_matrix_str, pred_labels): if label == 1: sent_num, obj1, obj2 = idxs sent_num = 'sent' + str(sent_num) obj1, obj2 = str(obj1), str(obj2) out_file.write(sent_num + '\t' + obj1 + '\t' + 'Live_In' + '\t' + obj2 + '\t\n') out_file.close()
def train_classifier(nlp, train_txt_file, train_annotation_file): fb = FeatureBuilder(nlp) features_matrix = fb.get_features_of_file(train_txt_file) annotation_dict, r2i = annotation_to_dict(train_annotation_file) lc = LabelChecker(annotation_dict, r2i) cls = MyClassifier(fb.features_to_index) gold_labels = lc.get_labels_of(features_matrix) cls.train_on(features_matrix, gold_labels) pred_labels = cls.predict(features_matrix) acc_all = accuracy_score(gold_labels, pred_labels) acc_filtered = accuracy_of(gold_labels, pred_labels) print 'train - accuracy all %0.2f%%' % (acc_all * 100.0) print 'train - accuracy filtered %0.2f%%' % (acc_filtered * 100.0) return cls
from FeatureBuilder import FeatureBuilder # fb = FeatureBuilder(csv_file_name='~/sdb1/ais/ais_data.csv') fb = FeatureBuilder( csv_file_name= '~/sdb1/ais/data/frequencyOfEdgesInData_ais201710_compact_version.csv') fb.run() print fb.get_new_feature_df()
from Classifier import Classifier from FeatureBuilder import FeatureBuilder features = FeatureBuilder() features.load_model() company_classifier_path = './models/one_vs_rest_company' location_classifier_path = './models/one_vs_rest_location' goods_classifier_path = './models/one_vs_rest_goods' company_X_train, company_y_train, company_X_test, company_y_test = features.one_vs_rest_generator( 0) location_X_train, location_y_train, location_X_test, location_y_test = features.one_vs_rest_generator( 1) goods_X_train, goods_y_train, goods_X_test, goods_y_test = features.one_vs_rest_generator( 2) classifier = Classifier(features.company_feature_encoder, features.location_feature_encoder, features.goods_feature_encoder) classifier.tpot_classifiers(company_X_train, company_y_train, company_X_test, company_y_test, company_classifier_path) classifier.tpot_classifiers(location_X_train, location_y_train, location_X_test, location_y_test, location_classifier_path) classifier.tpot_classifiers(goods_X_train, goods_y_train, goods_X_test, goods_y_test, goods_classifier_path)
def build_feature_builders(self): self.feature_list = [] for feature_opt in self.config["feature"]: builder = FeatureBuilder(feature_opt, self.config, self.dataloader) self.feature_list.append(builder)