def get_model_root(self, data_type): method_name = None if data_type == DataType.Test: method_name = self.model_name if data_type == DataType.Train: method_name = u'{}_train'.format(self.model_name) return io_utils.get_method_root(method_name)
# MAIN # synonyms_filepath = io_utils.get_synonyms_filepath() print "Preparing Train Collection" X_train, y_train = create_train_data(io_utils.get_train_vectors_list()) print "Preparing Test Collection" X_test, test_collections = create_test_data(io_utils.get_test_vectors_list()) BASELINES = { 'baseline_pos': estimators.baseline_pos, 'baseline_neg': estimators.baseline_neg, 'baseline_rand': estimators.baseline_rand, 'baseline_strat': estimators.baseline_strat, } # baseline estimators for method_name in BASELINES: print "Evaluate for baseline estimator: {}".format(method_name) test_opinions = fit_and_predict(method_name, BASELINES[method_name], X_train, y_train, X_test, test_collections, synonyms_filepath) io_utils.save_test_opinions(test_opinions, method_name) edf = evaluate(BASELINES[method_name], method_name, io_utils.create_files_to_compare_list(method_name), io_utils.get_method_root(method_name), synonyms_filepath) edf.to_csv("{}/test_{}.csv".format(io_utils.eval_baseline_root(), method_name))
print "Preparing Train Collection" X_train, y_train = create_train_data(io_utils.get_train_vectors_list()) print "Preparing Test Collection" X_test, test_collections = create_test_data(io_utils.get_train_vectors_list()) # Univariate for univariate_model_name in UNIVARIATE: model = UNIVARIATE[univariate_model_name] model.fit(X_train, y_train) X_train_new = model.transform(X_train) X_test_new = model.transform(X_test) for method_name in CLASSIFIERS: name = "{}_{}".format(method_name, univariate_model_name) print "Evaluate for univariate estimator: {}".format(name) test_opinions = fit_and_predict(name, CLASSIFIERS[method_name], X_train_new, y_train, X_test_new, test_collections, synonyms_filepath) io_utils.save_test_opinions(test_opinions, name) idf = create_model_feature_importances(None, name, model.get_support(), io_utils.read_feature_names()) edf = evaluate(CLASSIFIERS[method_name], name, io_utils.create_files_to_compare_list(method_name), io_utils.get_method_root(method_name), synonyms_filepath) edf.to_csv("{}/test_{}.csv".format(io_utils.eval_univariate_root(), method_name)) idf.to_csv("{}/test_{}.features.csv".format( io_utils.eval_features_filepath(), method_name))