Пример #1
0
    def get_model_root(self, data_type):
        method_name = None
        if data_type == DataType.Test:
            method_name = self.model_name
        if data_type == DataType.Train:
            method_name = u'{}_train'.format(self.model_name)

        return io_utils.get_method_root(method_name)
# MAIN
#
synonyms_filepath = io_utils.get_synonyms_filepath()

print "Preparing Train Collection"
X_train, y_train = create_train_data(io_utils.get_train_vectors_list())
print "Preparing Test Collection"
X_test, test_collections = create_test_data(io_utils.get_test_vectors_list())

BASELINES = {
    'baseline_pos': estimators.baseline_pos,
    'baseline_neg': estimators.baseline_neg,
    'baseline_rand': estimators.baseline_rand,
    'baseline_strat': estimators.baseline_strat,
}

# baseline estimators
for method_name in BASELINES:
    print "Evaluate for baseline estimator: {}".format(method_name)
    test_opinions = fit_and_predict(method_name, BASELINES[method_name],
                                    X_train, y_train, X_test, test_collections,
                                    synonyms_filepath)
    io_utils.save_test_opinions(test_opinions, method_name)

    edf = evaluate(BASELINES[method_name], method_name,
                   io_utils.create_files_to_compare_list(method_name),
                   io_utils.get_method_root(method_name), synonyms_filepath)

    edf.to_csv("{}/test_{}.csv".format(io_utils.eval_baseline_root(),
                                       method_name))
Пример #3
0
print "Preparing Train Collection"
X_train, y_train = create_train_data(io_utils.get_train_vectors_list())
print "Preparing Test Collection"
X_test, test_collections = create_test_data(io_utils.get_train_vectors_list())

# Univariate
for univariate_model_name in UNIVARIATE:
    model = UNIVARIATE[univariate_model_name]
    model.fit(X_train, y_train)
    X_train_new = model.transform(X_train)
    X_test_new = model.transform(X_test)
    for method_name in CLASSIFIERS:
        name = "{}_{}".format(method_name, univariate_model_name)
        print "Evaluate for univariate estimator: {}".format(name)
        test_opinions = fit_and_predict(name, CLASSIFIERS[method_name],
                                        X_train_new, y_train, X_test_new,
                                        test_collections, synonyms_filepath)
        io_utils.save_test_opinions(test_opinions, name)

        idf = create_model_feature_importances(None, name, model.get_support(),
                                               io_utils.read_feature_names())
        edf = evaluate(CLASSIFIERS[method_name], name,
                       io_utils.create_files_to_compare_list(method_name),
                       io_utils.get_method_root(method_name),
                       synonyms_filepath)
        edf.to_csv("{}/test_{}.csv".format(io_utils.eval_univariate_root(),
                                           method_name))
        idf.to_csv("{}/test_{}.features.csv".format(
            io_utils.eval_features_filepath(), method_name))