Пример #1
0
def all_feature_classify(training_file, num):
    """ Classifier using all features. """

    y, meta_data = utilities.read_training_file(training_file)
    y, meta_data = utilities.sample(y, meta_data, num)

    meta_data_train, y_train, meta_data_cv, y_cv = \
        classification.prepare_data(meta_data, y)

    x_train, x_cv = feature_selection.generate_features(meta_data_train,
        y_train, meta_data_cv)

    clf = classification.random_forest(x_train, y_train, x_cv, y_cv)
    print utilities.binomial_deviance(y_train,
        classification.get_prob(clf, x_train))
    print utilities.binomial_deviance(y_cv, classification.get_prob(clf, x_cv))
Пример #2
0
def all_feature_classify(training_file, num):
    """ Classifier using all features. """

    y, meta_data = utilities.read_training_file(training_file)
    y, meta_data = utilities.sample(y, meta_data, num)

    meta_data_train, y_train, meta_data_cv, y_cv = \
        classification.prepare_data(meta_data, y)

    x_train, x_cv = feature_selection.generate_features(
        meta_data_train, y_train, meta_data_cv)

    clf = classification.random_forest(x_train, y_train, x_cv, y_cv)
    print utilities.binomial_deviance(y_train,
                                      classification.get_prob(clf, x_train))
    print utilities.binomial_deviance(y_cv, classification.get_prob(clf, x_cv))
Пример #3
0
def spring_brother(training_file, test_file, submission_file):
    """ Running on the test file. """

    y, meta_data = utilities.read_training_file(training_file)
    ids, meta_data_test = utilities.read_test_file(test_file)

    x_train, x_test = feature_selection.generate_features(meta_data,
        y, meta_data_test)

    clf = classification.random_forest(x_train, y, None, None)

    p = classification.get_prob(clf, x_test)
    utilities.write_submission_file(submission_file, ids, p)
Пример #4
0
def spring_brother(training_file, test_file, submission_file):
    """ Running on the test file. """

    y, meta_data = utilities.read_training_file(training_file)
    ids, meta_data_test = utilities.read_test_file(test_file)

    x_train, x_test = feature_selection.generate_features(
        meta_data, y, meta_data_test)

    clf = classification.random_forest(x_train, y, None, None)

    p = classification.get_prob(clf, x_test)
    utilities.write_submission_file(submission_file, ids, p)