示例#1
0
def main():

    dataset = data.load_eviction(dataset_type=DATASET_TYPE)

    losses = []
    accs = []
    precs = []
    recs = []

    with open(FILENAME, 'w+') as f:
        for i in range(NUM_TESTS):

            print('*' * 80)
            print('Round %s' % i)

            dataset.split()
            train(dataset)
            predictions = predict(dataset)
            loss, acc, prec, rec = evaluate(dataset.y_test, predictions)

            losses.append(loss)
            accs.append(acc)
            precs.append(prec)
            recs.append(rec)

        loss_avg = np.array(losses).mean()
        accs_avg = np.array(accs).mean()
        precs_avg = np.array(precs).mean()
        recs_avg = np.array(recs).mean()

        print('Loss average: %s' % loss_avg)
        print('Accuracy average: %s' % accs_avg)
        print('Precision average: %s' % precs_avg)
        print('Recall average: %s' % recs_avg)

        output = '%s, %s, %s, %s, %s\n' % ('nn', loss_avg, accs_avg, precs_avg,
                                           recs_avg)
        f.write(output)
"""Preprocessing script to find most stable features, i.e. those that are
selective across many randomized trials.
"""

import pandas as pd
from sklearn.linear_model import RandomizedLogisticRegression
import data

if __name__ == '__main__':
    dataset = data.load_eviction()
    descriptions = pd.read_pickle('data/private/feature_codes_to_names.pck')

    print('Data loaded.')

    rlogistic = RandomizedLogisticRegression(normalize=True)
    rlogistic.fit(dataset.X_train, dataset.y_train)

    print('Model fitted.')

    features = sorted(
        zip(map(lambda x: round(x, 4), rlogistic.scores_),
            dataset.X_train.columns))
    print('Number of features:\t\t%s' % len(features))

    nonzero_features = [(score, code) for score, code in features if score > 0]
    print('Number of nonzero features:\t%s' % len(nonzero_features))

    columns = []
    for score, code in nonzero_features:
        columns.append(code)
        print('-' * 80)
示例#3
0
"""Predict eviction response variable.
"""

import data
from predict import utils
from sklearn.svm import OneClassSVM

if __name__ == '__main__':

    dataset_pos = data.load_pos_eviction()
    dataset_neg = data.load_neg_eviction()
    dataset_all = data.load_eviction()

    # nu: The proportion of outliers we expect in our data.
    model_pos = OneClassSVM(kernel='linear', nu=0.9)
    model_pos.fit(dataset_pos.X_train)

    model_neg = OneClassSVM(kernel='linear', nu=0.1)
    model_neg.fit(dataset_neg.X_train)

    predictions_pos = model_pos.predict(dataset_all.X_train)
    predictions_neg = model_neg.predict(dataset_all.X_train)

    # +1 is inlier, -1 is outlier. We want those who are evicted, to be +1
    # and those who are not evicted to be 0.

    # Outliers, those evicted, to be 1.
    predictions_neg = (predictions_neg == -1).astype(int)

    # Inliers, those evicted, to be 1.
    predictions_pos = (predictions_pos == 1).astype(int)
示例#4
0
def handcrafted():
    dataset = data.load_eviction(dataset_type='handcrafted')
    fname = '%s/results_HANDCRAFTED_dataset_OVER.csv' % DIR
    test_runner.predict(MODELS, dataset, fname, N_TESTS, OVERSAMPLE)
示例#5
0
def rlogistic():
    dataset = data.load_eviction(dataset_type='rlogistic')
    fname = '%s/results_RLOGISTIC_dataset_OVER.csv' % DIR
    test_runner.predict(MODELS, dataset, fname, N_TESTS, OVERSAMPLE)
示例#6
0
def pca():
    dataset = data.load_eviction(dataset_type='pca')
    fname = '%s/results_PCA_dataset_OVER.csv' % DIR
    test_runner.predict(MODELS, dataset, fname, N_TESTS, OVERSAMPLE)
示例#7
0
def full():
    dataset = data.load_eviction()
    fname = '%s/results_FULL_dataset_OVER.csv' % DIR
    test_runner.predict(MODELS, dataset, fname, N_TESTS, OVERSAMPLE)