Exemplo n.º 1
0
def MLP_test():
    from preprocess import process_data, partition_data
    print('processing data...')
    X, y = process_data(collapse=False,
                        encode=True,
                        normalize=True,
                        predict_missing=True,
                        k_predict=3)
    [test, validate, train] = partition_data(X, y)

    print('fitting model... ')
    from sklearn.neural_network import MLPClassifier
    model = MLPClassifier(hidden_layer_sizes=(1000, 2000, 1000, 100, 50),
                          verbose=False)
    model.fit(train[0], train[1])

    valid_prob = model.predict_proba(validate[0])

    print(valid_prob[0:5])
    print(validate[1][0:5])

    from cross_entropy import cross_entropy
    print(valid_prob.shape, validate[1].shape)
    print('cross entropy:', cross_entropy(validate[1], valid_prob))

    from risk import empirical_risk
    print('mse:', empirical_risk('mse', valid_prob, validate[1]))

    from sklearn.metrics import accuracy_score
    print('accuracy', accuracy_score(validate[1], model.predict(validate[0])))
Exemplo n.º 2
0
def cross_validate():
    net_hidden_layers = [
        (100),
        (1000),
        (100, 100),
        (1000, 1000),
        (1000, 100),
        (1000, 100, 100),
        (1000, 1000, 100),
        (1000, 1000, 100, 100),
        (1000, 2000, 100, 500, 100),
        (2000, 1000, 500, 100, 50),
    ]
    models = [FFNN(h) for h in net_hidden_layers]

    from preprocess import process_data, partition_data
    print('processing data...')
    X, y = process_data(collapse=False,
                        encode=True,
                        normalize=True,
                        predict_missing=True,
                        k_predict=3)

    from cross_validation import cross_validation
    r = cross_validation(X, y, models)

    print(r)
    i = np.argmin(r)

    print('best model...', net_hidden_layers[i])
    model = FFNN(net_hidden_layers[i])

    partitioned_data = partition_data(X, y, partitions=[0.2, 0.8])

    train = partitioned_data[1]
    valid = partitioned_data[0]

    model.fit(train[0], train[1])
    p = model.predict(valid[0])

    from evaluate import evaluate
    print(valid[1].shape, p.shape)
    evaluate(valid[1], p)
Exemplo n.º 3
0
def cross_validation(X, y, models):
    k = len(models)

    partitioned_data = preprocess.partition_data(
        X, y, partitions=[1 / k for _ in range(k)])

    # print(partitioned_data)

    r = []

    for i, model in enumerate(models):
        valid = partitioned_data[i]
        train_X = []
        train_y = []
        primed = False
        for j in range(k):
            if i == j:
                continue
            if not primed:
                train_X = partitioned_data[j][0]
                train_y = partitioned_data[j][1]
                primed = True
            else:
                train_X = np.append(train_X, partitioned_data[j][0], axis=0)
                train_y = np.append(train_y, partitioned_data[j][1], axis=0)

        try:
            model.fit(train_X, train_y)

            p = model.predict(valid[0])

            r.append(risk.empirical_risk('mse', p, valid[1]))
        except Exception as e:
            print(e)
            r.append(100000)  # big number because it didn't work

    return r
Exemplo n.º 4
0
    from evaluate import evaluate
    print(valid[1].shape, p.shape)
    evaluate(valid[1], p)


if __name__ == '__main__':

    from preprocess import process_data, partition_data
    print('processing data...')
    X, y = process_data(collapse=False,
                        encode=True,
                        normalize=True,
                        predict_missing=True,
                        k_predict=3)

    partitioned_data = partition_data(X, y, partitions=[0.2, 0.8])

    train = partitioned_data[1]
    valid = partitioned_data[0]

    #model = FFNN((1000, 100, 100), num_iterations=500)
    model = FFNN((1000, 10000, 1000, 100, 50), num_iterations=500)

    model.fit(train[0], train[1])

    from evaluate import evaluate
    evaluate(train[1], model.predict(train[0]))
    evaluate(valid[1], model.predict(valid[0]))

    from sklearn.metrics import roc_curve
Exemplo n.º 5
0
    print('processing data...')
    usecols = [i for i in range(0,26)] + [i for i in range(87,98)] + [161, 163] + [i for i in range(219,228)] + [279]
    X, y = preprocess.process_data(usecols=usecols,
        collapse=True, normalize=True, encode=False,
        predict_missing=True, k_predict=3)

    print('performing cross-validation...')
    models = [kNN(i) for i in range(1,10)]
    r = cross_validation(X, y, models)

    print(r)
    k = np.argmin(r) + 1

    print('evaluated best model (k:',k,')...')
    partitioned_data = preprocess.partition_data(X, y, partitions=[0.2,0.8])

    train = partitioned_data[1]
    valid = partitioned_data[0]

    model = kNN(k)

    model.fit(train[0], train[1])
    p = model.predict(valid[0])

    from evaluate import evaluate
    print('evaluating valid, train for presence')
    evaluate(valid[1], p)
    evaluate(train[1], model.predict(train[0]))
    # now, we'll have a kNN for if it's arrhythmia or not. Here's an idea: have a *different* predictor exclusively for
    # classes!
Exemplo n.º 6
0
    A = sigmoid(np.dot(w.T, X) + b)

    for i in range(A.shape[0]):
        Y_prediction[0,i] = 1 if A[0,i] > 0.5 else 0

    assert(Y_prediction.shape == (m, 1))

    return Y_prediction

def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5):
    print(X_train.shape)
    w, b = initialize_with_zeros(X_train.shape[1])

    # gradient descent
    w, b, dw, db, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate)

    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)

    print('train: {} %'.format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print('train: {} %'.format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))

if __name__ == "__main__":
    import preprocess
    data = preprocess.process_data()
    [test, train] = preprocess.partition_data(data, [0.2,0.8])

    model(train[:,0:-1],train[:,-1],test[:,0:-1],test[:,-1], 2000, 0.005)