Exemplo n.º 1
0
def neuralNetPredicter(train, target, test):
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(244,50,), random_state=1)
    clf.activation='logistic'
    clf.verbose=True
    clf.fit(train, target)
    predictions = clf.predict_proba(test)
    return predictions
Exemplo n.º 2
0
def test_unsupported_activation():
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        model = MLPClassifier(hidden_layer_sizes=(2, ), max_iter=10)
        model.fit([[1.0]], [True])
    with pytest.raises(Exception) as ex:
        model.activation = 'fake22'
        emlearn.convert(model)
    assert 'Unsupported activation' in str(ex.value)
    assert 'fake22' in str(ex.value)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.20,
                                                    random_state=10)

XtrS, params = ml.rescale(X_train)
Xvas, _ = ml.rescale(X_test, params)

from imblearn.under_sampling import RandomUnderSampler

rus = RandomUnderSampler(random_state=42)
X_res, y_res = rus.fit_resample(XtrS, y_train)

mlp = MLPClassifier(solver='sgd', max_iter=2000)
mlp.hidden_layer_sizes = (100, 100, 100)
mlp.activation = 'logistic'
mlp.learning_rate_init = 0.1
mlp.learning_rate = 'adaptive'
mlp.verbose = True

mlp.fit(X_res, y_res)

print(mlp.score(Xvas, y_test))

Xte = np.genfromtxt(
    'C:\\Users\\radad\\OneDrive\\Desktop\\cs178\\CS178-Kaggle-Competition\\X_test.txt',
    delimiter=None)
Yte = np.vstack((np.arange(Xte.shape[0]), mlp.predict_proba(Xte)[:, 1])).T
np.savetxt(
    'C:\\Users\\radad\\OneDrive\\Desktop\\cs178\\CS178-Kaggle-Competition\\Y_submit.txt',
    Yte,
    [maxIter - 100, maxIter - 50, maxIter, maxIter + 50, maxIter + 100]
}
CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, n_jobs=-1)
CV_rfc.fit(X_train, y_train)
print(CV_rfc.best_params_)
print("--- %s seconds ---" % (time.time() - start_time))

# In[98]:

# Intializing the parameters to fit the classifier on test dataset
hlSize = CV_rfc.best_params_['hidden_layer_sizes']
maxIter = CV_rfc.best_params_['max_iter']
start_time = time.time()
clf = MLPClassifier(solver='lbfgs')
clf.hidden_layer_sizes = (hlSize[0], )
clf.activation = 'relu'
clf.max_iter = maxIter
clf.fit(X_train, y_train)

print("--- %s seconds ---" % (time.time() - start_time))

# In[99]:

y_pred = clf.predict(X_test)
print(y_pred)

# In[101]:

#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
Exemplo n.º 5
0
def neural_network(df_images, df_labels, no_of_neurons):
    '''This function build the neural network and then delegate the accuracy calculation. It also plots the graphs'''
    classifier = MLPClassifier(solver="adam",
                               verbose=True,
                               early_stopping=False,
                               max_iter=1000)
    classifier.alpha = 0.05
    classifier.hidden_layer_sizes = (no_of_neurons, )
    classifier.activation = "relu"
    classifier.learning_rate_init = 0.1

    # splitting the dataset
    train_X, test_X, train_y, test_y = train_test_split(
        df_images,
        df_labels,
        test_size=0.2,
        random_state=1,
    )

    # fit the model
    classifier.fit(train_X, train_y)

    # for calculating accuracy manually
    print('\n------------------------accuracies for ' + str(no_of_neurons) +
          ' neurons----------------------------\n')
    train_accuracy, test_accuracy = calculate_accuracy(classifier, test_X,
                                                       test_y, train_X,
                                                       train_y)

    print(
        '\n---------------------Class wise accuracies-------------------------\n'
    )
    classwise_accuracy(classifier, test_X, test_y, train_X, train_y)

    # plotting the loss curve vs iterations
    loss_values = classifier.loss_curve_
    plt.plot(loss_values)
    plt.title('loss vs iterations for : ' +
              str(classifier.learning_rate_init) +
              ' learning rate and no of neurons ' + str(no_of_neurons))
    plt.xlabel('iterations')
    plt.ylabel('loss')
    plt.show()

    # save the model to disk
    current_date_time = time.strftime("%d/%m/%Y") + '_' + time.strftime(
        "%H:%M:%S")
    filename = 'model_for_' + str(
        no_of_neurons) + '_' + current_date_time + '.sav'
    joblib.dump(classifier, filename.replace(
        '/', '_'))  # saving the classifier using joblib library
    print('\nmodel saved in file ' + filename + '\n')

    # writing on csv file for reporting
    row_to_write = []
    row_to_write.append([
        filename,
        str(no_of_neurons),
        str(train_accuracy),
        str(test_accuracy)
    ])
    with open('report_regul_mnist.csv', 'a') as writeFile:
        writer = csv.writer(writeFile)
        writer.writerows(row_to_write)

    # printing train set classwise accuracy
    # classwise_accuracy(classifier, test_X, test_y, train_X, train_y)

    compute_confusion_matrix(classifier, df_labels, test_X, test_y)
Exemplo n.º 6
0
def neuralnet(dataset=None,
              nn_settings=None,
              train=False,
              save_settings=True,
              print_acc=False):
    """
    The following function is a wrapper function that will use a neural
    network in order to best estimate the protein count of the input
    images. The solver used in this neural network is L-BFGS which is a
    quasi-Newton method that is theoretically and experimentally
    verified to have a faster convergence and works well with low-
    dimensional input. L-BFGS does not work well in other settings
    because of its high memory requirement and lack of use of
    minibatches. There are other methods that can be tried (Adam, SGD
    BatchNorm , Adadelta, Adagrad), but L-BFGS was the best choice for
    this application and for the time constraint given in producing
    this package.

    Parameters
    ----------
    dataset : array[object] : dataset to use neural network on
    nn_settings : object : classifier data
    train : bool : decide on whether to train or test
    save_settings : bool : decide on whether to save classifier data
                           to 'data' folder
    print_acc : bool : passes bool to accuracy() to tell function to
                       print accuracy

    Return
    ------
    count : list : The counts of the input dataset


    """
    if dataset is None:
        dataset = []
    count = None
    acc = None
    train_x = None
    train_y = None
    value = None
    lrn_dataset = None
    lrn_cnt = None

    if not train:
        save_settings = False

    assert (nn_settings is not None
            or train), "Neural Network should be training " \
                       "if there is no settings inputed"

    count = []
    acc = []

    if nn_settings is None:
        # using lbfgs (explained in docstring)
        classifier = MLPClassifier(solver="lbfgs")
        # 1 hidden layer with 100 hidden units
        classifier.hidden_layer_sizes = (100, )
        # Using a tanh activation
        classifier.activation = "tanh"

    else:
        classifier = nn_settings

    train_x = []
    train_y = []
    value = []
    lrn_dataset = []
    lrn_cnt = []

    for k in range(len(dataset)):
        value.append([])
        for i in range(len(dataset[k].heights) // 3):
            value[k].append(dataset[k].heights[i * 3])

    for k in range(len(dataset)):
        lrn_dataset.append([])
        lrn_cnt.append(dataset[k].count)
        for ht_val in value[k]:
            lrn_dataset[k].append(ht_val[0])
    train_x = lrn_dataset
    train_y = lrn_cnt

    if train:
        classifier.fit(train_x, train_y)

    if save_settings:
        save_objects([classifier], name='classifier_info.dat')

    count = classifier.predict(train_x)
    acc.append(accuracy(train_x, count, classifier, output=print_acc))
    if train:
        print("Training complete")
        return
    return count
import os
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from loading_dataset import load_dataset
import matplotlib.pyplot as plt
from random import shuffle

images_test, labels_test = load_dataset("../training_dataset/", 160)

data = images_test.reshape(len(images_test), -1)

classifier = MLPClassifier(solver="sgd")

classifier.activation = "relu"
classifier.learning_rate_init = 0.001
classifier.learning_rate = "adaptive"

print(classifier)

classifier.fit(data, labels_test)

images_test, labels_test = load_dataset("../test_dataset/", 160, False)

data_test = images_test.reshape(len(images_test), -1)

expected = labels_test
predicted = classifier.predict(data_test)

accuracy = accuracy_score(expected, predicted)

print("****************** average_score : " + str(accuracy))
Exemplo n.º 8
0
#! /usr/bin/env python3

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neural_network import MLPClassifier
#import snips

X = pd.read_csv('HandWrittenDigit.csv')
y = pd.read_csv('Labels.csv')

classifier = MLPClassifier(solver='sgd')
classifier.hidden_layer_size = (40, )
classifier.activation = 'logistic'
classifier.fit(X, y)
plt.plot(classifier.loss_curve_)
plt.show()