def ex_2_2(input1, target1, input2, target2):
    ## TODO
    scores = []
    scores_train = []
    classifiers = []
    for i in range(10):
        classifier = MLPClassifier(hidden_layer_sizes=(20, ),
                                   solver="adam",
                                   max_iter=1000,
                                   activation="tanh",
                                   random_state=i)
        classifier.fit(input1, target1[:, 0])
        scores.append(classifier.score(input2, target2[:, 0]))
        classifiers.append(classifier)
        scores_train.append(classifier.score(input1, target1[:, 0]))

    conf_mat = confusion_matrix(target2[:, 0],
                                classifiers[np.argmax(scores)].predict(input2))

    plot_histogram_of_acc(scores_train, scores)
    #plot_histogram_of_acc(classifiers[np.argmax(scores)], classifier.score(input2, target2[:, 0]))
    #plot_histogram_of_acc(classifier.score(input1, target1[:,0]), classifier.score(input2, target2[:,0]))
    predected_target = classifier.predict(input2)
    misclassified_images = []
    for i in range(len(target2[:, 0])):
        if target2[:, 0][i] != predected_target[i]:
            misclassified_images.append(input2[i])

    for i in range(len(misclassified_images)):
        plot_image(misclassified_images[i])
    pass
Exemplo n.º 2
0
def ex_2_1(input2, target2):
    '''
    • Write code to train a feed-forward neural network with 1 hidden layers containing 6 hidden units
      for pose recognition. Use dataset2 for training after normalization, ‘adam’ as the training solver and
      train for 200 iterations.
    • Calculate the confusion matrix
    • Plot the weights between each input neuron and the hidden neurons to visualize what the network
      has learnt in the first layer.
      inote Use scikit-learn’s confusion_matrix function to to calculate the confusion matrix. Documentation
      for this can be found here
      inote You can use the coefs_ attribute of the model to read the weights. It is a list of length nlayers − 1
      where the ith element in the list represents the weight matrix corresponding to layer i.
      inote Use the plot_hidden_layer_weights in nn_classification_plot.py to plot the hidden weights.
    '''

    # dataset2 = normalize(input2) already done by main
    x_train = input2
    y_train = target2[:, 1]
    # print(y_train)
    nn = MLPClassifier(solver='adam',
                       activation='tanh',
                       max_iter=200,
                       hidden_layer_sizes=(6, ))
    nn.fit(x_train, y_train)
    cm = confusion_matrix(y_train, nn.predict(x_train))
    plot_hidden_layer_weights(nn.coefs_[0])
    print(cm)
    pass
def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    # parse target2 2nd column
    pose2 = []
    for target in target2:
        pose2.append(target[1])

    mlp = MLPClassifier(activation='tanh', hidden_layer_sizes=6)
    print("===========fit started===========")
    mlp.fit(input2, pose2)
    print("===========fit finished===========")
    print("classes_: ", mlp.classes_)
    print("n_layers_: ", mlp.n_layers_)
    plot_hidden_layer_weights(mlp.coefs_[0])

    print("===========predict started===========")
    prediction = mlp.predict(input2)
    print("===========predict finished===========")
    cnf_matrix = confusion_matrix(pose2, prediction)
    print(cnf_matrix)
    return
def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    ## TODO - done

    hidden_units = 6
    nn = MLPClassifier(activation=ACTIVATION,
                       solver='adam',
                       hidden_layer_sizes=(hidden_units, ),
                       max_iter=200)
    pose = target2[:, 1]
    nn.fit(input2, pose)

    # using index 0 because of just one hidden layer
    hidden_layer_weights = nn.coefs_[0]

    y_pred = nn.predict(input2)
    matrix = confusion_matrix(pose, y_pred)

    print("The Confusion Matrix we obtained: \n" + str(matrix))

    plot_hidden_layer_weights(hidden_layer_weights)
Exemplo n.º 5
0
def main():
    X = [[0.,1.,0.,1.,0.,0.], [1.,0.,0., 1., 1., 0.]]
    y = [0,15]
    clf = MLPClassifier(alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
    clf.partial_fit(X, y)
    res = clf.predict([[1., 1., 0., 1., 1., 1.], [0, 0, 1 ,1, 1.,0]])
    print("res",res)
Exemplo n.º 6
0
def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """

    #declaring variables used for MLPClassifier
    hidden_layers = 6
    solver_mode = 'adam'
    activation_mode = 'tanh'
    max_iter = 200

    cf = MLPClassifier(hidden_layer_sizes=(hidden_layers, ),
                       solver=solver_mode,
                       activation=activation_mode,
                       max_iter=max_iter)

    #training the classifier
    cf.fit(input2, target2[:, 1])

    #calculate y_predicted and y_true for confusion matrix calculation

    #printing confusion matrix
    print(confusion_matrix(target2[:, 1], cf.predict(input2)))

    #plotting the hidden layer weights
    plot_hidden_layer_weights(cf.coefs_[0])

    pass
    def train(self, labeledDoc):
        """
        Entrena el modelo final de clasificacion
        :param labeledDoc: objeto labeledDoc
        :return: True si todo correcto, Raise exception si fallo
        """
        if self.save_loc == None:
            raise UnboundLocalError("Should have set the save path <setSaveLocation>")

        if self.dependenceModel == None:
            raise UnboundLocalError("Should have set the TextProcessing.Doc2Vec model <setDependenceModel>")

        tags_id = {}
        Y = []
        X = []
        for doc in labeledDoc:
            for tag in doc.tags[1:]:
                if tag not in tags_id:
                    tags_id[tag] = len(tags_id)

        labeledDoc.reloadDoc()
        for doc in labeledDoc:
            tags = doc.tags
            text = doc.words
            auxY = np.zeros(len(tags_id))
            for tag in tags[1:]:
                auxY[tags_id[tag]] = 1.

            Y.append(auxY)
            vecX = self.dependenceModel.predict(text)[0]
            X.append(vecX)


        Y = np.array(Y)
        X = np.array(X)

        clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1)
        clf.fit(X, Y)
        print clf.predict(X)

        joblib.dump(clf, self.save_loc)
        with open(self.save_loc+"_tags_id", "w") as fout:
            fout.write(json.dumps(tags_id))
def ex_2_1(input2, target2):
    ## TODO
    classifier = MLPClassifier(hidden_layer_sizes=(6, ),
                               solver="adam",
                               max_iter=200,
                               activation="tanh")

    classifier.fit(input2, target2[:, 1])
    con_mat = confusion_matrix(target2[:, 1], classifier.predict(input2))
    plot_hidden_layer_weights(classifier.coefs_[0])
Exemplo n.º 9
0
def neural_net_2(train, test, val, train_out, test_out, val_out, BigSigma_inv):
    clf = MLPClassifier(solver='sgd',
                        alpha=1e-5,
                        hidden_layer_sizes=(100, 1),
                        activation='logistic',
                        batch_size=BATCH_HUMAN,
                        shuffle=True,
                        max_iter=5000)

    scaler = StandardScaler()
    scaler.fit(train)
    train1 = scaler.transform(train)
    # apply same transformation to test data
    test = scaler.transform(test)
    train_out = train_out.astype(float)
    clf.fit(X=train1, y=train_out)
    predict_test = clf.predict(test)
    predict_val = clf.predict(val)
    print("TEST ERMS ACCURACY", mean_squared_error(test_out, predict_test),
          acc_manual(test_out, predict_test))
    print("VAL ERMS ACCURACY", mean_squared_error(val_out, predict_val),
          acc_manual(val_out, predict_test))
Exemplo n.º 10
0
def ex_2_2(input1, target1, input2, target2):
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """

    #declaring variables used for MLPClassifier
    hidden_layers = 20
    solver_mode = 'adam'
    activation_mode = 'tanh'
    max_iter = 1000

    max_accuracy = 0.0

    train_accuracy = []
    test_accuracy = []
    cfn = []

    m = 0

    for m in range(10):
        cf = MLPClassifier(hidden_layer_sizes=(hidden_layers, ),
                           activation=activation_mode,
                           solver=solver_mode,
                           random_state=m,
                           max_iter=max_iter)
        cf.fit(input1, target1[:, 0])

        train_accuracy.append(cf.score(input1, target1[:, 0]))

        current_test_accuracy = cf.score(input2, target2[:, 0])

        test_accuracy.append(current_test_accuracy)

        plot_histogram_of_acc(train_accuracy[m], test_accuracy[m])

        if current_test_accuracy > max_accuracy:
            cfn = confusion_matrix(target2[:, 0], cf.predict(input2))
            max_accuracy = current_test_accuracy

    print(cfn)

    #plot_histogram_of_acc(train_accuracy, test_accuracy)
    #plot_random_images(input2)

    pass
Exemplo n.º 11
0
def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    ## TODO
    pose = target2[:,1]
    nn = MLPClassifier(hidden_layer_sizes=(6,) ,activation='tanh', max_iter=200)
    nn.fit(input2, pose) 
    y_pred = nn.predict(input2)
    C = confusion_matrix(pose, y_pred, labels=None, sample_weight=None)
    plot_hidden_layer_weights(nn.coefs_[0])
    return C
def ex_2_2(input1, target1, input2, target2):
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    n = 10

    train_acc = np.zeros(n)
    test_acc = np.zeros(n)
    pred_test = np.zeros((n, 564))
    coefs = np.zeros((n, 960, 20))

    #print(min(target1[:,0]), max(target1[:,0]))
    # we have 20 person

    for i in range(n):
        classifier = MLPClassifier(hidden_layer_sizes=(20, ),
                                   activation='tanh',
                                   solver='adam',
                                   max_iter=5000,
                                   random_state=i)
        classifier.fit(input1, target1[:, 0])
        pred_test[i] = classifier.predict(input2)
        coefs[i] = classifier.coefs_[0]
        train_acc[i] = classifier.score(input1, target1[:, 0])
        test_acc[i] = classifier.score(input2, target2[:, 0])

    error = pred_test[1] - target2[:, 0]
    for j in range(len(error)):
        if (error[j] != 0):
            print(j)
    plot_random_images(np.row_stack((input2[175, :], input2[184, :])))
    plot_random_images(np.row_stack((input2[210, :], input2[134, :])))
    plot_random_images(np.row_stack((input2[223, :], input2[177, :])))
    plot_random_images(np.row_stack((input2[179, :], input2[186, :])))

    plot_histogram_of_acc(train_acc, test_acc)

    # best network with seed i=1
    confmat = confusion_matrix(target2[:, 0], pred_test[1])
    print(confmat)

    pass
def ex_2_2(input1, target1, input2, target2):
    """
    Solution for exercise 2.2
    :param input1: The input from dataset1
    :param target1: The target from dataset1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    train = input1
    test = input2
    target_train = target1[:, 1]
    target_test = target2[:, 1]


    ## TODO
    n_hidden_neurons = 20

    accu_list_train = np.zeros((10,1))
    accu_list_test = np.zeros((10, 1))

# Find the best seed
    for seed in range(10):
        nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,), random_state=seed)
        nn.fit(train, target_train)
        accu_list_train[seed] = nn.score(train, target_train)
        accu_list_test[seed] = nn.score(test, target_test)

    print(accu_list_train)
    print(accu_list_test)
# Compute NN weights with the best seed
    best_seed = np.argmax(accu_list_train)
    best_nn = nn = MLPClassifier(activation='tanh', solver='adam', max_iter=1000, hidden_layer_sizes=(n_hidden_neurons,),random_state=best_seed)
    best_nn.fit(train, target_train)

# Evaluate the confusion matrix with best NN
    predictions = nn.predict(test)
    C = confusion_matrix(target_test, predictions)
    print(C)

# Plot results
    plot_histogram_of_acc(accu_list_train, accu_list_test)
    print(accu_list_test)
# Find misclassified images
    comp_array = target_test - predictions
    comp_vector2 = np.nonzero(comp_array)
def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """
    ## TODO
    n_hidden_neurons = 6
    nn = MLPClassifier(activation='tanh', solver='adam', max_iter=200, hidden_layer_sizes=(n_hidden_neurons,))
    target = target2[:,2]
    ## Train the network
    nn.fit(input2, target)
    predictions = nn.predict(input2)
    C=confusion_matrix(target,predictions)
    hidden_layer_weights = nn.coefs_[0]
    plot_hidden_layer_weights(hidden_layer_weights)
    print(C)
def ex_2_1(input2, target2):
    """
    Solution for exercise 2.1
    :param input2: The input from dataset2
    :param target2: The target from dataset2
    :return:
    """

    classifier = MLPClassifier(hidden_layer_sizes=(6, ),
                               activation='tanh',
                               solver='adam',
                               max_iter=200)
    classifier.fit(input2, target2[:, 1])
    pred2 = classifier.predict(input2)
    confmat = confusion_matrix(target2[:, 1], pred2)
    coefs = classifier.coefs_
    print(confmat)
    plot_hidden_layer_weights(coefs[0])
    ## TODO
    pass
Exemplo n.º 16
0
class MLPClassifierImpl():

    def __init__(self, hidden_layer_sizes=(100,), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10):
        self._hyperparams = {
            'hidden_layer_sizes': hidden_layer_sizes,
            'activation': activation,
            'solver': solver,
            'alpha': alpha,
            'batch_size': batch_size,
            'learning_rate': learning_rate,
            'learning_rate_init': learning_rate_init,
            'power_t': power_t,
            'max_iter': max_iter,
            'shuffle': shuffle,
            'random_state': random_state,
            'tol': tol,
            'verbose': verbose,
            'warm_start': warm_start,
            'momentum': momentum,
            'nesterovs_momentum': nesterovs_momentum,
            'early_stopping': early_stopping,
            'validation_fraction': validation_fraction,
            'beta_1': beta_1,
            'beta_2': beta_2,
            'epsilon': epsilon,
            'n_iter_no_change': n_iter_no_change}

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)

    def predict_proba(self, X):
        return self._sklearn_model.predict_proba(X)
Exemplo n.º 17
0
    def compute(self):
        # Iterate Leave-One-Out Index over all vectors
        actual_matrix = self.get_actual_data_matrix()
        for params_list_index in range(len(self._params_list)):
            params = self._params_list[params_list_index]
            current_params_result = self._params_result_list[params_list_index]
            for loo_index in range(self.get_vector_count()):
                # Prepare data and labels for current leave one out
                train_data = [[
                    0 for x in range(self.get_actual_feature_count())
                ] for y in range(self.get_vector_count() - 1)]
                train_labels = [
                    0 for x in range(0,
                                     self.get_vector_count() - 1)
                ]
                test_data = [[
                    0 for x in range(0, self.get_actual_feature_count())
                ] for y in range(1)]
                test_labels = [0 for x in range(1)]
                y1 = 0
                for y in range(self.get_vector_count()):
                    if (y != loo_index):
                        for x in range(self.get_actual_feature_count()):
                            train_data[y1][x] = actual_matrix[y][x]
                        train_labels[y1] = self._labels[y]
                        y1 = y1 + 1
                for x in range(self.get_actual_feature_count()):
                    test_data[0][x] = actual_matrix[loo_index][x]
                    test_labels[0] = self._labels[loo_index]

                #clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
                clf = MLPClassifier(**params)
                clf.fit(train_data, train_labels)
                res = clf.predict(test_data)
                current_params_result.predicted_labels[loo_index] = res[0]
                #print(repr(self.get_labels()[loo_index])+"\t"+repr(res[0]))

            self._commit_params_computation(params_list_index)
        self._complete_computation()
Exemplo n.º 18
0
#from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

iris = datasets.load_iris()
data = iris.data
labels = iris.target

data_train, data_test, labels_train, labels_test = train_test_split(
    data, labels, test_size=0.5, random_state=1)

scaler = StandardScaler()
scaler.fit(data)
data_train_std = scaler.transform(data_train)
data_test_std = scaler.transform(data_test)

data_train = data_train_std
data_test = data_test_std

# We add max_iter=1000 becaue the default is max_iter=200 and
# it is not enough for full convergence
mlp = MLPClassifier(random_state=1, max_iter=1000)
mlp.fit(data, labels)
mlp.fit(data_train, labels_train)
pred = mlp.predict(data_test)

print()
print('Misclassified samples: %d' % (labels_test != pred).sum())
print('Accuracy: %.2f' % accuracy_score(labels_test, pred))
Exemplo n.º 19
0
from sklearn.neural_network.multilayer_perceptron import MLPClassifier
from sklearn import datasets
from sklearn.metrics import accuracy_score

iris = datasets.load_iris()
data = iris.data
labels = iris.target

# We add max_iter=1000 becaue the default is max_iter=200 and
# it is not enough for full convergence
mlp = MLPClassifier(random_state=1, max_iter=1000)
mlp.fit(data, labels)

pred = mlp.predict(data)

print()
print('Accuracy: %.2f' % accuracy_score(labels, pred))
Exemplo n.º 20
0
df = encode_data(df)
df = delete_columns(df)
df, label = seperate_label(df)
df, scaler = scale_columns(df)

pickle.dump(scaler, open('./scaler.model', 'wb'))

x_train, x_test, y_train, y_test = train_test_split(df, label, test_size=.5)

# classifier=tree.DecisionTreeClassifier()
# classifier.fit(x_train,y_train)
# predictions=classifier.predict(x_test)

classifier = MLPClassifier()
classifier.fit(x_train, y_train)
predictions = classifier.predict(x_test)

print("Accuracy:", accuracy_score(y_test, predictions))

pickle.dump(classifier, open("model.model", 'wb'))

print(
    "Training completed. \nModel dumped succesfully..\n  -----------------------"
)

###############Evaluating#################

data = pd.read_csv("ITData_eval-unlabeled.csv")
data.columns = header
df2 = data.drop(['Satisfaction'], axis=1)
df2 = encode_data(df2)
Exemplo n.º 21
0
def classify_mlp(data_path):
    result_path = '%s/mlp_results.txt' % os.path.abspath(
        os.path.join(os.path.dirname(data_path),
                     os.path.join(os.pardir, os.pardir)))
    if os.path.exists(result_path):
        if data_path in open(result_path).read():
            return True
    print(data_path)

    fname = "{}/train_labels.csv".format(data_path)
    if not os.path.exists(fname):
        return True
    tr_labels = np.loadtxt(fname)

    fname = "{}/train_embeddings.csv".format(data_path)
    tr_embeddings = np.loadtxt(fname)

    fname = "{}/val_labels.csv".format(data_path)
    val_labels = np.loadtxt(fname)

    fname = "{}/val_embeddings.csv".format(data_path)
    val_embeddings = np.loadtxt(fname)

    fname = "{}/test_labels.csv".format(data_path)
    te_labels = np.loadtxt(fname)

    fname = "{}/test_embeddings.csv".format(data_path)
    te_embeddings = np.loadtxt(fname)

    clf = MLPClassifier(random_state=2,
                        max_iter=200000000,
                        hidden_layer_sizes=(64, ))
    clf.fit(tr_embeddings, tr_labels)

    tr_score = clf.score(tr_embeddings, tr_labels)
    val_score = clf.score(val_embeddings, val_labels)
    te_score = clf.score(te_embeddings, te_labels)

    tr_predictions = clf.predict(tr_embeddings)
    val_predictions = clf.predict(val_embeddings)
    te_predictions = clf.predict(te_embeddings)

    tr_fscore = f1_score(tr_predictions, tr_labels, average="weighted")
    val_fscore = f1_score(val_predictions, val_labels, average="weighted")
    te_fscore = f1_score(te_predictions, te_labels, average="weighted")
    print("tr_score %s" % tr_score)
    print("val_score %s" % val_score)
    print("te_score %s" % te_score)
    with open(result_path, mode='a') as f:
        f.write(
            'Data Path: %s\tTrain Accuracy:%s\tVal Accuracy:%s\tTest Accuracy:%s\tTrain FScore:%s\tVal FScore:%s\tTest FScore:%s\n'
            % (data_path, tr_score, val_score, te_score, tr_fscore, val_fscore,
               te_fscore))

    conf_mat = confusion_matrix(te_labels, te_predictions)
    labels = sorted(list(set(list(te_labels))))
    plot_confusion_matrix(conf_mat,
                          classes=labels,
                          normalize=True,
                          title='Normalized confusion matrix',
                          output=data_path,
                          path_name='mlp_confusion_matrix',
                          alg='mlp')
Exemplo n.º 22
0
if __name__ == '__main__':
    np.random.seed(100)

    nn = NeuralNetwork([2, 2, 1])
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([0, 1, 1, 0])
    nn.fit(X, y, learning_rate=0.1, epochs=1000)

    print("Final prediction")

    for s in X:
        print(s, nn.predict(s))

mlp = MLPClassifier(random_state=1)
mlp.fit(X, y)

data = X
markers = ('s', '*', '^')
colors = ('blue', 'green', 'red')
cmap = ListedColormap(colors)

x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1
resolution = 0.01

x, y = np.meshgrid(np.arange(x_min, x_max, resolution),
                   np.arange(y_min, y_max, resolution))
Z = mlp.predict(np.array([x.ravel(), y.ravel()]).T)
Z = Z.reshape(x.shape)
plt.pcolormesh(x, y, Z, cmap=cmap)
plt.xlim(x.min(), x.max())
Exemplo n.º 23
0
#fit only to the training data
scaler.fit(X)

StandardScaler(copy=True, with_mean=True, with_std=True)

#now apply the transformations to the data:
x_train_nn = scaler.transform(X)
x_test_nn = scaler.transform(X_test)

nn = MLPClassifier(solver='lbfgs',
                   alpha=1e-5,
                   hidden_layer_sizes=(5, 2),
                   random_state=1)
print(nn.fit(x_train_nn, y))
print('Neural network model:')
nn_pred_test = nn.predict(x_test_nn)
#compute confusion matrix
from sklearn import metrics
#pred_obj = np.where(predictions==predictions[0],'N','Y')
#print(pred_obj)

cnf_matrix = metrics.confusion_matrix(y_test, nn_pred_test)
print(cnf_matrix)

#compute roc cureve
import matplotlib.pyplot as plt
y_pred_proba = nn.predict_proba(X_test)[::, 1]
y_binary = np.where(y == 'N', 0, 1)
fpr, tpr, _ = metrics.roc_curve(y_binary, y_pred_proba)
auc = metrics.roc_auc_score(y_binary, y_pred_proba)
plt.plot(fpr, tpr, label="data 1, auc=" + str(auc))
Exemplo n.º 24
0
svd.fit(fea_data_set)
x_new=svd.fit_transform(fea_data_set)
# pca=PCA(n_components=30)
# pca.fit(fea_data_set)
# x_new=pca.transform(fea_data_set)
xtrain,xtest,ytrain,ytest=train_test_split(x_new,label,test_size=0.2)
lg.fit(xtrain,ytrain)
nb.fit(xtrain,ytrain)
forest.fit(xtrain,ytrain)
SVM.fit(xtrain,ytrain)
mlp.fit(xtrain,ytrain)
print("------------")
print(lg.score(xtest,ytest))
print(np.mean(lg.predict(xtest)-ytest)**2)
print(lg.score(xtrain,ytrain))
print(np.mean(lg.predict(xtrain)-ytrain)**2)
print("------------")
print(nb.score(xtest,ytest))
print(np.mean(nb.predict(xtest)-ytest)**2)
print(forest.score(xtest,ytest))
print(np.mean((forest.predict(xtest)-ytest)**2))
print(SVM.score(xtest,ytest))
print(np.mean((SVM.predict(xtest)-ytest)**2))
print(mlp.score(xtest,ytest))
print(np.mean((mlp.predict(xtest)-ytest)**2))
#训练了4个模型,分别是测试集为80%,70%,50%,30%的效果
joblib.dump(lg,"lg3.m")
joblib.dump(nb,"nb3.m")
joblib.dump(forest,"rf3.m")
joblib.dump(SVM,"svm3.m")
joblib.dump(mlp,"mlp3.m")
Exemplo n.º 25
0
def classify(data_path, path=None, counter=None, alg='svm'):
    out = os.path.join(data_path, '%s_%s_%s' % (alg, path, 'confusion.png'))
    if os.path.exists(out):
        return True
    fname = "{}/labels.csv".format(data_path)
    paths = pd.read_csv(fname, header=None).as_matrix()[:, 1]
    paths = map(os.path.basename, paths)  # Get the filename.
    # Remove the extension.
    paths = map(lambda x: x.split(".")[0], paths)
    paths = np.array(map(lambda path: os.path.splitext(path)[0], paths))

    fname = "{}/reps.csv".format(data_path)
    rawEmbeddings = pd.read_csv(fname, header=None).as_matrix()
    # print(rawEmbeddings.shape, paths.shape)
    folds = cross_validation.KFold(n=len(rawEmbeddings),
                                   random_state=1,
                                   n_folds=10,
                                   shuffle=True)
    scores = []
    fscores_weighted, fscores_macro, fscores_micro = [], [], []
    for idx, (train, test) in enumerate(folds):
        print idx, alg
        if alg == 'knn':
            clf = neighbors.KNeighborsClassifier(1)
        elif alg == 'svm':
            clf = svm.SVC(kernel='linear', C=1, max_iter=200000000)
            # clf = svm.LinearSVC()
            # clf = svm.SVC(kernel="poly", degree=5, C=1, verbose=10)
        elif alg == 'nn':
            # clf = MLPClassifier(random_state=2, max_iter=200000000)
            clf = MLPClassifier(random_state=2,
                                max_iter=200000000,
                                hidden_layer_sizes=(96, 64, 32))
        elif alg == 'nnd':
            # clf = MLPClassifier(random_state=2, max_iter=200000000)
            clf = MLPClassifier(random_state=2, max_iter=200000000)
        elif alg == 'poly':
            clf = svm.SVC(kernel="poly", max_iter=200000000)
        elif alg == 'rf':
            clf = RandomForestClassifier()
        clf.fit(rawEmbeddings[train], paths[train])
        gc.collect()
        score = clf.score(rawEmbeddings[test], paths[test])
        # print score, alg
        scores.append(score)
        prediction = clf.predict(rawEmbeddings[test])
        fscore_weighted = f1_score(paths[test], prediction, average="weighted")
        fscores_weighted.append(fscore_weighted)

        fscore_macro = f1_score(paths[test], prediction, average="macro")
        fscores_macro.append(fscore_macro)

        fscore_micro = f1_score(paths[test], prediction, average="micro")
        fscores_micro.append(fscore_micro)
    accuracy_dir = os.path.abspath(
        os.path.join(data_path, 'accuracies_%s.txt' % alg))

    with open(accuracy_dir, "wb") as file:
        for i in scores:
            file.writelines("%s,%s\n" % (str(i), str(counter)))
    # print "KNN Avg. score %s" % (reduce(operator.add, scores) / len(folds))
    # print "MLP Avg. score %s" % (reduce(operator.add, scores3) / len(folds))
    print "Avg. score %s" % (reduce(operator.add, scores) /
                             len(folds)), data_path
    result_path = "{}/{}_{}.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, scores) / len(folds))), str(counter), alg))
    fscores_weighted_result_path = "{}/{}_{}_fscores_weighted.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(fscores_weighted_result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, fscores_weighted) / len(folds))),
                                      str(counter), alg))

    fscores_macro_result_path = "{}/{}_{}_fscores_macro.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(fscores_macro_result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, fscores_macro) / len(folds))), str(counter),
                                      alg))

    fscores_micro_result_path = "{}/{}_{}_fscores_micro.log".format(
        os.path.abspath(
            os.path.join(os.path.join(data_path, os.pardir), os.pardir)), path,
        alg)
    with open(fscores_micro_result_path, "a") as file:
        file.write("%s,\t%s\t%s\n" % (str(
            (reduce(operator.add, fscores_micro) / len(folds))), str(counter),
                                      alg))
Exemplo n.º 26
0
from sklearn.preprocessing import StandardScaler
scaler= StandardScaler()
#fit only to the training data
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

#now apply the transformations to the data:
x_train_nn = scaler.transform(X_train)
x_test_nn = scaler.transform(X_test)

nn = MLPClassifier(solver='lbfgs', alpha=1e-5,
                    hidden_layer_sizes=(5, 2), random_state=1)
print(nn.fit(x_train_nn,y_train))
print('Neural network model:')
nn_pred_train = nn.predict(x_train_nn)
print('train data model estimation')

#Mean absolute error
print('MAE: ')
print(mean_absolute_error(y_train, nn_pred_train))

#ROOTMEAN SUARED ERROR
print('RMS: ')
print(sqrt(mean_squared_error(y_train, nn_pred_train)))

#R-squared score of this model
print('R2: ')
print(r2_score(y_train, nn_pred_train))

#MAPE
Exemplo n.º 27
0
def ex_2_2(input1, target1, input2, target2):
    '''
    • Write code to train a feed-forward neural network with 1 hidden layer containing 20 hidden units
      for recognising the individuals. Use dataset1 for training, ‘adam’ as the training solver and train for
      1000 iterations. Use dataset2 as the test set.
    • Repeat the process 10 times starting from a different initial weight vector and plot the histogram
      for the resulting accuracy on the training and on the test set (the accuracy is proportion of correctly
      classified samples and it is computed with the method score of the classifier).
    • Use the best network (with maximal accuracy on the test set) to calculate the confusion matrix for
      the test set.
    • Plot a few misclassified images.
    '''
    x_train = input1
    y_train = target1[:, 0]
    x_test = input2
    y_test = target2[:, 0]
    seeds = np.array(range(1, 11))
    train_accs = []
    test_accs = []
    max_acc = -1

    for index_seed, seed in np.ndenumerate(seeds):
        nn = MLPClassifier(solver='adam',
                           activation='tanh',
                           max_iter=1000,
                           hidden_layer_sizes=(20, ),
                           random_state=seed)
        nn.fit(x_train, y_train)
        train_acc = accuracy_score(y_train, nn.predict(x_train))
        train_accs.append(train_acc)
        test_acc = accuracy_score(y_test, nn.predict(x_test))
        test_accs.append(test_acc)
        if test_acc > max_acc:
            max_acc = test_acc
            best_nn = nn

    plot_histogram_of_acc(train_accs, test_accs)

    cm = confusion_matrix(y_test, best_nn.predict(x_test))
    prediction = best_nn.predict(x_test)
    misclassified = np.where(y_test != prediction)
    print(cm)
    limit = 8
    i = 0
    for mc_index in misclassified[0]:
        if i < limit:
            fig, plts = plt.subplots(1, 2)
            plts[0].set_title("Predicted Person " + str(prediction[mc_index]))
            plts[0].imshow(input2[prediction[mc_index]].reshape(*IMAGE_DIM).T,
                           cmap=plt.cm.gray)
            plts[0].set_xticks(())
            plts[0].set_yticks(())
            plts[1].set_title("Should be Person " + str(y_test[mc_index]))
            plts[1].imshow(input2[y_test[mc_index]].reshape(*IMAGE_DIM).T,
                           cmap=plt.cm.gray)
            plts[1].set_xticks(())
            plts[1].set_yticks(())
            plt.show()
        i = i + 1

    pass
Exemplo n.º 28
0
    kf = KFold(10, shuffle=True, random_state=None)

    print(" 10-fold Cross-Validation training and testing \n")

    i = 1

    tableResults = []
    tableResults=[]
    NN = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(10), random_state=1)
    for trainIndex, testIndex in kf.split(x):
        print(" ============== Fold ", i, "============\n")
        trainDocs, testDocs = x[trainIndex], x[testIndex]
        trainCats, testCats = y[trainIndex], y[testIndex]
        NN.fit(trainDocs, trainCats)
        pred = NN.predict(testDocs)
        accuracy = accuracy_score(testCats, pred)
        recall = recall_score(testCats, pred, average='weighted')
        precision = precision_score(testCats, pred, average='weighted')
        f1 = f1_score(testCats, pred, average='weighted')
        tableResults.append({'model': 'NN', 'accuracy': accuracy, 'recall': recall, 'precision': precision, 'f1': f1})
        i+=1
    joblib.dump(NN, 'NN_saved/' + pathname.split('/')[-1]+'.model')
    NN = joblib.load('NN_saved/' + pathname.split('/')[-1]+'.model')
    measures = ['precision', 'recall', 'accuracy', 'f1']
    with open('neuralNN' + pathname.replace('/', '_')+'.csv', 'w') as f:
        writer = csv.writer(f, delimiter=',')
        df = pd.DataFrame(tableResults)
        filt = pd.pivot_table(df, values=['precision', 'recall', 'accuracy', 'f1'], index=['model'])
        print(" Results")
        print(filt)
Exemplo n.º 29
0
# from warnings import warn
import numpy as np
from data_utils import *
from sklearn.neural_network.multilayer_perceptron import MLPClassifier

data = gather_and_clean_data()

X = data[:, 0:-1]
y = data[:, -1]

MClass = MLPClassifier()
MClass.fit(X, y)
pred = MClass.predict(X)
score = MClass.score(X, y)
print(f"Pred: {pred}")
print(f"Score: {score}")
Exemplo n.º 30
0
from sklearn.neural_network.multilayer_perceptron import MLPClassifier
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Iris

iris = datasets.load_iris()
X = iris.data
Y = iris.target

X_train, X_test, Y_train, Y_test = \
        train_test_split(X, Y, test_size=0.3, random_state=1)

sc = StandardScaler()
sc.fit(X)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

mlp = MLPClassifier(max_iter=200,
                    random_state=1,
                    hidden_layer_sizes=(200, 100))
mlp.fit(X_train_std, Y_train)

Y_pred = mlp.predict(X_test_std)
print('Misclassified samples: %d' % (Y_test != Y_pred).sum())
print('Accuracy: %.2f' % accuracy_score(Y_test, Y_pred))
# pred_prob = mlp.predict_proba(X_test_std)
# print('Probility:{0}'.format(pred_prob))
Exemplo n.º 31
0
X_train, X_test, y_train, y_test = train_test_split(examplesMatrix,
                                                    Y_vector,
                                                    test_size=0.2)
print("Training...")

# Commented code for several models:
model = MLPClassifier(hidden_layer_sizes=(128, 64, 32, 16, 8), max_iter=2500)
# model = SVC(gamma='scale', probability = True)
# model = KNeighborsClassifier()
# model = LinearDiscriminantAnalysis()
# model = GaussianNB()
# model = DecisionTreeClassifier()
# model = LogisticRegression()

model.fit(X_train, y_train)
predictions = model.predict(X_test)

# Test to see if the model(s) is seeing and producing reasonable values
print("Max/min of predictions: ")
ymax = max(predictions)
ymin = min(predictions)
print(str(ymax) + "/" + str(ymin))

print("Max/Min of Y_test")
ymax = max(y_test)
ymin = min(y_test)
print(str(ymax) + "/" + str(ymin))

print("Max/Min of Y_train")
ymax = max(y_train)
ymin = min(y_train)