Python ANN示例，DataScience.ANN Python示例

示例#1

0

显示文件

def run(L1, L2, overwrite=False, **kwargs):

    # check that pickles exist
    if any([not os.path.isfile('train.pkl'), overwrite]): pickle_data_sets()

    # load data
    train = read_pickle('train')
    validate = read_pickle('validate')
    test = read_pickle('test')
    # pdb.set_trace()

    # get dictionaries for activating functions and M
    afs = {
        1: {
            1: AF.ReLU(),
            2: AF.ReLU(),
            3: AF.ReLU(),
            4: AF.ReLU(),
            5: AF.ReLU(),
            6: AF.ReLU(),
            7: AF.ReLU(),
            8: AF.ReLU(),
            9: AF.ReLU(),
            10: AF.softmax()
        }
    }
    Ms = {1: {1: 4, 2: 4, 3: 4, 4: 4, 5: 4, 6: 4, 7: 4, 8: 4, 9: 4}}

    # run cross validation
    results = train_validate_test(train, validate, test, L1, L2, afs, Ms,
                                  **kwargs)

    # flatten out Y and Y_hat
    Y_hat = results['Y_hat']
    Y_hat = ANN.collapse_Y(Y_hat)
    Y = ANN.collapse_Y(test['Y'])

    # update results
    results['Y'] = Y
    results['Y_hat'] = Y_hat
    results['CM'] = confusion_matrix(Y, Y_hat)

    # send results to pickle
    pd.Series(results).to_pickle("./results_{:0.4}.pkl".format(
        results['validate']))

    # output results
    return results

示例#2

0

显示文件

文件： MNIST_digit_recognizer2.py 项目： jacluff1/DataScienceAssignments

def train_validate_test(train, validate, test, af, M, l1, l2, **kwargs):

    # print message
    print(f"\nperforming train-validate-test for lambda: {l1},{l2}")

    # training results
    bp = back_propagation(train, validate, af, M, l1, l2, **kwargs)
    W = bp['W']
    b = bp['b']
    Z_train = feed_forward(train['PHI'], W, b, af)
    P_hat_train = Z_train[len(af)]
    acc_train = ANN.accuracy(train['Y'], P_hat_train)

    # validate results
    Z_validate = feed_forward(validate['PHI'], W, b, af)
    P_hat_validate = Z_validate[len(af)]
    acc_validate = ANN.accuracy(validate['Y'], P_hat_validate)

    # test results
    Z_test = feed_forward(test['PHI'], W, b, af)
    P_hat_test = Z_test[len(af)]
    acc_test = ANN.accuracy(test['Y'], P_hat_test)

    # collect results
    results = {
        'W': W,
        'b': b,
        'P_hat': P_hat_test,
        'accuracy': {
            'train': acc_train,
            'validate': acc_validate,
            'test': acc_test
        }
    }

    return results

示例#3

0

显示文件

def train_validate_test(train, validate, test, L1, L2, afs, M, **kwargs):

    # instantiate some variables to keep track of best results
    bestAc = dict(train=0, validate=0, test=0)
    bestResults = None

    # cast a net and find best outcome
    for l1 in L1:
        for l2 in L2:
            for af in afs.values():
                for m in M.values():

                    # print iteration to track progress
                    print(f"\nworking on {l1},{l2}...")

                    # update kwargs
                    kwargs['lambda1'] = l1
                    kwargs['lambda2'] = l2

                    # instantiate ArtificalNeuralNet instance
                    ann = ANN.ArtificalNeuralNet(test['PHI'], test['Y'], af, m)

                    # solve and run cross validation test for this instance
                    results = ann.train_validate_test(train, validate, test,
                                                      **kwargs)

                    # print iteration results
                    print(
                        f"train accuracy: {results['train']}, validate accuracy {results['validate']}"
                    )

                    # update best values
                    if results['validate'] > bestAc['validate']:
                        bestAc['validate'] = results['validate']
                        bestResults = results
                        print(f"best value (so far)!")

    # collect results
    results = pd.Series(bestResults)

    # send results to pickle
    pd.Series(results).to_pickle(
        f"./results_{np.round(bestAc['validate'],4)}.pkl")

    # ouput
    return results

示例#4

0

显示文件

文件： 13_AdaGrad.py 项目： jacluff1/DataScienceAssignments

def back_propagation(train, validate, af, M, l1, l2, **kwargs):
    """
    explanation:
        Does batch gradient descent using the AdaGrad adaptive learning rate
    input:
        train:      dict - 'PHI' & 'Y'
        validate:   dict - 'PHI' & 'Y'
        af:         dict - activation function class instances for ALL layers
        M:          dict - numers of nodes for all hidden layers
        l1:         float - lambda1 for LASSO regression
        l2:         float - lambda2 for Ridge regression
        kwargs:
            save_plot:      bool - saves plot if True
            eta0:           float - initial 'basic' learning rate
            epsilon:        float - very small number so that AdaGrad won't
                            have 0 in denominator
            epochs:         int - number of epochs for training
            batch_size:     int - how many observations to feed at a time
            G0:             float/int - what to initialize the AdaGrad constant to
    output:
        dict - weights 'W' and bias 'b'
    """

    # input dimentions
    if train['Y'].shape[0] == train['Y'].size:
        pdb.set_trace()
        K = len(set(train['Y']))
    else:
        K = train['Y'].shape[1]
        N, D = train['PHI'].shape

    # kwargs
    save_plot = kwargs['save_plot'] if 'save_plot' in kwargs else True
    eta0 = kwargs['eta0'] if 'eta0' in kwargs else 1e-3
    epsilon = kwargs['epsilon'] if 'epsilon' in kwargs else 1e-8
    epochs = kwargs['epochs'] if 'epochs' in kwargs else 1e3
    epochs = int(epochs)
    batch_size = kwargs['batch_size'] if 'batch_size' in kwargs else train[
        'PHI'].shape[0]
    G0 = kwargs['G0'] if 'G0' in kwargs else 1

    # set random W and b of appropriate shapes
    W = {}
    b = {}
    W[1] = np.random.randn(D, M[1])
    b[1] = np.random.randn(M[1])
    for l in range(2, L):
        W[l] = np.random.randn(M[l - 1], M[l])
        b[l] = np.random.randn(M[l])
    W[L] = np.random.randn(M[L - 1], K)
    b[L] = np.random.randn(K)

    # set up back propagation
    batches = N // batch_size
    J_train = np.zeros(epochs * batches)
    J_validate = np.zeros_like(J_train)
    GW, Gb = {}, {}
    for l in af:
        GW[l] = 1
        Gb[l] = 1

    for epoch in range(epochs):

        X, Y = shuffle(train['PHI'], train['Y'])

        for batch in range(batches):

            X_b = X[(batch * batch_size):(batch + 1) * batch_size]
            Y_b = Y[(batch * batch_size):(batch + 1) * batch_size]

            # feed forward
            Z = feed_forward(X_b, W, b, af)

            # set up dZ,H,dH,dW, and db
            dZ, H, dH, dW, db, G = {}, {}, {}, {}, {}, {}

            # start with output layer
            dH[L] = Z[L] - Y_b
            dW[L] = np.matmul(Z[L - 1].T, dH[L])
            db[L] = dH[L].sum(axis=0)
            GW[L] += dW[L]**2
            Gb[L] += db[L]**2
            W[L] -= eta0 / np.sqrt(GW[L] + epsilon) * dW[L] / batch_size
            b[L] -= eta0 / np.sqrt(Gb[L] + epsilon) * db[L]

            # now work back through each layer till input layer
            for l in np.arange(2, L)[::-1]:
                dZ[l] = np.matmul(dH[l + 1], W[l + 1].T)
                dH[l] = dZ[l] * af[l].df(Z[l])
                dW[l] = np.matmul(Z[l - 1].T, dH[l])
                db[l] = dH[l].sum(axis=0)
                GW[l] += dW[l]**2
                Gb[l] += db[l]**2
                W[l] -= eta0 / np.sqrt(GW[l] + epsilon) * dW[l] / batch_size
                b[l] -= eta0 / np.sqrt(Gb[l] + epsilon) * db[l]

            # end with input layer
            dZ[1] = np.matmul(dH[2], W[2].T)
            dH[1] = dZ[1] * af[1].df(Z[1])
            dW[1] = np.matmul(X_b.T, dH[1])
            db[1] = dH[1].sum(axis=0)
            GW[1] += dW[1]**2
            Gb[1] += db[1]**2
            W[1] -= eta0 / np.sqrt(GW[1] + epsilon) * dW[1] / batch_size
            b[1] -= eta0 / np.sqrt(Gb[1] + epsilon) * db[1]

            # feed forward for whole train and validation sets
            Z_train = feed_forward(train['PHI'], W, b, af)
            Z_validate = feed_forward(validate['PHI'], W, b, af)

            # update train and validation cost functions
            index = batch + (epoch * batches)
            J_train[index] = ANN.cross_entropy(train['Y'], Z_train[L]) / N
            J_validate[index] = ANN.cross_entropy(
                validate['Y'], Z_validate[L]) / validate['Y'].shape[0]

    # save figure
    if save_plot:
        fig, ax = plt.subplots()
        fig.suptitle(
            f"$\\eta$: {eta}, epochs: {epochs}, batch size: {batch_size}")
        ax.plot(J_train, label="J: Training")
        ax.plot(J_validate, label="J: Validation")
        ax.set_xlabel("batch + (epochs x baches)")
        ax.set_ylabel("J")
        ax.legend(loc='best')
        if not os.path.isdir("J"): os.mkdir("J")
        savename = f"J/J_eta_{eta}_epochs_{epochs}.pdf"
        fig.savefig(savename)
        plt.close(fig)
        print(f"saved {savename}")

    # collect results
    results = {
        'W': W,  # weights
        'b': b  # bias
        # 'P_hat' : Z[L] # output predictions
    }

    # output results
    return results

示例#5

0

显示文件

def pickle_data_sets():

    #===========================================================================
    # load kaggle data
    #===========================================================================

    data = pd.read_csv("usps_digit_recognizer.csv")

    # #===========================================================================
    # # load up data https://pjreddie.com/projects/mnist-in-csv/
    # #===========================================================================
    #
    # # load training and test data
    # names = ['label'] + [f"pixel_{x}" for x in range(784)]
    # train = pd.read_csv("mnist_train.csv", names=names)
    # test = pd.read_csv("mnist_test.csv", names=names)
    #
    # # merge sets
    # data = train.append(test, ignore_index=True)

    #===========================================================================
    # design matrix & target matrix
    #===========================================================================

    # extract the label column
    y = data['label'].values
    data.drop(columns=['label'], inplace=True)
    X = data.values
    N, D = X.shape
    K = 10

    # one-hot Y
    Y = ANN.one_hot_encode(y)

    # construct design matrix
    PHI = X / 255
    # PHI = np.column_stack((np.ones((N,1)), X/255))

    #===========================================================================
    # shuffle data
    #===========================================================================

    # shuffle PHI and Y
    PHI, Y = ANN.shuffle(PHI, Y)

    # get the numbers of observations for the data sets
    N_train = int(.6 * N)
    N_validate = int(.2 * N)
    N_test = N - N_validate - N_train

    # get the cross validation design matrices
    PHI_train = PHI[:N_train]
    PHI_validate = PHI[N_train:N_train + N_validate]
    PHI_test = PHI[N_train + N_validate:]

    # get the cross validation target arrays
    Y_train = Y[:N_train]
    Y_validate = Y[N_train:N_train + N_validate]
    Y_test = Y[N_train + N_validate:]

    # get panda.Series of sets
    train = pd.Series(dict(PHI=PHI_train, Y=Y_train))
    validate = pd.Series(dict(PHI=PHI_validate, Y=Y_validate))
    test = pd.Series(dict(PHI=PHI_test, Y=Y_test))

    # send Series to pickle
    print("\npickle-ing train, validation and test sets...")
    train.to_pickle("train.pkl")
    validate.to_pickle("validate.pkl")
    test.to_pickle("test.pkl")

示例#6

0

显示文件

def back_propagation(train, **kwargs):

    # input dimentions
    N, D = train['PHI'].shape
    if train['Y'].shape[0] == train['Y'].size:
        K = len(set(train['Y']))
    else:
        K = train['Y'].shape[1]

    # kwargs
    save_plot = kwargs['save_plot'] if 'save_plot' in kwargs else True
    eta = kwargs['eta'] if 'eta' in kwargs else 1e-3
    epochs = kwargs['epochs'] if 'epochs' in kwargs else 1e3
    epochs = int(epochs)
    batch_size = kwargs['batch_size'] if 'batch_size' in kwargs else N

    # set random W and b of appropriate shapes
    W1 = np.random.randn(D, M1)
    b1 = np.random.randn(M1)
    # W2 = np.random.randn(M1,M2)
    # b2 = np.random.randn(M2)
    # W3 = np.random.randn(M2,M3)
    # b3 = np.random.randn(M3)
    # W4 = np.random.randn(M3,M4)
    # b4 = np.random.randn(M4)
    # W5 = np.random.randn(M4,M5)
    # b5 = np.random.randn(M5)
    # W6 = np.random.randn(M5,K)
    # b6 = np.random.randn(K)
    W2 = np.random.randn(M1, K)
    b2 = np.random.randn(K)

    # set up back propagation
    batches = N // batch_size
    J_train = np.zeros(epochs * batches)

    for epoch in range(epochs):

        X, Y = ANN.shuffle(train['PHI'], train['Y'])

        for batch in range(batches):

            # start timer
            t0 = datetime.now()

            # get the batch data
            X_b = X[(batch * batch_size):(batch + 1) * batch_size]
            Y_b = Y[(batch * batch_size):(batch + 1) * batch_size]

            # feed forward
            Z1, P_hat = feed_forward(X_b, W1, b1, W2, b2)
            # Z1, Z2, Z3, Z4, Z5, P_hat = feed_forward(X_b,W1,b1,W2,b2,W3,b3,W4,b4,W5,b5,W6,b6)

            dH2 = P_hat - Y_b
            dW2 = np.matmul(Z1.T, dH2)
            db2 = dH2.sum(axis=0)
            W2 -= eta * dW2
            b2 -= eta * db2

            # dZ2 = np.matmul(dH3,W3.T)
            # dH2 = dZ2*(1 - Z2**2)
            # dW2 = np.matmul(Z1.T,dH2)
            # db2 = dH2.sum(axis=0)
            # W2 -= eta*dW2
            # b2 -= eta*db2

            dZ1 = np.matmul(dH2, W2.T)
            dH1 = dZ1 * (1 - Z1**2)
            dW1 = np.matmul(X_b.T, dH1)
            db1 = dH1.sum(axis=0)
            W1 -= eta * dW1
            b1 -= eta * db1

            # dH6 = P_hat-Y_b
            # dW6 = np.matmul(Z5.T,dH6)
            # db6 = dH6.sum(axis=0)
            # W6 -= eta*dW6
            # b6 -= eta*db6
            #
            # dZ5 = np.matmul(dH6,W6.T)
            # dH5 = dZ5*(Z5 > 0)
            # dW5 = np.matmul(Z4.T,dH5)
            # db5 = dH5.sum(axis=0)
            # W5 -= eta*dW5
            # b5 -= eta*db5
            #
            # dZ4 = np.matmul(dH5,W5.T)
            # dH4 = dZ4*(Z4 > 0)
            # dW4 = np.matmul(Z3.T,dH4)
            # db4 = dH4.sum(axis=0)
            # W4 -= eta*dW4
            # b4 -= eta*db4
            #
            # dZ3 = np.matmul(dH4,W4.T)
            # dH3 = dZ3*(Z3 > 0)
            # dW3 = np.matmul(Z2.T,dH3)
            # db3 = dH3.sum(axis=0)
            # W3 -= eta*dW3
            # b3 -= eta*db3
            #
            # dZ2 = np.matmul(dH3,W3.T)
            # dH2 = dZ2*(Z2 > 0)
            # dW2 = np.matmul(Z1.T,dH2)
            # db2 = dH2.sum(axis=0)
            # W2 -= eta*dW2
            # b2 -= eta*db2
            #
            # dZ1 = np.matmul(dH2,W2.T)
            # dH1 = dZ1*(Z1 > 0)
            # dW1 = np.matmul(X_b.T,dH1)
            # db1 = dH1.sum(axis=0)
            # W1 -= eta*dW1
            # b1 -= eta*db1

            # feed forward for whole train and validation sets
            Z1, P_hat = feed_forward(train['PHI'], W1, b1, W2, b2)
            # Z1, Z2, Z3, Z4, Z5, P_hat = feed_forward(train['PHI'],W1,b1,W2,b2,W3,b3,W4,b4,W5,b5,W6,b6)

            # update train and validation cost functions
            index = batch + (epoch * batches)
            J_train[index] = ANN.cross_entropy(train['Y'], P_hat) / N

            #===================================================================
            # approximate time left till training is done
            #===================================================================

            # find batch time
            tf = (datetime.now() - t0).seconds

            # find number of sub-epochs left
            epochs_left = J_train.shape[0] - index - 1

            # time left till training done (minutes)
            time_left = tf * epochs_left / 60

            print("Approximately {:0.2f} minutes left.".format(time_left))

    # save figure
    if save_plot:
        fig, ax = plt.subplots()
        fig.suptitle(
            f"$\\eta$: {eta}, epochs: {epochs}, batch size: {batch_size}")
        ax.plot(J_train, label="J: Training")
        ax.set_xlabel("batch + (epochs x baches)")
        ax.set_ylabel("J")
        ax.legend(loc='best')
        if not os.path.isdir("J"): os.mkdir("J")
        savename = f"J/J_eta_{eta}_epochs_{epochs}.pdf"
        fig.savefig(savename)
        plt.close(fig)
        print(f"saved {savename}")

    # # collect results
    # results = {
    #     'W'     : (W1,W2,W3,W4,W5,W6), # weights
    #     'b'     : (b1,b2,b3,b4,b5,b6) # bias
    #     }

    print("Accuracy: {:0.4f}".format(ANN.accuracy(train['Y'], P_hat)))

示例#7

0

显示文件

文件： MNIST_digit_recognizer2.py 项目： jacluff1/DataScienceAssignments

def run(af, M, L1, L2, **kwargs):

    # kwargs
    overwrite = kwargs['overwrite'] if 'overwrite' in kwargs else False

    #===========================================================================
    # set up data
    #===========================================================================

    # check that pickles exist
    if any([not os.path.isfile('train.pkl'), overwrite]): pickle_data_sets()

    # load data
    train = read_pickle('train')
    validate = read_pickle('validate')
    test = read_pickle('test')

    #===========================================================================
    # run cross validation
    #===========================================================================

    # run cross validation
    results = train_validate_test(train, validate, test, af, M, L1, L2,
                                  **kwargs)

    # update results
    results['CM'] = ANN.confusion_matrix(test['Y'], results['P_hat'])
    results['M'] = M
    results['af'] = af
    results['L'] = L
    results['lambda1'] = L1
    results['lambda2'] = L2
    results['eta'] = kwargs['eta'] if 'eta' in kwargs else 1e-3
    results['epochs'] = kwargs['epochs'] if 'epochs' in kwargs else 1e3
    results[
        'batch_size'] = kwargs['batch_size'] if 'batch_size' in kwargs else 30

    #===========================================================================
    # send results to pickle
    #===========================================================================

    # get a list of the results in working directory
    DIR = np.array(os.listdir())
    filter = ['results_' in x for x in DIR]
    DIR = DIR[filter]
    best = np.array([x[x.find("_") + 1:x.find(".pkl")] for x in DIR],
                    dtype=np.float32).max()

    # use if there are results saved already -- add hoc quick fix
    # send results to pickle
    acc = results['accuracy']['validate']
    if acc > best:
        print("\nfound new best result!")
        pd.Series(results).to_pickle("results_{:0.4}.pkl".format(
            results['accuracy']['validate']))
    else:
        print("\nno such luck...")

    # # use if no results have been saved
    # pd.Series(results).to_pickle("results_{:0.4}.pkl".format(results['accuracy']['validate']))

    print(results['accuracy'])

    # output results
    return results

示例#8

0

显示文件

文件： MNIST_digit_recognizer2.py 项目： jacluff1/DataScienceAssignments

def back_propagation(train, validate, af, M, l1, l2, **kwargs):

    # input dimentions
    if train['Y'].shape[0] == train['Y'].size:
        pdb.set_trace()
        K = len(set(train['Y']))
    else:
        K = train['Y'].shape[1]
        N, D = train['PHI'].shape

    # kwargs
    save_plot = kwargs['save_plot'] if 'save_plot' in kwargs else True
    eta = kwargs['eta'] if 'eta' in kwargs else 1e-3
    epochs = kwargs['epochs'] if 'epochs' in kwargs else 1e3
    epochs = int(epochs)
    batch_size = kwargs['batch_size'] if 'batch_size' in kwargs else 30

    # set random W and b of appropriate shapes
    W = {}
    b = {}
    W[1] = np.random.randn(D, M[1])
    b[1] = np.random.randn(M[1])
    for l in range(2, L):
        W[l] = np.random.randn(M[l - 1], M[l])
        b[l] = np.random.randn(M[l])
    W[L] = np.random.randn(M[L - 1], K)
    b[L] = np.random.randn(K)

    # set up back propagation
    batches = N // batch_size
    J_train = np.zeros(epochs * batches)
    J_validate = np.zeros_like(J_train)

    for epoch in range(epochs):

        X, Y = shuffle(train['PHI'], train['Y'])

        for batch in range(batches):

            X_b = X[(batch * batch_size):(batch + 1) * batch_size]
            Y_b = Y[(batch * batch_size):(batch + 1) * batch_size]

            # feed forward
            Z = feed_forward(X_b, W, b, af)

            # set up dZ,H,dH,dW, and db
            dZ, H, dH, dW, db = {}, {}, {}, {}, {}

            # start with output layer
            dH[L] = Z[L] - Y_b
            dW[L] = np.matmul(Z[L - 1].T, dH[L])
            db[L] = dH[L].sum(axis=0)
            W[L] -= eta * dW[L] / batch_size
            b[L] -= eta * db[L]

            # now work back through each layer till input layer
            for l in np.arange(2, L)[::-1]:
                dZ[l] = np.matmul(dH[l + 1], W[l + 1].T)
                dH[l] = dZ[l] * af[l].df(Z[l])
                dW[l] = np.matmul(Z[l - 1].T, dH[l])
                db[l] = dH[l].sum(axis=0)
                W[l] -= eta * dW[l] / batch_size
                b[l] -= eta * db[l]

            # end with input layer
            dZ[1] = np.matmul(dH[2], W[2].T)
            dH[1] = dZ[1] * af[1].df(Z[1])
            dW[1] = np.matmul(X_b.T, dH[1])
            db[1] = dH[1].sum(axis=0)
            W[1] -= eta * dW[1] / batch_size
            b[1] -= eta * db[1]

            # feed forward for whole train and validation sets
            Z_train = feed_forward(train['PHI'], W, b, af)
            Z_validate = feed_forward(validate['PHI'], W, b, af)

            # update train and validation cost functions
            index = batch + (epoch * batches)
            J_train[index] = ANN.cross_entropy(train['Y'], Z_train[L]) / N
            J_validate[index] = ANN.cross_entropy(
                validate['Y'], Z_validate[L]) / validate['Y'].shape[0]

    # save figure
    if save_plot:
        fig, ax = plt.subplots()
        fig.suptitle(
            f"$\\eta$: {eta}, epochs: {epochs}, batch size: {batch_size}")
        ax.plot(J_train, label="J: Training")
        ax.plot(J_validate, label="J: Validation")
        ax.set_xlabel("batch + (epochs x baches)")
        ax.set_ylabel("J")
        ax.legend(loc='best')
        if not os.path.isdir("J"): os.mkdir("J")
        savename = f"J/J_eta_{eta}_epochs_{epochs}.pdf"
        fig.savefig(savename)
        plt.close(fig)
        print(f"saved {savename}")

    # collect results
    results = {
        'W': W,  # weights
        'b': b  # bias
        # 'P_hat' : Z[L] # output predictions
    }

    # output results
    return results

示例#9

0

显示文件

文件： MNIST_digit_recognizer3.py 项目： jacluff1/DataScienceAssignments

def run(af, M, L1, L2, **kwargs):

    # kwargs
    overwrite = kwargs['overwrite'] if 'overwrite' in kwargs else False

    #===========================================================================
    # set up data
    #===========================================================================

    # check that pickles exist
    if any([not os.path.isfile('train.pkl'), overwrite]): pickle_data_sets()

    # load data
    train = read_pickle('train')
    validate = read_pickle('validate')
    test = read_pickle('test')

    #===========================================================================
    # run cross validation
    #===========================================================================

    # run cross validation
    results = train_validate_test(train, validate, test, af, M, L1, L2,
                                  **kwargs)

    # update results
    try:
        results['CM'] = ANN.confusion_matrix(test['Y'], results['P_hat'])
    except:
        print("something went wrong with the confusion matrix")
    results['M'] = M
    results['af'] = af
    results['L'] = L
    results['lambda1'] = L1
    results['lambda2'] = L2
    results['eta'] = kwargs['eta'] if 'eta' in kwargs else 1e-3
    results['epochs'] = kwargs['epochs'] if 'epochs' in kwargs else 1e3
    results[
        'batch_size'] = kwargs['batch_size'] if 'batch_size' in kwargs else 30

    #===========================================================================
    # send results to pickle
    #===========================================================================

    # validation accuracy from current run
    acc = results['accuracy']['validate']

    # find a list of previously pickled best results
    DIR = np.array(os.listdir())
    filter = ['results_' in x for x in DIR]
    DIR = DIR[filter]

    # if there are past results, save current results only if they are better than any previous results
    if len(DIR) > 0:
        best = np.array([x[x.find("_") + 1:x.find(".pkl")] for x in DIR],
                        dtype=np.float32).max()
        if acc > best:
            print("\nfound new best result!")
            pd.Series(results).to_pickle("results_{:0.4}.pkl".format(acc))
        else:
            print("\nno such luck...")
    # if there are no results, just save the current results
    else:
        print("\nfound new best result!")
        pd.Series(results).to_pickle("results_{:0.4}.pkl".format(acc))

    print("Accuracy from this round: {:0.4f}".format(acc))

    # output results
    return results