示例#1
0
def dimensionality_KPCA(instruction, dataset, target="", y=""):
    global currLog
    global counter

    dataReader = DataReader("./data/" + get_last_file()[0])

    if target == "":
        data = dataReader.data_generator()
        data.fillna(0, inplace=True)
        remove = get_similar_column(get_value_instruction(instruction), data)

        y = data[remove]
        del data[remove]
        le = preprocessing.LabelEncoder()
        y = le.fit_transform(y)

    kpca = KernelPCA(n_components=len(dataset.columns), kernel="rbf")
    data_modified = kpca.fit_transform(dataset)

    X_train, X_test, y_train, y_test = train_test_split(dataset,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=49)
    X_train_mod, X_test_mod, y_train_mod, y_test_mod = train_test_split(
        data_modified, y, test_size=0.2, random_state=49)

    clf = tree.DecisionTreeClassifier()
    clf.fit(X_train, y_train)

    clf_mod = tree.DecisionTreeClassifier()
    clf_mod.fit(X_train_mod, y_train_mod)
    acc = []
    acc.append(accuracy_score(clf_mod.predict(X_test_mod), y_test_mod))
    for i, j in product(range(3, 10), ["entropy", "gini"]):
        model = tree.DecisionTreeClassifier(criterion=j, max_depth=i)
        model = model.fit(X_train_mod, y_train_mod)
        acc.append(accuracy_score(model.predict(X_test_mod), y_test))
    del i, j
    data_modified = pd.DataFrame(data_modified)
    data_modified[target] = np.r_[y_train, y_test]
    # data_modified.to_csv("./data/housingPCA.csv")

    return data_modified, accuracy_score(
        clf.predict(X_test),
        y_test), max(acc), (len(dataset.columns) - len(data_modified.columns))

    def booster(dataset, obj):
        #obj=["reg:linear","multi:softmax "]

        X_train, X_test, y_train, y_test = train_test_split(dataset,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=49)
        clf = XGBClassifier(objective=obj,
                            learning_rate=0.1,
                            silent=1,
                            alpha=10)
        clf.fit(X_train, y_train)
        return accuracy_score(clf.predict(X_test_mod), y_test_mod)
示例#2
0
def decision_tree(instruction,
                  dataset=None,
                  preprocess=True,
                  mca_threshold=None,
                  test_size=0.2,
                  drop=None):
    logger("Reading in dataset....")

    dataReader = DataReader(dataset)
    data = dataReader.data_generator()

    if drop is not None:
        data.drop(drop, axis=1, inplace=True)

    data, y, remove, full_pipeline = initial_preprocesser(
        data, instruction, preprocess, mca_threshold)
    logger("->", "Target Column Found: {}".format(remove))

    X_train = data['train']
    y_train = y['train']
    X_test = data['test']
    y_test = y['test']

    # classification_column = get_similar_column(getLabelwithInstruction(instruction), data)

    # Needed to make a custom label encoder due to train test split changes
    # Can still be inverse transformed, just a bit of extra work
    y_vals = np.unique(pd.concat([y['train'], y['test']], axis=0))
    label_mappings = {}
    for i in range(len(y_vals)):
        label_mappings[y_vals[i]] = i

    # Custom label encoder due to train test split
    y_train = y_train.apply(lambda x: label_mappings[x]).values
    y_test = y_test.apply(lambda x: label_mappings[x]).values
    num_classes = len(np.unique(y))

    # fitting and storing
    logger("Fitting Decision Tree...")

    clf = tree.DecisionTreeClassifier()
    clf = clf.fit(X_train, y_train)

    score = accuracy_score(clf.predict(X_test), y_test)
    logger("->", "Score found on testing set: {}".format(score))
    print("")
    logger("Stored model under 'decision_tree' key")

    clearLog()

    return {
        'id': generate_id(),
        "model": clf,
        "target": remove,
        "accuracy_score": score,
        "preprocesser": full_pipeline,
        "interpeter": label_mappings,
        "cross_val_score": cross_val_score(clf, X_train, y_train, cv=3)
    }
示例#3
0
def dimensionality_RF(instruction, dataset, target="", y="", n_features=10):
    global currLog
    global counter

    dataReader = DataReader("./data/" + get_last_file()[0])

    if target == "":
        data = dataReader.data_generator()
        data.fillna(0, inplace=True)
        remove = get_similar_column(get_value_instruction(instruction), data)
        data = structured_preprocesser(data)

        y = data[remove]
        del data[remove]
        le = preprocessing.LabelEncoder()
        y = le.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(dataset,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=49)
    first_classifier = tree.DecisionTreeClassifier()
    first_classifier.fit(X_train, y_train)

    first_classifier_acc = accuracy_score(first_classifier.predict(X_test),
                                          y_test)

    accuracy_scores = [first_classifier_acc]
    columns = []
    datas = []
    datas.append(dataset)
    columns.append([])

    for i, x in product(range(3, 10), range(4, len(dataset.columns))):
        feature_model = RandomForestRegressor(random_state=1, max_depth=i)
        feature_model.fit(X_train, y_train)

        importances = feature_model.feature_importances_
        indices = np.argsort(importances)[-x:]
        columns.append(dataset.columns[indices])

        X_temp_train = X_train[dataset.columns[indices]]
        X_temp_test = X_test[dataset.columns[indices]]

        val = pd.DataFrame(np.r_[X_temp_train, X_temp_test])
        val[target] = np.r_[y_train, y_test]
        datas.append(val)

        vr = tree.DecisionTreeClassifier()
        vr.fit(X_temp_train, y_train)

        accuracy_scores.append(accuracy_score(vr.predict(X_temp_test), y_test))

    the_index = accuracy_scores.index(max(accuracy_scores))

    return datas[the_index], accuracy_scores[0], max(accuracy_scores), list(
        columns[the_index])
示例#4
0
def nearest_neighbors(instruction=None,
                      dataset=None,
                      mca_threshold=None,
                      preprocess=True,
                      drop=None,
                      min_neighbors=3,
                      max_neighbors=10):
    logger("Reading in dataset....")
    # Reads in dataset
    # data = pd.read_csv(self.dataset)
    dataReader = DataReader(dataset)
    data = dataReader.data_generator()
    if drop is not None:
        data.drop(drop, axis=1, inplace=True)
    data, y, remove, full_pipeline = initial_preprocesser(
        data, instruction, preprocess, mca_threshold)
    logger("->", "Target Column Found: {}".format(remove))
    X_train = data['train']
    y_train = y['train']
    X_test = data['test']
    y_test = y['test']
    # classification_column = get_similar_column(getLabelwithInstruction(instruction), data)
    num_classes = len(np.unique(y))
    # encodes the label dataset into 0's and 1's
    y_vals = np.unique(pd.concat([y['train'], y['test']], axis=0))
    label_mappings = {}
    for i in range(len(y_vals)):
        label_mappings[y_vals[i]] = i
    y_train = y_train.apply(lambda x: label_mappings[x]).values
    y_test = y_test.apply(lambda x: label_mappings[x]).values
    models = []
    scores = []
    logger("Fitting Nearest Neighbor...")
    logger("Identifying optimal number of neighbors...")
    # Tries all neighbor possibilities, based on either defaults or user
    # specified values
    for x in range(min_neighbors, max_neighbors):
        knn = KNeighborsClassifier(n_neighbors=x)
        knn.fit(X_train, y_train)
        models.append(knn)
        scores.append(accuracy_score(knn.predict(X_test), y_test))
    logger("Stored model under 'nearest_neighbors' key")
    knn = models[scores.index(min(scores))]
    return {
        'id': generate_id(),
        "model": knn,
        "accuracy_score": scores.index(min(scores)),
        "preprocesser": full_pipeline,
        "interpreter": label_mappings,
        "target": remove,
        "cross_val_score": cross_val_score(knn, X_train, y_train, cv=3)
    }
    clearLog()
示例#5
0
def regression_ann(
            instruction,
            mca_threshold=None,
            dataset = None,
            drop=None,
            preprocess=True,
            test_size=0.2,
            random_state=49,
            epochs=50,
            generate_plots=True,
            callback_mode='min',
            maximizer="val_loss",
            save_model=True,
            save_path=os.getcwd()):

        global currLog
        logger("reading in dataset...")

        dataReader = DataReader(dataset)
        data = dataReader.data_generator()
        # data = pd.read_csv(self.dataset)

        if drop is not None:
            data.drop(drop, axis=1, inplace=True)

        data, y, target, full_pipeline = initial_preprocesser(data, instruction, preprocess, mca_threshold)
        logger("->", "Target Column Found: {}".format(target))
        X_train = data['train']
        X_test = data['test']


        # Target scaling
        target_scaler = StandardScaler()

        y_train = target_scaler.fit_transform(np.array(y['train']).reshape(-1, 1))
        y_test = target_scaler.transform(np.array(y['test']).reshape(-1,1))

        logger("establishing callback function...")

        models = []
        losses = []
        model_data = []

        # callback function to store lowest loss value
        es = EarlyStopping(
            monitor=maximizer,
            mode=callback_mode,
            verbose=0,
            patience=5)

        i = 0

        # get the first 3 layer model
        model = get_keras_model_reg(data, i)

        logger("training initial model...")
        history = model.fit(
            X_train,
            y_train,
            epochs=epochs,
            validation_data=(
                X_test,
                y_test),
            callbacks=[es],
            verbose=0)
        models.append(history)
        model_data.append(model)

        logger("->", "Initial number of layers " + str(len(model.layers)))

        logger("->", "Training Loss: " + \
               str(history.history['loss'][len(history.history['val_loss']) - 1]), '|')
        logger("->", "Test Loss: " +
               str(history.history['val_loss'][len(history.history['val_loss']) -
                                               1]), '|')
        print("")

        losses.append(history.history[maximizer]
                      [len(history.history[maximizer]) - 1])

        # keeps running model and fit functions until the validation loss stops
        # decreasing
        logger("testing number of layers...")
        print(currLog)
        while (all(x > y for x, y in zip(losses, losses[1:]))):
            model = get_keras_model_reg(data, i)
            history = model.fit(
                X_train,
                y_train,
                epochs=epochs,
                validation_data=(
                    X_test,
                    y_test), verbose=0)
            model_data.append(model)
            models.append(history)
            logger("->", "Current number of layers: " + str(len(model.layers)))

            logger("->", "Training Loss: " +
                   str(history.history['loss'][len(history.history['val_loss']) -
                                               1]), '|')
            logger("->", "Test Loss: " +
                   str(history.history['val_loss'][len(history.history['val_loss']) -
                                                   1]), '|')
            print("")
            losses.append(history.history[maximizer]
                          [len(history.history[maximizer]) - 1])
            i += 1

        final_model = model_data[losses.index(min(losses))]
        final_hist = models[losses.index(min(losses))]

        logger('->', "Best number of layers found: " +
               str(len(final_model.layers)))

        logger('->', "Training Loss: " + str(final_hist.history['loss']
                                             [len(final_hist.history['val_loss']) - 1]))
        logger('->', "Test Loss: " + str(final_hist.history['val_loss']
                                         [len(final_hist.history['val_loss']) - 1]))

        # calls function to generate plots in plot generation
        if generate_plots:
            init_plots, plot_names = generate_regression_plots(
                models[len(models) - 1], data, y)
            plots = {}
            for x in range(len(plot_names)):
                plots[str(plot_names[x])] = init_plots[x]

        if save_model:
            save(final_model, save_model)
        # stores values in the client object models dictionary field
        print("")
        logger("Stored model under 'regression_ANN' key")
        return {
            'id': generate_id(),
            'model': final_model,
            "target": target,
            "plots": plots,
            "preprocesser": full_pipeline,
            "interpreter": target_scaler,
            'losses': {
                'training_loss': final_hist.history['loss'],
                'val_loss': final_hist.history['val_loss']}}
示例#6
0
def classification_ann(instruction,
            dataset=None,
            mca_threshold=None,
            preprocess=True,
            callback_mode='min',
            drop=None,
            random_state=49,
            test_size=0.2,
            epochs=50,
            generate_plots=True,
            maximizer="val_loss",
            save_model=True,
            save_path=os.getcwd()):

        global currLog
        logger("reading in dataset...")

        dataReader = DataReader(dataset)
        data = dataReader.data_generator()

        if drop is not None:
            data.drop(drop, axis=1, inplace=True)

        data, y, remove, full_pipeline = initial_preprocesser(
            data, instruction, preprocess, mca_threshold)
        logger("->", "Target Column Found: {}".format(remove))

        # Needed to make a custom label encoder due to train test split changes
        # Can still be inverse transformed, just a bit of extra work
        y = pd.concat([y['train'], y['test']], axis=0)

        num_classes = len(np.unique(y))

        X_train = data['train']
        X_test = data['test']

        # ANN needs target one hot encoded for classification
        one_hot_encoder = OneHotEncoder()

        y = pd.DataFrame(one_hot_encoder.fit_transform(np.reshape(y.values, (-1,1))).toarray(),
                         columns=one_hot_encoder.get_feature_names())

        y_train = y.iloc[:len(X_train)]
        y_test = y.iloc[len(X_train):]


        models = []
        losses = []
        accuracies = []
        model_data = []

        logger("establishing callback function...")

        # early stopping callback
        es = EarlyStopping(
            monitor=maximizer,
            mode='min',
            verbose=0,
            patience=5)

        i = 0
        model = get_keras_model_class(data, i, num_classes)
        logger("training initial model...")
        history = model.fit(
            X_train, y_train, epochs=epochs, validation_data=(
                X_test, y_test), callbacks=[es], verbose=0)

        model_data.append(model)
        models.append(history)
        logger("->", "Initial number of layers " + str(len(model.layers)))

        logger("->", "Training Loss: " + \
               str(history.history['loss'][len(history.history['val_loss']) - 1]), '|')
        logger("->", "Test Loss: " +
               str(history.history['val_loss'][len(history.history['val_loss']) -
                                               1]), '|')
        print("")


        losses.append(history.history[maximizer]
                      [len(history.history[maximizer]) - 1])

        # keeps running model and fit functions until the validation loss stops
        # decreasing
        logger("testing number of layers...")
        while (all(x > y for x, y in zip(losses, losses[1:]))):
            model = get_keras_model_class(data, i, num_classes)
            history = model.fit(
                X_train,
                y_train,
                epochs=epochs,
                validation_data=(
                    X_test,
                    y_test),
                callbacks=[es], verbose=0)

            model_data.append(model)
            models.append(history)
            logger("->", "Current number of layers: " + str(len(model.layers)))

            logger("->", "Training Loss: " +
                   str(history.history['loss'][len(history.history['val_loss']) -
                                               1]), '|')
            logger("->", "Test Loss: " +
                   str(history.history['val_loss'][len(history.history['val_loss']) -
                                                   1]), '|')
            print("")

            losses.append(history.history[maximizer]
                          [len(history.history[maximizer]) - 1])
            accuracies.append(history.history['val_accuracy']
                              [len(history.history['val_accuracy']) - 1])
            i += 1

        final_model = model_data[losses.index(min(losses))]
        final_hist = models[losses.index(min(losses))]

        logger('->', "Best number of layers found: " +
               str(len(final_model.layers)))
        logger('->', "Training Accuracy: " + str(final_hist.history['accuracy']
                                                 [len(final_hist.history['val_accuracy']) - 1]))
        logger('->', "Test Accuracy: " + str(final_hist.history['val_accuracy'][
                                                 len(final_hist.history['val_accuracy']) - 1]))

        # genreates appropriate classification plots by feeding all information
        plots = generate_classification_plots(
            models[len(models) - 1], data, y, model, X_test, y_test)

        if save_model:
            save(final_model, save_model)

        print("")
        logger("Stored model under 'classification_ANN' key")

        # stores the values and plots into the object dictionary
        return {
            'id': generate_id(),
            "model": final_model,
            'num_classes': num_classes,
            "plots": plots,
            "target": remove,
            "preprocesser": full_pipeline,
            "interpreter": one_hot_encoder,
            'losses': {
                'training_loss': final_hist.history['loss'],
                'val_loss': final_hist.history['val_loss']},
            'accuracy': {
                'training_accuracy': final_hist.history['accuracy'],
                'validation_accuracy': final_hist.history['val_accuracy']}}
示例#7
0
def k_means_clustering(dataset=None,
                       preprocess=True,
                       generate_plots=True,
                       drop=None,
                       base_clusters=1):
    logger("Reading dataset...")
    # loads dataset and replaces n/a with zero
    # data = pd.read_csv(self.dataset)

    dataReader = DataReader(dataset)
    data = dataReader.data_generator()

    if drop is not None:
        data.drop(drop, axis=1, inplace=True)

    dataPandas = data.copy()

    full_pipeline = None
    if preprocess:
        logger("Preprocessing data...")
        data, full_pipeline = clustering_preprocessor(data)
        data = np.array(data)

    modelStorage = []
    inertiaStor = []

    # processes dataset and runs KMeans algorithm on one cluster as
    # baseline
    i = base_clusters
    logger("Creating unsupervised clustering task...")
    kmeans = KMeans(n_clusters=i, random_state=0).fit(data)
    modelStorage.append(kmeans)

    # stores SSE values in an array for later comparison
    inertiaStor.append(kmeans.inertia_)
    i += 1

    logger("Identifying best centroid count and optimizing accuracy")
    # continues to increase cluster size until SSE values don't decrease by
    # 1000 - this value was decided based on precedence
    while (all(earlier >= later
               for earlier, later in zip(inertiaStor, inertiaStor[1:]))):
        kmeans = KMeans(n_clusters=i, random_state=0).fit(data)
        modelStorage.append(kmeans)
        inertiaStor.append(kmeans.inertia_)
        # minimize inertia up to 10000
        i += 1

        # checks to see if it should continue to run; need to improve this
        # algorithm
        if i > 3 and inertiaStor[len(inertiaStor) - 2] - 1000 <= inertiaStor[
                len(inertiaStor) - 1]:
            break
    # generates the clustering plots approiately
    logger("->", "Optimal number of clusters found: {}".format(i))
    if generate_plots:
        logger("Generating plots and storing in model")
        init_plots, plot_names = generate_clustering_plots(
            modelStorage[len(modelStorage) - 1], dataPandas, data)

        plots = {}

        for x in range(len(plot_names)):
            plots[str(plot_names[x])] = init_plots[x]

    print("")
    logger("Stored model under 'k_means_clustering' key")

    # stores plots and information in the dictionary client model
    return {
        'id': generate_id(),
        "model": modelStorage[len(modelStorage) - 1],
        "preprocesser": full_pipeline,
        "plots": plots
    }
    clearLog()
示例#8
0
def train_svm(instruction,
              dataset=None,
              test_size=0.2,
              kernel='linear',
              preprocess=True,
              mca_threshold=None,
              drop=None,
              cross_val_size=0.3):

    logger("Reading in dataset....")
    # reads dataset and fills n/a values with zeroes
    #data = pd.read_csv(self.dataset)

    dataReader = DataReader(dataset)
    data = dataReader.data_generator()

    if drop is not None:
        data.drop(drop, axis=1, inplace=True)

    data, y, target, full_pipeline = initial_preprocesser(
        data, instruction, preprocess, mca_threshold)
    logger("->", "Target Column Found: {}".format(target))

    X_train = data['train']
    y_train = y['train']
    X_test = data['test']
    y_test = y['test']

    # classification_column = get_similar_column(getLabelwithInstruction(instruction), data)
    num_classes = len(np.unique(y))

    # Needed to make a custom label encoder due to train test split changes
    # Can still be inverse transformed, just a bit of extra work
    y_vals = np.unique(pd.concat([y['train'], y['test']], axis=0))
    label_mappings = {}
    for i in range(len(y_vals)):
        label_mappings[y_vals[i]] = i

    y_train = y_train.apply(lambda x: label_mappings[x]).values
    y_test = y_test.apply(lambda x: label_mappings[x]).values

    # Fitting to SVM and storing in the model dictionary
    logger("Fitting Support Vector Machine")
    clf = svm.SVC(kernel=kernel)
    clf.fit(X_train, y_train)

    score = accuracy_score(clf.predict(X_test), y_test)

    logger("->", "Accuracy found on testing set: {}".format(score))

    logger("Stored model under 'svm' key")
    return {
        'id': generate_id(),
        "model": clf,
        "accuracy_score": accuracy_score(clf.predict(X_test), y_test),
        "target": target,
        "preprocesser": full_pipeline,
        "interpreter": label_mappings,
        "cross_val_score": cross_val_score(clf, X_train, y_train)
    }
    clearLog()
示例#9
0
def dimensionality_reduc(instruction,
                         dataset,
                         arr=["RF", "PCA", "KPCA", "ICA"],
                         inplace=False):
    global currLog
    global counter

    dataReader = DataReader(dataset)

    logger("loading dataset...")
    data = dataReader.data_generator()
    data.fillna(0, inplace=True)

    logger("getting most similar column from instruction...")
    target = get_similar_column(get_value_instruction(instruction), data)

    y = data[target]
    del data[target]
    le = preprocessing.LabelEncoder()
    y = le.fit_transform(y)

    data = structured_preprocesser(data)

    perms = []
    overall_storage = []
    finals = []

    logger("generating dimensionality permutations...")
    for i in range(1, len(arr) + 1):
        for elem in list(permutations(arr, i)):
            perms.append(elem)

    logger("running each possible permutation...")
    logger("realigning tensors...")
    for path in perms:
        currSet = data
        for element in path:
            if element == "RF":
                data_mod, beg_acc, final_acc, col_removed = dimensionality_RF(
                    instruction, currSet, target, y)
            elif element == "PCA":
                data_mod, beg_acc, final_acc, col_removed = dimensionality_PCA(
                    instruction, currSet, target, y)
            elif element == "KPCA":
                data_mod, beg_acc, final_acc, col_removed = dimensionality_KPCA(
                    instruction, currSet, target, y)
            elif element == "ICA":
                data_mod, beg_acc, final_acc, col_removed = dimensionality_ICA(
                    instruction, currSet, target, y)
            overall_storage.append(
                list([data_mod, beg_acc, final_acc, col_removed]))
            currSet = data_mod
        finals.append(overall_storage[len(overall_storage) - 1])

    logger("Fetching Best Accuracies...")
    accs = []
    print("")
    print("Baseline Accuracy: " + str(finals[0][1]))
    print("----------------------------")
    for i, element in product(range(len(finals)), finals):
        print("Permutation --> " + str(perms[i]) + " | Final Accuracy --> " +
              str(element[2]))
        if finals[0][1] < element[2]:
            accs.append(
                list([
                    "Permutation --> " + str(perms[i]) +
                    " | Final Accuracy --> " + str(element[2])
                ]))
    print("")
    print("Best Accuracies")
    print("----------------------------")
    for element in accs:
        print(element)

    if inplace:
        data.to_csv(dataset)
示例#10
0
def tune_helper(model_to_tune=None,
                dataset=None,
                models=None,
                max_layers=10,
                min_layers=2,
                min_dense=32,
                max_dense=512,
                executions_per_trial=3,
                max_trials=1,
                activation='relu',
                loss='categorical_crossentropy',
                metrics='accuracy'):
    logger("Getting target model for tuning...")

    # checks to see which requested model is in the self.models

    # processing for regression feed forward NN
    if model_to_tune == 'regression_ANN':
        logger("Tuning model hyperparameters")
        dataReader = DataReader(dataset)
        data = dataReader.data_generator()
        target = models['regression_ANN']['target']
        target_column = data[models['regression_ANN']['target']]
        data = models['regression_ANN']['preprocesser'].transform(
            data.drop(target, axis=1))
        returned_model = tuneReg(data,
                                 target_column,
                                 max_layers=max_layers,
                                 min_layers=min_layers,
                                 min_dense=min_dense,
                                 max_dense=max_dense,
                                 executions_per_trial=executions_per_trial,
                                 max_trials=max_trials)
        models['regression_ANN'] = {'model': returned_model}
        return returned_model

        # processing for classification feed forward NN
    if model_to_tune == "classification_ANN":
        logger("Tuning model hyperparameters")
        dataReader = DataReader(dataset)
        data = dataReader.data_generator()
        target = models['classification_ANN']['target']
        target_column = data[models['classification_ANN']['target']]
        data = models['classification_ANN']['preprocesser'].transform(
            data.drop(target, axis=1))
        returned_model = tuneClass(data,
                                   target_column,
                                   models['classification_ANN']['num_classes'],
                                   max_layers=max_layers,
                                   min_layers=min_layers,
                                   min_dense=min_dense,
                                   max_dense=max_dense,
                                   executions_per_trial=executions_per_trial,
                                   max_trials=max_trials,
                                   activation=activation,
                                   loss=loss,
                                   metrics=metrics)
        models['classification_ANN'] = {'model': returned_model}
        return returned_model
        # processing for convolutional NN
    if model_to_tune == "convolutional_NN":
        logger("Tuning model hyperparameters")
        X = models['convolutional_NN']["X"]
        y = models['convolutional_NN']["y"]
        model = tuneCNN(np.asarray(X), np.asarray(y),
                        models["convolutional_NN"]["num_classes"])
        models["convolutional_NN"]["model"] = model
    return models