示例#1
0
def regression_ann(instruction,
                   callback=False,
                   ca_threshold=None,
                   text=[],
                   dataset=None,
                   drop=None,
                   preprocess=True,
                   test_size=0.2,
                   random_state=49,
                   epochs=50,
                   generate_plots=True,
                   callback_mode='min',
                   maximizer="val_loss",
                   save_model=False,
                   save_path=os.getcwd(),
                   add_layer={}):
    '''
    Body of the regression function used that is called in the neural network query
    if the data is numerical.
    :param many parameters: used to preprocess, tune, plot generation, and parameterizing the neural network trained.
    :return dictionary that holds all the information for the finished model.
    '''

    if dataset is None:
        dataReader = DataReader(get_file())
    else:
        dataReader = DataReader(dataset)
    logger("Reading in dataset")
    data = dataReader.data_generator()
    # data = pd.read_csv(self.dataset)

    if drop is not None:
        data.drop(drop, axis=1, inplace=True)
    data, y, target, full_pipeline = initial_preprocessor(
        data,
        instruction,
        preprocess,
        ca_threshold,
        text,
        test_size=test_size,
        random_state=random_state)
    logger("->", "Target column found: {}".format(target))

    X_train = data['train']
    X_test = data['test']

    # Target scaling
    target_scaler = StandardScaler()

    y_train = target_scaler.fit_transform(np.array(y['train']).reshape(-1, 1))
    y_test = target_scaler.transform(np.array(y['test']).reshape(-1, 1))

    logger("Establishing callback function")

    models = []
    losses = []
    model_data = []

    # callback function to store lowest loss value
    es = EarlyStopping(monitor=maximizer,
                       mode=callback_mode,
                       verbose=0,
                       patience=5)

    callback_value = None
    if callback is not False:
        callback_value = [es]

    i = 0

    # add_layer format: {<object> : list of indexs}
    # get the first 3 layer model
    model = get_keras_model_reg(data, i, add_layer)

    logger("Training initial model")
    history = model.fit(X_train,
                        y_train,
                        epochs=epochs,
                        validation_data=(X_test, y_test),
                        callbacks=callback_value,
                        verbose=0)
    models.append(history)
    model_data.append(model)

    col_name = [[
        "Initial number of layers ", "| Training Loss ", "| Test Loss "
    ]]
    col_width = max(len(word) for row in col_name for word in row) + 2
    for row in col_name:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")
    values = []
    values.append(str(len(model.layers)))
    values.append(
        "| " +
        str(history.history['loss'][len(history.history['val_loss']) - 1]))
    values.append(
        "| " +
        str(history.history['val_loss'][len(history.history['val_loss']) - 1]))
    datax = []
    datax.append(values)
    for row in datax:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")

    losses.append(history.history[maximizer][len(history.history[maximizer]) -
                                             1])

    # keeps running model and fit functions until the validation loss stops
    # decreasing
    logger("Testing number of layers")
    col_name = [["Current number of layers", "| Training Loss", "| Test Loss"]]
    col_width = max(len(word) for row in col_name for word in row) + 2
    for row in col_name:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")
    datax = []
    # while all(x > y for x, y in zip(losses, losses[1:])):
    while (len(losses) <= 2
           or losses[len(losses) - 1] < losses[len(losses) - 2]):
        model = get_keras_model_reg(data, i, add_layer)
        history = model.fit(X_train,
                            y_train,
                            callbacks=callback_value,
                            epochs=epochs,
                            validation_data=(X_test, y_test),
                            verbose=0)
        model_data.append(model)
        models.append(history)

        values = []
        datax = []
        values.append(str(len(model.layers)))
        values.append(
            "| " +
            str(history.history['loss'][len(history.history['val_loss']) - 1]))
        values.append("| " + str(history.history['val_loss'][
            len(history.history['val_loss']) - 1]))
        datax.append(values)
        for row in datax:
            print((" " * 2 * counter) + "| " +
                  ("".join(word.ljust(col_width) for word in row)) + " |")
        del values, datax
        losses.append(
            history.history[maximizer][len(history.history[maximizer]) - 1])
        i += 1
    # print((" " * 2 * counter)+ tabulate(datax, headers=col_name, tablefmt='orgtbl'))
    final_model = model_data[losses.index(min(losses))]
    final_hist = models[losses.index(min(losses))]
    print("")
    logger('->',
           "Best number of layers found: " + str(len(final_model.layers)))

    logger(
        '->', "Training Loss: " +
        str(final_hist.history['loss'][len(final_hist.history['val_loss']) -
                                       1]))
    logger(
        '->', "Test Loss: " +
        str(final_hist.history['val_loss'][len(final_hist.history['val_loss'])
                                           - 1]))

    # calls function to generate plots in plot generation
    plots = {}
    if generate_plots:
        init_plots, plot_names = generate_regression_plots(
            models[len(models) - 1], data, y)
        for x in range(len(plot_names)):
            plots[str(plot_names[x])] = init_plots[x]

    if save_model:
        save(final_model, save_model, save_path)
    # stores values in the client object models dictionary field
    print("")
    logger("Stored model under 'regression_ANN' key")
    clearLog()

    K.clear_session()

    return {
        'id': generate_id(),
        'model': final_model,
        "target": target,
        "num_classes": 1,
        "plots": plots,
        "preprocessor": full_pipeline,
        "interpreter": target_scaler,
        'test_data': {
            'X': X_test,
            'y': y_test
        },
        'losses': {
            'training_loss': final_hist.history['loss'],
            'val_loss': final_hist.history['val_loss']
        }
    }
示例#2
0
def tune_helper(model_to_tune=None,
                dataset=None,
                models=None,
                max_layers=10,
                min_layers=2,
                min_dense=32,
                max_dense=512,
                executions_per_trial=3,
                max_trials=1,
                activation='relu',
                loss='categorical_crossentropy',
                metrics='accuracy',
                seed=42,
                objective='val_accuracy',
                generate_plots=True,
                directory='my_dir',
                epochs=10,
                step=32,
                patience=1,
                verbose=0,
                test_size=0.2):
    '''
    Helper function that calls the appropriate tuning function
    :param instruction: the objective that you want to reduce dimensions to maximize
    :return the updated models dictionary
    '''
    print("")
    logger("Getting target model for tuning...")

    # checks to see which requested model is in the self.models

    # processing for regression feed forward NN
    if model_to_tune == 'regression_ANN':
        logger("Reading in data")
        logger("Tuning model hyperparameters...")
        dataReader = DataReader(dataset)
        data = dataReader.data_generator()
        target = models['regression_ANN']['target']
        target_column = data[models['regression_ANN']['target']]
        data = models['regression_ANN']['preprocesser'].transform(
            data.drop(target, axis=1))
        returned_model, returned_pms, history, X_test, y_test = tuneReg(
            data.values,
            target_column.values,
            max_layers=max_layers,
            min_layers=min_layers,
            min_dense=min_dense,
            max_dense=max_dense,
            executions_per_trial=executions_per_trial,
            max_trials=max_trials,
            epochs=epochs,
            activation=activation,
            step=step,
            directory=directory,
            verbose=verbose,
            test_size=test_size)
        plots = {}
        logger("->",
               'Best Hyperparameters Found: {}'.format(returned_pms.values))
        if generate_plots:
            logger("Generating updated plots")
            init_plots, plot_names = generate_regression_plots(
                history, data, target_column)
            for x in range(len(plot_names)):
                plots[str(plot_names[x])] = init_plots[x]

        models['regression_ANN'] = {
            'id': models['regression_ANN']['id'],
            'model': returned_model,
            'target': target,
            "plots": plots,
            'preprocesser': models['regression_ANN']['preprocesser'],
            'interpreter': models['regression_ANN']['interpreter'],
            'test_data': {
                'X': X_test,
                'y': y_test
            },
            'hyperparameters': returned_pms.values,
            'losses': {
                'training_loss': history.history['loss'],
                'val_loss': history.history['val_loss']
            }
        }
        logger("Re-stored model under 'regression_ANN' key")

        # processing for classification feed forward NN
    elif model_to_tune == "classification_ANN":
        logger("Reading in data")
        logger("Tuning model hyperparameters...")
        dataReader = DataReader(dataset)
        data = dataReader.data_generator()
        target = models['classification_ANN']['target']
        target_column = data[models['classification_ANN']['target']]
        data = models['classification_ANN']['preprocesser'].transform(
            data.drop(target, axis=1))
        returned_model, returned_pms, history, X_test, y_test = tuneClass(
            data,
            target_column,
            models['classification_ANN']['num_classes'],
            max_layers=max_layers,
            min_layers=min_layers,
            min_dense=min_dense,
            max_dense=max_dense,
            executions_per_trial=executions_per_trial,
            max_trials=max_trials,
            activation=activation,
            loss=loss,
            directory=directory,
            metrics=metrics,
            epochs=epochs,
            step=step,
            verbose=verbose,
            test_size=test_size)
        plots = {}
        logger("->",
               'Best Hyperparameters Found: {}'.format(returned_pms.values))
        if generate_plots:
            logger("Generating updated plots")
            plots = generate_classification_plots(history, data, target_column,
                                                  returned_model, X_test,
                                                  y_test)

        logger("Re-stored model under 'classification_ANN' key")
        models['classification_ANN'] = {
            'id': models['classification_ANN']['id'],
            'model': returned_model,
            'hyperparameters': returned_pms.values,
            'plots': plots,
            'preprocesser': models['classification_ANN']['preprocesser'],
            'interpreter': models['classification_ANN']['interpreter'],
            'test_data': {
                'X': X_test,
                'y': y_test
            },
            'target': target,
            'losses': {
                'training_loss': history.history['loss'],
                'val_loss': history.history['val_loss']
            },
            'accuracy': {
                'training_accuracy': history.history['accuracy'],
                'validation_accuracy': history.history['val_accuracy']
            }
        }

    elif model_to_tune == "convolutional_NN":
        logger("Tuning model hyperparameters...")
        X_train, X_test, height, width, num_classes = get_image_data(models)
        logger('Located image data')
        model, returned_pms, history = tuneCNN(
            X_train,
            X_test,
            height,
            width,
            num_classes,
            executions_per_trial=executions_per_trial,
            max_trials=max_trials,
            seed=seed,
            objective=objective,
            directory=directory,
            patience=patience,
            epochs=epochs,
            verbose=verbose,
            test_size=test_size)
        logger("->", "Optimal image size identified: {}".format(
            (height, width, 3)))
        logger('Packaging HyperModel')
        logger("->",
               'Best Hyperparameters Found: {}'.format(returned_pms.values))
        logger("Re-stored model under 'convolutional_NN' key")

        models['convolutional_NN'] = {
            'id': models['convolutional_NN']['id'],
            'data_type': models['convolutional_NN']['data_type'],
            'data_path': models['convolutional_NN']['data_path'],
            'data': {
                'train': X_train,
                'test': X_test
            },
            'shape': models['convolutional_NN']['shape'],
            'model': model,
            'num_classes': models['convolutional_NN']['num_classes'],
            'data_sizes': models['convolutional_NN']['data_sizes'],
            'losses': {
                'training_loss': history.history['loss'],
                'val_loss': history.history['val_loss']
            },
            'accuracy': {
                'training_accuracy': history.history['accuracy'],
                'validation_accuracy': history.history['val_accuracy']
            }
        }
    clearLog()
    return models
示例#3
0
def regression_ann(instruction,
                   ca_threshold=None,
                   text=None,
                   dataset=None,
                   drop=None,
                   preprocess=True,
                   test_size=0.2,
                   random_state=49,
                   epochs=50,
                   generate_plots=True,
                   callback_mode='min',
                   maximizer="val_loss",
                   save_model=True,
                   save_path=os.getcwd()):

    global currLog
    logger("reading in dataset...")

    dataReader = DataReader(dataset)
    data = dataReader.data_generator()
    # data = pd.read_csv(self.dataset)

    if drop is not None:
        data.drop(drop, axis=1, inplace=True)

    data, y, target, full_pipeline = initial_preprocesser(
        data, instruction, preprocess, ca_threshold, text)
    logger("->", "Target Column Found: {}".format(target))

    X_train = data['train']
    X_test = data['test']

    # Target scaling
    target_scaler = StandardScaler()

    y_train = target_scaler.fit_transform(np.array(y['train']).reshape(-1, 1))
    y_test = target_scaler.transform(np.array(y['test']).reshape(-1, 1))

    logger("Establishing callback function...")

    models = []
    losses = []
    model_data = []

    # callback function to store lowest loss value
    es = EarlyStopping(monitor=maximizer,
                       mode=callback_mode,
                       verbose=0,
                       patience=5)

    i = 0

    # get the first 3 layer model
    model = get_keras_model_reg(data, i)

    logger("Training initial model...")
    history = model.fit(X_train,
                        y_train,
                        epochs=epochs,
                        validation_data=(X_test, y_test),
                        callbacks=[es],
                        verbose=0)
    models.append(history)
    model_data.append(model)

    col_name = [[
        "Initial number of layers ", "| Training Loss ", "| Test Loss "
    ]]
    col_width = max(len(word) for row in col_name for word in row) + 2
    for row in col_name:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")
    values = []
    values.append(str(len(model.layers)))
    values.append(
        "| " +
        str(history.history['loss'][len(history.history['val_loss']) - 1]))
    values.append(
        "| " +
        str(history.history['val_loss'][len(history.history['val_loss']) - 1]))
    datax = []
    datax.append(values)
    for row in datax:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")

    losses.append(history.history[maximizer][len(history.history[maximizer]) -
                                             1])

    # keeps running model and fit functions until the validation loss stops
    # decreasing
    logger("Testing number of layers...")
    print(currLog)
    col_name = [["Current number of layers", "| Training Loss", "| Test Loss"]]
    col_width = max(len(word) for row in col_name for word in row) + 2
    for row in col_name:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")
    datax = []
    while (all(x > y for x, y in zip(losses, losses[1:]))):
        model = get_keras_model_reg(data, i)
        history = model.fit(X_train,
                            y_train,
                            epochs=epochs,
                            validation_data=(X_test, y_test),
                            verbose=0)
        model_data.append(model)
        models.append(history)

        values = []
        datax = []
        values.append(str(len(model.layers)))
        values.append(
            "| " +
            str(history.history['loss'][len(history.history['val_loss']) - 1]))
        values.append("| " + str(history.history['val_loss'][
            len(history.history['val_loss']) - 1]))
        datax.append(values)
        for row in datax:
            print((" " * 2 * counter) + "| " +
                  ("".join(word.ljust(col_width) for word in row)) + " |")
        del values, datax
        losses.append(
            history.history[maximizer][len(history.history[maximizer]) - 1])
        i += 1
    #print((" " * 2 * counter)+ tabulate(datax, headers=col_name, tablefmt='orgtbl'))
    final_model = model_data[losses.index(min(losses))]
    final_hist = models[losses.index(min(losses))]
    print("")
    logger('->',
           "Best number of layers found: " + str(len(final_model.layers)))

    logger(
        '->', "Training Loss: " +
        str(final_hist.history['loss'][len(final_hist.history['val_loss']) -
                                       1]))
    logger(
        '->', "Test Loss: " +
        str(final_hist.history['val_loss'][len(final_hist.history['val_loss'])
                                           - 1]))

    # calls function to generate plots in plot generation
    if generate_plots:
        init_plots, plot_names = generate_regression_plots(
            models[len(models) - 1], data, y)
        plots = {}
        for x in range(len(plot_names)):
            plots[str(plot_names[x])] = init_plots[x]

    if save_model:
        save(final_model, save_model)
    # stores values in the client object models dictionary field
    print("")
    logger("Stored model under 'regression_ANN' key")
    return {
        'id': generate_id(),
        'model': final_model,
        "target": target,
        "plots": plots,
        "preprocesser": full_pipeline,
        "interpreter": target_scaler,
        'losses': {
            'training_loss': final_hist.history['loss'],
            'val_loss': final_hist.history['val_loss']
        }
    }