示例#1
0
def temporary_complete_keras(major_model, specific_model, 
                             model_params, test_x):
    """
    This function is designed only to complete Keras parameters. It wraps 
    process_val() to make sure that the non-integer parameters are correctly
    translated prior to being used to build keras models
    """
    # If a keras model, add to model params
    if major_model == "Keras":
        if specific_model in {"OneConv", "TwoConv"}:
            model_params["input_shape"] = test_x.shape[1:]
            model_params = {key: process_val(key, val, test_x.shape)
                            for key, val in model_params.items()}
        else:
            final_x_shape = np.prod(test_x.shape[1:])
            model_params["input_shape"] = (final_x_shape,)
            model_params = {key: process_val(key, val, (len(test_x), final_x_shape))
                            for key, val in model_params.items()}
            
    return model_params
示例#2
0
def process_best(best_dict, major_model, specific_model, x_shape):
    """
    The best parameters returned from hyperopt.fmin() are not necessarily in
    the correct format for training downstream models. This function converts
    output "best_params" to the correct format.
    
    Parameters
    ----------
    best_dict: dict
        The parameters output by hyperopt.fmin() post hyperparameter optimization
    major_model: str
        Choice of 'Keras', 'XGB', or 'sklearn-regressor'. This argument
        tells MldeModel from which package we will be pulling models. 
    specific_model: str
        This arguments tells MldeModel which regressor to use within the package
        defined by major_model.
    x_shape: tuple
        Gives the shape of the input x-values for a model. This is used to
        calculate the appropriate conversions from percentile to integer for
        keras parameters
        
    Returns
    -------
    updated_model_params: dict
        best_dict values converted to the appropriate format for training
    """
    # Convert choice indices back to the correct values
    for var_name, var_val in best_dict.items():

        # If the variable name is a choice, convert var_val
        if var_name in categorical_params:
            best_dict[var_name] = categorical_params[var_name][var_val]

    # Redefine the dictionary
    formatted_dict = {
        key: process_val(key, val, x_shape)
        for key, val in best_dict.items()
    }

    # Add input_shape if we are working with Keras
    if major_model == "Keras":
        formatted_dict["input_shape"] = x_shape[1:]

    # Handle the problems with some of the linear models, then return
    return handle_linear_exceptions(formatted_dict, major_model,
                                    specific_model)
示例#3
0
def space_to_model_params(space, space_names, major_model, specific_model,
                          x_shape):
    """
    The outputs of search spaces are not always in the correct format for training
    a model. This function converts the output format to the appropriate format
    for training.
    
    Parameters
    ----------
    space: iterable of numeric
        The output parameter values from some hyperopt search space
    space_names: iterable of str
        The variable names of each parameter in space
    major_model: str
        Choice of 'Keras', 'XGB', or 'sklearn-regressor'. This argument
        tells MldeModel from which package we will be pulling models. 
    specific_model: str
        This arguments tells MldeModel which regressor to use within the package
        defined by major_model.
    x_shape: tuple
        Gives the shape of the input x-values for a model. This is used to
        calculate the appropriate conversions from percentile to integer for
        keras parameters
        
    Returns
    -------
    updated_model_params: dict
        Search space output values converted to the appropriate format for training
    """
    # Package into a dictionary and convert datatypes as appropriate
    model_params = {
        var_name: process_val(var_name, var_val, x_shape)
        for var_name, var_val in zip(space_names, space)
    }

    # Add the input shape on to model_params if this is a keras model
    if major_model == "Keras":
        model_params["input_shape"] = x_shape[1:]

    # Return model params
    return handle_linear_exceptions(model_params, major_model, specific_model)
示例#4
0
def test_process_val():
    """
    Things this function confirms:
    1) If the correct shape is passed in to process_val, then the correct changes
    are made to Keras models
    2) Only keras model parameters are affected by process_val
    3) The appropriate built-in flags are thrown if the x_shape isn't what we
    expect for a given parameter name
    """
    # Start by loading the default model parameters
    from Support.Params.Defaults import (default_model_params, cpu_models,
                                         gpu_models)
    from Support.RunMlde.CompleteKerasParams import process_val
        
    # Define two test x_shapes. One 3d and another 2d
    x2d = (384, 1000)
    x3d = (384, 5, 200)
    
    # Define the expected number of latent dims for each input shape
    expected_latent_dims_2d = 1000
    expected_latent_dims_3d = 200
        
    # Now define the expected parameter values based on the Keras defaults
    expectations = {"dropout": 0.2,
                    "size1": 250,
                    "size2": 63,
                    "filter_choice": 3,
                    "n_filters1": 13,
                    "flatten_choice": "Average",
                    "filter_arch": (3, 3),
                    "n_filters2": 2}
            
    # Loop over the different models and test the effects of process_val
    for major_model, specific_model in chain(gpu_models, cpu_models):
        
        # Pull the default model params
        test_params = default_model_params[major_model][specific_model].copy()
        
        # Determine the appropriate shape
        if major_model == "Keras" and specific_model in {"OneConv", "TwoConv"}:
            test_shape = x3d
        else:
            test_shape = x2d
        
        # Run the parameters through process_val
        processed_params = {key: process_val(key, val, test_shape) for 
                            key, val in test_params.items()}
        
        # Assert that the parameters match what we expect
        if major_model == "Keras":
            for key, val in processed_params.items():
                assert expectations[key] == val
        else:
            assert all(test_params[key] == val for 
                       key, val in processed_params.items())
            
    # Make sure we are calculating the correct number of latent dimensions
    returned_latent_dims = process_val(key, val, x2d, _debug = True)
    assert returned_latent_dims == expected_latent_dims_2d
    
    returned_n_aas, returned_latent_dims = process_val(key, val,
                                                       x3d, _debug = True)
    assert returned_latent_dims == expected_latent_dims_3d
    assert returned_n_aas == 5
    
    # Add assertions that will catch expected failures
    with pytest.raises(ValueError, match="Input X must be 2 or 3D"):
        process_val(key, val, (2, 3, 4, 2))
        
    with pytest.raises(AssertionError, match="Expect a 3D array for convolutional networks"):
        process_val("filter_choice", 0.25, (2, 4))
    
    with pytest.raises(AssertionError, match="Expect a 3D array for convolutional networks"):
        process_val("filter_arch", 0.25, (2, 4))
        
    with pytest.raises(AssertionError, match="Expect a 2D array for feed forward networks"):
        process_val("size1", 0.25, (2, 4, 6))
示例#5
0
def test_KerasModel():
    """
    What this function confirms:
    1) Passing the wrong set of parameters into a Keras model (e.g. passing in
    the parameters for OneHidden into OneConv) will result in an error
    2) Attempting to train with more or less points than labels  will result in
    an error
    3) All checks in subclass_pred_checker() are passed by KerasModel()
    4) Training parameters are appropriately handled by KerasModel
    """
    # Associate a class method with each specific model
    class_method_dict = {
        "NoHidden": KerasModel.NoHidden,
        "OneHidden": KerasModel.OneHidden,
        "TwoHidden": KerasModel.TwoHidden,
        "OneConv": KerasModel.OneConv,
        "TwoConv": KerasModel.TwoConv
    }

    # Define training aprams
    training_params = default_training_params["Keras"].copy()

    # Make sure that passing the parameters in for the wrong model throws
    # an error
    for major_model, specific_model in gpu_models:

        # Pull the parameters
        model_params = default_model_params[major_model][specific_model].copy()

        # Add input_shape
        if specific_model in {"OneConv", "TwoConv"}:
            model_params["input_shape"] = all_x.shape[1:]
            x_train = x_train3d
            x_test = x_test3d
        else:
            model_params["input_shape"] = flat_all_x.shape[1:]
            x_train = x_train2d
            x_test = x_test2d

        # Copy train and test
        local_x_train_copy = x_train.copy()
        local_x_test_copy = x_test.copy()

        # Complete model parameters
        model_params = {
            key: process_val(key, val, x_train.shape)
            for key, val in model_params.items()
        }

        # An error should be raised if we pass the wrong parameters to a model
        for _, other_specific in gpu_models:

            # Continue if this is the same model
            if other_specific == specific_model:
                continue

            # Confirm that we get an error if passing in the wrong set of parameters
            with pytest.raises(
                    AssertionError,
                    match=
                    "(Some model_params missing for .+|Too many parameters passed .+)"
            ):
                _ = class_method_dict[other_specific](model_params,
                                                      training_params)

        # Build a test model
        test_model = class_method_dict[specific_model](model_params,
                                                       training_params)

        # If we try to train on data with mismatched sizes we should run into
        # an error
        with pytest.raises(AssertionError, match="Mismatch in lengths of .+"):
            test_model.train(x_train, y_test, x_test, y_train)

        # Make sure nothing changes
        assert np.array_equal(local_x_train_copy, x_train)
        assert np.array_equal(local_x_test_copy, x_test)
        assert np.array_equal(y_train_copy, y_train)
        assert np.array_equal(y_test_copy, y_test)
        assert np.array_equal(all_x_copy, all_x)
        assert np.array_equal(all_y_copy, all_y)
        assert np.array_equal(train_inds_copy, train_inds)
        assert np.array_equal(test_inds_copy, test_inds)
        assert np.array_equal(flat_all_x_copy, flat_all_x)

        # Test predictions
        subclass_pred_checker(test_model, x_train, y_train, x_test, y_test)

        # Make sure we fail if we try to train multiple times
        with pytest.raises(AssertionError,
                           match="Successive calls to 'train' not supported"):
            test_model.train(x_train, y_train, x_test, y_test)

        # Make sure nothing changes
        assert np.array_equal(local_x_train_copy, x_train)
        assert np.array_equal(local_x_test_copy, x_test)
        assert np.array_equal(y_train_copy, y_train)
        assert np.array_equal(y_test_copy, y_test)
        assert np.array_equal(all_x_copy, all_x)
        assert np.array_equal(all_y_copy, all_y)
        assert np.array_equal(train_inds_copy, train_inds)
        assert np.array_equal(test_inds_copy, test_inds)
        assert np.array_equal(flat_all_x_copy, flat_all_x)

        # Confirm that all properties return something and that they match
        # the model instance variable
        test_early_stop = test_model.early_stopping_epoch
        test_training_params = test_model.training_params
        assert test_early_stop == test_model._early_stopping_epoch
        assert test_training_params == test_model._training_params

        # Make sure we have all expected training params
        assert test_training_params == expected_training_params_keras

        # Make sure that the early stopping epoch is an integer
        assert isinstance(test_early_stop, int)
示例#6
0
def prep_input_data(parameter_df, x_shape):
    """
    Given the data input to run_mlde_cl, build all args needed for running both
    default and hyperparameter optimization functions. This means instantiating
    a number of model instances with inbuilt default parameters (for passage 
    into run_mlde) as well as packaging args needed for run_hyperopt_mlde()
    
    Parameters
    ----------
    parameter_df: pd.DataFrame
        Dataframe derived from MLDE.Support.Params.MldeParameters.csv, containing
        only those models for which Include is True.
    x_shape: tuple
        Shape of the design space (should be 3D)
        
    Returns
    -------
    mods_for_default: Iterable of MldeModel instances
        MldeModel instances to be passed into run_mlde()
    hyperopt_args: Iterable of tuples
        Arguments to pass into run_hyperopt_mlde()
    """
    # Make sure the shape is 3D
    assert len(x_shape) == 3, "Input shape should be 3D"

    # Create an empty list in which to store objects
    n_mods = len(parameter_df)
    mods_for_default = [None for _ in range(n_mods)]
    hyperopt_args = [None for _ in range(n_mods)]
    for i, (_, row) in enumerate(parameter_df.iterrows()):

        # Pull info needed to instantiate model
        major_model = row["ModelClass"]
        specific_model = row["SpecificModel"]

        # Define the model and training parameters
        if major_model == "Keras":

            # Pull the appropriate model parameters
            temp_params = default_model_params[major_model][
                specific_model].copy()
            model_params = {}

            # Add input_shape as a parameter
            if specific_model in {"OneConv", "TwoConv"}:
                final_shape = x_shape[1:]
                finalized_x_shape = x_shape
            else:
                final_shape = (np.prod(x_shape[1:]), )
                finalized_x_shape = (x_shape[0], final_shape[0])
            model_params["input_shape"] = final_shape

            # Loop over the model parameters and update appropriately
            for key, val in temp_params.items():

                # Append the new value to model_params
                model_params[key] = process_val(key, val, finalized_x_shape)

        else:
            model_params = default_model_params[major_model][
                specific_model].copy()

        # Instantiate a model with default parameters
        mods_for_default[i] = MldeModel(
            major_model,
            specific_model,
            model_params=model_params,
            training_params=default_training_params[major_model],
            eval_metric=mse)

        # Package args for hyperopt
        hyperopt_args[i] = (major_model, specific_model, row["NHyperopt"])

    # Return the instantiated models and the hyperopt args
    return mods_for_default, hyperopt_args