def test_create_model_wrapper():
    """
        Args:
        params (Namespace) : Parameters passed to the model pipeline
                     featurizer (Featurization): Object managing the featurization of compounds
                                 ds_client (DatastoreClient): Interface to the file datastore

                                                  Returns:
                                                  model (pipeline.Model): Wrapper for DeepChem, sklearn or other model.

                                                              Raises:
ValueError: Only params.model_type = 'NN' or 'RF' is supported. 

Dependencies:
None

Calls:
DCNNModelWrapper, DCRFModelWrapper
    """
    inp_params = parse.wrapper(general_params)
    featurization = feat.create_featurization(inp_params)
    mdl = model_wrapper.create_model_wrapper(inp_params, featurization)
    mdl.setup_model_dirs()
    # testing for correct attribute initialization with model_type == "NN"
    test = []
    test.append(mdl.params.model_type == 'NN')
    test.append(isinstance(mdl.featurization, feat.DynamicFeaturization))
    test.append(mdl.output_dir == inp_params.output_dir)
    test.append(mdl.model_dir == inp_params.output_dir + '/' + 'model')
    test.append(mdl.best_model_dir == inp_params.output_dir + '/' +
                'best_model')
    test.append(mdl.baseline_model_dir == inp_params.output_dir + '/' +
                'baseline_epoch_model')
    test.append(mdl.transformers == [])
    test.append(mdl.transformers_x == [])
    test.append(isinstance(mdl, model_wrapper.DCNNModelWrapper))

    # testing for correct attribute initialization with model_type == "RF"
    temp_params = copy.deepcopy(inp_params)
    temp_params.model_type = 'RF'
    featurization = feat.create_featurization(temp_params)
    mdl_RF = model_wrapper.create_model_wrapper(temp_params, featurization)
    test.append(isinstance(mdl_RF, MP.model_wrapper.DCRFModelWrapper))
    test.append(mdl_RF.params.model_type == 'RF')

    # assertion for all tests
    assert all(test)

    #testing for Exception with model_type not in ['NN','RF']
    with pytest.raises(ValueError):
        temp_params.model_type = 'wrong'
        mdl_wrong = model_wrapper.create_model_wrapper(temp_params,
                                                       featurization)
Exemplo n.º 2
0
def test_super_transform_dataset():
    """
    Args:
    dataset: The DeepChem DiskDataset that contains a dataset

    Returns:
    transformed_dataset

    Raises:
    None

    Dependencies:
    model_dataset.create_transformers

    Calls:
    None


    """
    #set up for a model wrapper with regression and NN.
    inp_params = parse.wrapper(general_params)
    featurization = feat.create_featurization(inp_params)
    data_obj_ecfp = model_dataset.create_model_dataset(inp_params,
                                                       featurization,
                                                       ds_client=None)
    df_delaney = data_obj_ecfp.load_full_dataset()
    data_obj_ecfp.get_dataset_tasks(df_delaney)
    data_obj_ecfp.check_task_columns(df_delaney)
    data_obj_ecfp.get_featurized_data()
    mdl = model_wrapper.create_model_wrapper(inp_params,
                                             data_obj_ecfp.featurization)
    mdl.setup_model_dirs()
    mdl.create_transformers(data_obj_ecfp)
    dataset = mdl.transform_dataset(data_obj_ecfp.dataset)

    test = []
    # checking that the dataset is the correct type
    test.append(isinstance(dataset, DD))
    # since this is not descriptor featurization, the X values for the datasets should be the same
    test.append((dataset.X == data_obj_ecfp.dataset.X).all())
    # and the response values should be the same length:
    test.append(len(dataset.y) == len(data_obj_ecfp.dataset.y))
    test.append(len(dataset.y) == len(dataset.ids))
    assert all(test)
Exemplo n.º 3
0
def test_train_NN_graphconv_scaffold_inputs():
    """

    Args:
    pipeline (ModelPipeline): The ModelPipeline instance for this model run.
    
    Dependencies:
    ModelPipeline creation
    featurization creation
    creation of model_wrapper
    mp.load_featurize_data

    Calls:
    create_perf_data
    perf_data.accumulate_preds
    perf_data.comput_perf_metrics
    data.combined_training-data()
    self._copy_model
    """
    # checking that the layers, dropouts, and learning rate are properly added to the deepchem graphconv model
    general_params['featurizer'] = 'graphconv'
    general_params['layer_sizes'] = '100,100,10'
    general_params['dropouts'] = '0.3,0.3,0.1'
    general_params['uncertainty'] = False
    inp_params = parse.wrapper(general_params)
    mp = MP.ModelPipeline(inp_params)
    mp.featurization = feat.create_featurization(inp_params)
    mp.model_wrapper = model_wrapper.create_model_wrapper(
        inp_params, mp.featurization, mp.ds_client)
    # asserting that the correct model is created with the correct layer sizes, dropouts, model_dir, and mode by default
    test1 = []

    test1.append(mp.model_wrapper.params.layer_sizes == [100, 100, 10])
    test1.append(mp.model_wrapper.params.dropouts == [0.3, 0.3, 0.1])
    # checking that parameters are properly passed to the deepchem model object
    test1.append(isinstance(mp.model_wrapper.model, GraphConvModel))
    test1.append(
        mp.model_wrapper.model.model_dir == mp.model_wrapper.model_dir)
    test1.append(
        [i.out_channel
         for i in mp.model_wrapper.model.model.graph_convs] == [100, 100])
    test1.append(
        [i.rate
         for i in mp.model_wrapper.model.model.dropouts] == [0.3, 0.3, 0.1])
    test1.append(mp.model_wrapper.model.mode == 'regression')
    test1.append(mp.model_wrapper.model.model.dense.units == 10)
    assert all(test1)

    #***********************************************************************************
    def test_super_get_train_valid_pred_results():
        """
        Args:
        perf_data: A PerfData object that stores the predicted values and metrics
        Returns:
        dict: A dictionary of the prediction results

            Raises:
        None

        Dependencies:
        create_perf_data

        Calls:
        perf_data.get_prediction_results()

        """
        pass

    # should be tested in perf_data.get_prediction_results()
    # should still be called to make sure that the function is callable

    #***********************************************************************************
    def test_super_get_test_perf_data():
        """
        Args:
        model_dir (str): Directory where the saved model is stored
        model_dataset (DiskDataset): Stores the current dataset and related methods

        Returns:
        perf_data: PerfData object containing the predicted values and metrics for the current test dataset

            Raises:
        None

        Dependencies:
        A model must be in model_dir
        model_dataset.test_dset must exist

        Calls:
        create_perf_data
        self.generate_predictions
        perf_data.accumulate_preds
        """
        pass
        # mostly tested in accumulate_preds, but should be tested to ensure taht the predictions are properly being called

    #***********************************************************************************
    def test_super_get_test_pred_results():
        """
        Args:
        model_dir (str): Directory where the saved model is stored
        model_dataset (DiskDataset): Stores the current dataset and related methods

        Returns:
        dict: A dictionary containing the prediction values and metrics for the current dataset.

            Raises:
        None

        Dependencies:
        A model must be in model_dir
        model_dataset.test_dset must exist

        Calls:
        self.get_test_perf_data
        perf_data.get_prediction_results
        """
        pass
        #mostly tested in perf_data.get_prediction_results

    #***********************************************************************************
    def test_super_get_full_dataset_perf_data():
        """
        Args:
        model_dataset (DiskDataset): Stores the current dataset and related methods

        Returns:
        perf_data: PerfData object containing the predicted values and metrics for the current full dataset

            Raises:
        None

        Dependencies:
        A model must already be trained

        Calls:
        create_perf_data
        self.generate_predictions
        self.accumulate_preds
        """
        pass

    #***********************************************************************************
    def test_super_get_full_dataset_pred_results():
        """
        Args:
        model_dataset (DiskDataset): Stores the current dataset and related methods
        Returns:
        dict: A dictionary containing predicted values and metrics for the current full dataset

            Raises:
        None

        Dependencies:
        A model was already be trained.

        Calls:
        get_full_dataset_perf_data
        self.get_prediction_results()
        """
        pass
Exemplo n.º 4
0
def test_super_create_transformers():
    """
    Args:
    model_dataset: The ModelDataset object that handles the current dataset

    Returns:
    self.transformers
    self.transformers_x
    self.params.transformer_key
    self.params.transformer_oid (if datastore)

    Raises:
    Exception when failing to save to the datastore

    Dependencies:
    create_featurization
    create_model_dataset
    model_dataset.load_full_dataset
    model_dataset.get_dataset_tasks
    model_dataset.check_task_columns
    model_dataset.get_featurized_data
    Requires (self.params.prediction_type == 'regression' and self.params.transformers == True) or len(self.transformers) > 0 

    Calls:
    self.featurization.create_feature_transformer
    dsf.upload_pickle_to_DS

    """
    #set up for a model wrapper with regression and NN.

    inp_params = parse.wrapper(general_params)
    featurization = feat.create_featurization(inp_params)
    data_obj_ecfp = model_dataset.create_model_dataset(inp_params,
                                                       featurization,
                                                       ds_client=None)
    df_delaney = data_obj_ecfp.load_full_dataset()
    data_obj_ecfp.get_dataset_tasks(df_delaney)
    data_obj_ecfp.check_task_columns(df_delaney)
    data_obj_ecfp.get_featurized_data()
    mdl = model_wrapper.create_model_wrapper(inp_params,
                                             data_obj_ecfp.featurization)
    mdl.setup_model_dirs()

    #testing correct model_wrapper build with regression and NN
    test = []
    test.append(mdl.params.prediction_type == 'regression')
    test.append(mdl.params.model_type == 'NN')
    mdl.create_transformers(data_obj_ecfp)
    test.append(
        isinstance(mdl.transformers[0],
                   dc.trans.transformers.NormalizationTransformer))
    test.append(mdl.transformers_x == [])
    #testing saving of transformer to correct location:
    transformer_path = os.path.join(mdl.output_dir, 'transformers.pkl')
    test.append(os.path.isfile(transformer_path))

    # TODO: test proper saving of the transformer to the datastore

    # TODO: test when transformers is False:
    inp_params.prediction_type = 'classification'
    mdl = model_wrapper.create_model_wrapper(inp_params, featurization)
    test.append(mdl.transformers == [])
    test.append(mdl.transformers_x == [])
    assert all(test)