def tests_for_all_extract(self):
        search_path = os.path.join(
            configs,
            os.path.normpath('test_data/config.yml/'
                             'test_extract_*.config.yml'))
        extract_config_files = glob.glob(search_path)
        for extract_config_file in extract_config_files:
            if os.getcwd() != homedir:
                os.chdir(homedir)
            hvc.extract(extract_config_file)
            extract_config = hvc.parse_config(extract_config_file, 'extract')

            for todo in extract_config['todo_list']:
                # switch to test dir
                os.chdir(todo['output_dir'])
                extract_outputs = list(
                    filter(os.path.isdir, glob.glob('*extract_output*')))
                extract_outputs.sort(key=os.path.getmtime)

                os.chdir(extract_outputs[-1])  # most recent
                ftr_files = glob.glob('features_from*')
                ftr_dicts = []
                for ftr_file in ftr_files:
                    ftr_dicts.append(joblib.load(ftr_file))

                if any(['features' in ftr_dict for ftr_dict in ftr_dicts]):
                    assert all(
                        ['features' in ftr_dict for ftr_dict in ftr_dicts])
                    for ftr_dict in ftr_dicts:
                        labels = ftr_dict['labels']
                        if 'features' in ftr_dict:
                            features = ftr_dict['features']
                            assert features.shape[0] == len(labels)

                    # make sure number of features i.e. columns is constant across feature matrices
                    ftr_cols = [
                        ftr_dict['features'].shape[1] for ftr_dict in ftr_dicts
                    ]
                    assert np.unique(ftr_cols).shape[-1] == 1

                if any([
                        'neuralnets_input_dict' in ftr_dict
                        for ftr_dict in ftr_dicts
                ]):
                    assert all([
                        'neuralnets_input_dict' in ftr_dict
                        for ftr_dict in ftr_dicts
                    ])

                # make sure rows in summary dict features == sum of rows of each ftr file features
                summary_file = glob.glob('summary_feature_file_*')
                # (should only be one summary file)
                assert len(summary_file) == 1
                summary_dict = joblib.load(summary_file[0])
Пример #2
0
    def _yaml_config_asserts(self,
                             extract_yaml_config_file,
                             tmp_output_dir):
        replace_dict = {'output_dir':
                            ('replace with tmp_output_dir',
                             str(tmp_output_dir))}
        # have to put tmp_output_dir into yaml file
        extract_config_rewritten = rewrite_config(extract_yaml_config_file,
                                                  tmp_output_dir,
                                                  replace_dict)

        # helper function that is called by tests below
        hvc.extract(extract_config_rewritten)
        extract_config = hvc.parse_config(extract_config_rewritten,
                                          'extract')

        for todo in extract_config['todo_list']:
            os.chdir(todo['output_dir'])
            extract_outputs = list(
                filter(os.path.isdir, glob('*extract_output*')
                       )
            )
            extract_outputs.sort(key=os.path.getmtime)

            os.chdir(extract_outputs[-1])  # most recent
            ftr_files = glob('features_from*')
            ftr_dicts = []
            for ftr_file in ftr_files:
                ftr_dicts.append(joblib.load(ftr_file))

            if any(['features' in ftr_dict for ftr_dict in ftr_dicts]):
                assert all(['features' in ftr_dict for ftr_dict in ftr_dicts])
                for ftr_dict in ftr_dicts:
                    labels = ftr_dict['labels']
                    if 'features' in ftr_dict:
                        features = ftr_dict['features']
                        assert features.shape[0] == len(labels)

                # make sure number of features i.e. columns is constant across feature matrices
                ftr_cols = [ftr_dict['features'].shape[1] for ftr_dict in ftr_dicts]
                assert np.unique(ftr_cols).shape[-1] == 1

            if any(['neuralnets_input_dict' in ftr_dict for ftr_dict in ftr_dicts]):
                assert all(['neuralnets_input_dict' in ftr_dict for ftr_dict in ftr_dicts])
    def _yaml_config_asserts(self, select_yaml_config_path, tmp_output_dir,
                             feature_file):
        select_config_rewritten = rewrite_config(
            select_yaml_config_path,
            tmp_output_dir,
            replace_dict={
                'feature_file': ('replace with feature_file', feature_file),
                'output_dir':
                ('replace with tmp_output_dir', str(tmp_output_dir))
            })
        select_outputs_before = glob(
            os.path.join(str(tmp_output_dir), 'select_output*',
                         'summary_model_select_file*'))
        hvc.select(select_config_rewritten)
        # helper function with assertions shared by all
        # tests for hvc.select run with config.yml files
        select_outputs_after = glob(
            os.path.join(str(tmp_output_dir), 'select_output*',
                         'summary_model_select_file*'))
        select_output = [
            after for after in select_outputs_after
            if after not in select_outputs_before
        ]
        # should only be one summary output file
        assert len(select_output) == 1

        # now check for every model in config
        # if there is corresponding folder with model files etc
        select_config = hvc.parse_config(select_config_rewritten, 'select')
        select_output_dir = os.path.dirname(select_output[0])
        select_model_dirs = next(
            os.walk(select_output_dir))[1]  # [1] to return just dir names
        select_model_folder_names = [
            determine_model_output_folder_name(model_dict)
            for model_dict in select_config['models']
        ]
        for folder_name in select_model_folder_names:
            assert folder_name in select_model_dirs

        return True
Пример #4
0
def check_select_output(config_path, output_dir):
    """
    """

    select_output = glob(
        os.path.join(str(output_dir), 'summary_model_select_file*'))
    # should only be one summary output file
    assert len(select_output) == 1

    # now check for every model in config
    # if there is corresponding folder with model files etc
    select_config = hvc.parse_config(config_path, 'select')
    select_model_dirs = next(
        os.walk(output_dir))[1]  # [1] to return just dir names
    select_model_folder_names = [
        determine_model_output_folder_name(model_dict)
        for model_dict in select_config['models']
    ]
    for folder_name in select_model_folder_names:
        assert folder_name in select_model_dirs

    return True