def test_list_metafeatures(self): mf_list = Metafeatures.list_metafeatures() mf_list_copy = [mf for mf in mf_list] mf_list.clear() if Metafeatures.list_metafeatures() != mf_list_copy: mf_list.extend(mf_list_copy) self.fail('Metafeature list has been mutated')
def test_numeric_targets(self): """ Test Metafeatures().compute() with numeric targets """ test_failures = {} test_name = inspect.stack()[0][3] for dataset_filename, dataset in self.datasets.items(): metafeatures = Metafeatures() column_types = dataset["column_types"].copy() column_types[dataset["Y"].name] = consts.NUMERIC computed_mfs = metafeatures.compute( X=dataset["X"], Y=pd.Series(np.random.rand(dataset["Y"].shape[0]), name=dataset["Y"].name), seed=CORRECTNESS_SEED, column_types=column_types) known_mfs = dataset["known_metafeatures"] target_dependent_metafeatures = Metafeatures.list_metafeatures( consts.MetafeatureGroup.TARGET_DEPENDENT.value) for mf_name in target_dependent_metafeatures: known_mfs[mf_name] = { consts.VALUE_KEY: consts.NUMERIC_TARGETS, consts.COMPUTE_TIME_KEY: 0. } required_checks = [(self._check_correctness, [computed_mfs, known_mfs, dataset_filename]), (self._check_compare_metafeature_lists, [computed_mfs, known_mfs, dataset_filename])] test_failures.update(self._perform_checks(required_checks)) self._report_test_failures(test_failures, test_name)
def test_exclude_metafeature_groups(self): SUBSET_LENGTH = 3 test_failures = {} test_name = inspect.stack()[0][3] for dataset_filename, dataset in self.datasets.items(): groups = random.sample( [group.value for group in consts.MetafeatureGroup], SUBSET_LENGTH) computed_mfs = Metafeatures().compute( X=dataset["X"], Y=dataset["Y"], column_types=dataset["column_types"], seed=CORRECTNESS_SEED, exclude_groups=groups, ) known_metafeatures = dataset["known_metafeatures"] required_checks = [ (self._check_correctness, [computed_mfs, known_metafeatures, dataset_filename]) ] test_failures.update(self._perform_checks(required_checks)) metafeature_ids = set( mf_id for group in groups for mf_id in Metafeatures.list_metafeatures(group)) if any(mf_id in computed_mfs.keys() for mf_id in metafeature_ids): self.fail('Metafeatures computed an excluded metafeature') self._report_test_failures(test_failures, test_name)
def get_list_metafeatures(list_X, list_y, type_metafeatures): metafeatures = Metafeatures() list_dataset_metafeatures = [] for X, y in tqdm(zip(list_X, list_Y), total=7084): mfs = metafeatures.compute( pd.DataFrame(X), Y=pd.Series(y, dtype="category"), metafeature_ids=metafeatures.list_metafeatures( group=type_metafeatures), exclude=None, seed=0, #verbose=True, timeout=60, # return_times=True, ) list_dataset_metafeatures.append( pd.DataFrame(mfs).reset_index(drop=True)) df_metafeatures = pd.concat(list_dataset_metafeatures).fillna(0) df_metafeatures["index"] = list_files df_metafeatures.set_index("index", inplace=True) return df_metafeatures