def test_kNearestDatasets(self): kND = KNearestDatasets() kND.fit(pd.DataFrame([self.krvskp, self.labor]), self.runs.loc[:, [233, 234]]) neighbor = kND.kNearestDatasets(self.anneal, 1) self.assertEqual([233], neighbor) neighbor, distance = kND.kNearestDatasets(self.anneal, 1, return_distance=True) self.assertEqual([233], neighbor) np.testing.assert_array_almost_equal([3.8320802803440586], distance) neighbors = kND.kNearestDatasets(self.anneal, 2) self.assertEqual([233, 234], neighbors) neighbors, distance = kND.kNearestDatasets(self.anneal, 2, return_distance=True) self.assertEqual([233, 234], neighbors) np.testing.assert_array_almost_equal( [3.8320802803440586, 4.367919719655942], distance) neighbors = kND.kNearestDatasets(self.anneal, -1) self.assertEqual([233, 234], neighbors) neighbors, distance = kND.kNearestDatasets(self.anneal, -1, return_distance=True) self.assertEqual([233, 234], neighbors) np.testing.assert_array_almost_equal( [3.8320802803440586, 4.367919719655942], distance) self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, 0) self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, -2)
def test_kNearestDatasets(self): kND = KNearestDatasets() kND.fit(pd.DataFrame([self.krvskp, self.labor]), self.runs.loc[:, [233, 234]]) neighbor = kND.kNearestDatasets(self.anneal, 1) self.assertEqual([233], neighbor) neighbor, distance = kND.kNearestDatasets(self.anneal, 1, return_distance=True) self.assertEqual([233], neighbor) assert_array_almost_equal([1.82298937], distance) neighbors = kND.kNearestDatasets(self.anneal, 2) self.assertEqual([233, 234], neighbors) neighbors, distance = kND.kNearestDatasets(self.anneal, 2, return_distance=True) self.assertEqual([233, 234], neighbors) assert_array_almost_equal([1.822989, 2.267919], distance) neighbors = kND.kNearestDatasets(self.anneal, -1) self.assertEqual([233, 234], neighbors) neighbors, distance = kND.kNearestDatasets(self.anneal, -1, return_distance=True) self.assertEqual([233, 234], neighbors) assert_array_almost_equal([1.822989, 2.267919], distance) self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, 0) self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, -2)
def test_random_metric(self): kND = KNearestDatasets(metric=get_random_metric(random_state=1)) kND.fit(pd.DataFrame([self.krvskp, self.labor]), self.runs.loc[:, [233, 234]]) distances = [] for i in range(20): neighbor = kND.kBestSuggestions(self.anneal, 1) distances.append(neighbor[0][1]) self.assertEqual(len(np.unique(distances)), 20)
def test_random_metric(self): kND = KNearestDatasets(metric=get_random_metric(random_state=1)) kND.fit(pd.DataFrame([self.krvskp, self.labor]), self.runs.loc[:,[233, 234]]) distances = [] for i in range(20): neighbor = kND.kBestSuggestions(self.anneal, 1) distances.append(neighbor[0][1]) self.assertEqual(len(np.unique(distances)), 20)
def test_fit_l1_distance(self): kND = KNearestDatasets() kND.fit(pd.DataFrame([self.anneal, self.krvskp, self.labor]), self.runs) self.assertEqual(kND.best_configuration_per_dataset[232], 0) self.assertEqual(kND.best_configuration_per_dataset[233], 1) self.assertEqual(kND.best_configuration_per_dataset[234], 2) self.assertTrue((kND.metafeatures == pd.DataFrame([self.anneal, self.krvskp, self.labor])).all().all())
def test_scale(self): kND = KNearestDatasets() metafeatures = pd.DataFrame([self.anneal, self.krvskp]) metafeatures, other = kND._scale(metafeatures, self.labor) from pandas.util.testing import assert_series_equal # Series.equal does not work properly with floats... assert_series_equal(metafeatures.iloc[0], pd.Series({"number_of_instances": 0.267919719656, "number_of_classes": 1, "number_of_features": 1}))
def test_kNearestDatasets(self): kND = KNearestDatasets() kND.fit(pd.DataFrame([self.krvskp, self.labor]), self.runs.loc[:, [233, 234]]) neighbor = kND.kNearestDatasets(self.anneal, 1) self.assertEqual([233], neighbor) neighbor, distance = kND.kNearestDatasets(self.anneal, 1, return_distance=True) self.assertEqual([233], neighbor) assert_array_almost_equal([1.82298937], distance) neighbors = kND.kNearestDatasets(self.anneal, 2) self.assertEqual([233, 234], neighbors) neighbors, distance = kND.kNearestDatasets(self.anneal, 2, return_distance=True) self.assertEqual([233, 234], neighbors) assert_array_almost_equal([1.822989, 2.267919], distance) neighbors = kND.kNearestDatasets(self.anneal, -1) self.assertEqual([233, 234], neighbors) neighbors, distance = kND.kNearestDatasets(self.anneal, -1, return_distance=True) self.assertEqual([233, 234], neighbors) assert_array_almost_equal([1.822989, 2.267919], distance) self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, 0) self.assertRaises(ValueError, kND.kNearestDatasets, self.anneal, -2)
def _learn(self, exclude_double_configurations=True): dataset_metafeatures, all_other_metafeatures = \ self._split_metafeature_array() # Remove metafeatures which could not be calculated for the target # dataset keep = [] for idx in dataset_metafeatures.index: if np.isfinite(dataset_metafeatures.loc[idx]): keep.append(idx) dataset_metafeatures = dataset_metafeatures.loc[keep] all_other_metafeatures = all_other_metafeatures.loc[:, keep] # Do mean imputation of all other metafeatures all_other_metafeatures = all_other_metafeatures.fillna( all_other_metafeatures.mean()) if self.kND is None: # In case that we learn our distance function, get_value the parameters for # the random forest if self.distance_kwargs: rf_params = ast.literal_eval(self.distance_kwargs) else: rf_params = None # To keep the distance the same in every iteration, we create a new # random state random_state = sklearn.utils.check_random_state(self.seed) kND = KNearestDatasets(metric=self.distance, random_state=random_state, logger=self.logger, metric_params=rf_params) runs = dict() # TODO move this code to the metabase for task_id in all_other_metafeatures.index: try: runs[task_id] = self.meta_base.get_runs(task_id) except KeyError: # TODO should I really except this? self.logger.info("Could not find runs for instance %s" % task_id) runs[task_id] = pd.Series([], name=task_id) runs = pd.DataFrame(runs) kND.fit(all_other_metafeatures, runs) self.kND = kND return self.kND.kBestSuggestions( dataset_metafeatures, k=-1, exclude_double_configurations=exclude_double_configurations, )
def _learn(self, exclude_double_configurations=True): dataset_metafeatures, all_other_metafeatures = \ self._split_metafeature_array() # Remove metafeatures which could not be calculated for the target # dataset keep = [] for idx in dataset_metafeatures.index: if np.isfinite(dataset_metafeatures.loc[idx]): keep.append(idx) dataset_metafeatures = dataset_metafeatures.loc[keep] all_other_metafeatures = all_other_metafeatures.loc[:,keep] # Do mean imputation of all other metafeatures all_other_metafeatures = all_other_metafeatures.fillna( all_other_metafeatures.mean()) if self.kND is None: # In case that we learn our distance function, get_value the parameters for # the random forest if self.distance_kwargs: rf_params = ast.literal_eval(self.distance_kwargs) else: rf_params = None # To keep the distance the same in every iteration, we create a new # random state random_state = sklearn.utils.check_random_state(self.seed) kND = KNearestDatasets(metric=self.distance, random_state=random_state, metric_params=rf_params) runs = dict() # TODO move this code to the metabase for task_id in all_other_metafeatures.index: try: runs[task_id] = self.meta_base.get_runs(task_id) except KeyError: # TODO should I really except this? self.logger.warning("Could not find runs for instance %s" % task_id) runs[task_id] = pd.Series([], name=task_id) runs = pd.DataFrame(runs) kND.fit(all_other_metafeatures, runs) self.kND = kND return self.kND.kBestSuggestions(dataset_metafeatures, k=-1, exclude_double_configurations=exclude_double_configurations)
def test_kBestSuggestions(self): kND = KNearestDatasets() kND.fit(pd.DataFrame([self.krvskp, self.labor]), self.runs.loc[:, [233, 234]]) neighbor = kND.kBestSuggestions(self.anneal, 1) self.assertEqual([(233, 1.8229893712531495, 1)], neighbor) neighbors = kND.kBestSuggestions(self.anneal, 2) self.assertEqual([(233, 1.8229893712531495, 1), (234, 2.2679197196559415, 2)], neighbors) neighbors = kND.kBestSuggestions(self.anneal, -1) self.assertEqual([(233, 1.8229893712531495, 1), (234, 2.2679197196559415, 2)], neighbors) self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, 0) self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, -2)
def test_kBestSuggestions(self): kND = KNearestDatasets() kND.fit(pd.DataFrame([self.krvskp, self.labor]), self.runs.loc[:,[233, 234]]) neighbor = kND.kBestSuggestions(self.anneal, 1) self.assertEqual([(233, 1.8229893712531495, 1)], neighbor) neighbors = kND.kBestSuggestions(self.anneal, 2) self.assertEqual([(233, 1.8229893712531495, 1), (234, 2.2679197196559415, 2)], neighbors) neighbors = kND.kBestSuggestions(self.anneal, -1) self.assertEqual([(233, 1.8229893712531495, 1), (234, 2.2679197196559415, 2)], neighbors) self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, 0) self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, -2)
def test_kBestSuggestions(self): kND = KNearestDatasets() kND.fit(pd.DataFrame([self.krvskp, self.labor]), self.runs.loc[:, [233, 234]]) neighbor = kND.kBestSuggestions(self.anneal, 1) np.testing.assert_array_almost_equal( [(233, 3.8320802803440586, 1)], neighbor, ) neighbors = kND.kBestSuggestions(self.anneal, 2) np.testing.assert_array_almost_equal( [(233, 3.8320802803440586, 1), (234, 4.367919719655942, 2)], neighbors, ) neighbors = kND.kBestSuggestions(self.anneal, -1) np.testing.assert_array_almost_equal( [(233, 3.8320802803440586, 1), (234, 4.367919719655942, 2)], neighbors, ) self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, 0) self.assertRaises(ValueError, kND.kBestSuggestions, self.anneal, -2)