def test_evaluate_multilabel_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') Y_train = np.array(convert_to_bin(Y_train, 3)) Y_train[:, -1] = 1 Y_test = np.array(convert_to_bin(Y_test, 3)) Y_test[:, -1] = 1 X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'metric': F1_METRIC, 'task': MULTILABEL_CLASSIFICATION, 'is_sparse': False, 'label_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print('Evaluate configuration: %d; result:' % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err[i] = evaluator.predict() print(err[i]) self.assertTrue(np.isfinite(err[i])) self.assertGreaterEqual(err[i], 0.0)
def get_multilabel_classification_datamanager(): X_train, Y_train, X_test, Y_test = get_dataset('iris') indices = list(range(X_train.shape[0])) np.random.seed(1) np.random.shuffle(indices) X_train = X_train[indices] Y_train = Y_train[indices] Y_train = np.array(convert_to_bin(Y_train, 3)) # for i in range(Y_train_.shape[0]): # Y_train_[:, Y_train[i]] = 1 # Y_train = Y_train_ Y_test = np.array(convert_to_bin(Y_test, 3)) # for i in range(Y_test_.shape[0]): # Y_test_[:, Y_test[i]] = 1 # Y_test = Y_test_ X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'task': MULTILABEL_CLASSIFICATION, 'is_sparse': False, 'label_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'Y_valid': Y_valid, 'X_test': X_test, 'Y_test': Y_test } D.feat_type = { 0: 'numerical', 1: 'Numerical', 2: 'numerical', 3: 'numerical' } return D
def get_multilabel_classification_datamanager(): X_train, Y_train, X_test, Y_test = get_dataset('iris') indices = list(range(X_train.shape[0])) np.random.seed(1) np.random.shuffle(indices) X_train = X_train[indices] Y_train = Y_train[indices] Y_train = np.array(convert_to_bin(Y_train, 3)) #for i in range(Y_train_.shape[0]): # Y_train_[:, Y_train[i]] = 1 #Y_train = Y_train_ Y_test = np.array(convert_to_bin(Y_test, 3)) #for i in range(Y_test_.shape[0]): # Y_test_[:, Y_test[i]] = 1 #Y_test = Y_test_ X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = { 'metric': ACC_METRIC, 'task': MULTILABEL_CLASSIFICATION, 'is_sparse': False, 'label_num': 3 } D.data = { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'Y_valid': Y_valid, 'X_test': X_test, 'Y_test': Y_test } D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] return D
def test_evaluate_multilabel_classification(self): X_train, Y_train, X_test, Y_test = get_dataset("iris") Y_train = np.array(convert_to_bin(Y_train, 3)) Y_train[:, -1] = 1 Y_test = np.array(convert_to_bin(Y_test, 3)) Y_test[:, -1] = 1 X_valid = X_test[:25,] Y_valid = Y_test[:25,] X_test = X_test[25:,] Y_test = Y_test[25:,] D = Dummy() D.info = {"metric": F1_METRIC, "task": MULTILABEL_CLASSIFICATION, "is_sparse": False, "label_num": 3} D.data = {"X_train": X_train, "Y_train": Y_train, "X_valid": X_valid, "X_test": X_test} D.feat_type = ["numerical", "Numerical", "numerical", "numerical"] configuration_space = get_configuration_space( D.info, include_estimators=["extra_trees"], include_preprocessors=["no_preprocessing"] ) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print("Evaluate configuration: %d; result:" % i) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): continue err[i] = evaluator.predict() print(err[i]) self.assertTrue(np.isfinite(err[i])) self.assertGreaterEqual(err[i], 0.0) print("Number of times it was worse than random guessing:" + str(np.sum(err > 1)))