def _train_classifier(self, X, Y, num_splits=5): """ Trains a classifier if we have less than `total_classifiers` number of models, else simply returns. # Arguments: X (np.ndarray): A numpy array representing all of the encoded samples that will be used for training the classifiers. Y (np.ndarray): A numpy array representing all of the encoded evaluations that will be used for training the classifiers. num_splits (int): number of splits to perform cross validated training. Useful if we have sufficient samples for training a cross validated model per batch, else set to 1 (do not cross validate). # Returns: A trained model or None """ if len(self.classifiers) < self.total_classifiers: model = xgb_utils.train_single_model(X, Y, num_splits=num_splits, n_jobs=self.num_workers) return model else: return None
def test_evaluate_train_evaluate(): params = get_hyperparameter_list() h = hp.HyperParameterList(params) dataset = data.Dataset(h) # models clfs = [] # fit samples num_samples = 16 for i in range(3): samples = [h.sample() for _ in range(num_samples)] labels = [np.sum(sample) for sample in samples] x, y = samples, labels x, y = dataset.encode_dataset(x, y) model = xgb_utils.train_single_model(x, y) clfs.append(model) # test samples num_samples = 100 samples = [h.sample() for _ in range(num_samples)] ex2, _ = dataset.encode_dataset(samples, None) preds = xgb_utils.evaluate_models(ex2, clfs) count = np.sum(preds) print(count) assert preds.shape == (num_samples,) assert count > 0
def test_serialization_deserialization(): basepath = 'shac' params = get_hyperparameter_list() h = hp.HyperParameterList(params) dataset = data.Dataset(h) # models clfs = [] # fit samples num_samples = 16 for i in range(3): samples = [h.sample() for _ in range(num_samples)] labels = [np.sum(sample) for sample in samples] x, y = samples, labels x, y = dataset.encode_dataset(x, y) model = xgb_utils.train_single_model(x, y) clfs.append(model) xgb_utils.save_classifiers(clfs, basepath) assert os.path.exists(os.path.join(basepath, 'classifiers', 'classifiers.pkl')) models = xgb_utils.restore_classifiers(basepath) assert len(models) == len(clfs) with pytest.raises(FileNotFoundError): models = xgb_utils.restore_classifiers('none')
def test_evaluate_single_sample(): params = get_hyperparameter_list() h = hp.HyperParameterList(params) dataset = data.Dataset(h) # models clfs = [] # fit samples num_samples = 16 for i in range(3): samples = [h.sample() for _ in range(num_samples)] labels = [np.sum(sample) for sample in samples] x, y = samples, labels x, y = dataset.encode_dataset(x, y) model = xgb_utils.train_single_model(x, y) clfs.append(model) # single sample test sample = h.sample() ex2, _ = dataset.encode_dataset([sample]) assert ex2.shape == (1, 3) pred = xgb_utils.evaluate_models(ex2, clfs) assert pred.shape == (1,)
def test_evaluate_train_evaluate_failure(): params = [hp.DiscreteHyperParameter('h%d' % i, [0]) for i in range(3)] h = hp.HyperParameterList(params) dataset = data.Dataset(h) # models clfs = [] # fit samples num_samples = 16 for i in range(3): samples = [h.sample() for _ in range(num_samples)] labels = [np.sum(sample) for sample in samples] x, y = samples, labels x, y = dataset.encode_dataset(x, y) model = xgb_utils.train_single_model(x, y) clfs.append(model) # test samples for model in clfs: assert model is None