def test_io_datagenerator_wy_unlabeled(): X = numpy.random.randn(500, 13) w = numpy.abs(numpy.random.randn(500)) y = numpy.random.randint(5, size=500) - 1 data = DataGenerator(X, w, y) X_ = numpy.concatenate([batch[0] for batch in data.unlabeled_batches()]) w_ = numpy.concatenate([batch[1] for batch in data.unlabeled_batches()]) assert_true(X.shape[0] > X_.shape[0]) assert_almost_equal(X[y == -1], X_) assert_almost_equal(w[y == -1], w_)
def test_io_fit(): X = numpy.random.randn(100, 5) + 0.5 weights = numpy.abs(numpy.random.randn(100)) y = numpy.random.randint(2, size=100) data_generator = DataGenerator(X, weights, y) d1 = IndependentComponentsDistribution( [NormalDistribution(0, 1) for i in range(5)]) d2 = IndependentComponentsDistribution( [NormalDistribution(1, 1) for i in range(5)]) nb1 = NaiveBayes([d1, d2]) nb1.fit(X, y, weights) d1 = IndependentComponentsDistribution( [NormalDistribution(0, 1) for i in range(5)]) d2 = IndependentComponentsDistribution( [NormalDistribution(1, 1) for i in range(5)]) nb2 = NaiveBayes([d1, d2]) nb2.fit(data_generator) logp1 = nb1.log_probability(X) logp2 = nb2.log_probability(X) assert_array_almost_equal(logp1, logp2)
def test_io_predict_log_proba(): X2 = DataGenerator(X) X3 = DataFrameGenerator(pandas.DataFrame(X)) y_hat1 = model.predict_log_proba(X) y_hat2 = model.predict_log_proba(X2) y_hat3 = model.predict_log_proba(X3) assert_array_almost_equal(y_hat1, y_hat2) assert_array_almost_equal(y_hat1, y_hat3)
def test_io_log_probability(): X2 = DataGenerator(X) X3 = DataFrameGenerator(pandas.DataFrame(X)) logp1 = model.log_probability(X) logp2 = model.log_probability(X2) logp3 = model.log_probability(X3) assert_array_almost_equal(logp1, logp2) assert_array_almost_equal(logp1, logp3)
def test_numpy_predict_datagenerator(): obs = numpy.array([['A', None, 'B'], ['A', None, 'C'], ['A', 'B', 'C']]) X = DataGenerator(obs) predictions = monty_network.predict(X) assert_array_equal(predictions, [['A', 'C', 'B'], ['A', 'B', 'C'], ['A', 'B', 'C']]) assert_array_equal(obs, [['A', None, 'B'], ['A', None, 'C'], ['A', 'B', 'C']])
def test_io_log_probability(): X = numpy.random.randn(100, 5) + 0.5 X2 = DataGenerator(X) X3 = DataFrameGenerator(pandas.DataFrame(X)) d = MultivariateGaussianDistribution model = GeneralMixtureModel.from_samples(d, n_components=2, X=X) logp1 = model.log_probability(X) logp2 = model.log_probability(X2) logp3 = model.log_probability(X3) assert_array_almost_equal(logp1, logp2) assert_array_almost_equal(logp1, logp3)
def test_io_predict_log_proba(): X = numpy.random.randn(100, 5) + 0.5 X2 = DataGenerator(X) X3 = DataFrameGenerator(pandas.DataFrame(X)) d = MultivariateGaussianDistribution model = GeneralMixtureModel.from_samples(d, n_components=2, X=X) y_hat1 = model.predict_log_proba(X) y_hat2 = model.predict_log_proba(X2) y_hat3 = model.predict_log_proba(X3) assert_array_almost_equal(y_hat1, y_hat2) assert_array_almost_equal(y_hat1, y_hat3)
def test_io_from_samples(): X = numpy.random.randn(100, 5) + 0.5 weights = numpy.abs(numpy.random.randn(100)) y = numpy.random.randint(2, size=100) data_generator = DataGenerator(X, weights, y) d = MultivariateGaussianDistribution bc1 = BayesClassifier.from_samples(d, X=X, y=y, weights=weights) bc2 = BayesClassifier.from_samples(d, X=data_generator) logp1 = bc1.log_probability(X) logp2 = bc2.log_probability(X) assert_array_almost_equal(logp1, logp2)
def test_io_from_samples(): X = numpy.random.randn(100, 5) + 0.5 weights = numpy.abs(numpy.random.randn(100)) y = numpy.random.randint(2, size=100) data_generator = DataGenerator(X, weights, y) d = NormalDistribution nb1 = NaiveBayes.from_samples(d, X=X, weights=weights, y=y) nb2 = NaiveBayes.from_samples(d, X=data_generator) logp1 = nb1.log_probability(X) logp2 = nb2.log_probability(X) assert_array_almost_equal(logp1, logp2)
def test_io_from_samples_gmm(): X = numpy.random.randn(100, 5) + 0.5 weights = numpy.abs(numpy.random.randn(100)) data_generator = DataGenerator(X, weights) d = MultivariateGaussianDistribution gmm1 = GeneralMixtureModel.from_samples(d, n_components=2, X=X, weights=weights, max_iterations=5, init='first-k') gmm2 = GeneralMixtureModel.from_samples(d, n_components=2, X=data_generator, max_iterations=5, init='first-k') logp1 = gmm1.log_probability(X) logp2 = gmm2.log_probability(X) assert_array_almost_equal(logp1, logp2)
def setup_random_mixed(): numpy.random.seed(0) global X X = numpy.array([ numpy.random.choice([True, False], size=50), numpy.random.choice(['A', 'B'], size=50), numpy.random.choice(2, size=50) ], dtype=object).T.copy() global weights weights = numpy.abs(numpy.random.randn(50)) global data_generator data_generator = DataGenerator(X, weights) global model model = BayesianNetwork.from_samples(X)
def test_io_fit(): X = numpy.random.randn(100, 5) + 0.5 weights = numpy.abs(numpy.random.randn(100)) data_generator = DataGenerator(X, weights) mu1 = numpy.array([0, 0, 0, 0, 0]) mu2 = numpy.array([1, 1, 1, 1, 1]) cov = numpy.eye(5) d1 = MultivariateGaussianDistribution(mu1, cov) d2 = MultivariateGaussianDistribution(mu2, cov) gmm1 = GeneralMixtureModel([d1, d2]) gmm1.fit(X, weights, max_iterations=5) d1 = MultivariateGaussianDistribution(mu1, cov) d2 = MultivariateGaussianDistribution(mu2, cov) gmm2 = GeneralMixtureModel([d1, d2]) gmm2.fit(data_generator, max_iterations=5) logp1 = gmm1.log_probability(X) logp2 = gmm2.log_probability(X) assert_array_almost_equal(logp1, logp2)
def test_io_fit(): X = numpy.random.randn(100, 5) + 0.5 weights = numpy.abs(numpy.random.randn(100)) y = numpy.random.randint(2, size=100) data_generator = DataGenerator(X, weights, y) mu1 = numpy.array([0, 0, 0, 0, 0]) mu2 = numpy.array([1, 1, 1, 1, 1]) cov = numpy.eye(5) d1 = MultivariateGaussianDistribution(mu1, cov) d2 = MultivariateGaussianDistribution(mu2, cov) bc1 = BayesClassifier([d1, d2]) bc1.fit(X, y, weights) d1 = MultivariateGaussianDistribution(mu1, cov) d2 = MultivariateGaussianDistribution(mu2, cov) bc2 = BayesClassifier([d1, d2]) bc2.fit(data_generator) logp1 = bc1.log_probability(X) logp2 = bc2.log_probability(X) assert_array_almost_equal(logp1, logp2)
def test_io_datagenerator_y_batches(): X = numpy.random.randn(500, 13) w = numpy.abs(numpy.random.randn(500)) y = numpy.random.randint(5, size=500) data = DataGenerator(X, y=y) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(y, y_) data = DataGenerator(X, y=y, batch_size=123) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(y, y_) data = DataGenerator(X, y=y, batch_size=1) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(y, y_) data = DataGenerator(X, y=y, batch_size=506) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(y, y_)
def test_io_datagenerator_w_batches(): X = numpy.random.randn(500, 13) w = numpy.abs(numpy.random.randn(500)) data = DataGenerator(X, w) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(w, w_) data = DataGenerator(X, w, batch_size=123) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(w, w_) data = DataGenerator(X, w, batch_size=1) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(w, w_) data = DataGenerator(X, w, batch_size=506) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(w, w_)
def test_io_datagenerator_x_batches(): X = numpy.random.randn(500, 13) w = numpy.ones(500) data = DataGenerator(X) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(X, X_) assert_almost_equal(w, w_) data = DataGenerator(X, batch_size=123) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(X, X_) assert_almost_equal(w, w_) data = DataGenerator(X, batch_size=1) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(X, X_) assert_almost_equal(w, w_) data = DataGenerator(X, batch_size=506) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(X, X_) assert_almost_equal(w, w_)
def test_io_datagenerator_classes(): X = numpy.random.randn(500, 13) y = numpy.random.randint(5, size=500) data = DataGenerator(X, y=y) assert_array_equal(data.classes, [0, 1, 2, 3, 4])
def test_io_datagenerator_classes_fail(): X = numpy.random.randn(500, 13) data = DataGenerator(X) assert_raises(ValueError, lambda data: data.classes, data)
def test_io_datagenerator_shape(): X = numpy.random.randn(500, 13) data = DataGenerator(X) assert_array_equal(data.shape, X.shape)