Exemplo n.º 1
0
def test_io_datagenerator_wy_unlabeled():
    X = numpy.random.randn(500, 13)
    w = numpy.abs(numpy.random.randn(500))
    y = numpy.random.randint(5, size=500) - 1

    data = DataGenerator(X, w, y)
    X_ = numpy.concatenate([batch[0] for batch in data.unlabeled_batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.unlabeled_batches()])

    assert_true(X.shape[0] > X_.shape[0])
    assert_almost_equal(X[y == -1], X_)
    assert_almost_equal(w[y == -1], w_)
Exemplo n.º 2
0
def test_io_fit():
    X = numpy.random.randn(100, 5) + 0.5
    weights = numpy.abs(numpy.random.randn(100))
    y = numpy.random.randint(2, size=100)
    data_generator = DataGenerator(X, weights, y)

    d1 = IndependentComponentsDistribution(
        [NormalDistribution(0, 1) for i in range(5)])
    d2 = IndependentComponentsDistribution(
        [NormalDistribution(1, 1) for i in range(5)])

    nb1 = NaiveBayes([d1, d2])
    nb1.fit(X, y, weights)

    d1 = IndependentComponentsDistribution(
        [NormalDistribution(0, 1) for i in range(5)])
    d2 = IndependentComponentsDistribution(
        [NormalDistribution(1, 1) for i in range(5)])

    nb2 = NaiveBayes([d1, d2])
    nb2.fit(data_generator)

    logp1 = nb1.log_probability(X)
    logp2 = nb2.log_probability(X)

    assert_array_almost_equal(logp1, logp2)
Exemplo n.º 3
0
def test_io_predict_log_proba():
    X2 = DataGenerator(X)
    X3 = DataFrameGenerator(pandas.DataFrame(X))
    y_hat1 = model.predict_log_proba(X)
    y_hat2 = model.predict_log_proba(X2)
    y_hat3 = model.predict_log_proba(X3)
    assert_array_almost_equal(y_hat1, y_hat2)
    assert_array_almost_equal(y_hat1, y_hat3)
Exemplo n.º 4
0
def test_io_log_probability():
    X2 = DataGenerator(X)
    X3 = DataFrameGenerator(pandas.DataFrame(X))
    logp1 = model.log_probability(X)
    logp2 = model.log_probability(X2)
    logp3 = model.log_probability(X3)
    assert_array_almost_equal(logp1, logp2)
    assert_array_almost_equal(logp1, logp3)
Exemplo n.º 5
0
def test_numpy_predict_datagenerator():
    obs = numpy.array([['A', None, 'B'], ['A', None, 'C'], ['A', 'B', 'C']])

    X = DataGenerator(obs)

    predictions = monty_network.predict(X)

    assert_array_equal(predictions,
                       [['A', 'C', 'B'], ['A', 'B', 'C'], ['A', 'B', 'C']])

    assert_array_equal(obs,
                       [['A', None, 'B'], ['A', None, 'C'], ['A', 'B', 'C']])
Exemplo n.º 6
0
def test_io_log_probability():
	X = numpy.random.randn(100, 5) + 0.5
	X2 = DataGenerator(X)
	X3 = DataFrameGenerator(pandas.DataFrame(X))

	d = MultivariateGaussianDistribution
	model = GeneralMixtureModel.from_samples(d, n_components=2, X=X)

	logp1 = model.log_probability(X)
	logp2 = model.log_probability(X2)
	logp3 = model.log_probability(X3)

	assert_array_almost_equal(logp1, logp2)
	assert_array_almost_equal(logp1, logp3)
Exemplo n.º 7
0
def test_io_predict_log_proba():
	X = numpy.random.randn(100, 5) + 0.5
	X2 = DataGenerator(X)
	X3 = DataFrameGenerator(pandas.DataFrame(X))

	d = MultivariateGaussianDistribution
	model = GeneralMixtureModel.from_samples(d, n_components=2, X=X)

	y_hat1 = model.predict_log_proba(X)
	y_hat2 = model.predict_log_proba(X2)
	y_hat3 = model.predict_log_proba(X3)

	assert_array_almost_equal(y_hat1, y_hat2)
	assert_array_almost_equal(y_hat1, y_hat3)
Exemplo n.º 8
0
def test_io_from_samples():
    X = numpy.random.randn(100, 5) + 0.5
    weights = numpy.abs(numpy.random.randn(100))
    y = numpy.random.randint(2, size=100)
    data_generator = DataGenerator(X, weights, y)

    d = MultivariateGaussianDistribution

    bc1 = BayesClassifier.from_samples(d, X=X, y=y, weights=weights)
    bc2 = BayesClassifier.from_samples(d, X=data_generator)

    logp1 = bc1.log_probability(X)
    logp2 = bc2.log_probability(X)

    assert_array_almost_equal(logp1, logp2)
Exemplo n.º 9
0
def test_io_from_samples():
    X = numpy.random.randn(100, 5) + 0.5
    weights = numpy.abs(numpy.random.randn(100))
    y = numpy.random.randint(2, size=100)
    data_generator = DataGenerator(X, weights, y)

    d = NormalDistribution

    nb1 = NaiveBayes.from_samples(d, X=X, weights=weights, y=y)
    nb2 = NaiveBayes.from_samples(d, X=data_generator)

    logp1 = nb1.log_probability(X)
    logp2 = nb2.log_probability(X)

    assert_array_almost_equal(logp1, logp2)
Exemplo n.º 10
0
def test_io_from_samples_gmm():
	X = numpy.random.randn(100, 5) + 0.5
	weights = numpy.abs(numpy.random.randn(100))
	data_generator = DataGenerator(X, weights)

	d = MultivariateGaussianDistribution
	gmm1 = GeneralMixtureModel.from_samples(d, n_components=2, X=X, 
		weights=weights, max_iterations=5, init='first-k')
	gmm2 = GeneralMixtureModel.from_samples(d, n_components=2, 
		X=data_generator, max_iterations=5, init='first-k')

	logp1 = gmm1.log_probability(X)
	logp2 = gmm2.log_probability(X)

	assert_array_almost_equal(logp1, logp2)
Exemplo n.º 11
0
def setup_random_mixed():
    numpy.random.seed(0)
    global X
    X = numpy.array([
        numpy.random.choice([True, False], size=50),
        numpy.random.choice(['A', 'B'], size=50),
        numpy.random.choice(2, size=50)
    ], dtype=object).T.copy()

    global weights
    weights = numpy.abs(numpy.random.randn(50))

    global data_generator
    data_generator = DataGenerator(X, weights)

    global model
    model = BayesianNetwork.from_samples(X)
Exemplo n.º 12
0
def test_io_fit():
	X = numpy.random.randn(100, 5) + 0.5
	weights = numpy.abs(numpy.random.randn(100))
	data_generator = DataGenerator(X, weights)

	mu1 = numpy.array([0, 0, 0, 0, 0])
	mu2 = numpy.array([1, 1, 1, 1, 1])
	cov = numpy.eye(5)

	d1 = MultivariateGaussianDistribution(mu1, cov)
	d2 = MultivariateGaussianDistribution(mu2, cov)
	gmm1 = GeneralMixtureModel([d1, d2])
	gmm1.fit(X, weights, max_iterations=5)

	d1 = MultivariateGaussianDistribution(mu1, cov)
	d2 = MultivariateGaussianDistribution(mu2, cov)
	gmm2 = GeneralMixtureModel([d1, d2])
	gmm2.fit(data_generator, max_iterations=5)

	logp1 = gmm1.log_probability(X)
	logp2 = gmm2.log_probability(X)

	assert_array_almost_equal(logp1, logp2)
Exemplo n.º 13
0
def test_io_fit():
    X = numpy.random.randn(100, 5) + 0.5
    weights = numpy.abs(numpy.random.randn(100))
    y = numpy.random.randint(2, size=100)
    data_generator = DataGenerator(X, weights, y)

    mu1 = numpy.array([0, 0, 0, 0, 0])
    mu2 = numpy.array([1, 1, 1, 1, 1])
    cov = numpy.eye(5)

    d1 = MultivariateGaussianDistribution(mu1, cov)
    d2 = MultivariateGaussianDistribution(mu2, cov)
    bc1 = BayesClassifier([d1, d2])
    bc1.fit(X, y, weights)

    d1 = MultivariateGaussianDistribution(mu1, cov)
    d2 = MultivariateGaussianDistribution(mu2, cov)
    bc2 = BayesClassifier([d1, d2])
    bc2.fit(data_generator)

    logp1 = bc1.log_probability(X)
    logp2 = bc2.log_probability(X)

    assert_array_almost_equal(logp1, logp2)
Exemplo n.º 14
0
def test_io_datagenerator_y_batches():
    X = numpy.random.randn(500, 13)
    w = numpy.abs(numpy.random.randn(500))
    y = numpy.random.randint(5, size=500)

    data = DataGenerator(X, y=y)
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(y, y_)

    data = DataGenerator(X, y=y, batch_size=123)
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(y, y_)

    data = DataGenerator(X, y=y, batch_size=1)
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(y, y_)

    data = DataGenerator(X, y=y, batch_size=506)
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(y, y_)
Exemplo n.º 15
0
def test_io_datagenerator_w_batches():
    X = numpy.random.randn(500, 13)
    w = numpy.abs(numpy.random.randn(500))

    data = DataGenerator(X, w)
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(w, w_)

    data = DataGenerator(X, w, batch_size=123)
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(w, w_)

    data = DataGenerator(X, w, batch_size=1)
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(w, w_)

    data = DataGenerator(X, w, batch_size=506)
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(w, w_)
Exemplo n.º 16
0
def test_io_datagenerator_x_batches():
    X = numpy.random.randn(500, 13)
    w = numpy.ones(500)

    data = DataGenerator(X)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(X, X_)
    assert_almost_equal(w, w_)

    data = DataGenerator(X, batch_size=123)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(X, X_)
    assert_almost_equal(w, w_)

    data = DataGenerator(X, batch_size=1)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(X, X_)
    assert_almost_equal(w, w_)

    data = DataGenerator(X, batch_size=506)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(X, X_)
    assert_almost_equal(w, w_)
Exemplo n.º 17
0
def test_io_datagenerator_classes():
    X = numpy.random.randn(500, 13)
    y = numpy.random.randint(5, size=500)
    data = DataGenerator(X, y=y)

    assert_array_equal(data.classes, [0, 1, 2, 3, 4])
Exemplo n.º 18
0
def test_io_datagenerator_classes_fail():
    X = numpy.random.randn(500, 13)
    data = DataGenerator(X)

    assert_raises(ValueError, lambda data: data.classes, data)
Exemplo n.º 19
0
def test_io_datagenerator_shape():
    X = numpy.random.randn(500, 13)
    data = DataGenerator(X)

    assert_array_equal(data.shape, X.shape)