Пример #1
0
def test_io_dfgenerator_wy_unlabeled():
    X = pandas.DataFrame(numpy.random.randn(500, 13))
    w = numpy.abs(numpy.random.randn(500))
    y = numpy.random.randint(5, size=500) - 1

    data = DataFrameGenerator(X, w, y)
    X_ = numpy.concatenate([batch[0] for batch in data.unlabeled_batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.unlabeled_batches()])

    assert_true(X.shape[0] > X_.shape[0])
    assert_almost_equal(X.loc[y == -1], X_)
    assert_almost_equal(w[y == -1], w_)
Пример #2
0
def test_io_predict_log_proba():
    X2 = DataGenerator(X)
    X3 = DataFrameGenerator(pandas.DataFrame(X))
    y_hat1 = model.predict_log_proba(X)
    y_hat2 = model.predict_log_proba(X2)
    y_hat3 = model.predict_log_proba(X3)
    assert_array_almost_equal(y_hat1, y_hat2)
    assert_array_almost_equal(y_hat1, y_hat3)
Пример #3
0
def test_io_log_probability():
    X2 = DataGenerator(X)
    X3 = DataFrameGenerator(pandas.DataFrame(X))
    logp1 = model.log_probability(X)
    logp2 = model.log_probability(X2)
    logp3 = model.log_probability(X3)
    assert_array_almost_equal(logp1, logp2)
    assert_array_almost_equal(logp1, logp3)
Пример #4
0
def test_io_dfgenerator_wy_str_labeled():
    X = pandas.DataFrame(numpy.random.randn(500, 13))
    X2 = X.copy()
    w = numpy.abs(numpy.random.randn(500))
    y = numpy.random.randint(5, size=500) - 1

    X['w'] = w
    X['y'] = y

    data = DataFrameGenerator(X, 'w', 'y')
    X_ = numpy.concatenate([batch[0] for batch in data.labeled_batches()])
    y_ = numpy.concatenate([batch[1] for batch in data.labeled_batches()])
    w_ = numpy.concatenate([batch[2] for batch in data.labeled_batches()])

    assert_true(X.shape[0] > X_.shape[0])
    assert_almost_equal(X2.loc[y != -1], X_)
    assert_almost_equal(y[y != -1], y_)
    assert_almost_equal(w[y != -1], w_)
Пример #5
0
def test_io_predict_log_proba():
	X = numpy.random.randn(100, 5) + 0.5
	X2 = DataGenerator(X)
	X3 = DataFrameGenerator(pandas.DataFrame(X))

	d = MultivariateGaussianDistribution
	model = GeneralMixtureModel.from_samples(d, n_components=2, X=X)

	y_hat1 = model.predict_log_proba(X)
	y_hat2 = model.predict_log_proba(X2)
	y_hat3 = model.predict_log_proba(X3)

	assert_array_almost_equal(y_hat1, y_hat2)
	assert_array_almost_equal(y_hat1, y_hat3)
Пример #6
0
def test_io_log_probability():
	X = numpy.random.randn(100, 5) + 0.5
	X2 = DataGenerator(X)
	X3 = DataFrameGenerator(pandas.DataFrame(X))

	d = MultivariateGaussianDistribution
	model = GeneralMixtureModel.from_samples(d, n_components=2, X=X)

	logp1 = model.log_probability(X)
	logp2 = model.log_probability(X2)
	logp3 = model.log_probability(X3)

	assert_array_almost_equal(logp1, logp2)
	assert_array_almost_equal(logp1, logp3)
Пример #7
0
def test_io_dfgenerator_wy_str_batches():
    X = pandas.DataFrame(numpy.random.randn(500, 13))
    X2 = X.copy()
    w = numpy.abs(numpy.random.randn(500))
    y = numpy.random.randint(5, size=500)

    X['w'] = w
    X['y'] = y

    data = DataFrameGenerator(X, 'w', 'y')
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    w_ = numpy.concatenate([batch[2] for batch in data.batches()])
    assert_almost_equal(X2.values, X_)
    assert_almost_equal(y, y_)
    assert_almost_equal(w, w_)

    data = DataFrameGenerator(X, 'w', 'y', batch_size=123)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    w_ = numpy.concatenate([batch[2] for batch in data.batches()])
    assert_almost_equal(X2.values, X_)
    assert_almost_equal(y, y_)
    assert_almost_equal(w, w_)

    data = DataFrameGenerator(X, 'w', 'y', batch_size=1)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    w_ = numpy.concatenate([batch[2] for batch in data.batches()])
    assert_almost_equal(X2.values, X_)
    assert_almost_equal(y, y_)
    assert_almost_equal(w, w_)

    data = DataFrameGenerator(X, 'w', 'y', batch_size=506)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    w_ = numpy.concatenate([batch[2] for batch in data.batches()])
    assert_almost_equal(X2.values, X_)
    assert_almost_equal(y, y_)
    assert_almost_equal(w, w_)
Пример #8
0
def test_io_dfgenerator_wy_batches():
    X = pandas.DataFrame(numpy.random.randn(500, 13))
    w = numpy.abs(numpy.random.randn(500))
    y = numpy.random.randint(5, size=500)

    data = DataFrameGenerator(X, w, y)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    w_ = numpy.concatenate([batch[2] for batch in data.batches()])
    assert_almost_equal(X.values, X_)
    assert_almost_equal(y, y_)
    assert_almost_equal(w, w_)

    data = DataFrameGenerator(X, w, y, batch_size=123)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    w_ = numpy.concatenate([batch[2] for batch in data.batches()])
    assert_almost_equal(X.values, X_)
    assert_almost_equal(y, y_)
    assert_almost_equal(w, w_)

    data = DataFrameGenerator(X, w, y, batch_size=1)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    w_ = numpy.concatenate([batch[2] for batch in data.batches()])
    assert_almost_equal(X.values, X_)
    assert_almost_equal(y, y_)
    assert_almost_equal(w, w_)

    data = DataFrameGenerator(X, w, y, batch_size=506)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    y_ = numpy.concatenate([batch[1] for batch in data.batches()])
    w_ = numpy.concatenate([batch[2] for batch in data.batches()])
    assert_almost_equal(X.values, X_)
    assert_almost_equal(y, y_)
    assert_almost_equal(w, w_)
Пример #9
0
def test_io_dfgenerator_x_batches():
    X = pandas.DataFrame(numpy.random.randn(500, 13))
    w = numpy.ones(500)

    data = DataFrameGenerator(X)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(X.values, X_)
    assert_almost_equal(w, w_)

    data = DataFrameGenerator(X, batch_size=123)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(X.values, X_)
    assert_almost_equal(w, w_)

    data = DataFrameGenerator(X, batch_size=1)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(X.values, X_)
    assert_almost_equal(w, w_)

    data = DataFrameGenerator(X, batch_size=506)
    X_ = numpy.concatenate([batch[0] for batch in data.batches()])
    w_ = numpy.concatenate([batch[1] for batch in data.batches()])
    assert_almost_equal(X.values, X_)
    assert_almost_equal(w, w_)
Пример #10
0
def test_io_dfgenerator_str_classes():
    X = pandas.DataFrame(numpy.random.randn(500, 13))
    X['y'] = numpy.random.randint(5, size=500)
    data = DataFrameGenerator(X, y='y')

    assert_array_equal(data.classes, [0, 1, 2, 3, 4])
Пример #11
0
def test_io_dfgenerator_pandas_classes():
    X = pandas.DataFrame(numpy.random.randn(500, 13))
    y = pandas.Series(numpy.random.randint(5, size=500))
    data = DataFrameGenerator(X, y=y)

    assert_array_equal(data.classes, [0, 1, 2, 3, 4])
Пример #12
0
def test_io_dfgenerator_classes_fail():
    X = pandas.DataFrame(numpy.random.randn(500, 13))
    data = DataFrameGenerator(X)

    assert_raises(ValueError, lambda data: data.classes, data)
Пример #13
0
def test_io_dfgenerator_shape():
    X = pandas.DataFrame(numpy.random.randn(500, 13))
    data = DataFrameGenerator(X)

    assert_array_equal(data.shape, X.shape)