def test_io_dfgenerator_wy_unlabeled(): X = pandas.DataFrame(numpy.random.randn(500, 13)) w = numpy.abs(numpy.random.randn(500)) y = numpy.random.randint(5, size=500) - 1 data = DataFrameGenerator(X, w, y) X_ = numpy.concatenate([batch[0] for batch in data.unlabeled_batches()]) w_ = numpy.concatenate([batch[1] for batch in data.unlabeled_batches()]) assert_true(X.shape[0] > X_.shape[0]) assert_almost_equal(X.loc[y == -1], X_) assert_almost_equal(w[y == -1], w_)
def test_io_predict_log_proba(): X2 = DataGenerator(X) X3 = DataFrameGenerator(pandas.DataFrame(X)) y_hat1 = model.predict_log_proba(X) y_hat2 = model.predict_log_proba(X2) y_hat3 = model.predict_log_proba(X3) assert_array_almost_equal(y_hat1, y_hat2) assert_array_almost_equal(y_hat1, y_hat3)
def test_io_log_probability(): X2 = DataGenerator(X) X3 = DataFrameGenerator(pandas.DataFrame(X)) logp1 = model.log_probability(X) logp2 = model.log_probability(X2) logp3 = model.log_probability(X3) assert_array_almost_equal(logp1, logp2) assert_array_almost_equal(logp1, logp3)
def test_io_dfgenerator_wy_str_labeled(): X = pandas.DataFrame(numpy.random.randn(500, 13)) X2 = X.copy() w = numpy.abs(numpy.random.randn(500)) y = numpy.random.randint(5, size=500) - 1 X['w'] = w X['y'] = y data = DataFrameGenerator(X, 'w', 'y') X_ = numpy.concatenate([batch[0] for batch in data.labeled_batches()]) y_ = numpy.concatenate([batch[1] for batch in data.labeled_batches()]) w_ = numpy.concatenate([batch[2] for batch in data.labeled_batches()]) assert_true(X.shape[0] > X_.shape[0]) assert_almost_equal(X2.loc[y != -1], X_) assert_almost_equal(y[y != -1], y_) assert_almost_equal(w[y != -1], w_)
def test_io_predict_log_proba(): X = numpy.random.randn(100, 5) + 0.5 X2 = DataGenerator(X) X3 = DataFrameGenerator(pandas.DataFrame(X)) d = MultivariateGaussianDistribution model = GeneralMixtureModel.from_samples(d, n_components=2, X=X) y_hat1 = model.predict_log_proba(X) y_hat2 = model.predict_log_proba(X2) y_hat3 = model.predict_log_proba(X3) assert_array_almost_equal(y_hat1, y_hat2) assert_array_almost_equal(y_hat1, y_hat3)
def test_io_log_probability(): X = numpy.random.randn(100, 5) + 0.5 X2 = DataGenerator(X) X3 = DataFrameGenerator(pandas.DataFrame(X)) d = MultivariateGaussianDistribution model = GeneralMixtureModel.from_samples(d, n_components=2, X=X) logp1 = model.log_probability(X) logp2 = model.log_probability(X2) logp3 = model.log_probability(X3) assert_array_almost_equal(logp1, logp2) assert_array_almost_equal(logp1, logp3)
def test_io_dfgenerator_wy_str_batches(): X = pandas.DataFrame(numpy.random.randn(500, 13)) X2 = X.copy() w = numpy.abs(numpy.random.randn(500)) y = numpy.random.randint(5, size=500) X['w'] = w X['y'] = y data = DataFrameGenerator(X, 'w', 'y') X_ = numpy.concatenate([batch[0] for batch in data.batches()]) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) w_ = numpy.concatenate([batch[2] for batch in data.batches()]) assert_almost_equal(X2.values, X_) assert_almost_equal(y, y_) assert_almost_equal(w, w_) data = DataFrameGenerator(X, 'w', 'y', batch_size=123) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) w_ = numpy.concatenate([batch[2] for batch in data.batches()]) assert_almost_equal(X2.values, X_) assert_almost_equal(y, y_) assert_almost_equal(w, w_) data = DataFrameGenerator(X, 'w', 'y', batch_size=1) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) w_ = numpy.concatenate([batch[2] for batch in data.batches()]) assert_almost_equal(X2.values, X_) assert_almost_equal(y, y_) assert_almost_equal(w, w_) data = DataFrameGenerator(X, 'w', 'y', batch_size=506) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) w_ = numpy.concatenate([batch[2] for batch in data.batches()]) assert_almost_equal(X2.values, X_) assert_almost_equal(y, y_) assert_almost_equal(w, w_)
def test_io_dfgenerator_wy_batches(): X = pandas.DataFrame(numpy.random.randn(500, 13)) w = numpy.abs(numpy.random.randn(500)) y = numpy.random.randint(5, size=500) data = DataFrameGenerator(X, w, y) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) w_ = numpy.concatenate([batch[2] for batch in data.batches()]) assert_almost_equal(X.values, X_) assert_almost_equal(y, y_) assert_almost_equal(w, w_) data = DataFrameGenerator(X, w, y, batch_size=123) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) w_ = numpy.concatenate([batch[2] for batch in data.batches()]) assert_almost_equal(X.values, X_) assert_almost_equal(y, y_) assert_almost_equal(w, w_) data = DataFrameGenerator(X, w, y, batch_size=1) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) w_ = numpy.concatenate([batch[2] for batch in data.batches()]) assert_almost_equal(X.values, X_) assert_almost_equal(y, y_) assert_almost_equal(w, w_) data = DataFrameGenerator(X, w, y, batch_size=506) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) y_ = numpy.concatenate([batch[1] for batch in data.batches()]) w_ = numpy.concatenate([batch[2] for batch in data.batches()]) assert_almost_equal(X.values, X_) assert_almost_equal(y, y_) assert_almost_equal(w, w_)
def test_io_dfgenerator_x_batches(): X = pandas.DataFrame(numpy.random.randn(500, 13)) w = numpy.ones(500) data = DataFrameGenerator(X) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(X.values, X_) assert_almost_equal(w, w_) data = DataFrameGenerator(X, batch_size=123) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(X.values, X_) assert_almost_equal(w, w_) data = DataFrameGenerator(X, batch_size=1) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(X.values, X_) assert_almost_equal(w, w_) data = DataFrameGenerator(X, batch_size=506) X_ = numpy.concatenate([batch[0] for batch in data.batches()]) w_ = numpy.concatenate([batch[1] for batch in data.batches()]) assert_almost_equal(X.values, X_) assert_almost_equal(w, w_)
def test_io_dfgenerator_str_classes(): X = pandas.DataFrame(numpy.random.randn(500, 13)) X['y'] = numpy.random.randint(5, size=500) data = DataFrameGenerator(X, y='y') assert_array_equal(data.classes, [0, 1, 2, 3, 4])
def test_io_dfgenerator_pandas_classes(): X = pandas.DataFrame(numpy.random.randn(500, 13)) y = pandas.Series(numpy.random.randint(5, size=500)) data = DataFrameGenerator(X, y=y) assert_array_equal(data.classes, [0, 1, 2, 3, 4])
def test_io_dfgenerator_classes_fail(): X = pandas.DataFrame(numpy.random.randn(500, 13)) data = DataFrameGenerator(X) assert_raises(ValueError, lambda data: data.classes, data)
def test_io_dfgenerator_shape(): X = pandas.DataFrame(numpy.random.randn(500, 13)) data = DataFrameGenerator(X) assert_array_equal(data.shape, X.shape)