def test_manifold_LLE():
    train_set, valid_set, test_set = read_dataset(dataset_name='dataset_simulation_zero_iterations.csv')
    dataset, result = train_set

    dataset = get_gaussian_normalization(dataset)
    dataset = get_LLE(dataset, num_components=2, n_neighbors=80)
    assert dataset is not None
def test_dimensionality_reduction_PCA():
    train_set, valid_set, test_set = read_dataset(dataset_name='dataset_simulation_zero_iterations.csv')
    dataset, result = train_set

    dataset, explained_variance_ratio_ = get_pca(dataset, num_components=2)

    assert dataset is not None
    assert explained_variance_ratio_ is not None
        print' ... plotting sample {}'.format(idx)
        print vis_mf, chain_input
        lst_output = vis_mf

    with open(os.path.join('trained_models', name_model), 'wb') as f:
        cPickle.dump(model, f, protocol=cPickle.HIGHEST_PROTOCOL)

    return lst_output


if __name__ == '__main__':
    # dataset = 'mnist.pkl.gz'
    # datasets = load_data(dataset)

    from datasets.DatasetManager import read_dataset
    datasets = read_dataset('dataset_simulation_20.csv', shared=True)
    train_set_x, train_set_y = datasets[0]

    n_in = train_set_x.get_value().shape[1]
    n_out = train_set_y.get_value().shape[1]

    x = T.matrix('x')
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    # construct the RBM class
    rbm = RBM(
        input=x,
        n_visible=n_in,
        n_hidden=1000,
        numpy_rng=rng,
def test_solve_missing_values():
    train_set, valid_set, test_set = read_dataset(dataset_name='dataset_simulation_zero_iterations.csv')
    dataset, result = train_set
    dataset = solve_missing_values(dataset)
    assert dataset is not None
def test_scaling_linear():
    train_set, valid_set, test_set = read_dataset(dataset_name='dataset_simulation_zero_iterations.csv')
    dataset, result = train_set
    dataset = get_linear_normalization(dataset)
    assert dataset is not None
def test_dataset_manager():
    assert read_dataset(dataset_name="dataset_simulation_20.csv", shared=False)
    assert read_dataset(dataset_name="dataset_simulation_20.csv", shared=True)