def get_crosscat(prng):
    view = FlexibleRowMixture(
        cgpm_row_divide=CRP([2], [], rng=prng),
        cgpm_components_base=Product([
            Normal([0], [], rng=prng),
            Normal([1], [], rng=prng),
        ], rng=prng),
        rng=prng)
    return Product(cgpms=[view], rng=prng)
Пример #2
0
def test_simple_product():
    prng = get_prng(2)
    column0 = Normal([0], [], rng=prng)
    column1 = Normal([1], [], rng=prng)
    column2 = Categorical([2], [], distargs={'k':4}, rng=prng)
    product = Product([column0, column1, column2], prng)
    assert product.outputs == [0,1,2]
    assert product.inputs == []
    sample = product.simulate(None, [1,2,0])
    assert set(sample.keys()) == set([1, 2, 0])
    logp = product.logpdf(None, sample)
    assert logp < 0
def test_finite_mixture_two_component__ci_():
    prng = get_prng(2)
    integration = pytest.config.getoption('--integration')
    finite_mixture = FiniteRowMixture(
        cgpm_row_divide=Categorical([2], [], distargs={'k':2}, rng=prng),
        cgpm_components=[
            Product([Normal([0], [], rng=prng), Normal([1], [], rng=prng)],
                rng=prng),
            Product([Normal([0], [], rng=prng), Normal([1], [], rng=prng)],
                rng=prng),
        ],
        rng=prng)
    run_mixture_test(finite_mixture, integration, prng)
Пример #4
0
def get_crosscat_synthesizer(prng):
    view = FlexibleRowMixture(cgpm_row_divide=CRP([2], [], rng=prng),
                              cgpm_components_base=Product([
                                  Normal([0], [], rng=prng),
                                  Normal([1], [], rng=prng),
                              ],
                                                           rng=prng),
                              rng=prng)
    crosscat = Product(cgpms=[view], rng=prng)
    data = make_bivariate_two_clusters(prng)
    for rowid, row in enumerate(data):
        crosscat.observe(rowid, {0: row[0], 1: row[1]})
    return GibbsCrossCat(crosscat)
Пример #5
0
def test_transition_hypers_basic():
    prng = get_prng(2)
    component0 = Product([
        Poisson([0], [], hypers={'m': 100}, rng=prng),
        Normal([1], [], hypers={'m': 100}, rng=prng)
    ],
                         rng=prng)
    cgpm_row_divide = CRP([2], [], rng=prng)
    infinite_mixture = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide,
                                          cgpm_components_base=component0,
                                          rng=prng)
    # Make normal observations.
    infinite_mixture.observe(0, {1: 100})
    infinite_mixture.observe(1, {1: 300})
    infinite_mixture.observe(2, {1: -300})
    # Fetch log score.
    log_score0 = infinite_mixture.logpdf_score()
    # Run inference.
    normal_cgpms = get_cgpms_by_output_index(infinite_mixture, 1)
    grids_normal = transition_hyper_grids(normal_cgpms, 30)
    hypers_normal = [
        transition_hypers(normal_cgpms, grids_normal, prng) for _i in xrange(2)
    ]
    assert not all(hypers == hypers_normal[0] for hypers in hypers_normal)
    log_score1 = infinite_mixture.logpdf_score()
    assert log_score0 < log_score1
Пример #6
0
def test_transition_rows_fixed_mixture():
    prng = get_prng(2)
    component0 = Product([
        Normal([0], [], hypers={'m':1000}, rng=prng),
        Normal([1], [], hypers={'m':0}, rng=prng)
        ], rng=prng)
    component1 = Product([
        Normal([0], [], hypers={'m':-1000}, rng=prng),
        Normal([1], [], hypers={'m':1000}, rng=prng)
        ], rng=prng)
    component2 = Product([
        Normal([0], [], hypers={'m':0}, rng=prng),
        Normal([1], [], hypers={'m':-100}, rng=prng)
        ], rng=prng)
    cgpm_row_divide = Categorical([2], [], distargs={'k':3}, rng=prng)
    finite_mixture = FiniteRowMixture(
        cgpm_row_divide=cgpm_row_divide,
        cgpm_components=[component0, component1, component2],
        rng=prng)
    # For component 0.
    finite_mixture.observe(0, {0:1000, 1:0, 2:0})
    finite_mixture.observe(1, {0:990, 1:-10, 2:0})
    # For component 1.
    finite_mixture.observe(2, {0:-1000, 1:1000, 2:0})
    finite_mixture.observe(3, {0:-990, 1:990, 2:0})
    # For component 2.
    finite_mixture.observe(4, {0:0, 1:-1000, 2:0})
    finite_mixture.observe(5, {0:10, 1:-990, 2:0})
    # Confirm all rows in component 0.
    assert finite_mixture.simulate(0, [2]) == {2:0}
    assert finite_mixture.simulate(1, [2]) == {2:0}
    assert finite_mixture.simulate(2, [2]) == {2:0}
    assert finite_mixture.simulate(3, [2]) == {2:0}
    assert finite_mixture.simulate(4, [2]) == {2:0}
    assert finite_mixture.simulate(5, [2]) == {2:0}
    # Run transitions
    for _i in xrange(10):
        for rowid in range(6):
            transition_rows(finite_mixture, rowid, prng)
    # Confirm all rows in correct components.
    assert finite_mixture.simulate(0, [2]) == {2:0}
    assert finite_mixture.simulate(1, [2]) == {2:0}
    assert finite_mixture.simulate(2, [2]) == {2:1}
    assert finite_mixture.simulate(3, [2]) == {2:1}
    assert finite_mixture.simulate(4, [2]) == {2:2}
    assert finite_mixture.simulate(5, [2]) == {2:2}
def test_crosscat_three_component_cpp__ci_():
    prng = get_prng(12)
    integration = pytest.config.getoption('--integration')
    view = FlexibleRowMixture(cgpm_row_divide=CRP([1], [], rng=prng),
                              cgpm_components_base=Product(
                                  cgpms=[Normal([0], [], rng=prng)], rng=prng),
                              rng=prng)
    crosscat = Product(cgpms=[view], rng=prng)

    def func_inference(crosscat):
        n_step = 1000 if integration else 1
        synthesizer = GibbsCrossCat(crosscat)
        synthesizer.transition_structure_cpp(N=n_step)
        synthesizer.transition_hypers_distributions()
        synthesizer.transition_hypers_row_divide()
        return synthesizer

    run_crosscat_test(crosscat, func_inference, integration, prng)
def test_flexible_mixture_two_component__ci_():
    prng = get_prng(2)
    integration = pytest.config.getoption('--integration')
    flexible_mixture = FlexibleRowMixture(
        cgpm_row_divide=CRP([2], [], rng=prng),
        cgpm_components_base=Product([
            Normal([0], [], rng=prng),
            Normal([1], [], rng=prng),
        ], rng=prng),
        rng=prng)
    run_mixture_test(flexible_mixture, integration, prng)
Пример #9
0
def make_random_crosscat((outputs, distributions, Cd, Ci, seed)):
    rng = get_prng(seed)
    alpha = rng.gamma(2, 1)
    N = len(outputs)
    partition = make_random_partition(N, alpha, Cd, Ci, rng)
    views = [
        make_random_view(outputs=[outputs[i] for i in block],
                         distributions=[distributions[i] for i in block],
                         rng=rng) for block in partition
    ]
    crosscat = Product(cgpms=views, rng=rng)
    return crosscat
Пример #10
0
def make_random_view(outputs, distributions, rng):
    crp_output = rng.randint(2**32 - 1)
    cgpm_row_divide = CRP([crp_output], [], rng=rng)
    cgpm_base_list = [
        make_random_primitive(output, distribution, rng)
        for output, distribution in zip(outputs, distributions)
    ]
    view = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide,
                              cgpm_components_base=Product(cgpm_base_list,
                                                           rng=rng),
                              rng=rng)
    return view
Пример #11
0
def test_simple_product_finite_array():
    prng = get_prng(2)
    array0_component0 = Normal([0], [], hypers={'m':100}, rng=prng)
    array0_component1 = Normal([0], [], hypers={'m':-100}, rng=prng)
    array0_component2 = Normal([0], [], hypers={'m':0}, rng=prng)
    indexer_0 = 128
    cgpm_array_0 = FiniteArray(
        cgpms=[array0_component0, array0_component1, array0_component2],
        indexer=indexer_0,
        rng=prng)
    with pytest.raises(Exception):
        # Missing indexer_0 as a required input.
        cgpm_array_0.simulate(None, [0])
    array1_component0 = Normal([1], [], hypers={'m':1000}, rng=prng)
    array1_component1 = Normal([1], [], hypers={'m':-1000}, rng=prng)
    array1_component2 = Normal([1], [], hypers={'m':50}, rng=prng)
    indexer_1 = 129
    cgpm_array_1 = FiniteArray(
        cgpms=[array1_component0, array1_component1, array1_component2],
        indexer=indexer_1,
        rng=prng)
    product = Product([cgpm_array_0, cgpm_array_1], prng)
    assert product.outputs == [0, 1]
    assert product.inputs == [indexer_0, indexer_1]
    with pytest.raises(Exception):
        # Missing indexer_0.
        product.simulate(None, [0,1], inputs={indexer_1: 0})
    with pytest.raises(Exception):
        # Missing indexer_1.
        product.simulate(None, [0,1], inputs={indexer_0: 1})
    # Should work, since output 1 is not being queried.
    product.simulate(None, [0], inputs={indexer_0: 1})
    # Sampling from correct components.
    sample = product.simulate(None, [0,1], inputs={indexer_0:1, indexer_1:0})
    assert abs(-100 - sample[0]) < 10
    assert abs(1000 - sample[1]) < 10
    logp = product.logpdf(None, sample, inputs={indexer_0:1, indexer_1:0})
    assert np.allclose(logp,
        array0_component1.logpdf(None, {0: sample[0]})
            + array1_component0.logpdf(None, {1: sample[1]}))
Пример #12
0
def get_crosscat(prng):
    view0 = FlexibleRowMixture(
        cgpm_row_divide=CRP([-1], [], rng=prng),
        cgpm_components_base=Product([
            Normal([0], [], rng=prng),
            Normal([1], [], rng=prng),
        ], rng=prng),
        rng=prng)
    view1 = FlexibleRowMixture(
        cgpm_row_divide=CRP([-2], [], rng=prng),
        cgpm_components_base=Product([
            Poisson([2], [], rng=prng),
            Normal([3], [], rng=prng),
            Normal([4], [], rng=prng),
        ], rng=prng),
        rng=prng)
    view2 = FlexibleRowMixture(
        cgpm_row_divide=CRP([-3], [], rng=prng),
        cgpm_components_base=Product([
            Categorical([5], [], distargs={'k':4}, rng=prng),
        ], rng=prng),
        rng=prng)
    return Product([view0, view1, view2], rng=prng)
Пример #13
0
def test_product_mixture_constraints():
    prng = get_prng(2)
    component0 = Product([
        Normal([0], [], hypers={'m':1000}, rng=prng),
        Normal([1], [], hypers={'m':1000}, rng=prng)
        ], rng=prng)
    component1 = Product([
        Normal([0], [], hypers={'m':-1000}, rng=prng),
        Normal([1], [], hypers={'m':-1000}, rng=prng)
        ], rng=prng)
    component2 = Product([
        Normal([0], [], hypers={'m':0}, rng=prng),
        Normal([1], [], hypers={'m':0}, rng=prng)
        ], rng=prng)
    cgpm_row_divide = Categorical([2], [], distargs={'k':3}, rng=prng)
    finite_mixture = FiniteRowMixture(
        cgpm_row_divide=cgpm_row_divide,
        cgpm_components=[component0, component1, component2],
        rng=prng)
    def run_mixture_tests(mixture):
        N = 100
        # Simulate from component 1.
        samples = mixture.simulate(None, [0], constraints={2:1}, N=N)
        assert len([s for s in samples if -1100 < s[0] < -900]) > int(.9*N)
        # Simulate from random components.
        samples = mixture.simulate(None, [0], N=N)
        assert len([s for s in samples if -900 < s[0] < -1100]) < int(.33*N)
        # Simulate (implicitly) from component 0.
        samples = mixture.simulate(None, [1,2], constraints={0:1000}, N=N)
        assert len([s for s in samples if 900 < s[1] < 1100]) > int(.9*N)
        assert len([s for s in samples if s[2] == 0]) == N
    # Run tests on finite_mixture.
    run_mixture_tests(finite_mixture)
    # Run tests after to/from metadata conversion.
    metadata = finite_mixture.to_metadata()
    finite_mixture2 = FiniteRowMixture.from_metadata(metadata, prng)
    run_mixture_tests(finite_mixture2)
Пример #14
0
def test_crosscat_add_remove():
    prng = get_prng(2)
    crosscat =  get_crosscat(prng)
    infinite_mixture4 = FlexibleRowMixture(
        cgpm_row_divide=CRP([-4], [], rng=prng),
        cgpm_components_base=Product([
            Categorical([6], [], distargs={'k':4}, rng=prng),
        ], rng=prng),
        rng=prng)
    crosscat = add_cgpm(crosscat, infinite_mixture4)
    assert crosscat.outputs == [-1, 0, 1, -2, 2, 3, 4, -3, 5, -4, 6]
    crosscat = remove_cgpm(crosscat, -1)
    assert crosscat.outputs == [-2, 2, 3, 4, -3, 5, -4, 6]
    crosscat = remove_cgpm(crosscat, 5)
    assert crosscat.outputs == [-2, 2, 3, 4, -4, 6]
def test_crosscat_two_component_nominal__ci_():
    prng = get_prng(10)
    integration = pytest.config.getoption('--integration')
    # Build CGPM with adversarial initialization.
    crosscat = Product([
        FlexibleRowMixture(
            cgpm_row_divide=CRP([-1], [], rng=prng),
            cgpm_components_base=Product([
                Normal([0], [], rng=prng),
            ], rng=prng),
            rng=prng),
        FlexibleRowMixture(
            cgpm_row_divide=CRP([-2], [], rng=prng),
            cgpm_components_base=Product([
                Normal([1], [], rng=prng),
                Categorical([50], [], distargs={'k':4}, rng=prng),
            ], rng=prng),
            rng=prng),
    ], rng=prng,)
    # Fetch data and add a nominal variable.
    data_xy = make_bivariate_two_clusters(prng)
    data_z = np.zeros(len(data_xy))
    data_z[:15] = 0
    data_z[15:30] = 1
    data_z[30:45] = 2
    data_z[45:60] = 3
    data = np.column_stack((data_xy, data_z))
    # Observe.
    for rowid, row in enumerate(data):
        crosscat.observe(rowid, {0: row[0], 1: row[1], 50:row[2]})
    # Run inference.
    synthesizer = GibbsCrossCat(crosscat)
    synthesizer.transition(N=(50 if integration else 1), progress=False)
    synthesizer.transition(N=(100 if integration else 1),
            kernels=['hypers_distributions','hypers_row_divide'],
            progress=False)

    # Assert views are merged into one.
    assert not integration or len(synthesizer.crosscat.cgpms) == 1
    crp_output = synthesizer.crosscat.cgpms[0].cgpm_row_divide.outputs[0]

    # Check joint samples for all nominals.
    samples = synthesizer.crosscat.simulate(None, [crp_output,0,1,50], N=250)
    not integration or check_sampled_data(samples, [0, 7], 3, 110)
    # Check joint samples for nominals [0, 2].
    samples_a = [s for s in samples if s[50] in [0,2]]
    not integration or check_sampled_data(samples_a, [0, 7], 3, 45)
    # Check joint samples for nominals [1, 3].
    samples_b = [s for s in samples if s[50] in [1,3]]
    not integration or check_sampled_data(samples_b, [0, 7], 3, 45)

    # Check conditional samples in correct quadrants.
    means = {0:0, 1:0, 2:7, 3:7}
    for z in [0, 1, 2, 3]:
        samples = synthesizer.crosscat.simulate(None, [0, 1], {50:z}, N=100)
        not integration or check_sampled_data(samples, [means[z]], 3, 90)
Пример #16
0
def test_add_remove():
    prng = get_prng(2)
    mixture0 = FlexibleRowMixture(
        cgpm_row_divide=CRP([2], [], rng=prng),
        cgpm_components_base=Product([
            Normal([0], [], rng=prng),
            Normal([1], [], rng=prng),
        ], rng=prng),
        rng=prng)
    for rowid, row in enumerate([[0,.9] ,[.5, 1], [-.5, 1.2]]):
        mixture0.observe(rowid, {0:row[0], 1:row[1]})

    mixture1 = remove_cgpm(mixture0, 0)
    assert mixture0.outputs == [2, 0, 1]
    assert mixture1.outputs == [2, 1]

    mixture2 = add_cgpm(mixture1, Normal([0], [], rng=prng))
    assert mixture0.outputs == [2, 0, 1]
    assert mixture1.outputs == [2, 1]
    assert mixture2.outputs == [2, 1, 0]

    mixture3 = remove_cgpm(mixture2, 1)
    assert mixture0.outputs == [2, 0, 1]
    assert mixture1.outputs == [2, 1]
    assert mixture2.outputs == [2, 1, 0]
    assert mixture3.outputs == [2, 0]

    mixture4 = remove_cgpm(mixture3, 0)
    assert mixture0.outputs == [2, 0, 1]
    assert mixture1.outputs == [2, 1]
    assert mixture2.outputs == [2, 1, 0]
    assert mixture3.outputs == [2, 0]
    assert mixture4.outputs == [2]

    with pytest.raises(Exception):
        # Cannot remove the cgpm_row_divide for a mixture.
        mixture3 = remove_cgpm(mixture2, 2)
Пример #17
0
def test_product_mixture_walk():
    prng = get_prng(2)
    component_base = Product([
        Poisson([0], [], hypers={
            'a': 10,
            'b': 1
        }, rng=prng),
        Normal([1], [], hypers={'m': 100}, rng=prng)
    ],
                             rng=prng)
    cgpm_row_divide = CRP([2], [], rng=prng)
    infinite_mixture = FlexibleRowMixture(cgpm_row_divide=cgpm_row_divide,
                                          cgpm_components_base=component_base,
                                          rng=prng)
    # Only the base CGPMs in the flexible mixture.
    cgpm_poisson = get_cgpms_by_output_index(infinite_mixture, 0)
    cgpm_normal = get_cgpms_by_output_index(infinite_mixture, 1)
    cgpm_crp = get_cgpms_by_output_index(infinite_mixture, 2)
    assert cgpm_poisson == [component_base.cgpms[0]]
    assert cgpm_normal == [component_base.cgpms[1]]
    assert cgpm_crp == [cgpm_row_divide]
    infinite_mixture.observe(0, {0: 1})
    # New CGPMs in the flexible CGPM after observing.
    cgpm_poisson = get_cgpms_by_output_index(infinite_mixture, 0)
    cgpm_normal = get_cgpms_by_output_index(infinite_mixture, 1)
    assert len(cgpm_poisson) == len(cgpm_normal) == 2
    assert [cgpm_poisson[-1]] == [component_base.cgpms[0]]
    assert [cgpm_normal[-1]] == [component_base.cgpms[1]]
    assert cgpm_poisson[0].N == 1
    assert cgpm_normal[0].N == 0
    cgpm_crp = get_cgpms_by_output_index(infinite_mixture, 2)
    assert len(cgpm_crp) == 1
    assert cgpm_crp[0].N == 1
    assert cgpm_crp[0].data[0] == 0
    # Misc. errors, no such output.
    with pytest.raises(Exception):
        get_cgpms_by_output_index(infinite_mixture, -1)