Пример #1
0
def test_factor_type():
    lg1 = pbn.LinearGaussianCPD("a", [])
    lg2 = pbn.LinearGaussianCPD("b", ["a"])
    lg3 = pbn.LinearGaussianCPD("c", ["b", "a"])

    assert lg1.type() == pbn.LinearGaussianCPDType()
    assert lg1.type() == lg2.type()
    assert lg1.type() == lg3.type()
    assert lg2.type() == lg3.type()

    c1 = pbn.CKDE("a", [])
    c2 = pbn.CKDE("b", ["a"])
    c3 = pbn.CKDE("c", ["b", "a"])

    assert c1.type() == pbn.CKDEType()
    assert c1.type() == c2.type()
    assert c1.type() == c3.type()
    assert c2.type() == c3.type()

    d1 = pbn.DiscreteFactor("a", [])
    d2 = pbn.DiscreteFactor("b", ["a"])
    d3 = pbn.DiscreteFactor("c", ["b", "a"])

    assert d1.type() == pbn.DiscreteFactorType()
    assert d1.type() == d2.type()
    assert d1.type() == d3.type()
    assert d2.type() == d3.type()

    assert lg1.type() != c1.type()
    assert lg1.type() != d1.type()
    assert c1.type() != d1.type()
Пример #2
0
def test_lg_sample():
    SAMPLE_SIZE = 1000

    cpd = pbn.LinearGaussianCPD('a', [])
    cpd.fit(df)
    
    sampled = cpd.sample(SAMPLE_SIZE, None, 0)

    assert sampled.type == pa.float64()
    assert int(sampled.nbytes / (sampled.type.bit_width / 8)) == SAMPLE_SIZE
        
    cpd = pbn.LinearGaussianCPD('b', ['a'])
    cpd.fit(df)

    sampling_df = pd.DataFrame({'a': np.full((SAMPLE_SIZE,), 3.0)})
    sampled = cpd.sample(SAMPLE_SIZE, sampling_df, 0)

    assert sampled.type == pa.float64()
    assert int(sampled.nbytes / (sampled.type.bit_width / 8)) == SAMPLE_SIZE
    
    cpd = pbn.LinearGaussianCPD('c', ['a', 'b'])
    cpd.fit(df)

    sampling_df = pd.DataFrame({'a': np.full((SAMPLE_SIZE,), 3.0),
                                'b': np.full((SAMPLE_SIZE,), 7.45)})
    sampled = cpd.sample(SAMPLE_SIZE, sampling_df, 0)

    assert sampled.type == pa.float64()
    assert int(sampled.nbytes / (sampled.type.bit_width / 8)) == SAMPLE_SIZE
Пример #3
0
def test_lg_slogl_null():
    test_df = util_test.generate_normal_data(5000)

    np.random.seed(0)
    a_null = np.random.randint(0, 5000, size=100)
    b_null = np.random.randint(0, 5000, size=100)
    c_null = np.random.randint(0, 5000, size=100)
    d_null = np.random.randint(0, 5000, size=100)

    df_null = test_df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]:
        cpd = pbn.LinearGaussianCPD(variable, evidence)
        cpd.fit(df)

        beta = cpd.beta
        variance = cpd.variance

        assert np.all(np.isclose(cpd.slogl(df_null), np.nansum(numpy_logpdf(df_null, variable, evidence, beta, variance)))),\
                     "Wrong slogl for LinearGaussianCPD(" + str(variable) + " | " + str(evidence) + ") with null values."

    cpd = pbn.LinearGaussianCPD('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.LinearGaussianCPD('d', ['c', 'a', 'b'])
    cpd2.fit(df)

    assert np.all(np.isclose(cpd.slogl(df_null), cpd2.slogl(df_null))), "The order of the evidence changes the slogl() result."
Пример #4
0
def test_lg_slogl():
    test_df = util_test.generate_normal_data(5000)

    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]:
        cpd = pbn.LinearGaussianCPD(variable, evidence)
        cpd.fit(df)

        beta = cpd.beta
        variance = cpd.variance

        assert np.all(np.isclose(cpd.slogl(test_df), np.sum(numpy_logpdf(test_df, variable, evidence, beta, variance)))),\
                     "Wrong slogl for LinearGaussianCPD(" + str(variable) + " | " + str(evidence) + ")"

    cpd = pbn.LinearGaussianCPD('d', ['a', 'b', 'c'])
    cpd.fit(df)
    cpd2 = pbn.LinearGaussianCPD('d', ['c', 'a', 'b'])
    cpd2.fit(df)

    assert np.all(np.isclose(cpd.slogl(test_df), cpd2.slogl(test_df))), "The order of the evidence changes the slogl() result."
Пример #5
0
def test_lg_fit():
    for variable, evidence in [("a", []), ("b", ["a"]), ("c", ["a", "b"]), ("d", ["a", "b", "c"])]:
        cpd = pbn.LinearGaussianCPD(variable, evidence)
        assert not cpd.fitted()
        cpd.fit(df)
        assert cpd.fitted()

        npbeta, npvar = fit_numpy(df, variable, evidence)
        
        assert np.all(np.isclose(npbeta, cpd.beta)), "Wrong beta vector."
        assert np.all(np.isclose(npvar, cpd.variance)), "Wrong variance."
Пример #6
0
def test_lg_fit_null():
    np.random.seed(0)
    a_null = np.random.randint(0, SIZE, size=100)
    b_null = np.random.randint(0, SIZE, size=100)
    c_null = np.random.randint(0, SIZE, size=100)
    d_null = np.random.randint(0, SIZE, size=100)

    df_null = df.copy()
    df_null.loc[df_null.index[a_null], 'a'] = np.nan
    df_null.loc[df_null.index[b_null], 'b'] = np.nan
    df_null.loc[df_null.index[c_null], 'c'] = np.nan
    df_null.loc[df_null.index[d_null], 'd'] = np.nan

    for variable, evidence in [("a", []), ("b", ["a"]), ("c", ["a", "b"]), ("d", ["a", "b", "c"])]:
        cpd = pbn.LinearGaussianCPD(variable, evidence)
        assert not cpd.fitted()
        cpd.fit(df_null)
        assert cpd.fitted()

        npbeta, npvar = fit_numpy(df_null, variable, evidence)
        
        assert np.all(np.isclose(npbeta, cpd.beta)), "Wrong beta vector."
        assert np.all(np.isclose(npvar, cpd.variance)), "Wrong variance."
Пример #7
0
def gaussian_partial_fit_bytes():
    gaussian = GaussianNetwork(["a", "b", "c", "d"], [("a", "b")])
    lg = pbn.LinearGaussianCPD("b", ["a"], [1, 2], 2)
    gaussian.add_cpds([lg])
    gaussian.include_cpd = True
    return pickle.dumps(gaussian)
Пример #8
0
def test_lg_data_type():
    cpd = pbn.LinearGaussianCPD("a", [])
    assert cpd.data_type() == pa.float64()
Пример #9
0
def test_lg_evidence():
    for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]:
        cpd = pbn.LinearGaussianCPD(variable, evidence)
        assert cpd.evidence() == evidence