def test_factor_type(): lg1 = pbn.LinearGaussianCPD("a", []) lg2 = pbn.LinearGaussianCPD("b", ["a"]) lg3 = pbn.LinearGaussianCPD("c", ["b", "a"]) assert lg1.type() == pbn.LinearGaussianCPDType() assert lg1.type() == lg2.type() assert lg1.type() == lg3.type() assert lg2.type() == lg3.type() c1 = pbn.CKDE("a", []) c2 = pbn.CKDE("b", ["a"]) c3 = pbn.CKDE("c", ["b", "a"]) assert c1.type() == pbn.CKDEType() assert c1.type() == c2.type() assert c1.type() == c3.type() assert c2.type() == c3.type() d1 = pbn.DiscreteFactor("a", []) d2 = pbn.DiscreteFactor("b", ["a"]) d3 = pbn.DiscreteFactor("c", ["b", "a"]) assert d1.type() == pbn.DiscreteFactorType() assert d1.type() == d2.type() assert d1.type() == d3.type() assert d2.type() == d3.type() assert lg1.type() != c1.type() assert lg1.type() != d1.type() assert c1.type() != d1.type()
def _test_ckde_kde_joint_iter(variable, evidence, _df): cpd = pbn.CKDE(variable, evidence) cpd.fit(_df) kde_joint = cpd.kde_joint kde_joint().bandwidth = np.eye(len(evidence) + 1) assert np.all( cpd.kde_joint().bandwidth == np.eye(len(evidence) + 1) ), "kde_joint do not return a reference to the KDE joint, but a copy."
def test_ckde_slogl(): def _test_ckde_slogl(variable, evidence, _df, _test_df): cpd = pbn.CKDE(variable, evidence) cpd.fit(_df) scipy_kde_joint, scipy_kde_marg = train_scipy_ckde( _df, variable, evidence) scipy_logl = scipy_ckde_logpdf(_test_df, scipy_kde_joint, scipy_kde_marg, variable, evidence) if np.all(_test_df.dtypes == "float32"): # Allow an error of 0.0005 for each training instance. assert np.isclose(cpd.slogl(_test_df), scipy_logl.sum(), atol=0.0005 * _df.shape[0]) else: assert np.isclose(cpd.slogl(_test_df), scipy_logl.sum()) test_df = util_test.generate_normal_data(TEST_SIZE, seed=1) test_df_float = test_df.astype('float32') for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]: _test_ckde_slogl(variable, evidence, df, test_df) _test_ckde_slogl(variable, evidence, df_small, test_df) _test_ckde_slogl(variable, evidence, df_float, test_df_float) _test_ckde_slogl(variable, evidence, df_small_float, test_df_float) cpd = pbn.CKDE('d', ['a', 'b', 'c']) cpd.fit(df) cpd2 = pbn.CKDE('d', ['c', 'b', 'a']) cpd2.fit(df) assert np.all(np.isclose( cpd.slogl(test_df), cpd2.slogl(test_df))), "Order of evidence changes slogl() result." cpd = pbn.CKDE('d', ['a', 'b', 'c']) cpd.fit(df_float) cpd2 = pbn.CKDE('d', ['c', 'b', 'a']) cpd2.fit(df_float) assert np.all( np.isclose(cpd.slogl(test_df_float), cpd2.slogl( test_df_float))), "Order of evidence changes slogl() result."
def test_ckde_cdf(): def _test_ckde_cdf(variable, evidence, _df, _test_df): cpd = pbn.CKDE(variable, evidence) cpd.fit(_df) scipy_kde_joint, scipy_kde_marg = train_scipy_ckde( _df, variable, evidence) cdf = cpd.cdf(_test_df) scipy = scipy_ckde_cdf(_test_df, scipy_kde_joint, scipy_kde_marg, variable, evidence) if np.all(_df.dtypes == 'float32'): assert np.all(np.isclose(cdf, scipy, atol=0.0005)) else: assert np.all(np.isclose(cdf, scipy)) test_df = util_test.generate_normal_data(TEST_SIZE, seed=1) test_df_float = test_df.astype('float32') for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]: _test_ckde_cdf(variable, evidence, df, test_df) _test_ckde_cdf(variable, evidence, df_small, test_df) _test_ckde_cdf(variable, evidence, df_float, test_df_float) _test_ckde_cdf(variable, evidence, df_small_float, test_df_float) cpd = pbn.CKDE('d', ['a', 'b', 'c']) cpd.fit(df) cpd2 = pbn.CKDE('d', ['c', 'b', 'a']) cpd2.fit(df) assert np.all(np.isclose( cpd.cdf(test_df), cpd2.cdf(test_df))), "Order of evidence changes logl() result." cpd = pbn.CKDE('d', ['a', 'b', 'c']) cpd.fit(df_float) cpd2 = pbn.CKDE('d', ['c', 'b', 'a']) cpd2.fit(df_float) assert np.all( np.isclose(cpd.cdf(test_df_float), cpd2.cdf(test_df_float), atol=0.0005)), "Order of evidence changes logl() result."
def test_kde_data_type(): k = pbn.CKDE("a", []) with pytest.raises(ValueError) as ex: k.data_type() "CKDE factor not fitted" in str(ex.value) k.fit(df) assert k.data_type() == pa.float64() k.fit(df_float) assert k.data_type() == pa.float32()
def _test_ckde_kde_marg_iter(variable, evidence, _df): cpd = pbn.CKDE(variable, evidence) cpd.fit(_df) kde_marg = cpd.kde_marg if evidence: assert kde_marg().fitted() kde_marg().bandwidth = np.eye(len(evidence)) assert np.all( cpd.kde_marg().bandwidth == np.eye(len(evidence)) ), "kde_marg do not return a reference to the KDE joint, but a copy." else: # kde_marg contains garbage if there is no evidence pass
def _test_ckde_cdf(variable, evidence, _df, _test_df): cpd = pbn.CKDE(variable, evidence) cpd.fit(_df) scipy_kde_joint, scipy_kde_marg = train_scipy_ckde( _df, variable, evidence) cdf = cpd.cdf(_test_df) scipy = scipy_ckde_cdf(_test_df, scipy_kde_joint, scipy_kde_marg, variable, evidence) if np.all(_df.dtypes == 'float32'): assert np.all(np.isclose(cdf, scipy, atol=0.0005)) else: assert np.all(np.isclose(cdf, scipy))
def _test_ckde_logl_null(variable, evidence, _df, _test_df): cpd = pbn.CKDE(variable, evidence) cpd.fit(_df) scipy_kde_joint, scipy_kde_marg = train_scipy_ckde( _df, variable, evidence) logl = cpd.logl(_test_df) scipy = scipy_ckde_logpdf(_test_df, scipy_kde_joint, scipy_kde_marg, variable, evidence) if np.all(_test_df.dtypes == "float32"): assert np.all(np.isclose(logl, scipy, atol=0.0005, equal_nan=True)) else: assert np.all(np.isclose(logl, scipy, equal_nan=True))
def _test_ckde_slogl_null(variable, evidence, _df, _test_df): cpd = pbn.CKDE(variable, evidence) cpd.fit(_df) scipy_kde_joint, scipy_kde_marg = train_scipy_ckde( _df, variable, evidence) scipy_logl = scipy_ckde_logpdf(_test_df, scipy_kde_joint, scipy_kde_marg, variable, evidence) if np.all(_test_df.dtypes == "float32"): # Allow an error of 0.0005 for each training instance. assert np.isclose(cpd.slogl(_test_df), np.nansum(scipy_logl), atol=0.0005 * _df.shape[0]) else: assert np.isclose(cpd.slogl(_test_df), np.nansum(scipy_logl))
def _test_ckde_fit(variables, _df, instances): npdata = _df.loc[:, variables].to_numpy() scipy_kde = gaussian_kde( npdata[:instances, :].T, bw_method=lambda s: np.power(4 / (s.d + 2), 1 / (s.d + 4)) * s.scotts_factor()) cpd = pbn.CKDE(variable, evidence) assert not cpd.fitted() cpd.fit(_df.iloc[:instances]) assert cpd.fitted() kde_joint = cpd.kde_joint assert np.all(np.isclose(kde_joint().bandwidth, scipy_kde.covariance)) if evidence: kde_marg = cpd.kde_marg assert np.all( np.isclose(kde_marg().bandwidth, scipy_kde.covariance[1:, 1:])) assert cpd.num_instances() == instances
def test_ckde_sample(): SAMPLE_SIZE = 1000 cpd = pbn.CKDE('a', []) cpd.fit(df) sampled = cpd.sample(SAMPLE_SIZE, None, 0) assert sampled.type == pa.float64() assert int(sampled.nbytes / (sampled.type.bit_width / 8)) == SAMPLE_SIZE cpd = pbn.CKDE('b', ['a']) cpd.fit(df) sampling_df = pd.DataFrame({'a': np.full((SAMPLE_SIZE, ), 3.0)}) sampled = cpd.sample(SAMPLE_SIZE, sampling_df, 0) assert sampled.type == pa.float64() assert int(sampled.nbytes / (sampled.type.bit_width / 8)) == SAMPLE_SIZE cpd = pbn.CKDE('c', ['a', 'b']) cpd.fit(df) sampling_df = pd.DataFrame({ 'a': np.full((SAMPLE_SIZE, ), 3.0), 'b': np.full((SAMPLE_SIZE, ), 7.45) }) sampled = cpd.sample(SAMPLE_SIZE, sampling_df, 0) assert sampled.type == pa.float64() assert int(sampled.nbytes / (sampled.type.bit_width / 8)) == SAMPLE_SIZE cpd = pbn.CKDE('a', []) cpd.fit(df_float) sampled = cpd.sample(SAMPLE_SIZE, None, 0) assert sampled.type == pa.float32() assert int(sampled.nbytes / (sampled.type.bit_width / 8)) == SAMPLE_SIZE cpd = pbn.CKDE('b', ['a']) cpd.fit(df_float) sampling_df = pd.DataFrame( {'a': np.full((SAMPLE_SIZE, ), 3.0, dtype=np.float32)}) sampled = cpd.sample(SAMPLE_SIZE, sampling_df, 0) assert sampled.type == pa.float32() assert int(sampled.nbytes / (sampled.type.bit_width / 8)) == SAMPLE_SIZE cpd = pbn.CKDE('c', ['a', 'b']) cpd.fit(df_float) sampling_df = pd.DataFrame({ 'a': np.full((SAMPLE_SIZE, ), 3.0, dtype=np.float32), 'b': np.full((SAMPLE_SIZE, ), 7.45, dtype=np.float32) }) sampled = cpd.sample(SAMPLE_SIZE, sampling_df, 0) assert sampled.type == pa.float32() assert int(sampled.nbytes / (sampled.type.bit_width / 8)) == SAMPLE_SIZE
def test_ckde_cdf_null(): def _test_ckde_cdf_null(variable, evidence, _df, _test_df): cpd = pbn.CKDE(variable, evidence) cpd.fit(_df) scipy_kde_joint, scipy_kde_marg = train_scipy_ckde( _df, variable, evidence) cdf = cpd.cdf(_test_df) scipy = scipy_ckde_cdf(_test_df, scipy_kde_joint, scipy_kde_marg, variable, evidence) if np.all(_df.dtypes == 'float32'): assert np.all(np.isclose(cdf, scipy, atol=0.0005, equal_nan=True)) else: assert np.all(np.isclose(cdf, scipy, equal_nan=True)) test_df = util_test.generate_normal_data(TEST_SIZE, seed=1) test_df_float = test_df.astype('float32') np.random.seed(0) a_null = np.random.randint(0, TEST_SIZE, size=10) b_null = np.random.randint(0, TEST_SIZE, size=10) c_null = np.random.randint(0, TEST_SIZE, size=10) d_null = np.random.randint(0, TEST_SIZE, size=10) df_null = test_df.copy() df_null.loc[df_null.index[a_null], 'a'] = np.nan df_null.loc[df_null.index[b_null], 'b'] = np.nan df_null.loc[df_null.index[c_null], 'c'] = np.nan df_null.loc[df_null.index[d_null], 'd'] = np.nan df_null_float = test_df_float.copy() df_null_float.loc[df_null_float.index[a_null], 'a'] = np.nan df_null_float.loc[df_null_float.index[b_null], 'b'] = np.nan df_null_float.loc[df_null_float.index[c_null], 'c'] = np.nan df_null_float.loc[df_null_float.index[d_null], 'd'] = np.nan for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]: _test_ckde_cdf_null(variable, evidence, df, df_null) _test_ckde_cdf_null(variable, evidence, df_small, df_null) _test_ckde_cdf_null(variable, evidence, df_float, df_null_float) _test_ckde_cdf_null(variable, evidence, df_small_float, df_null_float) cpd = pbn.CKDE('d', ['a', 'b', 'c']) cpd.fit(df) cpd2 = pbn.CKDE('d', ['c', 'b', 'a']) cpd2.fit(df) assert np.all( np.isclose(cpd.cdf(df_null), cpd2.cdf(df_null), equal_nan=True)), "Order of evidence changes cdf() result." cpd = pbn.CKDE('d', ['a', 'b', 'c']) cpd.fit(df_float) cpd2 = pbn.CKDE('d', ['c', 'b', 'a']) cpd2.fit(df_float) assert np.all( np.isclose(cpd.cdf(df_null_float), cpd2.cdf(df_null_float), atol=0.0005, equal_nan=True)), "Order of evidence changes cdf() result."
def test_ckde_slogl_null(): def _test_ckde_slogl_null(variable, evidence, _df, _test_df): cpd = pbn.CKDE(variable, evidence) cpd.fit(_df) scipy_kde_joint, scipy_kde_marg = train_scipy_ckde( _df, variable, evidence) scipy_logl = scipy_ckde_logpdf(_test_df, scipy_kde_joint, scipy_kde_marg, variable, evidence) if np.all(_test_df.dtypes == "float32"): # Allow an error of 0.0005 for each training instance. assert np.isclose(cpd.slogl(_test_df), np.nansum(scipy_logl), atol=0.0005 * _df.shape[0]) else: assert np.isclose(cpd.slogl(_test_df), np.nansum(scipy_logl)) test_df = util_test.generate_normal_data(TEST_SIZE, seed=1) test_df_float = test_df.astype('float32') np.random.seed(0) a_null = np.random.randint(0, TEST_SIZE, size=10) b_null = np.random.randint(0, TEST_SIZE, size=10) c_null = np.random.randint(0, TEST_SIZE, size=10) d_null = np.random.randint(0, TEST_SIZE, size=10) df_null = test_df.copy() df_null.loc[df_null.index[a_null], 'a'] = np.nan df_null.loc[df_null.index[b_null], 'b'] = np.nan df_null.loc[df_null.index[c_null], 'c'] = np.nan df_null.loc[df_null.index[d_null], 'd'] = np.nan df_null_float = test_df_float.copy() df_null_float.loc[df_null_float.index[a_null], 'a'] = np.nan df_null_float.loc[df_null_float.index[b_null], 'b'] = np.nan df_null_float.loc[df_null_float.index[c_null], 'c'] = np.nan df_null_float.loc[df_null_float.index[d_null], 'd'] = np.nan for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]: _test_ckde_slogl_null(variable, evidence, df, df_null) _test_ckde_slogl_null(variable, evidence, df_small, df_null) _test_ckde_slogl_null(variable, evidence, df_float, df_null_float) _test_ckde_slogl_null(variable, evidence, df_small_float, df_null_float) cpd = pbn.CKDE('d', ['a', 'b', 'c']) cpd.fit(df) cpd2 = pbn.CKDE('d', ['c', 'b', 'a']) cpd2.fit(df) assert np.all(np.isclose( cpd.slogl(df_null), cpd2.slogl(df_null))), "Order of evidence changes slogl() result." cpd = pbn.CKDE('d', ['a', 'b', 'c']) cpd.fit(df_float) cpd2 = pbn.CKDE('d', ['c', 'b', 'a']) cpd2.fit(df_float) assert np.all( np.isclose(cpd.slogl(df_null_float), cpd2.slogl( df_null_float))), "Order of evidence changes slogl() result."
def test_evidence(): for variable, evidence in [('a', []), ('b', ['a']), ('c', ['a', 'b']), ('d', ['a', 'b', 'c'])]: cpd = pbn.CKDE(variable, evidence) assert cpd.evidence() == evidence