def test_cvl_local_score_gbn(): gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')]) cvl = pbn.CVLikelihood(df, 10, seed) assert np.isclose( cvl.local_score(gbn, 'a', []), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'a', [])) assert np.isclose( cvl.local_score(gbn, 'b', ['a']), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'b', ['a'])) assert np.isclose( cvl.local_score(gbn, 'c', ['a', 'b']), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'c', ['a', 'b'])) assert np.isclose( cvl.local_score(gbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd', ['a', 'b', 'c'])) assert np.isclose(cvl.local_score(gbn, 'd', ['a', 'b', 'c']), cvl.local_score(gbn, 'd', ['b', 'c', 'a'])) assert cvl.local_score(gbn, 'a') == cvl.local_score(gbn, 'a', gbn.parents('a')) assert cvl.local_score(gbn, 'b') == cvl.local_score(gbn, 'b', gbn.parents('b')) assert cvl.local_score(gbn, 'c') == cvl.local_score(gbn, 'c', gbn.parents('c')) assert cvl.local_score(gbn, 'd') == cvl.local_score(gbn, 'd', gbn.parents('d'))
def test_create_change_node(): gbn = pbn.GaussianNetwork(['a', 'b', 'c', 'd']) cv = pbn.CVLikelihood(df) node_op = pbn.ChangeNodeTypeSet() with pytest.raises(ValueError) as ex: node_op.cache_scores(gbn, cv) assert "can only be used with non-homogeneous" in str(ex.value)
def test_cvl_create(): s = pbn.CVLikelihood(df) assert len(list(s.cv)) == 10 s = pbn.CVLikelihood(df, 5) assert len(list(s.cv)) == 5 s = pbn.CVLikelihood(df, 10, 0) assert len(list(s.cv)) == 10 s2 = pbn.CVLikelihood(df, 10, 0) assert len(list(s2.cv)) == 10 for (train_cv, test_cv), (train_cv2, test_cv2) in zip(s.cv, s2.cv): assert train_cv.equals( train_cv2), "Train CV DataFrames with the same seed are not equal." assert test_cv.equals( test_cv2), "Test CV DataFrames with the same seed are not equal." with pytest.raises(ValueError) as ex: s = pbn.CVLikelihood(df, SIZE + 1) assert "Cannot split" in str(ex.value)
def test_cvl_local_score_spbn(): spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')], [('a', pbn.CKDEType()), ('c', pbn.CKDEType())]) cvl = pbn.CVLikelihood(df, 10, seed) assert np.isclose(cvl.local_score(spbn, 'a', []), numpy_local_score(pbn.CKDEType(), df, 'a', [])) assert np.isclose( cvl.local_score(spbn, 'b', ['a']), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'b', ['a'])) assert np.isclose(cvl.local_score(spbn, 'c', ['a', 'b']), numpy_local_score(pbn.CKDEType(), df, 'c', ['a', 'b'])) assert np.isclose( cvl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd', ['a', 'b', 'c'])) assert np.isclose( cvl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'd', ['b', 'c', 'a'])) assert cvl.local_score(spbn, 'a') == cvl.local_score(spbn, 'a', spbn.parents('a')) assert cvl.local_score(spbn, 'b') == cvl.local_score(spbn, 'b', spbn.parents('b')) assert cvl.local_score(spbn, 'c') == cvl.local_score(spbn, 'c', spbn.parents('c')) assert cvl.local_score(spbn, 'd') == cvl.local_score(spbn, 'd', spbn.parents('d')) assert np.isclose( cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'a', []), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'a', [])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'b', ['a']), numpy_local_score(pbn.CKDEType(), df, 'b', ['a'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'c', ['a', 'b']), numpy_local_score(pbn.LinearGaussianCPDType(), df, 'c', ['a', 'b'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']), numpy_local_score(pbn.CKDEType(), df, 'd', ['a', 'b', 'c'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']), numpy_local_score(pbn.CKDEType(), df, 'd', ['b', 'c', 'a']))
def test_cvl_score(): gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')]) cv = pbn.CVLikelihood(df, 10, 0) assert np.isclose( cv.score(gbn), (cv.local_score(gbn, 'a', []) + cv.local_score(gbn, 'b', ['a']) + cv.local_score(gbn, 'c', ['a', 'b']) + cv.local_score(gbn, 'd', ['a', 'b', 'c']))) spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')], [('a', pbn.CKDEType()), ('c', pbn.CKDEType())]) assert np.isclose(cv.score(spbn), (cv.local_score(spbn, 'a') + cv.local_score(spbn, 'b') + cv.local_score(spbn, 'c') + cv.local_score(spbn, 'd')))
def test_cvl_local_score_gbn_null(): gbn = pbn.GaussianNetwork([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')]) np.random.seed(0) a_null = np.random.randint(0, SIZE, size=100) b_null = np.random.randint(0, SIZE, size=100) c_null = np.random.randint(0, SIZE, size=100) d_null = np.random.randint(0, SIZE, size=100) df_null = df.copy() df_null.loc[df_null.index[a_null], 'a'] = np.nan df_null.loc[df_null.index[b_null], 'b'] = np.nan df_null.loc[df_null.index[c_null], 'c'] = np.nan df_null.loc[df_null.index[d_null], 'd'] = np.nan cvl = pbn.CVLikelihood(df_null, 10, seed) assert np.isclose( cvl.local_score(gbn, 'a', []), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'a', [])) assert np.isclose( cvl.local_score(gbn, 'b', ['a']), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'b', ['a'])) assert np.isclose( cvl.local_score(gbn, 'c', ['a', 'b']), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'c', ['a', 'b'])) assert np.isclose( cvl.local_score(gbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd', ['a', 'b', 'c'])) assert np.isclose(cvl.local_score(gbn, 'd', ['a', 'b', 'c']), cvl.local_score(gbn, 'd', ['b', 'c', 'a'])) assert cvl.local_score(gbn, 'a') == cvl.local_score(gbn, 'a', gbn.parents('a')) assert cvl.local_score(gbn, 'b') == cvl.local_score(gbn, 'b', gbn.parents('b')) assert cvl.local_score(gbn, 'c') == cvl.local_score(gbn, 'c', gbn.parents('c')) assert cvl.local_score(gbn, 'd') == cvl.local_score(gbn, 'd', gbn.parents('d'))
def test_find_max(): spbn = pbn.SemiparametricBN(['a', 'b', 'c', 'd']) cv = pbn.CVLikelihood(df) arcs = pbn.ArcOperatorSet() node_type = pbn.ChangeNodeTypeSet() arcs.cache_scores(spbn, cv) spbn.set_unknown_node_types(df) node_type.cache_scores(spbn, cv) arcs_max = arcs.find_max(spbn) node_max = node_type.find_max(spbn) pool = pbn.OperatorPool([arcs, node_type]) pool.cache_scores(spbn, cv) op_combined = pool.find_max(spbn) if arcs_max.delta() >= node_max.delta(): assert op_combined == arcs_max else: assert op_combined == node_max
def test_cvl_local_score_null_spbn(): spbn = pbn.SemiparametricBN([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')], [('a', pbn.CKDEType()), ('c', pbn.CKDEType())]) np.random.seed(0) a_null = np.random.randint(0, SIZE, size=100) b_null = np.random.randint(0, SIZE, size=100) c_null = np.random.randint(0, SIZE, size=100) d_null = np.random.randint(0, SIZE, size=100) df_null = df.copy() df_null.loc[df_null.index[a_null], 'a'] = np.nan df_null.loc[df_null.index[b_null], 'b'] = np.nan df_null.loc[df_null.index[c_null], 'c'] = np.nan df_null.loc[df_null.index[d_null], 'd'] = np.nan cvl = pbn.CVLikelihood(df_null, 10, seed) assert np.isclose(cvl.local_score(spbn, 'a', []), numpy_local_score(pbn.CKDEType(), df_null, 'a', [])) assert np.isclose( cvl.local_score(spbn, 'b', ['a']), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'b', ['a'])) assert np.isclose( cvl.local_score(spbn, 'c', ['a', 'b']), numpy_local_score(pbn.CKDEType(), df_null, 'c', ['a', 'b'])) assert np.isclose( cvl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd', ['a', 'b', 'c'])) assert np.isclose( cvl.local_score(spbn, 'd', ['a', 'b', 'c']), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'd', ['b', 'c', 'a'])) assert cvl.local_score(spbn, 'a') == cvl.local_score(spbn, 'a', spbn.parents('a')) assert cvl.local_score(spbn, 'b') == cvl.local_score(spbn, 'b', spbn.parents('b')) assert cvl.local_score(spbn, 'c') == cvl.local_score(spbn, 'c', spbn.parents('c')) assert cvl.local_score(spbn, 'd') == cvl.local_score(spbn, 'd', spbn.parents('d')) assert np.isclose( cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'a', []), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'a', [])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'b', ['a']), numpy_local_score(pbn.CKDEType(), df_null, 'b', ['a'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.LinearGaussianCPDType(), 'c', ['a', 'b']), numpy_local_score(pbn.LinearGaussianCPDType(), df_null, 'c', ['a', 'b'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']), numpy_local_score(pbn.CKDEType(), df_null, 'd', ['a', 'b', 'c'])) assert np.isclose( cvl.local_score_node_type(spbn, pbn.CKDEType(), 'd', ['a', 'b', 'c']), numpy_local_score(pbn.CKDEType(), df_null, 'd', ['b', 'c', 'a']))