Exemplo n.º 1
0
def test_ipcmb_finding_Markov_blankets_in_graphs__as_in_pcmb_article():
    # Test IPC-MB with the graphs proposed in the PCMB article, used to
    # illustrate the flaws of MMMB and HITON.
    graph_a = {0: [1, 2], 1: [3], 2: [3], 3: [], 4: [1]}
    bn = BayesianNetwork('testnet_a')
    bn.from_directed_graph(graph_a)

    parameters = make_parameters(4, bn)
    parameters['pc_only'] = True
    pc = AlgorithmIPCMB(None, parameters).discover_mb()
    assert pc == [1]

    parameters = make_parameters(4, bn)
    mb = AlgorithmIPCMB(None, parameters).discover_mb()
    assert mb == [0, 1]

    parameters = make_parameters(0, bn)
    mb = AlgorithmIPCMB(None, parameters).discover_mb()
    assert mb == [1, 2, 4]

    parameters = make_parameters(2, bn)
    mb = AlgorithmIPCMB(None, parameters).discover_mb()
    assert mb == [0, 1, 3]

    parameters = make_parameters(1, bn)
    mb = AlgorithmIPCMB(None, parameters).discover_mb()
    assert mb == [0, 2, 3, 4]
Exemplo n.º 2
0
def test_reading_bif_file():
    survey_bif = Path(testutil.bif_folder, 'survey.bif')

    bn_expected = default_Bayesian_network()
    bn_read = BayesianNetwork.from_bif_file(survey_bif, use_cache=False)

    assertBayesianNetworkEqual(bn_expected, bn_read)
def test_sampling_bayesian_network_as_dataset_source__random():
    configuration = default_configuration()
    configuration['method'] = 'random'
    sample_count = configuration['sample_count']

    random.seed(configuration['random_seed'])
    bayesian_network = BayesianNetwork.from_bif_file(configuration['sourcepath'], use_cache=False)
    bayesian_network.finalize()
    sample_matrix = bayesian_network.sample_matrix(configuration['sample_count'])

    sbnds = SampledBayesianNetworkDatasetSource(configuration)
    sbnds.reset_random_seed = True
    datasetmatrix = sbnds.create_dataset_matrix('test_sbnds')

    assert ['AGE', 'EDU', 'OCC', 'SEX'] == datasetmatrix.column_labels_X
    assert ['R', 'TRN'] == datasetmatrix.column_labels_Y
    assert ['row{}'.format(i) for i in range(0, sample_count)] == datasetmatrix.row_labels

    assert (sample_count, 4) == datasetmatrix.X.get_shape()
    assert (sample_count, 2) == datasetmatrix.Y.get_shape()

    assert numpy.array_equal(sample_matrix[:, 0], datasetmatrix.get_column_X(0)) is True
    assert numpy.array_equal(sample_matrix[:, 1], datasetmatrix.get_column_X(1)) is True
    assert numpy.array_equal(sample_matrix[:, 2], datasetmatrix.get_column_X(2)) is True
    assert numpy.array_equal(sample_matrix[:, 4], datasetmatrix.get_column_X(3)) is True

    assert numpy.array_equal(sample_matrix[:, 3], datasetmatrix.get_column_Y(0)) is True
    assert numpy.array_equal(sample_matrix[:, 5], datasetmatrix.get_column_Y(1)) is True
def test_sampling_bayesian_network_as_dataset_source__exact():
    configuration = default_configuration()
    configuration['method'] = 'exact'
    sample_count = configuration['sample_count']

    bayesian_network = BayesianNetwork.from_bif_file(configuration['sourcepath'], use_cache=False)
    bayesian_network.finalize()
    joint_pmf = bayesian_network.create_joint_pmf()
    instances = joint_pmf.create_instances_list(sample_count)
    instances_as_lists = [list(instance) for instance in instances]
    instances_matrix = numpy.asarray(instances_as_lists, dtype=numpy.int8)

    sbnds = SampledBayesianNetworkDatasetSource(configuration)
    datasetmatrix = sbnds.create_dataset_matrix('test_sbnds')

    assert ['AGE', 'EDU', 'OCC', 'SEX'] == datasetmatrix.column_labels_X
    assert ['R', 'TRN'] == datasetmatrix.column_labels_Y
    assert ['row{}'.format(i) for i in range(0, sample_count)] == datasetmatrix.row_labels

    assert (sample_count, 4) == datasetmatrix.X.get_shape()
    assert (sample_count, 2) == datasetmatrix.Y.get_shape()

    assert numpy.array_equal(instances_matrix[:, 0], datasetmatrix.get_column_X(0)) is True
    assert numpy.array_equal(instances_matrix[:, 1], datasetmatrix.get_column_X(1)) is True
    assert numpy.array_equal(instances_matrix[:, 2], datasetmatrix.get_column_X(2)) is True
    assert numpy.array_equal(instances_matrix[:, 4], datasetmatrix.get_column_X(3)) is True

    assert numpy.array_equal(instances_matrix[:, 3], datasetmatrix.get_column_Y(0)) is True
    assert numpy.array_equal(instances_matrix[:, 5], datasetmatrix.get_column_Y(1)) is True
 def __init__(self, configuration, finalize_bn=True):
     self.configuration = configuration
     path = self.configuration['sourcepath']
     self.bayesian_network = BayesianNetwork.from_bif_file(path,
                                                           use_cache=True)
     if finalize_bn:
         self.bayesian_network.finalize()
     self.reset_random_seed = True
Exemplo n.º 6
0
def test_d_separation__custom_graph_3():
    # Simple graph taken from the PCMB article, where authors provide
    # examples to illustrate the flaws found in MMMB and HITON.
    graph_a = {0: [1, 2], 1: [3], 2: [3], 3: [], 4: [1]}
    bn = BayesianNetwork('testnet_a')
    bn.from_directed_graph(graph_a)
    bn.debug = True
    assert bn.d_separated(4, [1], 3) is False
    # Assert that [0, 1] is the Markov blanket of 4, by the Intersection
    # Property.
    assert bn.d_separated(4, [0, 1, 3], 2) is True
    assert bn.d_separated(4, [0, 1, 2], 3) is True
    # Assert that [0, 1] is the Markov blanket of 4, by the Contraction
    # Property.
    assert bn.d_separated(4, [0, 1, 3], 2) is True
    assert bn.d_separated(4, [0, 1], 3) is True
Exemplo n.º 7
0
def test_d_separation__custom_graph_2():
    # Simple graph imitating the 'survey' Bayesian network, from
    # http://www.bnlearn.com/bnrepository/discrete-small.html#survey
    graph = {0: [1], 4: [1], 1: [2, 3], 2: [5], 3: [5]}
    bn = BayesianNetwork('testnet')
    bn.from_directed_graph(graph)

    assert bn.d_separated(3, [0], 1) is False
    assert bn.d_separated(3, [], 2) is False
    assert bn.d_separated(3, [5], 2) is False
    assert bn.d_separated(3, [1], 2) is True
Exemplo n.º 8
0
    def network_definition(self, items):
        bn = BayesianNetwork('')
        # Firstly, gather 'network_name' and all the Variables.
        for item in items:
            if isinstance(item, dict):
                bn.name = item['network_name']
                bn.properties = item.get('properties', {})
            if isinstance(item, VariableNode):
                bn.variable_nodes[item.name] = item

        # Secondly, add references from VariableNodes to ProbabilityDistributionOfVariableNode and vice-versa.
        for item in items:
            if isinstance(item, ProbabilityDistributionOfVariableNode):
                variable = bn.variable_nodes[item.variable_name]
                pd = item
                variable.probdist = pd
                pd.variable = variable

                for varname in pd.conditioning_variable_nodes.keys():
                    pd.conditioning_variable_nodes[
                        varname] = bn.variable_nodes[varname]

        return bn
Exemplo n.º 9
0
    def setup(self, arguments):
        self.Arguments = arguments
        self.algorithm_name = self.arguments.algorithm
        self.algorithm_class = self.get_algorithm_class()

        self.source_type = self.arguments.source_type

        self.bayesian_network_name = self.arguments.source_name
        bn_sourcepath = self.paths.BIFRepository / self.bayesian_network_name
        bn_sourcepath = bn_sourcepath.with_suffix('.bif')

        self.bayesian_network = BayesianNetwork.from_bif_file(bn_sourcepath,
                                                              use_cache=True)
        self.bayesian_network.finalize()

        if self.source_type == 'ds':
            if self.arguments.sample_count is None:
                raise ValueError('sample count required')
            self.sample_count_string = self.arguments.sample_count
            self.sample_count = int(float(self.sample_count_string))
            self.ci_test_significance = 0.95
Exemplo n.º 10
0
def test_d_separation__custom_graph_1():
    # Simple graph, from "Probabilistic Reasoning in Intelligent Systems"
    # by Judea Pearl, 1988
    graph = {1: [2, 3], 2: [4], 3: [4], 4: [5], 5: []}
    bn = BayesianNetwork('testnet')
    bn.from_directed_graph(graph)

    assert bn.d_separated(2, [1], 3) is True
    assert bn.d_separated(2, [1, 5], 3) is False
    assert bn.d_separated(1, [], 2) is False
    assert bn.d_separated(1, [], 3) is False
    assert bn.d_separated(1, [], 4) is False
    assert bn.d_separated(1, [], 5) is False

    assert bn.d_separated(1, [4], 5) is True
    assert bn.d_separated(2, [], 3) is False
    assert bn.d_separated(2, [4], 3) is False

    assert bn.d_separated(5, [2], 1) is False
    assert bn.d_separated(5, [3], 1) is False
    assert bn.d_separated(5, [2, 3], 1) is True

    assert bn.d_separated(3, [1, 2], 5) is False
Exemplo n.º 11
0
def bn_survey() -> BayesianNetwork:
    path = Path(testutil.bif_folder, 'survey.bif')
    bn = BayesianNetwork.from_bif_file(path, use_cache=False)
    bn.finalize()
    return bn
Exemplo n.º 12
0
def bn_andes():
    bn = BayesianNetwork.from_bif_file(Path(testutil.bif_folder, 'andes.bif'), use_cache=True)
    bn.finalize()
    return bn
Exemplo n.º 13
0
def test_building_from_directed_graph():
    graph = {
        0: [32],
        1: [3, 9],
        2: [],
        3: [12],
        4: [2],
        5: [],
        6: [36],
        7: [14, 15],
        8: [13],
        9: [],
        10: [28],
        11: [],
        12: [4, 13, 14, 15],
        13: [],
        14: [],
        15: [],
        16: [20, 31],
        17: [3],
        18: [22, 26, 30, 33, 34],
        19: [26, 34],
        20: [5, 25],
        21: [11, 20, 31],
        22: [],
        23: [35],
        24: [],
        25: [],
        26: [],
        27: [24, 30],
        28: [29],
        29: [3],
        30: [29],
        31: [4],
        32: [2, 3],
        33: [1, 28],
        34: [9, 22, 33],
        35: [36],
        36: [26, 34],
    }
    expected_undirected_graph = {
        0: [32],
        1: [3, 9, 33],
        2: [4, 32],
        3: [1, 12, 17, 29, 32],
        4: [2, 12, 31],
        5: [20],
        6: [36],
        7: [14, 15],
        8: [13],
        9: [1, 34],
        10: [28],
        11: [21],
        12: [3, 4, 13, 14, 15],
        13: [8, 12],
        14: [7, 12],
        15: [7, 12],
        16: [20, 31],
        17: [3],
        18: [22, 26, 30, 33, 34],
        19: [26, 34],
        20: [5, 16, 21, 25],
        21: [11, 20, 31],
        22: [18, 34],
        23: [35],
        24: [27],
        25: [20],
        26: [18, 19, 36],
        27: [24, 30],
        28: [10, 29, 33],
        29: [3, 28, 30],
        30: [18, 27, 29],
        31: [4, 16, 21],
        32: [0, 2, 3],
        33: [1, 18, 28, 34],
        34: [9, 18, 19, 22, 33, 36],
        35: [23, 36],
        36: [6, 26, 34, 35],
    }
    bn = BayesianNetwork('testnet_graph')
    bn.from_directed_graph(graph)
    assert bn.graph_d == graph
    assert bn.graph_u == expected_undirected_graph

    graph = {1: [2, 3], 2: [4], 3: [4], 4: [5], 5: []}
    expected_undirected_graph = {
        1: [2, 3],
        2: [1, 4],
        3: [1, 4],
        4: [2, 3, 5],
        5: [4]
    }

    bn = BayesianNetwork('testnet_graph')
    bn.from_directed_graph(graph)
    assert bn.graph_d == graph
    assert bn.graph_u == expected_undirected_graph
Exemplo n.º 14
0
def test_ipcmb_finding_Markov_blankets_in_graphs__imitating_survey():
    # Simple graph imitating the 'survey' Bayesian network, from
    # http://www.bnlearn.com/bnrepository/discrete-small.html#survey
    graph = {0: [1], 4: [1], 1: [2, 3], 2: [5], 3: [5]}
    bn = BayesianNetwork('testnet')
    bn.from_directed_graph(graph)

    parameters = make_parameters(3, bn)
    mb = AlgorithmIPCMB(None, parameters).discover_mb()
    assert mb == [1, 2, 5]

    parameters = make_parameters(1, bn)
    mb = AlgorithmIPCMB(None, parameters).discover_mb()
    assert mb == [0, 2, 3, 4]

    # Remove the edge 1 → 2 from the Bayesian network.
    graph[1] = [3]
    bn = BayesianNetwork('testnet')
    bn.from_directed_graph(graph)

    parameters = make_parameters(3, bn)
    mb = AlgorithmIPCMB(None, parameters).discover_mb()
    assert mb == [1, 2, 5]

    parameters = make_parameters(1, bn)
    mb = AlgorithmIPCMB(None, parameters).discover_mb()
    assert mb == [0, 3, 4]

    # Replace the edge from 1 → 3 with 1 → 2.
    graph[1] = [2]
    bn = BayesianNetwork('testnet')
    bn.from_directed_graph(graph)

    parameters = make_parameters(3, bn)
    mb = AlgorithmIPCMB(None, parameters).discover_mb()
    assert mb == [2, 5]

    parameters = make_parameters(1, bn)
    mb = AlgorithmIPCMB(None, parameters).discover_mb()
    assert mb == [0, 2, 4]
Exemplo n.º 15
0
def make_test_bayesian_network(configuration):
    bn = None
    with Lock('bn-' + configuration['sourcepath'].name, 'w'):
        bn = BayesianNetwork.from_bif_file(configuration['sourcepath'], use_cache=True)
    bn.finalize()
    return bn
Exemplo n.º 16
0
def bn_lc_repaired() -> BayesianNetwork:
    path = Path(testutil.bif_folder, 'lc_repaired.bif')
    bn = BayesianNetwork.from_bif_file(path, use_cache=False)
    bn.finalize()
    return bn
Exemplo n.º 17
0
def bn_lungcancer() -> BayesianNetwork:
    path = Path(testutil.bif_folder, 'lungcancer.bif')
    bn = BayesianNetwork.from_bif_file(path, use_cache=False)
    bn.finalize()
    return bn
Exemplo n.º 18
0
def test_conditional_pmf__from_bayesian_network():
    configuration = dict()
    configuration['sourcepath'] = testutil.bif_folder / 'survey.bif'
    configuration['sample_count'] = int(4e4)
    # Using a random seed of 42 somehow requires 2e6 samples to pass, but
    # with the seed 1984, it is sufficient to generate only 4e4. Maybe the
    # random generator is biased somehow?
    configuration['random_seed'] = 1984
    configuration['values_as_indices'] = False
    configuration['objectives'] = ['R', 'TRN']

    bayesian_network = BayesianNetwork.from_bif_file(configuration['sourcepath'], use_cache=False)
    bayesian_network.finalize()

    sbnds = SampledBayesianNetworkDatasetSource(configuration)
    sbnds.reset_random_seed = True
    datasetmatrix = sbnds.create_dataset_matrix('test_sbnds')

    assert ['AGE', 'EDU', 'OCC', 'SEX'] == datasetmatrix.column_labels_X
    assert ['R', 'TRN'] == datasetmatrix.column_labels_Y

    AGE = Variable(datasetmatrix.get_column_by_label('X', 'AGE'))
    PrAge = PMF(AGE)

    SEX = Variable(datasetmatrix.get_column_by_label('X', 'SEX'))
    PrSex = PMF(SEX)

    assert_PMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['AGE'].probdist,
        PrAge)

    assert_PMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['SEX'].probdist,
        PrSex)

    EDU = Variable(datasetmatrix.get_column_by_label('X', 'EDU'))
    PrEdu = CPMF(EDU, given=JointVariables(AGE, SEX))

    assert_CPMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['EDU'].probdist,
        PrEdu)

    OCC = Variable(datasetmatrix.get_column_by_label('X', 'OCC'))
    PrOcc = CPMF(OCC, given=EDU)

    assert_CPMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['OCC'].probdist,
        PrOcc)

    R = Variable(datasetmatrix.get_column_by_label('Y', 'R'))
    PrR = CPMF(R, given=EDU)

    assert_CPMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['R'].probdist,
        PrR)

    TRN = Variable(datasetmatrix.get_column_by_label('Y', 'TRN'))
    PrTRN = CPMF(TRN, given=JointVariables(OCC, R))

    assert_CPMF_AlmostEquals_BNProbDist(
        bayesian_network.variable_nodes['TRN'].probdist,
        PrTRN)
Exemplo n.º 19
0
def bn_pathfinder():
    path = Path(testutil.bif_folder, 'pathfinder.bif')
    bn = BayesianNetwork.from_bif_file(path, use_cache=True)
    bn.finalize()
    return bn
Exemplo n.º 20
0
def default_Bayesian_network():
    AGE = VariableNode('AGE')
    AGE.values = ['young', 'adult', 'old']
    AGE.properties = {'label': 'age'}
    AGE.probdist = ProbabilityDistributionOfVariableNode(AGE)
    AGE.probdist.conditioning_variable_nodes = OrderedDict()
    AGE.probdist.probabilities = {'<unconditioned>': [0.3, 0.5, 0.2]}

    SEX = VariableNode('SEX')
    SEX.values = ['M', 'F']
    SEX.properties = {'label': 'sex'}
    SEX.probdist = ProbabilityDistributionOfVariableNode(SEX)
    SEX.probdist.conditioning_variable_nodes = OrderedDict()
    SEX.probdist.probabilities = {'<unconditioned>': [0.49, 0.51]}

    EDU = VariableNode('EDU')
    EDU.values = ['highschool', 'uni']
    EDU.properties = {'label': 'education'}
    EDU.probdist = ProbabilityDistributionOfVariableNode(AGE)
    EDU.probdist.conditioning_variable_nodes = OrderedDict([('AGE', AGE),
                                                            ('SEX', SEX)])
    EDU.probdist.probabilities = {
        ('young', 'M'): [0.75, 0.25],
        ('young', 'F'): [0.64, 0.36],
        ('adult', 'M'): [0.72, 0.28],
        ('adult', 'F'): [0.70, 0.30],
        ('old', 'M'): [0.88, 0.12],
        ('old', 'F'): [0.90, 0.10]
    }

    OCC = VariableNode('OCC')
    OCC.values = ['emp', 'self']
    OCC.properties = {'label': 'occupation'}
    OCC.probdist = ProbabilityDistributionOfVariableNode(OCC)
    OCC.probdist.conditioning_variable_nodes = OrderedDict([('EDU', EDU)])
    OCC.probdist.probabilities = {
        ('highschool', ): [0.96, 0.04],
        ('uni', ): [0.92, 0.08]
    }

    R = VariableNode('R')
    R.values = ['small', 'big']
    R.properties = {'label': 'unknown'}
    R.probdist = ProbabilityDistributionOfVariableNode(R)
    R.probdist.conditioning_variable_nodes = OrderedDict([('EDU', EDU)])
    R.probdist.probabilities = {
        ('highschool', ): [0.25, 0.75],
        ('uni', ): [0.2, 0.8]
    }

    TRN = VariableNode('TRN')
    TRN.values = ['car', 'train', 'other']
    TRN.properties = {'label': 'transportation'}
    TRN.probdist = ProbabilityDistributionOfVariableNode(TRN)
    TRN.probdist.conditioning_variable_nodes = OrderedDict([('OCC', OCC),
                                                            ('R', R)])
    TRN.probdist.probabilities = {
        ('emp', 'small'): [0.48, 0.42, 0.10],
        ('self', 'small'): [0.56, 0.36, 0.08],
        ('emp', 'big'): [0.58, 0.24, 0.18],
        ('self', 'big'): [0.70, 0.21, 0.09]
    }

    BN = BayesianNetwork('survey')
    BN.properties = {'testing': 'yes'}
    BN.variable_nodes = {
        'AGE': AGE,
        'SEX': SEX,
        'EDU': EDU,
        'OCC': OCC,
        'R': R,
        'TRN': TRN
    }

    return BN