Exemplo n.º 1
0
def load_bn_from_BIF(dataset_name='child', folder=DATA_FOLDER, verbose=False):
    bif_file = os.path.join(folder, BIF_FOLDER_MAP[dataset_name],
                            dataset_name + '.bif', dataset_name + '.bif')
    image_file = os.path.join(folder, BIF_FOLDER_MAP[dataset_name],
                              dataset_name + '.png')

    if verbose:
        print(f'Loading graph from {bif_file}')

    reader = BIFReader(bif_file)
    model = reader.get_model()

    if os.path.exists(image_file):
        if verbose:
            print(f'Loading graph image from {image_file}')
        model.image = PIL.Image.open(image_file)

    # Take the leaves as features
    __FEATURES = model.get_leaves()
    __ROOTS = model.get_roots()
    __NODES = model.nodes()
    __NOT_FEATURES = list({
        node
        for node in __NODES if node not in __FEATURES and node not in __ROOTS
    })

    if verbose:
        print(f'Nodes: {__NODES} ({len(__NODES)})')
        print(f'Features/Leaves: {__FEATURES} ({len(__FEATURES)})')
        print(f'Roots: {__ROOTS} ({len(__ROOTS)})')
        print(
            f'Intermediate (non-roots/non-leaves): {__NOT_FEATURES} ({len(__NOT_FEATURES)})'
        )

    return model
Exemplo n.º 2
0
    def __init__(self, evidence, bif_path=None, model=None):
        """
            Args:
                evidence  A dictionary with observations, e.g., {node-name: node-value}. 
                model / bif_path  PGMPY model specification or .bif file path.
        """

        assert bif_path is not None or model is not None, \
                "You must set either pgmpy model or bif_path!"
        if model is None:
            logger.debug("[BayesianNetworkVI] reading from %s" % bif_path)
            reader = BIFReader(bif_path)
            model = reader.get_model()
        self._model = model
        self._evidence = evidence
        
        latent_variables = sorted( set(v for cpd in self._model.get_cpds() 
                                         for v in cpd.variables).difference(evidence) )
        observed_variables = sorted(evidence)
        self._no2var = dict(enumerate(latent_variables+observed_variables))
        self._var2no = dict((var, no) for no, var in self._no2var.items())  
        logger.debug("[BayesianNetworkVI] mapping(id->variable)=%s" % self._no2var)      

        variable2values = cpds_to_variables(self._model.get_cpds())
        self._variable2values = dict( (var, dict( (v, i) for i, v in enumerate(vals) )) 
                                             for var, vals in variable2values.items()) 
        
        self._prob_arrays = [cpd_to_prob_array(cpd)+EPS for cpd in self._model.get_cpds()] #!TODO renormalization

        logger.info("[BayesianNetworkVI] %i vars with max cardinality=%i => enumeration size=%i" % 
                    (len(variable2values), self.cardinality, self.enumeration_size))       
Exemplo n.º 3
0
    def setUp(self):

        self.reader = BIFReader(string="""
// Bayesian Network in the Interchange Format
// Produced by BayesianNetworks package in JavaBayes
// Output created Sun Nov 02 17:49:49 GMT+00:00 1997
// Bayesian network
network "Dog-Problem" { //5 variables and 5 probability distributions
        property "credal-set constant-density-bounded 1.1" ;
}
variable  "light-on" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (218, 195)" ;
}
variable  "bowel-problem" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (335, 99)" ;
}
variable  "dog-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (300, 195)" ;
}
variable  "hear-bark" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (296, 268)" ;
}
variable  "family-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (257, 99)" ;
}
probability (  "light-on"  "family-out" ) { //2 variable(s) and 4 values
        (true) 0.6 0.4 ;
        (false) 0.05 0.95 ;
}
probability (  "bowel-problem" ) { //1 variable(s) and 2 values
        table 0.01 0.99 ;
}
probability (  "dog-out"  "bowel-problem"  "family-out" ) { //3 variable(s) and 8 values
        table 0.99 0.97 0.9 0.3 0.01 0.03 0.1 0.7 ;
}
probability (  "hear-bark"  "dog-out" ) { //2 variable(s) and 4 values
        table 0.7 0.01 0.3 0.99 ;
}
probability (  "family-out" ) { //1 variable(s) and 2 values
        table 0.15 0.85 ;
}
""")
Exemplo n.º 4
0
    def compile(self):
        """
        Preprocess model:
          1. read graph object from given bif_path.
          2. sets nodes with graph nodes.

        :return: Preprocessed model.
        """
        self.graph = pgmpy.models.BayesianModel = BIFReader(
            self.bif_path).get_model()
        self.nodes = set(self.graph.nodes)
        self.compiled = True
        return self
Exemplo n.º 5
0
def get_graph(fname, tar_list=None):
    reader = BIFReader(fname)
    nodes = reader.get_variables()
    candidates = set(nodes)
    print(nodes)
    edges = reader.get_edges()
    n_nodes = len(nodes)
    node2id = {node:id for id, node in enumerate(nodes)}
    for i, tar in enumerate(tar_list): 
        # select one as the target
        target = tar or np.random.choice(list(candidates))
        print('target: ', target)
        candidates -= {target}
        node2id[target], node2id[nodes[-(i+1)]] = node2id[nodes[-(i+1)]], node2id[target]
    print(node2id)
    # build adjmat
    graph = np.zeros([n_nodes, n_nodes])
    for edge in edges:
        p = node2id[edge[0]]
        n = node2id[edge[1]]
        graph[n, p] = 1
    print(graph)

    return graph
Exemplo n.º 6
0
    def setUp(self):

        self.reader = BIFReader(string="""
// Bayesian Network in the Interchange Format
// Produced by BayesianNetworks package in JavaBayes
// Output created Sun Nov 02 17:49:49 GMT+00:00 1997
// Bayesian network
network "Dog-Problem" { //5 variables and 5 probability distributions
        property "credal-set constant-density-bounded 1.1" ;
}
variable  "light-on" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (218, 195)" ;
}
variable  "bowel-problem" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (335, 99)" ;
}
variable  "dog-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (300, 195)" ;
}
variable  "hear-bark" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (296, 268)" ;
}
variable  "family-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (257, 99)" ;
}
probability (  "light-on"  "family-out" ) { //2 variable(s) and 4 values
        (true) 0.6 0.4 ;
        (false) 0.05 0.95 ;
}
probability (  "bowel-problem" ) { //1 variable(s) and 2 values
        table 0.01 0.99 ;
}
probability (  "dog-out"  "bowel-problem"  "family-out" ) { //3 variable(s) and 8 values
        table 0.99 0.97 0.9 0.3 0.01 0.03 0.1 0.7 ;
}
probability (  "hear-bark"  "dog-out" ) { //2 variable(s) and 4 values
        table 0.7 0.01 0.3 0.99 ;
}
probability (  "family-out" ) { //1 variable(s) and 2 values
        table 0.15 0.85 ;
}
""")
Exemplo n.º 7
0
    ]

    if (len(Model_files)):
        print(
            "If you find there is only a target BN model, please try to perform structure learning first (training.py)"
        )
        print("BN model: ", Model_files)
        selection = int(
            input(
                "Please choose a model to perform analysis on it (index from 1):"
            ))

        if (1 <= selection and selection <= len(Model_files)):
            model_path = os.path.join("model", Model_files[selection - 1])
            from pgmpy.readwrite import BIFReader
            reader = BIFReader(model_path)
            model = reader.get_model()
            # Checking if the cpds are valid for the model.
            print("Checking if CPDs are valid for model: ",
                  model.check_model())
            # Probability reasoning with the samples from dataset
            # The result will be designated into two class (actually confirmed or healthy) with two colors in graph
            from pgmpy.inference import VariableElimination
            covid_infer = VariableElimination(model)
            variables_name = ["Covid"]
            evidences_name = list(dataset)
            evidences_name.remove("Covid")
            variables_list = dataset["Covid"].values.tolist()
            evidences_list = dataset.drop(columns=["Covid"]).values.tolist()

            import time
Exemplo n.º 8
0
    def run(self, request):
        response = {'text': '', 'weight': 0, 'type': 'normal'}
        js = json.loads(request, encoding = 'utf-8')
        if len(js) == 0:
            response['text'] = ''
            response['weight'] = 0
            res = json.dumps(response, ensure_ascii = False)
            return res
		
        candidates = {} 
        sym_list = js['Self Explaination'].split(' ')
        for sym in sym_list:
            if sym in self.disease_id:
                id = self.disease_id[sym]
                if not id in candidates:
                    candidates[id] = 0
                candidates[id] += 1	
            if sym in self.ds_rind:
                for id, rate in self.ds_rind[sym].items():
                    if not id in candidates:
                        candidates[id] = 0
                    candidates[id] += rate	
        ids = sorted(candidates.items(), key=lambda d:d[1], reverse=True)[:20]
        print('candidates generated', file = sys.stderr)

        dw = {}
        deps = {}
        for i, r in ids:
            if r >= 1:
                for d in self.disease_part[i]:
                    if not d in deps:
                        deps[d] = 0
                    deps[d] += 1

            try:
                model = BIFReader('models/model.bif.%s' % i).get_model()
            except:
                continue
            infer = VariableElimination(model)
            key = self.fea_map['D_' + self.disease_name[i]]
            observed_info = {}
            if '性别' in js['User Info']:
                observed_info['SEX'] = js['User Info']['性别']
            if '年龄' in js['User Info']:
                observed_info['AGE'] = js['User Info']['年龄']

            for sym in sym_list:
                sym = 'S_' + sym
                if not sym in model.nodes():
                    continue
                if not sym in self.fea_map:
                    continue
                observed_info[self.fea_map[sym]] = 0
			
            score = infer.query(variables = [key], evidence = observed_info)[key].values[0]
            dw[self.disease_name[i]] = score
            #print('%s: %s = %.8f' % (i, self.disease_name[i], score))	

            for d in self.disease_part[i]:
                if not d in deps:
                    deps[d] = 0
                deps[d] += score	
	
        if len(deps) == 0:
            print('unknown symptoms...', file = sys.stderr)
            response['weight'] = 0
            response['text'] = ''
            res = json.dumps(response, ensure_ascii = False)
            return res
	
        for name, score in sorted(dw.items(), key=lambda d:d[1], reverse=True)[:10]:
            print('%s\t%.8f' % (name, score), file = sys.stderr)
	
        dep_list = sorted(deps.items(), key=lambda d:d[1], reverse=True)
        print('department: ', file = sys.stderr)
        for dep, wei in dep_list[:3]:
            print(dep, file = sys.stderr)
            response['text'] += dep
            response['text'] += ' '
        response['weight'] = (1000 * dep_list[0][1]) + 0.8 
        res = json.dumps(response, ensure_ascii = False)
        return res
Exemplo n.º 9
0
from pgmpy.readwrite import BIFReader

reader = BIFReader('asia.bif')

asia_model = reader.get_model()

asia_model.nodes()
asia_model.edges()
asia_model.get_cpds()

# 用变量消去法来精确推理
from pgmpy.inference import VariableElimination

asia_infer = VariableElimination(asia_model)

# Computing the probability of bronc given smoke.
q = asia_infer.query(variables=['bronc'], evidence={'smoke': 0})
print(q['bronc'])
q = asia_infer.query(variables=['bronc'], evidence={'smoke': 1})
print(q['bronc'])
Exemplo n.º 10
0
class TestBIFReader(unittest.TestCase):
    def setUp(self):

        self.reader = BIFReader(string="""
// Bayesian Network in the Interchange Format
// Produced by BayesianNetworks package in JavaBayes
// Output created Sun Nov 02 17:49:49 GMT+00:00 1997
// Bayesian network
network "Dog-Problem" { //5 variables and 5 probability distributions
        property "credal-set constant-density-bounded 1.1" ;
}
variable  "light-on" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (218, 195)" ;
}
variable  "bowel-problem" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (335, 99)" ;
}
variable  "dog-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (300, 195)" ;
}
variable  "hear-bark" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (296, 268)" ;
}
variable  "family-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (257, 99)" ;
}
probability (  "light-on"  "family-out" ) { //2 variable(s) and 4 values
        table 0.6 0.05 0.4 0.95 ;
}
probability (  "bowel-problem" ) { //1 variable(s) and 2 values
        table 0.01 0.99 ;
}
probability (  "dog-out"  "bowel-problem"  "family-out" ) { //3 variable(s) and 8 values
        table 0.99 0.97 0.9 0.3 0.01 0.03 0.1 0.7 ;
}
probability (  "hear-bark"  "dog-out" ) { //2 variable(s) and 4 values
        table 0.7 0.01 0.3 0.99 ;
}
probability (  "family-out" ) { //1 variable(s) and 2 values
        table 0.15 0.85 ;
}
""")

    def test_network_name(self):

        name_expected = 'Dog-Problem'
        self.assertEqual(self.reader.network_name, name_expected)

    def test_get_variables(self):

        var_expected = [
            'light-on', 'bowel-problem', 'dog-out', 'hear-bark', 'family-out'
        ]
        self.assertListEqual(self.reader.get_variables(), var_expected)

    def test_states(self):

        states_expected = {
            'bowel-problem': ['true', 'false'],
            'dog-out': ['true', 'false'],
            'family-out': ['true', 'false'],
            'hear-bark': ['true', 'false'],
            'light-on': ['true', 'false']
        }
        states = self.reader.get_states()
        for variable in states_expected:
            self.assertListEqual(states_expected[variable], states[variable])

    def test_get_property(self):

        property_expected = {
            'bowel-problem': ['position = (335, 99)'],
            'dog-out': ['position = (300, 195)'],
            'family-out': ['position = (257, 99)'],
            'hear-bark': ['position = (296, 268)'],
            'light-on': ['position = (218, 195)']
        }
        prop = self.reader.get_property()
        for variable in property_expected:
            self.assertListEqual(property_expected[variable], prop[variable])

    def test_get_cpd(self):

        cpd_expected = {
            'bowel-problem': np.array([[0.01], [0.99]]),
            'dog-out': np.array([[0.99, 0.97, 0.9, 0.3],
                                 [0.01, 0.03, 0.1, 0.7]]),
            'family-out': np.array([[0.15], [0.85]]),
            'hear-bark': np.array([[0.7, 0.01], [0.3, 0.99]]),
            'light-on': np.array([[0.6, 0.05], [0.4, 0.95]])
        }
        cpd = self.reader.variable_cpds
        for variable in cpd_expected:
            np_test.assert_array_equal(cpd_expected[variable], cpd[variable])

    def test_get_parents(self):

        parents_expected = {
            'bowel-problem': [],
            'dog-out': ['bowel-problem', 'family-out'],
            'family-out': [],
            'hear-bark': ['dog-out'],
            'light-on': ['family-out']
        }
        parents = self.reader.get_parents()
        for variable in parents_expected:
            self.assertListEqual(parents_expected[variable], parents[variable])

    def test_get_edges(self):

        edges_expected = [['family-out', 'dog-out'],
                          ['bowel-problem', 'dog-out'],
                          ['family-out', 'light-on'], ['dog-out', 'hear-bark']]
        self.assertListEqual(sorted(self.reader.variable_edges),
                             sorted(edges_expected))

    def test_get_model(self):
        edges_expected = [('family-out', 'dog-out'),
                          ('bowel-problem', 'dog-out'),
                          ('family-out', 'light-on'), ('dog-out', 'hear-bark')]
        nodes_expected = [
            'bowel-problem', 'hear-bark', 'light-on', 'dog-out', 'family-out'
        ]
        edge_expected = {
            'bowel-problem': {
                'dog-out': {}
            },
            'dog-out': {
                'hear-bark': {}
            },
            'family-out': {
                'dog-out': {},
                'light-on': {}
            },
            'hear-bark': {},
            'light-on': {}
        }
        node_expected = {
            'bowel-problem': {
                'position': '(335, 99)'
            },
            'dog-out': {
                'position': '(300, 195)'
            },
            'family-out': {
                'position': '(257, 99)'
            },
            'hear-bark': {
                'position': '(296, 268)'
            },
            'light-on': {
                'position': '(218, 195)'
            }
        }
        cpds_expected = [
            np.array([[0.01], [0.99]]),
            np.array([[0.99, 0.97, 0.9, 0.3], [0.01, 0.03, 0.1, 0.7]]),
            np.array([[0.15], [0.85]]),
            np.array([[0.7, 0.01], [0.3, 0.99]]),
            np.array([[0.6, 0.05], [0.4, 0.95]])
        ]
        model = self.reader.get_model()
        for cpd_index in range(0, len(cpds_expected)):
            np_test.assert_array_equal(model.get_cpds()[cpd_index].get_cpd(),
                                       cpds_expected[cpd_index])
        self.assertDictEqual(model.node, node_expected)
        self.assertDictEqual(model.edge, edge_expected)
        self.assertListEqual(sorted(model.nodes()), sorted(nodes_expected))
        self.assertListEqual(sorted(model.edges()), sorted(edges_expected))

    def tearDown(self):
        del self.reader
Exemplo n.º 11
0
def get_dataset(dataset):
    """
    Fetches the `dataset` and returns a pgmpy.model instance.

    Parameter
    ---------
    dataset: str
        Any dataset from bnlearn repository (http://www.bnlearn.com/bnrepository).

        Discrete Bayesian Network Options:
            Small Networks:
                1. asia
                2. cancer
                3. earthquake
                4. sachs
                5. survey
            Medium Networks:
                1. alarm
                2. barley
                3. child
                4. insurance
                5. mildew
                6. water
            Large Networks:
                1. hailfinder
                2. hepar2
                3. win95pts
            Very Large Networks:
                1. andes
                2. diabetes
                3. link
                4. munin1
                5. munin2
                6. munin3
                7. munin4
                8. pathfinder
                9. pigs
                10. munin
        Gaussian Bayesian Network Options:
                1. ecoli70
                2. magic-niab
                3. magic-irri
                4. arth150
        Conditional Linear Gaussian Bayesian Network Options:
                1. sangiovese
                2. mehra

    Example
    -------
    >>> from pgmpy.data import get_dataset
    >>> model = get_dataset(dataset='asia')
    >>> model

    Returns
    -------
    pgmpy.models instance: An instance of one of the model classes in pgmpy.models
                           depending on the type of dataset.
    """
    dataset_links = {
        "asia": "http://www.bnlearn.com/bnrepository/asia/asia.bif.gz",
        "cancer": "http://www.bnlearn.com/bnrepository/cancer/cancer.bif.gz",
        "earthquake":
        "http://www.bnlearn.com/bnrepository/earthquake/earthquake.bif.gz",
        "sachs": "http://www.bnlearn.com/bnrepository/sachs/sachs.bif.gz",
        "survey": "http://www.bnlearn.com/bnrepository/survey/survey.bif.gz",
        "alarm": "http://www.bnlearn.com/bnrepository/alarm/alarm.bif.gz",
        "barley": "http://www.bnlearn.com/bnrepository/barley/barley.bif.gz",
        "child": "http://www.bnlearn.com/bnrepository/child/child.bif.gz",
        "insurance":
        "http://www.bnlearn.com/bnrepository/insurance/insurance.bif.gz",
        "mildew": "http://www.bnlearn.com/bnrepository/mildew/mildew.bif.gz",
        "water": "http://www.bnlearn.com/bnrepository/water/water.bif.gz",
        "hailfinder":
        "http://www.bnlearn.com/bnrepository/hailfinder/hailfinder.bif.gz",
        "hepar2": "http://www.bnlearn.com/bnrepository/hepar2/hepar2.bif.gz",
        "win95pts":
        "http://www.bnlearn.com/bnrepository/win95pts/win95pts.bif.gz",
        "andes": "http://www.bnlearn.com/bnrepository/andes/andes.bif.gz",
        "diabetes":
        "http://www.bnlearn.com/bnrepository/diabetes/diabetes.bif.gz",
        "link": "http://www.bnlearn.com/bnrepository/link/link.bif.gz",
        "munin1": "http://www.bnlearn.com/bnrepository/munin4/munin1.bif.gz",
        "munin2": "http://www.bnlearn.com/bnrepository/munin4/munin2.bif.gz",
        "munin3": "http://www.bnlearn.com/bnrepository/munin4/munin3.bif.gz",
        "munin4": "http://www.bnlearn.com/bnrepository/munin4/munin4.bif.gz",
        "pathfinder":
        "http://www.bnlearn.com/bnrepository/pathfinder/pathfinder.bif.gz",
        "pigs": "http://www.bnlearn.com/bnrepository/pigs/pigs.bif.gz",
        "munin": "http://www.bnlearn.com/bnrepository/munin/munin.bif.gz",
        "ecoli70": "",
        "magic-niab": "",
        "magic-irri": "",
        "arth150": "",
        "sangiovese": "",
        "mehra": "",
    }

    if dataset not in dataset_links.keys():
        raise ValueError("dataset should be one of the options")
    if dataset_links[dataset] == "":
        raise NotImplementedError("The specified dataset isn't supported")

    filename, _ = urlretrieve(dataset_links[dataset])
    with gzip.open(filename, "rb") as f:
        content = f.read()
    reader = BIFReader(content)
    return reader.get_model()
Exemplo n.º 12
0
    input_path = Path(args.input)
    output_file = Path(args.output)

    with output_file.open("w", newline='') as csvfile:
        writer = csv.writer(csvfile, delimiter=',')
        writer.writerow([
            "network", "nodes", "edges", "max_in_degree", "avg_degree",
            "treewidth_min_degree", "treewidth_min_fill_in"
        ])  # csv header

        counter = 0
        for network_file in input_path.glob(
                "*.{}".format(NETWORK_FILE_EXTENSIONS)):
            print("Parsing {}".format(network_file.name))
            reader = BIFReader(network_file)

            name = network_file.stem
            print("Computing treewidth for {}".format(name))
            network, min_degree, min_fill = compute_treewidth_from_BIF(reader)

            nodes = network.order()
            edges = network.size()
            max_indegree = (np.array([n for node, n in network.in_degree()
                                      ])).max()
            avg_degree = (np.array([n for node, n in network.degree()])).mean()

            writer.writerow([
                name, nodes, edges, max_indegree, avg_degree, min_degree,
                min_fill
            ])
Exemplo n.º 13
0
from pgmpy.readwrite import BIFReader
reader = BIFReader('shade.bif')

asia_model = reader.get_model()

asia_model.nodes()
asia_model.edges()
asia_model.get_cpds()

# 用变量消去法来精确推理
from pgmpy.inference import VariableElimination
asia_infer = VariableElimination(asia_model)

# Computing the probability of bronc given smoke.
q = asia_infer.query(variables=['Shade'], evidence={'SolarElevation': 1})
print(q['Shade'])
q = asia_infer.query(variables=['Shade'], evidence={'GenerateLevel': 1})
print(q['Shade'])
q = asia_infer.query(variables=['Shade'], evidence={'DipersionRatio': 2})
print(q['Shade'])
q = asia_infer.query(variables=['Shade'],
                     evidence={
                         'GenerateLevel': 1,
                         'DipersionRatio': 2
                     })
print(q['Shade'])
Exemplo n.º 14
0
while 1:
    try:
        buf = input()
    except:
        break
    a = buf.rstrip().split(' ')
    ids = set([])
    for sym in a:
        if sym in ds_rind:
            ids = ids.union(ds_rind[sym])

    dw = {}
    for i in ids:
        try:
            model = BIFReader('models/model.bif.%s' % i).get_model()
        except:
            continue
        infer = VariableElimination(model)
        key = fea_map['D_' + disease_name[i]]
        observed_info = {}
        for sym in a:
            if sym.startswith('SEX_'):
                observed_info['SEX'] = int(sym[len('SEX_'):])
            if sym.startswith('AGE_'):
                observed_info['AGE'] = int(sym[len('AGE_'):])
            sym = 'S_' + sym
            if not sym in model.nodes():
                continue
            if not sym in fea_map:
                continue
Exemplo n.º 15
0
from pgmpy.readwrite import BIFReader
from pgmpy.inference import VariableElimination
import os

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = curPath[:curPath.find("sklearn\\") + len("sklearn\\")]
dataPath = rootPath + "Input/MLWorkHome/experiment6/img/asia.bif"
reader = BIFReader(dataPath)
asia_model = reader.get_model()
# 通过nodes函数可以查看模型中有哪些结点
print(asia_model.nodes())
# NodeView(('xray', 'bronc', 'asia', 'dysp', 'lung', 'either', 'smoke', 'tub'))
# 练习1   在下面的单元格中,实现判断,判断tub结点和either结点之间是否存在有向连接:
print("练习1:")
print(asia_model.is_active_trail('tub', 'either'))

# 练习2   在下面的单元格中,实现判断,判断tub结点和dysp结点之间能否通过either结点有向连接:
print("练习2:")
print(asia_model.is_active_trail('tub', 'dysp', observed=['either']))

asia_infer = VariableElimination(asia_model)
# 给出当smoke为0时,bronc的概率分布情况
q = asia_infer.query(variables=['bronc'], evidence={'smoke': 0})
print(q['bronc'])

# 练习3   在下面的单元格中,实现查询,当either为1时,xray的概率分布情况:
print("练习3:")
asia_infer2 = VariableElimination(asia_model)
p = asia_infer2.query(variables=['xray'], evidence={'either': 1})
print(p['xray'])
Exemplo n.º 16
0
    def __init__(self, pathfile):
        self.reader = BIFReader(pathfile)
        self.node_topo_sorted = list()

        # Find Topo order
        self.__find_topo_sorted()
Exemplo n.º 17
0
class Transformer:
    def __init__(self, pathfile):
        self.reader = BIFReader(pathfile)
        self.node_topo_sorted = list()

        # Find Topo order
        self.__find_topo_sorted()


    def __find_topo_sorted(self):
        parents = self.reader.get_parents()
        queue = deque()
        queue.extend(self.__find_root(parents))

        while len(queue):
            node  = queue.popleft()

            # Add to totpo list
            self.node_topo_sorted.append(node)
            
            # Remove node in parrents
            parents.pop(node)
            
            # Remove in child
            for parent in parents:
                if node in parents[parent]:
                    parents[parent].remove(node)

            # When queue emty try find child
            if len(queue) == 0:
                queue.extend(self.__find_root(parents))


    def __find_root(self, parrents):
        root = list()
        for parrent in parrents:
            if len(parrents[parrent]) == 0:
                root.append(parrent)

        return root

    def write(self, pathfile):
        parrents = self.reader.get_parents()
        states = self.reader.get_states()
        values = self.reader.get_values()

        with open(pathfile, 'w') as writer:
            # Write number of node
            writer.write( str(len(self.node_topo_sorted)) + '\n')

            # Write each node 
            for node in self.node_topo_sorted:

                # Node Name
                writer.write(node + ';')

                # Parent of node
                node_parrents = parrents[node]
                writer.write(','.join(node_parrents) + ';')
                
                # Write state
                state = states[node]
                writer.write(','.join(state) + ';')

                # Find dim
                dim = list()
                for node_parrent in node_parrents:
                    dim.append(len(states[node_parrent]))

                dim.append(len(state))

                # Write dim
                writer.write(','.join(map(str,dim)) + ';')

                # Write propabilities
                value = map(str, list(np.transpose(values[node]).ravel()))
                writer.write(','.join(value) )
            
                writer.write('\n')
Exemplo n.º 18
0
from pgmpy.readwrite import BIFReader
from pgmpy.sampling import BayesianModelSampling
import networkx as nx
import sys
from tqdm import tqdm

if __name__ == '__main__':
    dataset = sys.argv[1]
    sample_size = int(sys.argv[2])
    n_runs = int(sys.argv[3])
    score = sys.argv[4]
    alpha = float(sys.argv[5])
    palim = int(sys.argv[6])

    # Read the BN model
    reader = BIFReader(dataset + '.bif')
    model = reader.get_model()
    arities = dict(model.get_cardinality())
    for key, value in arities.items():
        arities[key] = [value]
    nodes = list(nx.topological_sort(model))

    for run in tqdm(range(n_runs)):
        data = pd.read_csv(os.path.join(
            dataset,
            str(sample_size) + '_' + str(run) + '.csv'),
                           sep=' ')
        data = data.iloc[1:]  # Remove the arities from the table
        if score == 'BDeu':
            save_folder = os.path.join(
                dataset,
Exemplo n.º 19
0
if __name__ == "__main__":
    # Load train and test dataset
    train, test = train_test()
    print("Train and Test loaded successfully!\n")

    train = train.sample(frac=1).reset_index(drop=True)
    test = test.sample(frac=1).reset_index(drop=True)

    # Creation of a new model
    print('\nStarting creation of a new model...')
    model, total_time = create_BN_model(train)
    print(total_time)

    # Load of the model we want to use to make inference
    print('Loading the model...')
    reader = BIFReader('model_pgmpy.bif')
    model = reader.get_model()
    if model.check_model():
        print(
            "Your network structure and CPD's are correctly defined. The probabilities in the columns sum to 1. Hill Climb worked fine!"
        )
    else:
        print("not good")

    # Inference
    print('\nStarting inference...')
    inference(test, model)

    print('Inference done!')
Exemplo n.º 20
0
class TestBIFReader(unittest.TestCase):

    def setUp(self):

        self.reader = BIFReader(string="""
// Bayesian Network in the Interchange Format
// Produced by BayesianNetworks package in JavaBayes
// Output created Sun Nov 02 17:49:49 GMT+00:00 1997
// Bayesian network
network "Dog-Problem" { //5 variables and 5 probability distributions
        property "credal-set constant-density-bounded 1.1" ;
}
variable  "light-on" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (218, 195)" ;
}
variable  "bowel-problem" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (335, 99)" ;
}
variable  "dog-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (300, 195)" ;
}
variable  "hear-bark" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (296, 268)" ;
}
variable  "family-out" { //2 values
        type discrete[2] {  "true"  "false" };
        property "position = (257, 99)" ;
}
probability (  "light-on"  "family-out" ) { //2 variable(s) and 4 values
        (true) 0.6 0.4 ;
        (false) 0.05 0.95 ;
}
probability (  "bowel-problem" ) { //1 variable(s) and 2 values
        table 0.01 0.99 ;
}
probability (  "dog-out"  "bowel-problem"  "family-out" ) { //3 variable(s) and 8 values
        table 0.99 0.97 0.9 0.3 0.01 0.03 0.1 0.7 ;
}
probability (  "hear-bark"  "dog-out" ) { //2 variable(s) and 4 values
        table 0.7 0.01 0.3 0.99 ;
}
probability (  "family-out" ) { //1 variable(s) and 2 values
        table 0.15 0.85 ;
}
""")

    def test_network_name(self):

        name_expected = 'Dog-Problem'
        self.assertEqual(self.reader.network_name, name_expected)

    def test_get_variables(self):

        var_expected = ['light-on', 'bowel-problem', 'dog-out',
                        'hear-bark', 'family-out']
        self.assertListEqual(self.reader.get_variables(), var_expected)

    def test_states(self):

        states_expected = {'bowel-problem': ['true', 'false'],
                           'dog-out': ['true', 'false'],
                           'family-out': ['true', 'false'],
                           'hear-bark': ['true', 'false'],
                           'light-on': ['true', 'false']}
        states = self.reader.get_states()
        for variable in states_expected:
            self.assertListEqual(states_expected[variable], states[variable])

    def test_get_property(self):

        property_expected = {'bowel-problem': ['position = (335, 99)'],
                             'dog-out': ['position = (300, 195)'],
                             'family-out': ['position = (257, 99)'],
                             'hear-bark': ['position = (296, 268)'],
                             'light-on': ['position = (218, 195)']}
        prop = self.reader.get_property()
        for variable in property_expected:
            self.assertListEqual(property_expected[variable],
                                 prop[variable])

    def test_get_values(self):

        cpd_expected = {'bowel-problem': np.array([[0.01],
                                                   [0.99]]),
                        'dog-out': np.array([[0.99, 0.97, 0.9, 0.3],
                                             [0.01, 0.03, 0.1, 0.7]]),
                        'family-out': np.array([[0.15],
                                                [0.85]]),
                        'hear-bark': np.array([[0.7, 0.01],
                                               [0.3, 0.99]]),
                        'light-on': np.array([[0.6, 0.05],
                                              [0.4, 0.95]])}
        cpd = self.reader.variable_cpds
        for variable in cpd_expected:
            np_test.assert_array_equal(cpd_expected[variable],
                                       cpd[variable])

    def test_get_parents(self):

        parents_expected = {'bowel-problem': [],
                            'dog-out': ['bowel-problem', 'family-out'],
                            'family-out': [],
                            'hear-bark': ['dog-out'],
                            'light-on': ['family-out']}
        parents = self.reader.get_parents()
        for variable in parents_expected:
            self.assertListEqual(parents_expected[variable],
                                 parents[variable])

    def test_get_edges(self):

        edges_expected = [['family-out', 'dog-out'],
                          ['bowel-problem', 'dog-out'],
                          ['family-out', 'light-on'],
                          ['dog-out', 'hear-bark']]
        self.assertListEqual(sorted(self.reader.variable_edges),
                             sorted(edges_expected))

    def test_get_model(self):
        edges_expected = [('family-out', 'dog-out'),
                          ('bowel-problem', 'dog-out'),
                          ('family-out', 'light-on'),
                          ('dog-out', 'hear-bark')]
        nodes_expected = ['bowel-problem', 'hear-bark', 'light-on',
                          'dog-out', 'family-out']
        edge_expected = {'bowel-problem': {'dog-out': {'weight': None}},
                         'dog-out': {'hear-bark': {'weight': None}},
                         'family-out': {'dog-out': {'weight': None},
                                        'light-on': {'weight': None}},
                         'hear-bark': {},
                         'light-on': {}}
        node_expected = {'bowel-problem': {'weight': None,
                                           'position': '(335, 99)'},
                         'dog-out': {'weight': None,
                                     'position': '(300, 195)'},
                         'family-out': {'weight': None,
                                        'position': '(257, 99)'},
                         'hear-bark': {'weight': None,
                                       'position': '(296, 268)'},
                         'light-on': {'weight': None,
                                      'position': '(218, 195)'}}
        cpds_expected = [np.array([[0.01],
                                   [0.99]]),
                         np.array([[0.99, 0.97, 0.9, 0.3],
                                   [0.01, 0.03, 0.1, 0.7]]),
                         np.array([[0.15],
                                   [0.85]]),
                         np.array([[0.7, 0.01],
                                   [0.3, 0.99]]),
                         np.array([[0.6, 0.05],
                                   [0.4, 0.95]])]
        model = self.reader.get_model()
        for cpd_index in range(0, len(cpds_expected)):
            np_test.assert_array_equal(model.get_cpds()[cpd_index].get_values(),
                                       cpds_expected[cpd_index])
        self.assertDictEqual(model.node, node_expected)
        self.assertDictEqual(model.edge, edge_expected)
        self.assertListEqual(sorted(model.nodes()), sorted(nodes_expected))
        self.assertListEqual(sorted(model.edges()), sorted(edges_expected))

    def tearDown(self):
        del self.reader
Exemplo n.º 21
0
state, c, states, costs = annealing(maxsteps=3000, debug=True)
nodes = ['asia', 'tub', 'smoke', 'lung', 'bronc', 'either', 'xray', 'dysp']
G = vetor_Rede(state, nodes)
nx.draw(G, with_labels=True)
print(state)
print(c)
with open('Asia.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    aux = 0
    data = []
    data1 = [[] for i in range(8)]
    for row in csv_reader:
        data.append(row)
        for i in range(len(row)):
            data1[i].append(row[i])
        aux = aux + 1
        if aux == 50001:
            break
#22376.39851240954
data = {}
for i in range(len(data1)):
    data[data1[i][0]] = [data1[i][j] for j in range(1, len(data1[i]))]
data = pd.DataFrame(data)
print("Data: ")
print(data)  #Dados Retirandos do arquivo
reader = BIFReader('asia.bif')  # melhor rede do asia, como esta no bnlearn.com
asia_model = reader.get_model()  # lendo esse modelo
print("Score BIC")
print(abs(BicScore(data).score(asia_model)))
#see_annealing(states, costs)