示例#1
1
def main():
    # Defining the network structure
    model = BayesianModel([('C', 'H'), ('P', 'H')])

    # H: host
    # P: prize
    # C: contestant

    # Defining the CPDs:
    cpd_c = TabularCPD('C', 3, [[0.33, 0.33, 0.33]])
    cpd_p = TabularCPD('P', 3, [[0.33, 0.33, 0.33]])
    cpd_h = TabularCPD('H', 3, [[0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 0.0, 1.0, 0.5],
                                [0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.5],
                                [0.5, 1.0, 0.0, 1.0, 0.5, 0.0, 0.0, 0.0, 0.0]],
                       evidence=['C', 'P'], evidence_card=[3, 3])

    # Associating the CPDs with the network structure.
    model.add_cpds(cpd_c, cpd_p, cpd_h)

    # Some other methods
    # model.get_cpds()

    # check_model check for the model structure and the associated CPD and
    # returns True if everything is correct otherwise throws an exception
    # print model.check_model()

    # Infering the posterior probability
    infer = VariableElimination(model)
    posterior_p = infer.query(['H'], evidence={'C': 0, 'P': 0})
    print(posterior_p['H'])
    def predict(self, data):
        """
        Predicts states of all the missing variables.

        Parameters
        ----------
        data : pandas DataFrame object
            A DataFrame object with column names same as the variables in the model.

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> train_data = values[:800]
        >>> predict_data = values[800:]
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> predict_data = predict_data.copy()
        >>> predict_data.drop('E', axis=1, inplace=True)
        >>> y_pred = model.predict(predict_data)
        >>> y_pred
            E
        800 0
        801 1
        802 1
        803 1
        804 0
        ... ...
        993 0
        994 0
        995 1
        996 1
        997 0
        998 0
        999 0
        """
        from pgmpy.inference import VariableElimination

        if set(data.columns) == set(self.nodes()):
            raise ValueError("No variable missing in data. Nothing to predict")

        elif set(data.columns) - set(self.nodes()):
            raise ValueError("Data has variables which are not in the model")

        missing_variables = set(self.nodes()) - set(data.columns)
        pred_values = defaultdict(list)

        # Send state_names dict from one of the estimated CPDs to the inference class.
        model_inference = VariableElimination(self, state_names=self.get_cpds()[0].state_names)
        for index, data_point in data.iterrows():
            states_dict = model_inference.map_query(variables=missing_variables, evidence=data_point.to_dict())
            for k, v in states_dict.items():
                pred_values[k].append(v)
        return pd.DataFrame(pred_values, index=data.index)
示例#3
0
class TimeVE:
    def setup(self):
        values = pd.DataFrame(np.random.randint(low=0, high=2, size=(1000, 5)), columns=['A', 'B', 'C', 'D', 'E'])
        model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        model.fit(values)
        self.inference = VariableElimination(model)

    def time_query(self):
        self.inference.query(['A', 'B'])
示例#4
0
class HailfinderVE:
    def setup(self):
        reader = BIFReader('hailfinder.bif')
        model = reader.get_bayesian_model()
        self.inference = VariableElimination(model)

    def time_hailfinder_bound(self):
        self.inference.query('Boundaries')

    def time_hailfinder_Wind(self):
        self.inference.query('WindFieldPln')
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                             ('J', 'L'), ('G', 'L')])
        cpd_a = TabularCPD('A', 2, values=[[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, values=[[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1],
                                           [0.1, 0.4, 0.3, 0.9]],
                           evidence=['A', 'R'], evidence_card=[2, 2])
        cpd_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]],
                           evidence=['J'], evidence_card=[2])
        cpd_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1],
                                           [0.1, 0.55, 0.2, 0.9]],
                           evidence=['J', 'G'], evidence_card=[2, 2])
        cpd_g = TabularCPD('G', 2, values=[[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

        self.bayesian_inference = VariableElimination(self.bayesian_model)
示例#6
0
	def bys1_init(self):
		bysmodel1 = bysmodel([('ED', 'MD'), ('AD', 'MD'), \
		                      ('EU', 'MU'), ('AU', 'MU'), \
		                      ('ER', 'MR'), ('AR', 'MR'), \
		                      ('EL', 'ML'), ('AL', 'ML'), \
		                      ('END', 'PR')])

		EU = tcpd(variable='EU', variable_card=2, \
		          values=[[0.01, 0.99]])
		ED = tcpd(variable='ED', variable_card=2, \
		          values=[[0.01, 0.99]])
		EL = tcpd(variable='EL', variable_card=2, \
		          values=[[0.01, 0.99]])
		ER = tcpd(variable='ER', variable_card=2, \
		          values=[[0.01, 0.99]])
		AU = tcpd(variable='AU', variable_card=2, \
		          values=[[0.01, 0.99]])
		AD = tcpd(variable='AD', variable_card=2, \
		          values=[[0.01, 0.99]])
		AL = tcpd(variable='AL', variable_card=2, \
		          values=[[0.01, 0.99]])
		AR = tcpd(variable='AR', variable_card=2, \
		          values=[[0.01, 0.99]])
		MD = tcpd(variable='MD', variable_card=2, \
		          evidence=['ED', 'AD'], evidence_card=[2, 2],\
		          values=[[0.75, 0.4, 0.9, 0.9],[0.25, 0.6, 0.1, 0.1]])
		MU = tcpd(variable='MU', variable_card=2, \
		          evidence=['EU', 'AU'], evidence_card=[2, 2], \
		          values=[[0.75, 0.4, 0.9, 0.9],[0.25, 0.6, 0.1, 0.1]])
		ML = tcpd(variable='ML', variable_card=2, \
		          evidence=['EL', 'AL'], evidence_card=[2, 2], \
		          values=[[0.75, 0.4, 0.9, 0.9], [0.25, 0.6, 0.1, 0.1]])
		MR = tcpd(variable='MR', variable_card=2, \
		          evidence=['ER', 'AR'], evidence_card=[2, 2], \
		          values=[[0.75, 0.4, 0.9, 0.9], [0.25, 0.6, 0.1, 0.1]])
		END = tcpd(variable='END', variable_card=2, \
		           values=[[0.01, 0.99]])
		PR = tcpd(variable='PR', variable_card=2, evidence=['END'], \
		          evidence_card=[2], values=[[1.0, 0.0], [0.0, 1.0]])

		bysmodel1.add_cpds(EU, ED, EL, ER, AU, AD, AL, AR, \
		                   MD, MU, ML, MR, END, PR)

		self.VEbysmodel1 = VariableElimination(bysmodel1)
    def setUp(self):
        # It is just a moralised version of the above Bayesian network so all the results are same. Only factors
        # are under consideration for inference so this should be fine.
        self.markov_model = MarkovModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'),
                                         ('G', 'L'), ('A', 'R'), ('J', 'G')])

        factor_a = TabularCPD('A', 2, values=[[0.2], [0.8]]).to_factor()
        factor_r = TabularCPD('R', 2, values=[[0.4], [0.6]]).to_factor()
        factor_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1],
                                              [0.1, 0.4, 0.3, 0.9]],
                              evidence=['A', 'R'], evidence_card=[2, 2]).to_factor()
        factor_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]],
                              evidence=['J'], evidence_card=[2]).to_factor()
        factor_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1],
                                              [0.1, 0.55, 0.2, 0.9]],
                              evidence=['J', 'G'], evidence_card=[2, 2]).to_factor()
        factor_g = TabularCPD('G', 2, [[0.6], [0.4]]).to_factor()

        self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q, factor_l, factor_g)
        self.markov_inference = VariableElimination(self.markov_model)
    def setUp(self):
        self.sn2 = {'grade': ['A', 'B', 'F'],
                    'diff': ['high', 'low'],
                    'intel': ['poor', 'good', 'very good']}
        self.sn1 = {'speed': ['low', 'medium', 'high'],
                    'switch': ['on', 'off'],
                    'time': ['day', 'night']}

        self.phi1 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12))
        self.phi2 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12), state_names=self.sn1)

        self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3],
                               state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = VariableElimination(student)
        self.model2 = VariableElimination(student, state_names=self.sn2)
示例#9
0
    def Test_Data_Inference_map_n_steps(self, df_test, n_tsteps):
        """
        Perform both map and marignal inference and report values
        """
        df_inference_results = df_test.filter(items=COLUMN_SEQUENCE).copy()
        df_inference_results['M_t_orig'] = df_inference_results['M_t']
        infer = VariableElimination(self.model)

        dict_unique_vals = dict(
            zip(df_test.columns,
                [df_test[i].unique() for i in df_test.columns]))
        result_list = ['M_t']
        if n_tsteps > 1:
            result_list = result_list + [
                "M_t+{}".format(x) for x in range(1, n_tsteps)
            ]
        count = 0
        for key, value in df_test.filter(
                items=[x for x in df_test.columns
                       if x not in result_list]).to_dict('index').items():

            index_key = key
            if check_data_in_evidence(value, dict_unique_vals):

                #MAP query
                tic = time.time()
                map_result = infer.map_query(variables=result_list,
                                             evidence=value)
                toc = time.time() - tic
                logging.info(
                    "thermostat {} - Elapsed seconds for MAP query {:.2f}".
                    format(self.thermostat.tstat_id, toc))

                for n in result_list:

                    tic = time.time()
                    result = infer.query(variables=[n], evidence=value)

                    toc = time.time()
                    if TIME_INFERENCE:
                        print('Elapsed: %s' % (toc - tic))
                    logging.info(
                        "thermostat {} - Elapsed seconds for query {:.2f}".
                        format(self.thermostat.tstat_id, toc))

                    df_inference_results.at[
                        index_key, '{}_0'.format(n)] = result[n].values[0]
                    df_inference_results.at[
                        index_key, '{}_1'.format(n)] = result[n].values[1]
                    df_inference_results.at[index_key,
                                            '{}'.format(n)] = Map_Occ_Values(
                                                result[n].values[1])
                    df_inference_results.at[index_key,
                                            '{}_map'.format(n)] = map_result[n]
            else:
                for n in result_list:
                    df_inference_results.at[index_key,
                                            '{}_0'.format(n)] = np.nan
                    df_inference_results.at[index_key,
                                            '{}_1'.format(n)] = np.nan
                    df_inference_results.at[index_key, '{}'.format(n)] = np.nan
                    df_inference_results.at[index_key,
                                            '{}_map'.format(n)] = np.nan
            count += 1

        logging.info("thermostat {} - Iterations of test {}".format(
            self.thermostat.tstat_id, count))
        return df_inference_results
                      values=[[0.9, 0.2], [0.1, 0.8]],
                      evidence=['Cancer'],
                      evidence_card=[2])
cpd_dysp = TabularCPD(variable='Dyspnoea',
                      variable_card=2,
                      values=[[0.65, 0.3], [0.35, 0.7]],
                      evidence=['Cancer'],
                      evidence_card=[2])

# Associating the parameters with the model structure.
cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp)

# Checking if the cpds are valid for the model.
cancer_model.check_model()

cancer_infer = VariableElimination(cancer_model)

print('All local independecies are as follows')
cancer_model.get_independencies()
print('Displaying CPDs')
print(cancer_model.get_cpds('Pollution'))
print(cancer_model.get_cpds('Smoker'))
print(cancer_model.get_cpds('Cancer'))
print(cancer_model.get_cpds('Xray'))
print(cancer_model.get_cpds('Dyspnoea'))

print('\n Probablity of Cancer given smoker')
q = cancer_infer.query(variables=['Cancer'], evidence={'Smoker': 1})
print(q)

print('\n Probablity of Cancer given smoker')
示例#11
0
import pandas as pd
data = pd.read_csv("datasetheart.csv",names = ['A','B','C','D','E','F','G','H','I','J','K','L','M','RESULT'])
print(data.head(5))
print(data.tail(5))

from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
model = BayesianModel([("A","B"),("B","C"),("C","D"),("D","RESULT")])
model.fit(data,estimator=MaximumLikelihoodEstimator)

from pgmpy.inference import VariableElimination
infer = VariableElimination(model)
q = infer.query(variables=['RESULT'],evidence={"C":2})
print(q)
heartDisease = pd.read_csv('heart.csv', names= attributes)
heartDisease = heartDisease.replace('?', np.nan)              # Handling missing values

# View the data
print('Few examples from the dataset are given below- ')
print(heartDisease.head())
print('\nAttributes and data types-')
print(heartDisease.dtypes)

# Model a Bayesian Network
model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'),
('exang', 'trestbps'), ('trestbps', 'heartdisease'),
('fbs', 'heartdisease'), ('heartdisease', 'restecg'), ('heartdisease', 'thalach'),
('heartdisease', 'chol')])

# Learning CPD's (Conditional Probability Distribution) using Maximum Likelihood Estimators
print('\nLearning CPDs using Maximum Likelihood Estimators...')
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

#Deducing with Bayesian Network
print('\nInferencing with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)

print('\n1.Probability of HeartDisease given Age = 20') 
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 20})
print(q)

# print('\n2. Probability of HeartDisease given chol (Cholestoral) = 100')
# q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'sex': 0, 'chol': 100})
# print(q)
示例#13
0
	                      ('cost','no_of_people'),
	                      ('location','no_of_people')])
cpd_location = TabularCPD('location', 2, [[0.6,0.4]])
cpd_quality = TabularCPD('quality', 3, [[0.3,0.5,0.2]])
cpd_cost = TabularCPD('cost', 2, [[0.8,0.6,0.1,0.6,0.6,0.05], # 2 X 2 X 3 = 12 --> 6 each row X 2
	                              [0.2,0.1,0.9,0.4,0.4,0.95]],
	                              ['location','quality'], [2,3])
cpd_no_of_people = TabularCPD('no_of_people', 2, [[0.6,0.8,0.1,0.6], # 2 X 2 X 3 = 12 --> 6 each row X 2
	                                              [0.4,0.2,0.9,0.4]],
	                                              ['cost','location'], [2,2])

resurant.add_cpds(cpd_location, cpd_quality, cpd_cost, cpd_no_of_people)


# Creating the inference object of the model
resurant_inference = VariableElimination(resurant)

# Doing simple queries over one or multiple variables
resurant_inference.query(variables=['location'])

resurant_inference.query(variables=['location','no_of_people'])

resurant_inference.query(variables=['no_of_people'], evidence={'location':1, 'quality':1}) # If we have evidence

resurant_inference.query(variables=['no_of_people'], evidence={'location':1}, elimination_order=['quality', 'cost']) # can sepcify elimination sequence / otherwise system will choose automatically


-2- " Induced Graph "
" also defined as the undirected graph constructed by the unionof all the graphs formed in each step of variable elimination "
# Check induced graph
induced_graph = resurant_inference.induced_graph(['cost', 'location', 'no_of_people', 'quality'])
示例#14
0
print(heartDisease.head())

#display the Attributes names and datatyes
print('\n Attributes and datatypes')
print(heartDisease.dtypes)

#Creat Model- Bayesian Network
model = BayesianModel([('age', 'heartdisease'), ('sex', 'heartdisease'),
                       ('exang', 'heartdisease'), ('cp', 'heartdisease'),
                       ('heartdisease', 'restecg'), ('heartdisease', 'chol')])

#Learning CPDs using Maximum Likelihood Estimators
print('\n Learning CPD using Maximum likelihood estimators')
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)

# Inferencing with Bayesian Network
print('\n Inferencing with Bayesian Network:')
HeartDiseasetest_infer = VariableElimination(model)

#computing the Probability of HeartDisease given restecg
print('\n 1.Probability of HeartDisease given evidence= restecg :1')
q1 = HeartDiseasetest_infer.query(variables=['heartdisease'],
                                  evidence={'restecg': 1})
print(q1)

#computing the Probability of HeartDisease given cp
print('\n 2.Probability of HeartDisease given evidence= cp:2 ')
q2 = HeartDiseasetest_infer.query(variables=['heartdisease'],
                                  evidence={'cp': 2})
print(q2)
示例#15
0
文件: bayesNet.py 项目: skilgall/pgm
evidence=['I'],evidence_card=[3])

femaleSchool_cpd = TabularCPD(
variable='F',variable_card=2,
values=[[.8,.3,.2],[.2,.7,.8]],
evidence=['I'],evidence_card=[3])

govtCorr_cpd = TabularCPD(
variable='G', variable_card=2,
values=[[.05,.4,.55,.85],[.95,.6,.45,.15]],
evidence=['F','L'],evidence_card=[2,2])

model.add_cpds(income_cpd, lifeExp_cpd,
femaleSchool_cpd, govtCorr_cpd)

inference = VariableElimination(model)
prob_G = inference.query(variables='G',evidence=dict([('L',1)]))
print(prob_G['G'])
# +-----+----------+
# | G   |   phi(G) |
# |-----+----------|
# | G_0 |   0.7292 |
# | G_1 |   0.2708 |
# +-----+----------+
inference = VariableElimination(model)
prob_G = inference.query(variables='G',evidence=dict([('F',1)]))
print(prob_G['G'])
# +-----+----------+
# | G   |   phi(G) |
# |-----+----------|
# | G_0 |   0.7174 |
class TestVariableEliminationMarkov(unittest.TestCase):
    def setUp(self):
        # It is just a moralised version of the above Bayesian network so all the results are same. Only factors
        # are under consideration for inference so this should be fine.
        self.markov_model = MarkovModel([('A', 'J'), ('R', 'J'), ('J', 'Q'), ('J', 'L'),
                                         ('G', 'L'), ('A', 'R'), ('J', 'G')])

        factor_a = TabularCPD('A', 2, values=[[0.2], [0.8]]).to_factor()
        factor_r = TabularCPD('R', 2, values=[[0.4], [0.6]]).to_factor()
        factor_j = TabularCPD('J', 2, values=[[0.9, 0.6, 0.7, 0.1],
                                              [0.1, 0.4, 0.3, 0.9]],
                              evidence=['A', 'R'], evidence_card=[2, 2]).to_factor()
        factor_q = TabularCPD('Q', 2, values=[[0.9, 0.2], [0.1, 0.8]],
                              evidence=['J'], evidence_card=[2]).to_factor()
        factor_l = TabularCPD('L', 2, values=[[0.9, 0.45, 0.8, 0.1],
                                              [0.1, 0.55, 0.2, 0.9]],
                              evidence=['J', 'G'], evidence_card=[2, 2]).to_factor()
        factor_g = TabularCPD('G', 2, [[0.6], [0.4]]).to_factor()

        self.markov_model.add_factors(factor_a, factor_r, factor_j, factor_q, factor_l, factor_g)
        self.markov_inference = VariableElimination(self.markov_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.markov_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        query_result = self.markov_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={'A': 0, 'R': 0,
                                                             'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.markov_inference.query(['J'])
        query_result = self.markov_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

        query_result = self.markov_inference.query(['Q', 'J'])
        query_result = self.markov_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={'A': 0, 'R': 1})
        query_result = self.markov_inference.query(variables=['J'],
                                                   evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={'A': 0, 'R': 0,
                                                             'G': 0, 'L': 1})
        query_result = self.markov_inference.query(variables=['J', 'Q'],
                                                   evidence={'A': 0, 'R': 0,
                                                             'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_max_marginal(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(), 0.1659, decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(['G']), 0.5714, decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(['G', 'R']),
                                    0.4055, decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.markov_inference.max_marginal(['G', 'R', 'A']),
                                    0.3260, decimal=4)

    def test_map_query(self):
        map_query = self.markov_inference.map_query()
        self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0, 'L': 0})

    def test_map_query_with_evidence(self):
        map_query = self.markov_inference.map_query(['A', 'R', 'L'],
                                                    {'J': 0, 'Q': 1, 'G': 0})
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def test_induced_graph(self):
        induced_graph = self.markov_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R'])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'],
                          ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']],
                         result_edges)

    def test_induced_width(self):
        result_width = self.markov_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R'])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.markov_inference
        del self.markov_model
示例#17
0
 def setup(self):
     reader = BIFReader('hailfinder.bif')
     model = reader.get_bayesian_model()
     self.inference = VariableElimination(model)
示例#18
0
        root.attributes("-fullscreen", False)
        var = 0


root.bind("<F11>", f)

root.configure(background='#599442')

############ ############ ############ ############ ############
# READ THE DATASET AND INITIALIZE THE MODELS                   #
############ ############ ############ ############ ############
data = pd.read_csv("../Datasets/Final_Processed_Training.csv")
data = data.drop(columns=["Unnamed: 0"])
# Select the already processed datasets and train the BN & Inference models
BN_Model = Bayesian_Net_Model(data)
inference_model = VariableElimination(BN_Model)

# Set the entry for the first parameter
var1_descr = " Enter a text review: "
label_descr1 = tk.Label(root,
                        text=var1_descr,
                        font='Helvetica 11 bold',
                        bg='#599442')
parameter_1 = tkst.ScrolledText(root,
                                width=75,
                                height=5,
                                wrap=WORD,
                                bd=3,
                                font='Helvetica 10')
#parameter_1 = tk.Entry(root, width =100, bd=3)
示例#19
0
class InputAgent:

	def __init__(self,k_output):
		self.VEbysmodel1 = 0
		self.input = 0
		self.info1 = {}
		self.bys1_input = 0
		self.keyarray = k_output
		self.beta_input = 0
		self.beta_input_list = []
		self.bys1_beta_input = 0

	def generate_input(self):
		self.input = random.randint(0, 4)
		print("input ",self.input)

	def bys1_init(self):
		bysmodel1 = bysmodel([('ED', 'MD'), ('AD', 'MD'), \
		                      ('EU', 'MU'), ('AU', 'MU'), \
		                      ('ER', 'MR'), ('AR', 'MR'), \
		                      ('EL', 'ML'), ('AL', 'ML'), \
		                      ('END', 'PR')])

		EU = tcpd(variable='EU', variable_card=2, \
		          values=[[0.01, 0.99]])
		ED = tcpd(variable='ED', variable_card=2, \
		          values=[[0.01, 0.99]])
		EL = tcpd(variable='EL', variable_card=2, \
		          values=[[0.01, 0.99]])
		ER = tcpd(variable='ER', variable_card=2, \
		          values=[[0.01, 0.99]])
		AU = tcpd(variable='AU', variable_card=2, \
		          values=[[0.01, 0.99]])
		AD = tcpd(variable='AD', variable_card=2, \
		          values=[[0.01, 0.99]])
		AL = tcpd(variable='AL', variable_card=2, \
		          values=[[0.01, 0.99]])
		AR = tcpd(variable='AR', variable_card=2, \
		          values=[[0.01, 0.99]])
		MD = tcpd(variable='MD', variable_card=2, \
		          evidence=['ED', 'AD'], evidence_card=[2, 2],\
		          values=[[0.75, 0.4, 0.9, 0.9],[0.25, 0.6, 0.1, 0.1]])
		MU = tcpd(variable='MU', variable_card=2, \
		          evidence=['EU', 'AU'], evidence_card=[2, 2], \
		          values=[[0.75, 0.4, 0.9, 0.9],[0.25, 0.6, 0.1, 0.1]])
		ML = tcpd(variable='ML', variable_card=2, \
		          evidence=['EL', 'AL'], evidence_card=[2, 2], \
		          values=[[0.75, 0.4, 0.9, 0.9], [0.25, 0.6, 0.1, 0.1]])
		MR = tcpd(variable='MR', variable_card=2, \
		          evidence=['ER', 'AR'], evidence_card=[2, 2], \
		          values=[[0.75, 0.4, 0.9, 0.9], [0.25, 0.6, 0.1, 0.1]])
		END = tcpd(variable='END', variable_card=2, \
		           values=[[0.01, 0.99]])
		PR = tcpd(variable='PR', variable_card=2, evidence=['END'], \
		          evidence_card=[2], values=[[1.0, 0.0], [0.0, 1.0]])

		bysmodel1.add_cpds(EU, ED, EL, ER, AU, AD, AL, AR, \
		                   MD, MU, ML, MR, END, PR)

		self.VEbysmodel1 = VariableElimination(bysmodel1)

	def bys1_generate(self,info):
		self.reset_info()
		self.condition_cal(info)

		VEbys1_query = self.VEbysmodel1.query(['MD', 'MU', 'ML', 'MR', 'PR'], \
								evidence=self.info1)
		max_p = -1.0
		max_p2 = -1.0
		target_move = ''
		target_move2 = ''
		counter = 0
		for key in VEbys1_query.keys():
			tempv = VEbys1_query[key].values[1]
			if counter == 0:
				if max_p < tempv:
					max_p = tempv
					target_move = key
			else:
				if max_p < tempv:
					max_p2 = max_p
					target_move2 = target_move
					max_p = tempv
					target_move = key
				elif max_p2 < tempv:
					max_p2 = tempv
					target_move2 = key

			counter += 1

		print(target_move,target_move2,max_p,max_p2)

		self.bys1_input = (self.keyarray.index(target_move),self.keyarray.index(target_move2))


	def condition_cal(self,info):
		player_cord, apple_cord, enemy_array = self.info_generate(info)

		if player_cord[0] < apple_cord[0]:
			self.info1['AR'] = 1
		if player_cord[0] > apple_cord[0]:
			self.info1['AL'] = 1
		if player_cord[1] < apple_cord[1]:
			self.info1['AD'] = 1
		if player_cord[1] > apple_cord[1]:
			self.info1['AU'] = 1

		for enemy in enemy_array:
			if (player_cord[0] == enemy[0]-1) and (player_cord[1] == enemy[1]):
				self.info1['ER'] = 1
			if (player_cord[0] == enemy[0]+1) and (player_cord[1] == enemy[1]):
				self.info1['EL'] = 1
			if (player_cord[1] == enemy[1]-1) and (player_cord[0] == enemy[0]):
				self.info1['ED'] = 1
			if (player_cord[1] == enemy[1]+1) and (player_cord[0] == enemy[0]):
				self.info1['EU'] = 1

		if info['dead']:
			self.info1['END'] = 1

	def reset_info(self):
		self.info1['EU'] = 0
		self.info1['ED'] = 0
		self.info1['ER'] = 0
		self.info1['EL'] = 0
		self.info1['AU'] = 0
		self.info1['AD'] = 0
		self.info1['AL'] = 0
		self.info1['AR'] = 0
		self.info1['END'] = 0

	def info_generate(self,info):
		enemy_array = []
		player = info['player']
		player_cord = (player.x[0],player.y[0])
		for i in range(1,player.length):
			enemy_array.append((player.x[i],player.y[i]))
		for wall in info['wall']:
			enemy_array.append(wall)
		apple_cord = info['apple']

		return (player_cord,apple_cord,enemy_array)

	def info_generate_withp(self,info):
		return (self.info_generate(info),info['player'])

	def beta_generate(self,info,steps):
		self.beta_input_list.clear()

		temp_info = info.copy()
		right_score = self.beta_recursion(temp_info, steps, 0)
		left_score = self.beta_recursion(temp_info, steps, 1)
		up_score = self.beta_recursion(temp_info, steps, 2)
		down_score = self.beta_recursion(temp_info, steps, 3)

		diff = 2
		score_lsit = []
		score_lsit.append(right_score)
		score_lsit.append(left_score)
		score_lsit.append(up_score)
		score_lsit.append(down_score)

		max = -65525
		counter = 0
		target = 0
		for score in score_lsit:
			if score > max:
				max = score
				target = counter
			counter += 1

		self.beta_input = target

		counter = 0
		for score in score_lsit:
			print(score,end="")
			if abs(max-score) <= diff:
				self.beta_input_list.append(counter)
			counter += 1

		print(self.beta_input_list)

	def beta_recursion(self,info,steps,player_move):
		(player_cord, apple_cord, enemy_array), player = self.info_generate_withp(info)
		total_score = 0
		for enemy in enemy_array:
			if self.isCollission(player_cord, enemy):
				return -40
		if self.isCollission(player_cord, apple_cord):
			total_score += 40
		if steps == 0:
			return total_score+20
		else:
			steps -= 1
			player_copy = player.copyself()
			up_score = 0
			down_score = 0
			left_score = 0
			right_score = 0
			if player_move == 0:
				if player_copy.direction != 1:
					player_copy.moveRight()
				else:
					return -40
			elif player_move == 1:
				if player_copy.direction != 0:
					player_copy.moveLeft()
				else:
					return -40
			elif player_move == 2:
				if player_copy.direction != 3:
					player_copy.moveUp()
				else:
					return -40
			elif player_move == 3:
				if player_copy.direction != 2:
					player_copy.moveDown()
				else:
					return -40

			player_copy.update()
			temp_info = info.copy()
			temp_info['player'] = player_copy
			right_score = self.beta_recursion(temp_info, steps, 0)
			left_score = self.beta_recursion(temp_info, steps, 1)
			up_score = self.beta_recursion(temp_info, steps, 2)
			down_score = self.beta_recursion(temp_info, steps, 3)

			total_score += ((right_score+left_score+up_score+down_score)//4)

			return total_score

	def isCollission(self,cord1,cord2):
		if cord1[0] == cord2[0]:
			if cord1[1] == cord2[1]:
				return True
		return False

	def combine_bys1_beta(self,info,steps):
		self.bys1_generate(info)
		self.beta_generate(info,steps)

		if (self.bys1_input[0] in self.beta_input_list):
			self.bys1_beta_input = self.bys1_input[0]
		elif (self.bys1_input[1] in self.beta_input_list):
			self.bys1_beta_input = self.bys1_input[1]
		else:
			self.bys1_beta_input = self.beta_input
示例#20
0
class TestVariableElimination(unittest.TestCase):
    def setUp(self):
        self.bayesian_model = BayesianModel([('A', 'J'), ('R', 'J'), ('J', 'Q'),
                                             ('J', 'L'), ('G', 'L')])
        cpd_a = TabularCPD('A', 2, [[0.2], [0.8]])
        cpd_r = TabularCPD('R', 2, [[0.4], [0.6]])
        cpd_j = TabularCPD('J', 2,
                           [[0.9, 0.6, 0.7, 0.1],
                            [0.1, 0.4, 0.3, 0.9]],
                           ['R', 'A'], [2, 2])
        cpd_q = TabularCPD('Q', 2,
                           [[0.9, 0.2],
                            [0.1, 0.8]],
                           ['J'], [2])
        cpd_l = TabularCPD('L', 2,
                           [[0.9, 0.45, 0.8, 0.1],
                            [0.1, 0.55, 0.2, 0.9]],
                           ['G', 'J'], [2, 2])
        cpd_g = TabularCPD('G', 2, [[0.6], [0.4]])
        self.bayesian_model.add_cpds(cpd_a, cpd_g, cpd_j, cpd_l, cpd_q, cpd_r)

        self.bayesian_inference = VariableElimination(self.bayesian_model)

    # All the values that are used for comparision in the all the tests are
    # found using SAMIAM (assuming that it is correct ;))

    def test_query_single_variable(self):
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

    def test_query_multiple_variable(self):
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

    def test_query_single_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

    def test_query_multiple_variable_with_evidence(self):
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))

    def test_query_multiple_times(self):
        # This just tests that the models are not getting modified while querying them
        query_result = self.bayesian_inference.query(['J'])
        query_result = self.bayesian_inference.query(['J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))

        query_result = self.bayesian_inference.query(['Q', 'J'])
        query_result = self.bayesian_inference.query(['Q', 'J'])
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.416, 0.584]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.4912, 0.5088]))

        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        query_result = self.bayesian_inference.query(variables=['J'],
                                                     evidence={'A': 0, 'R': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.60, 0.40]))

        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        query_result = self.bayesian_inference.query(variables=['J', 'Q'],
                                                     evidence={'A': 0, 'R': 0,
                                                               'G': 0, 'L': 1})
        np_test.assert_array_almost_equal(query_result['J'].values,
                                          np.array([0.818182, 0.181818]))
        np_test.assert_array_almost_equal(query_result['Q'].values,
                                          np.array([0.772727, 0.227273]))



    def test_max_marginal(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(), 0.1659, decimal=4)

    def test_max_marginal_var(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G']), 0.5714, decimal=4)

    def test_max_marginal_var1(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R']),
                                    0.4055, decimal=4)

    def test_max_marginal_var2(self):
        np_test.assert_almost_equal(self.bayesian_inference.max_marginal(['G', 'R', 'A']),
                                    0.3260, decimal=4)

    def test_map_query(self):
        map_query = self.bayesian_inference.map_query()
        self.assertDictEqual(map_query, {'A': 1, 'R': 1, 'J': 1, 'Q': 1, 'G': 0,
                                         'L': 0})

    def test_map_query_with_evidence(self):
        map_query = self.bayesian_inference.map_query(['A', 'R', 'L'],
                                                      {'J': 0, 'Q': 1, 'G': 0})
        self.assertDictEqual(map_query, {'A': 1, 'R': 0, 'L': 0})

    def test_induced_graph(self):
        induced_graph = self.bayesian_inference.induced_graph(['G', 'Q', 'A', 'J', 'L', 'R'])
        result_edges = sorted([sorted(x) for x in induced_graph.edges()])
        self.assertEqual([['A', 'J'], ['A', 'R'], ['G', 'J'], ['G', 'L'],
                          ['J', 'L'], ['J', 'Q'], ['J', 'R'], ['L', 'R']],
                         result_edges)

    def test_induced_width(self):
        result_width = self.bayesian_inference.induced_width(['G', 'Q', 'A', 'J', 'L', 'R'])
        self.assertEqual(2, result_width)

    def tearDown(self):
        del self.bayesian_inference
        del self.bayesian_model
示例#21
0
文件: DBN.py 项目: hoangdzung/DBN
data = pd.read_csv(sys.argv[1], ",")
data_size = len(data)

# pr = {}
# data = pd.read_csv('data.csv') #"fisrm.csv"
# data_size = len(data)
model = BayesianModel()
list_edges = [('TQ', 'DFT'), ('DPQ', 'DI'), ('C','DI'),('DI','DFT'),('DI','RD'),('DFT','RD'),('RD','DFO'),('OU','DFO')]

model.add_edges_from(list_edges)
model.fit(data, estimator_type = BayesianEstimator, prior_type = "BDeu", equivalent_sample_size = 10)
for edge in model.edges():
    print(edge)
    print("\n")
infer = VariableElimination(model)

nodes = model.nodes()
Distribution = {}

for key in pr.keys():
    Distribution[key] = [1 - abs(np.sign(pr[key] - i)) for i in range(5)]
    nodes.remove(key)
    print('pr done')

for key in nodes:
    Distribution[key] = infer.query([key], evidence = pr)[key].values
    print('done' + key)

print(Distribution['DPQ'])
plt.subplot(4, 2, 1)
# Associating the parameters with the model structure.
for cpd in cpds:
    grass_model.add_cpds(cpd)

# Checking if the cpds are valid for the model.
grass_model.check_model()
grass_model.get_cpds()

# Inference with BN
# Now that we have represented the BN with a complete JPD of all variables, it is theoretically possible to answer any query of certain variable(s) by marginalizing all irrelevant variables.
# This procedure is called **inference**.
# In general, a variable elimination method is employed to make use of the CPDs.

# Do exact inference using Variable Elimination
grass_infer = VariableElimination(grass_model)

# Computing the probability of cloudy, sprinkler and rain given evidence of wet grass.
q = grass_infer.query(variables=NODES[:-1], evidence={NODES[-1]: 1})
print('Inference with Evidence of Wet=True')
for node in NODES[:-1]:
    print(node, '\n', q[node])

# Migrate distribution from  BN to MN

# Chimera-structured Boltzmann machine.

# Represent a Bayesian network with joint probability
# Moralize Bayesian network to Markov network
# Define the two features as binary quadratic functions
import numpy as np
import pandas as pd
from pgmpy.inference import VariableElimination
from pgmpy.models import BayesianModel

data = pd.read_csv('~/Documents/unifiedMLData.csv')

#print data
movie_model = BayesianModel([
('occupation','rating')
#,('gender','rating')
#,('age','rating')
#,('age','occupation')
#,('gender','occupation')
#,('genre','movie_title')
#,('movie_title','rating')
                             ])
movie_model.fit(data)


model_infer = VariableElimination(movie_model)
results = model_infer.query('rating')

print(results['rating'])

#print(movie_model.get_cpds('rating'))
示例#24
0
    def configure(self, rf):
        # command format will be the following:
        # trainPGClassifier selfName networkStructure
        print sys.argv

        # read network structure and make graph
        # labels in networkStructure identical to model names
        # networkStructure as a string containing a list of tuples

        # selfName = 'actionPGN'
        # netStructureString = "[('Actions3 exp','actionPGN'), ('Actions4','actionPGN')]"

        selfName = sys.argv[1]
        netStructureString = sys.argv[2]

        netStructure = ast.literal_eval(netStructureString)
        print netStructure

        # collect all model names in a list to extract a unique set
        modelList = []
        for k in netStructure:
            modelList += list(k)
        print list(set(modelList))

        # create a port to connect to /sam/rpc:i to query model path for each model name
        portsList = []
        querySupervisorPort = yarp.RpcClient()
        querySupervisorPortName = '/sam/' + selfName + '/queryRpc'
        querySupervisorPort.open(querySupervisorPortName)

        portsList.append({'name': querySupervisorPortName, 'port': querySupervisorPort})
        yarp.Network.connect(querySupervisorPortName, '/sam/rpc:i')
        # ---------------------------------------------------------------------------------------------------------------
        modelDict = dict()
        failFlag = False
        for j in modelList:
            if j != selfName:
                modNameSplit = j.split(' ')
                cmd = yarp.Bottle()
                cmd.addString('dataDir')
                for l in modNameSplit:
                    cmd.addString(l)
                reply = yarp.Bottle()
                querySupervisorPort.write(cmd, reply)
                if reply.get(0).asString() != 'nack':
                    modelDict[modNameSplit[0]] = {'filename': reply.get(1).asString(), 'pickleData': None}
                    # try:
                    # load pickle for the model file
                    currPickle = pickle.load(open(reply.get(1).asString(), 'rb'))
                    # try loading labelComparisonDict from the pickle
                    if 'labelComparisonDict' in currPickle.keys():
                        modelDict[modNameSplit[0]]['pickleData'] = currPickle['labelComparisonDict']
                        print j, 'labelComparisonDict loaded'
                    else:
                        print modNameSplit[0], 'labelComparisonDict not found'
                        failFlag = True

                    if 'overallPerformanceLabels' in currPickle.keys():
                        modelDict[modNameSplit[0]]['labels'] = currPickle['overallPerformanceLabels']
                        print j, 'overallPerformanceLabels loaded'
                    else:
                        print j, 'overallPerformanceLabels not found'
                        failFlag = True
                    # except:
                    #     failFlag = True
                else:
                    failFlag = True

        print 'FAIL?', failFlag
        if failFlag:
            return False

        modelList = modelDict.keys()
        print modelList

        # ---------------------------------------------------------------------------------------------------------------

        # extract unique lists from the collected data
        # the unique list of pickleData[original] represents the possibleClassifications for each model
        modelDict[selfName] = dict()
        modelDict[selfName]['labels'] = []
        selfModelCol = 1

        for j in modelList:
            modelDict[j]['CPD'] = np.zeros([1, len(modelDict[j]['labels'])])
            print j, 'unique labels:', modelDict[j]['labels']
            print j, 'CPD shape', modelDict[j]['CPD'].shape

            modelDict[selfName]['labels'] += modelDict[j]['labels']
            selfModelCol *= len(modelDict[j]['labels'])
            print

        # the possibleClassifications for both models (outputs of the PGN)
        # are the unique list of the model specific labels for all models
        modelDict[selfName]['labels'] = list(set(modelDict[selfName]['labels']))
        modelDict[selfName]['actualLabels'] = modelDict[j]['pickleData']['original']
        modelDict[selfName]['CPD'] = np.zeros([len(modelDict[selfName]['labels']), selfModelCol])
        print selfName, 'unique labels:', modelDict[selfName]['labels']
        print selfName, 'CPD shape', modelDict[selfName]['CPD'].shape

        # check that original classifications of both are identical
        # otherwise cannot combine them with a single node.
        # This is currently a big limitation that will be removed later
        print modelDict[selfName]['labels']
        for j in modelList:
            print j,
            for k in range(len(modelDict[j]['pickleData']['original'])):
                print modelDict[j]['pickleData']['original'][k]
                if modelDict[j]['pickleData']['original'][k] not in modelDict[selfName]['labels']:
                    modelDict[j]['pickleData']['original'][k] = 'unknown'

        for j in modelList:
            if modelDict[j]['pickleData']['original'] != modelDict[selfName]['actualLabels']:
                failFlag = True
                print 'original classifications of', j, 'are not identical to those of', selfName

        if failFlag:
            return False

        # Update netStructureString to reflect changes in the modelList names
        strSections = netStructureString.split("'")
        for k in range(len(strSections)):
            if len(strSections[k]) > 2 and ',' not in strSections[k]:
                strSections[k] = strSections[k].split(' ')[0]
        netStructureString = "'".join(strSections)
        netStructure = ast.literal_eval(netStructureString)
        # ---------------------------------------------------------------------------------------------------------------
        # iterate through actual labels
        # for each actual label, iterate through models
        # for each model find classification label of this model for current actual label
        # get the index of the current classification and add it to its CPD
        # also calculate which item in the joint CPD needs to be incremented

        for j in range(len(modelDict[selfName]['actualLabels'])):
            currActualLabel = modelDict[selfName]['actualLabels'][j]
            row = modelDict[selfName]['labels'].index(currActualLabel)

            colVar = np.zeros([len(modelList)])
            for k in range(len(modelList)):
                cmod = modelList[k]
                if k != 0:
                    pmod = modelList[k-1]
                    colVar *= len(modelDict[pmod]['labels'])

                colVar[k] = modelDict[cmod]['labels'].index(
                                   modelDict[cmod]['pickleData']['results'][j])
                modelDict[cmod]['CPD'][0, colVar[k]] += 1

            col = sum(colVar)
            modelDict[selfName]['CPD'][row, col] += 1

        # take all CPD's and normalise the matrices
        evidenceCard = copy.deepcopy(modelList)
        for j in modelDict:
            if j == selfName:
                # this is a joint CPD matrix
                # normalise columns to have sum = 1
                modelDict[j]['CPD'] = normalize(modelDict[j]['CPD'], axis=0, norm='l1')
            else:
                # normalise sum of matrix = 1
                modelDict[j]['CPD'] /= np.sum(modelDict[j]['CPD'])
                evidenceCard[evidenceCard.index(j)] = len(modelDict[j]['labels'])
            print modelDict[j]['CPD']

        model = BayesianModel(netStructure)

        # create TabularCPD data structure to nest calculated CPD
        for j in modelDict:
            if j == selfName:
                modelDict[j]['cpdObject'] = TabularCPD(variable=j, variable_card=len(modelDict[j]['labels']),
                                                       values=modelDict[j]['CPD'],
                                                       evidence=modelList,
                                                       evidence_card=evidenceCard)
            else:
                modelDict[j]['cpdObject'] = TabularCPD(variable=j,
                                                       variable_card=len(modelDict[j]['labels']),
                                                       values=modelDict[j]['CPD'])

        # Associating the CPDs with the network
        for j in modelDict:
            model.add_cpds(modelDict[j]['cpdObject'])

        # check_model checks for the network structure and CPDs and verifies that the CPDs are correctly
        # defined and sum to 1.
        if not model.check_model():
            print 'Model check returned unsuccessful'
            return False

        infer = VariableElimination(model)
        confMatrix = np.zeros(len(modelDict[selfName]['labels']))
        # iterate over all original data and perform classifications to calculate if accuracy with PGN has increased
        for j in range(len(modelDict[selfName]['actualLabels'])):
            currEvidenceDict = dict()
            for k in modelList:
                currEvidenceDict[k] = modelDict[k]['labels'].index(modelDict[k]['pickleData']['results'][j])

            q = infer.query([selfName], currEvidenceDict)

            inferenceClass = modelDict[selfName]['labels'][np.argmax(q[selfName].values)]
            actualClass = modelDict[selfName]['actualLabels'][j]
            confMatrix[modelDict[selfName].index(actualClass), modelDict[selfName].index(inferenceClass)] += 1

        print "%Accuracy with PGN"
        dCalc = SAMTesting.calculateData(modelDict[selfName]['actualLabels'], confMatrix)

        return True
示例#25
0
def bayesian_network_prediction(dataset, ad_cpt, gh_cpt, ga_cpt, prediction_cpt):
    ###创建模型代码
    # coding: utf-8
    # In[16]:
    # Starting with defining the network structure

    dolores_model = BayesianModel([('ability_difference', 'goals_home'),
                                   ('ability_difference', 'goals_away'),
                                   ('goals_home', 'Prediction'),
                                   ('goals_away', 'Prediction')])
    cpd_AD = TabularCPD(variable='ability_difference', variable_card=42,
                          values=ad_cpt)
    cpd_GH = TabularCPD(variable='goals_home', variable_card=8,
                        values=gh_cpt,
                        evidence=['ability_difference'],
                        evidence_card=[42])
    cpd_GA = TabularCPD(variable='goals_away', variable_card=8,
                        values=ga_cpt,
                        evidence=['ability_difference'],
                        evidence_card=[42])
    cpd_P = TabularCPD(variable='Prediction', variable_card=3,
                            values=prediction_cpt,
                            evidence=['goals_home', 'goals_away'],
                            evidence_card=[8, 8])

    # Associating the parameters with the model structure.
    dolores_model.add_cpds(cpd_AD, cpd_GH, cpd_GA, cpd_P)
    # Checking if the cpds are valid for the model.
    dolores_model.check_model()
    dolores_model.get_independencies()
    from pgmpy.inference import VariableElimination
    inference = VariableElimination(dolores_model)

    histogram, home_scores, away_scores = get_histogram(dataset)
    predictions = []
    results = []
    for n in range(len(histogram)):
        rank = histogram[n]
        result = dataset[n]['result']
        pred = inference.query(variables=['Prediction'], evidence={'ability_difference': rank})
        predictions.append(pred.values)
        results.append(result)
    predictions = np.array(predictions)
    predictions = np.around(predictions, 2)
    results = np.array(results)
    results = results.reshape((results.shape[0], 1))
    results = np.around(results, 0)
    kk = np.concatenate((predictions, results), axis=1)

    pred0 = inference.query(variables=['Prediction'], evidence={'ability_difference': 0})
    pred1 = inference.query(variables=['Prediction'], evidence={'ability_difference': 5})
    pred2 = inference.query(variables=['Prediction'], evidence={'ability_difference': 10})
    pred3 = inference.query(variables=['Prediction'], evidence={'ability_difference': 15})
    pred4 = inference.query(variables=['Prediction'], evidence={'ability_difference': 20})
    pred5 = inference.query(variables=['Prediction'], evidence={'ability_difference': 21})
    pred6 = inference.query(variables=['Prediction'], evidence={'ability_difference': 22})
    pred7 = inference.query(variables=['Prediction'], evidence={'ability_difference': 23})
    pred8 = inference.query(variables=['Prediction'], evidence={'ability_difference': 24})
    pred9 = inference.query(variables=['Prediction'], evidence={'ability_difference': 25})

    return 0
示例#26
0
hc = HillClimbSearch(df, scoring_method=bic)
#hc = ExhaustiveSearch(df, k2)
model = hc.estimate()
for ee in model.edges():
    print(ee)



##参数学习
from pgmpy.models import BayesianModel
mod = BayesianModel(model.edges())
mod.fit(df)
for cpd in mod.get_cpds():
    print(cpd)

#print(mod.local_independencies('HA'))

##模型推理
from pgmpy.inference import VariableElimination, BeliefPropagation
cancer_infer = VariableElimination(mod)
q = cancer_infer.query(variables=['HA'])
print(q)

#cancer_infer = BeliefPropagation(mod)
#q = cancer_infer.query(variables=['HA'])
#print(q)




示例#27
0
class GeneralModel(Model):
    """ Allows construction of an arbitray causal graph & action space with discrete (currently assumed binary) CPD tables. 
        This implementation will not scale to large graphs. """
    def __init__(self, model, actions, py_func):
        """ model is a pgmpy.BayesianModel
            actions is a list of (var,value) tuples """
        self.py_func = py_func
        self.parents = sorted(model.get_parents('Y'))
        self.N = len(self.parents)
        self.actions = actions
        self.K = len(actions)

        self.observational_model = model
        self.observational_inference = VariableElimination(
            self.observational_model)
        self.post_action_models = [
            GeneralModel.do(model, action) for action in actions
        ]
        self.samplers = [
            BayesianModelSampling(model_a)
            for model_a in self.post_action_models
        ]

        self.interventional_distributions = []
        for indx, new_model in enumerate(self.post_action_models):
            infer = VariableElimination(new_model)
            _, distribution_over_parents = infer.query(self.parents)
            self.interventional_distributions.append(distribution_over_parents)

        self.pre_compute()

    def expected_Y_observational(self):
        """ return a vector of length K with the expected Y given we observe the variable-value pair corresponding to each action """
        expected_Y = np.zeros(self.K)

        for indx, action in enumerate(self.actions):
            var, value = action
            if var is None:
                _, distribution = self.observational_inference.query(['Y'])
            else:
                _, distribution = self.observational_inference.query(
                    ['Y'], evidence=dict([action]))

            pyis1 = distribution.reduce([('Y', 1)], inplace=False).values
            expected_Y[indx] = pyis1

        return expected_Y

    def _expected_Y(self):
        expected_Y = np.zeros(self.K)
        for indx, new_model in enumerate(self.post_action_models):
            infer = VariableElimination(new_model)
            _, distribution_over_reward = infer.query(['Y'])
            expected_reward = distribution_over_reward.reduce(
                [('Y', 1)], inplace=False
            ).values  #TODO investigate failing if inplace=True - bug in pgmpy?
            expected_Y[indx] = expected_reward
        return expected_Y

    @staticmethod
    def build_ycpd(py_func, N):
        cpd = np.zeros((2, 2**N))
        for i, x in enumerate(Model.generate_binary_assignments(N)):
            cpd[0, i] = 1 - py_func(x)
            cpd[1, i] = py_func(x)
        return cpd

    def pYgivenX(self, x):
        return self.py_func(x)

    @classmethod
    def create_confounded_parallel(cls, N, N1, pz, pY, q, act_on_z=True):
        """ convinience method for constructing equivelent models to Confounded_Parallel"""
        q10, q11, q20, q21 = q
        pZ = [[1 - pz, pz]]
        pXgivenZ_N1 = [[1 - q10, 1 - q11], [q10, q11]]
        pXgivenZ_N2 = [[1 - q20, 1 - q21], [q20, q21]]

        xvars = ['X' + str(i) for i in range(1, N + 1)]
        edges = chain([('Z', v) for v in xvars], [(v, 'Y') for v in xvars])
        model = BayesianModel(edges)
        cpds = [TabularCPD(variable='Z', variable_card=2, values=pZ)]
        cpds.extend([
            TabularCPD(variable=v,
                       variable_card=2,
                       values=pXgivenZ_N1,
                       evidence=['Z'],
                       evidence_card=[2]) for v in xvars[0:N1]
        ])
        cpds.extend([
            TabularCPD(variable=v,
                       variable_card=2,
                       values=pXgivenZ_N2,
                       evidence=['Z'],
                       evidence_card=[2]) for v in xvars[N1:]
        ])

        def py(x):
            i, j = x[0], x[N - 1]
            return pY[i, j]

        ycpd = GeneralModel.build_ycpd(py, N)
        cpds.append(
            TabularCPD(variable='Y',
                       variable_card=2,
                       values=ycpd,
                       evidence=xvars,
                       evidence_card=[2] * len(xvars)))

        model.add_cpds(*cpds)
        model.check_model()

        if act_on_z:
            actions = list(
                chain([(x, 0) for x in xvars], [(x, 1) for x in xvars],
                      [("Z", i) for i in (0, 1)], [(None, None)]))

        else:
            actions = list(
                chain([(x, 0) for x in xvars], [(x, 1) for x in xvars],
                      [(None, None)]))

        pgm_model = cls(model, actions, py)

        return pgm_model

    @classmethod
    def create_very_confounded(cls, Nz, pZ1, pZ, a, b, py):
        """ construct a very confounded model """

        zvars = ['Z' + str(i) for i in range(1, Nz + 1)]
        xvars = ['X' + str(i) for i in range(1, 3)]
        edges = chain(product(zvars, xvars), product(xvars, ['Y']))
        bayes_model = BayesianModel(edges)

        z_other = list(product((0, 1), repeat=(Nz - 1)))

        px1 = np.hstack((np.full(2**(Nz - 1),
                                 a), [np.mean(z) for z in z_other]))
        px2 = np.hstack((np.full(2**(Nz - 1),
                                 b), [np.prod(z) for z in z_other]))

        cpds = [
            TabularCPD(variable='Z1',
                       variable_card=2,
                       values=np.vstack((1 - pZ1, pZ1)))
        ]
        cpds.extend([
            TabularCPD(variable=v,
                       variable_card=2,
                       values=np.vstack((1 - pZ, pZ))) for v in zvars[1:]
        ])
        cpds.append(
            TabularCPD(variable='X1',
                       variable_card=2,
                       values=np.vstack((1 - px1, px1)),
                       evidence=zvars,
                       evidence_card=[2] * Nz))
        cpds.append(
            TabularCPD(variable='X2',
                       variable_card=2,
                       values=np.vstack((1 - px2, px2)),
                       evidence=zvars,
                       evidence_card=[2] * Nz))
        cpds.append(
            TabularCPD(variable='Y',
                       variable_card=2,
                       values=np.vstack((1 - py, py)),
                       evidence=xvars,
                       evidence_card=[2] * len(xvars)))

        bayes_model.add_cpds(*cpds)
        bayes_model.check_model()
        actions = list(
            chain([(z, 0) for z in zvars], [(z, 1) for z in zvars],
                  [(x, 0) for x in xvars], [(x, 1) for x in xvars],
                  [(None, None)]))

        model = cls(bayes_model, actions)
        return model

    @classmethod
    def do(cls, model, action):
        var, value = action
        new_model = BayesianModel(model.edges())
        if var is not None:
            for p in model.get_parents(var):
                new_model.remove_edge(p, var)
        cpds = []
        for cpd in model.get_cpds():
            if cpd.variable == var:
                values = np.zeros((cpd.variable_card, 1))
                values[value] = 1.0
                values[1 - value] = 0.0
                cpd_new = TabularCPD(variable=var,
                                     variable_card=cpd.variable_card,
                                     values=values)
                cpds.append(cpd_new)
            else:
                cpds.append(cpd.copy())
        new_model.add_cpds(*cpds)
        new_model.check_model()
        return new_model

    def sample(self, action):
        """ samples given the specified action index and returns the values of the parents of Y, Y. """
        s = self.samplers[action].forward_sample()
        x = s.loc[:, self.parents].values[0]
        y = s.loc[:, ['Y']].values[0][0]
        return x, y

    def P(self, x):
        """ returns the probability of the given assignment to the parents of Y for given each action. """
        assignment = zip(self.parents, x)
        pa = np.asarray([
            q.reduce(assignment, inplace=False).values
            for q in self.interventional_distributions
        ])
        return pa
示例#28
0
def gradeBayesianInference(evidences):
    grades_infer = VariableElimination(grades)
    votoOffer = grades_infer.query(variables=['Voto'], evidence=evidences)
    return votoOffer
class StateNameDecorator(unittest.TestCase):
    def setUp(self):
        self.sn2 = {'grade': ['A', 'B', 'F'],
                    'diff': ['high', 'low'],
                    'intel': ['poor', 'good', 'very good']}
        self.sn1 = {'speed': ['low', 'medium', 'high'],
                    'switch': ['on', 'off'],
                    'time': ['day', 'night']}

        self.phi1 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12))
        self.phi2 = DiscreteFactor(['speed', 'switch', 'time'],
                                   [3, 2, 2], np.ones(12), state_names=self.sn1)

        self.cpd1 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3])
        self.cpd2 = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 3],
                               state_names=self.sn2)

        student = BayesianModel([('diff', 'grade'), ('intel', 'grade')])
        diff_cpd = TabularCPD('diff', 2, [[0.2, 0.8]])
        intel_cpd = TabularCPD('intel', 2, [[0.3, 0.7]])
        grade_cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1],
                                            [0.1, 0.1, 0.1, 0.1],
                                            [0.8, 0.8, 0.8, 0.8]],
                               evidence=['diff', 'intel'],
                               evidence_card=[2, 2])
        student.add_cpds(diff_cpd, intel_cpd, grade_cpd)
        self.model1 = VariableElimination(student)
        self.model2 = VariableElimination(student, state_names=self.sn2)

    def test_assignment_statename(self):
        req_op1 = [[('speed', 'low'), ('switch', 'on'), ('time', 'night')],
                   [('speed', 'low'), ('switch', 'off'), ('time', 'day')]]
        req_op2 = [[('speed', 0), ('switch', 0), ('time', 1)],
                   [('speed', 0), ('switch', 1), ('time', 0)]]

        self.assertEqual(self.phi1.assignment([1, 2]), req_op2)
        self.assertEqual(self.phi2.assignment([1, 2]), req_op1)

    def test_factor_reduce_statename(self):
        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi.reduce([('speed', 'medium'), ('time', 'day')])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi = phi.reduce([('speed', 'medium'), ('time', 'day')], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi.reduce([('speed', 1), ('time', 0)])
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

        phi = DiscreteFactor(['speed', 'switch', 'time'],
                             [3, 2, 2], np.ones(12), state_names=self.sn1)
        phi = phi.reduce([('speed', 1), ('time', 0)], inplace=False)
        self.assertEqual(phi.variables, ['switch'])
        self.assertEqual(phi.cardinality, [2])
        np_test.assert_array_equal(phi.values, np.array([1, 1]))

    def test_reduce_cpd_statename(self):
        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 'high')])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd.reduce([('diff', 0)])
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 'high')], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

        cpd = TabularCPD('grade', 3, [[0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
                                      [0.8, 0.8, 0.8, 0.8, 0.8, 0.8]],
                         evidence=['diff', 'intel'], evidence_card=[2, 3],
                         state_names=self.sn2)
        cpd = cpd.reduce([('diff', 0)], inplace=False)
        self.assertEqual(cpd.variable, 'grade')
        self.assertEqual(cpd.variables, ['grade', 'intel'])
        np_test.assert_array_equal(cpd.get_values(), np.array([[0.1, 0.1, 0.1],
                                                            [0.1, 0.1, 0.1],
                                                            [0.8, 0.8, 0.8]]))

    def test_inference_query_statename(self):
        inf_op1 = self.model2.query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.query(['grade'], evidence={'intel': 0})
        req_op = {'grade': DiscreteFactor(['grade'], [3], np.array([0.1, 0.1, 0.8]))}

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)

        inf_op1 = self.model2.map_query(['grade'], evidence={'intel': 'poor'})
        inf_op2 = self.model2.map_query(['grade'], evidence={'intel': 0})
        req_op = {'grade': 'F'}

        self.assertEqual(inf_op1, inf_op2)
        self.assertEqual(inf_op1, req_op)
        self.assertEqual(inf_op1, req_op)
示例#30
0
# Associating the parameters with the model structure.
cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp)

# Checking if the cpds are valid for the model.
print(cancer_model.check_model())

# Check d-separations. This is only meant for those interested. You do not need to understand this to do the project.
print(cancer_model.is_active_trail('Pollution', 'Smoker'))
print(cancer_model.is_active_trail('Pollution', 'Smoker', observed=['Cancer']))
print(cancer_model.local_independencies('Xray'))
print(cancer_model.get_independencies())

# Print model information
print(cancer_model.edges())
print(cancer_model.nodes())
print(cancer_model.get_cpds())

# Doing exact inference using Variable Elimination
from pgmpy.inference import VariableElimination

cancer_infer = VariableElimination(cancer_model)

# Query
print(cancer_infer.query(variables=['Dyspnoea'], evidence={'Cancer': 0}))
print(
    cancer_infer.query(variables=['Cancer'],
                       evidence={
                           'Smoker': 0,
                           'Pollution': 0
                       }))
示例#31
0
from pgmpy.readwrite import BIFReader
from pgmpy.inference import VariableElimination
import os

curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = curPath[:curPath.find("sklearn\\") + len("sklearn\\")]
dataPath = rootPath + "Input/MLWorkHome/experiment6/img/asia.bif"
reader = BIFReader(dataPath)
asia_model = reader.get_model()
# 通过nodes函数可以查看模型中有哪些结点
print(asia_model.nodes())
# NodeView(('xray', 'bronc', 'asia', 'dysp', 'lung', 'either', 'smoke', 'tub'))
# 练习1   在下面的单元格中,实现判断,判断tub结点和either结点之间是否存在有向连接:
print("练习1:")
print(asia_model.is_active_trail('tub', 'either'))

# 练习2   在下面的单元格中,实现判断,判断tub结点和dysp结点之间能否通过either结点有向连接:
print("练习2:")
print(asia_model.is_active_trail('tub', 'dysp', observed=['either']))

asia_infer = VariableElimination(asia_model)
# 给出当smoke为0时,bronc的概率分布情况
q = asia_infer.query(variables=['bronc'], evidence={'smoke': 0})
print(q['bronc'])

# 练习3   在下面的单元格中,实现查询,当either为1时,xray的概率分布情况:
print("练习3:")
asia_infer2 = VariableElimination(asia_model)
p = asia_infer2.query(variables=['xray'], evidence={'either': 1})
print(p['xray'])
               cpd_vehicleRunsHot, cpd_badCarbuerator, cpd_weakBattery, cpd_badStarter, cpd_noFuelPressure, 
               cpd_faultyFuelFilter, cpd_cloggedAirFilter, cpd_wornDistributor, cpd_wornEngineMounts, 
               cpd_harmonicBalancer, cpd_vacuumLeaks, cpd_engineTuneUp, cpd_sparkPlug, cpd_pistonNotWorking,
               cpd_lowCoolantLevel, cpd_faultyEngineCoolingFan, cpd_stuckThermostat, cpd_corrodedBatteryTerminal,
               cpd_fuelSystemCleaning, cpd_fuelPumpReplacement, cpd_badIgnitionSytem, cpd_badTimingChain, 
               cpd_brokenMissingFanAssembly, cpd_noSpark, cpd_ignitionCoilForSpark)


#validate model
model.check_model()


#applying inference

from pgmpy.inference import VariableElimination
infer = VariableElimination(model)


#function for getting all the CPDs with the node given as evidence
def getAllProbabilities(user_evidence):
    
    print(user_evidence)
    
    for i in range(len(user_evidence)):
        activeTrailNodes = model.active_trail_nodes(user_evidence[i])
        print(activeTrailNodes)
        nodes=[]
        for value in activeTrailNodes:
            nodes.append(value)
        print("printing..", nodes) 
        
示例#33
0
    UGM = DGM.to_markov_model()
    jtree = UGM.to_junction_tree()

    evidence = {'A': 1}

    marginal = jta(UGM, jtree, evidence.items())
    print "Results of the implemented JTA"
    for m in marginal:
        print m

    print "\n=======================================\n"

    print "Results of the Variable Elimination from pgmpy"

    inference = VariableElimination(DGM)
    for v in get_different(DGM.nodes(), evidence):
        print inference.query(variables=[v], evidence=evidence)[v]

    # visualization part
    # nx.draw_circular(DGM, with_labels=True, node_color="white", node_size=1000)
    # plt.draw()
    # plt.show()

    # nx.draw_circular(UGM, with_labels=True, node_color="white", node_size=1000)
    # plt.draw()
    # plt.show()

    # nx.draw_circular(jtree, with_labels=True, node_color="white", node_shape='s', node_size=8000)
    # plt.draw()
    # plt.show()
示例#34
0
    def predict_probability(self, data):
        """
        Predicts probabilities of all states of the missing variables.

        Parameters
        ----------
        data : pandas DataFrame object
            A DataFrame object with column names same as the variables in the model.

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> train_data = values[:80]
        >>> predict_data = values[80:]
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> predict_data = predict_data.copy()
        >>> predict_data.drop('B', axis=1, inplace=True)
        >>> y_prob = model.predict_probability(predict_data)
        >>> y_prob
            B_0         B_1
        80  0.439178    0.560822
        81  0.581970    0.418030
        82  0.488275    0.511725
        83  0.581970    0.418030
        84  0.510794    0.489206
        85  0.439178    0.560822
        86  0.439178    0.560822
        87  0.417124    0.582876
        88  0.407978    0.592022
        89  0.429905    0.570095
        90  0.581970    0.418030
        91  0.407978    0.592022
        92  0.429905    0.570095
        93  0.429905    0.570095
        94  0.439178    0.560822
        95  0.407978    0.592022
        96  0.559904    0.440096
        97  0.417124    0.582876
        98  0.488275    0.511725
        99  0.407978    0.592022
        """
        from pgmpy.inference import VariableElimination

        if set(data.columns) == set(self.nodes()):
            raise ValueError("No variable missing in data. Nothing to predict")

        elif set(data.columns) - set(self.nodes()):
            raise ValueError("Data has variables which are not in the model")

        missing_variables = set(self.nodes()) - set(data.columns)
        pred_values = defaultdict(list)

        model_inference = VariableElimination(self)
        for index, data_point in data.iterrows():
            states_dict = model_inference.query(variables=missing_variables, evidence=data_point.to_dict())
            for k, v in states_dict.items():
                for l in range(len(v.values)):
                    state = self.get_cpds(k).state_names[k][l]
                    pred_values[k + '_' + str(state)].append(v.values[l])
        return pd.DataFrame(pred_values, index=data.index)
示例#35
0
            evidence_card=[2]
)

student_model.add_cpds(
    grade_cpd,
    difficulty_cpd,
    intel_cpd,
    letter_cpd,
    sat_cpd
)

print(student_model.get_cpds())
print(student_model.get_independencies())

from pgmpy.inference import VariableElimination
student_infer = VariableElimination(student_model)
prob_G = student_infer.query(
            variables=["G"],
            evidence={"I": 1, "D": 0})
print(prob_G)

import numpy as np
import pandas as pd
raw_data = np.random.randint(low=0, high=2, size=(1000, 5))
data = pd.DataFrame(raw_data, columns=["D", "I", "G", "L", "S"])
data.head()

from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator
model = BayesianModel([("D", "G"), ("I", "G"), ("I", "S"), ("G", "L")])
# 基于极大似然估计进行模型训练
# %% codecell
drawGraph(carModel)
# %% markdown [markdown]
# #### Testing conditional independence:
# $$
# \color{DodgerBlue}{\text{WorkCapacity (observed)}: \;\;\;\;\;\;\;  \text{Experience} \; \bot \; \text{Absenteeism} \; | \; \text{WorkCapacity}}
# $$

# Given that **WorkCapacity**'s state is observed, we can make the following equivalent statements:
# * there is NO active trail between **Experience** and **Absenteeism**.
# * **Experience** and **Absenteeism** are locally independent.
# * the probability of **Experience** won't influence probability of **Absenteeism** (and vice versa).
#

# %% codecell
elim: VariableElimination = VariableElimination(model=carModel)

# %% markdown [markdown]
# **Testing Conditional Independence:** Using Active Trails Methods
# %% codecell
assert carModel.is_active_trail(start=Experience.var,
                                end=Absenteeism.var,
                                observed=None)

assert carModel.is_active_trail(
    start=Experience.var, end=Absenteeism.var, observed=[WorkCapacity.var]
), "Check: still need to condition on extra variable for this not to be an active trail"

# Finding out which extra variable to condition on:
# TODO OBSERVEDVARS: must fix observedvars function so that (assuming causal chain) it can identify in the graph what is the middle node between these passed 'start' and 'end' nodes and also include that middle node in the output list (along with existing backdoors)
observedVars(carModel, start=Experience, end=Absenteeism)
                           ('Tuberculose', 'TbOuCa'),
                           ('Bronchite', 'Dyspnea')])

#apprentissage des paramètres
#print("estimation des cpds :")
from pgmpy.estimators import BayesianEstimator

est = BayesianEstimator(best_model, data)
print(est.estimate_cpd('Cancer', prior_type='BDeu', equivalent_sample_size=10))

best_model.fit(data, estimator=BayesianEstimator, prior_type='BDeu')
#for cpd in best_model.get_cpds():
#	print(cpd)

#Caractéristique des personnes ayant un cancer
model_infer = VariableElimination(best_model)
q = model_infer.query(variables=[
    'Age', 'Fumeur', 'Tuberculose', 'VisiteAsie', 'Radiographie', 'Bronchite',
    'Dyspnea', 'Geographie', 'TbOuCa'
],
                      evidence={'Cancer': 2})  # 0 = ? , 1=False, 2=True
print("Caratéristiques des personnes ayant le cancer :")
#print(q['Age'])
print(q['Fumeur'])
print(q['Tuberculose'])
print(q['VisiteAsie'])
print(q['Radiographie'])
print(q['Bronchite'])
print(q['Dyspnea'])
print(q['Geographie'])
print(q['TbOuCa'])
    def predict_probability(self, data):
        """
        Predicts probabilities of all states of the missing variables.

        Parameters
        ----------
        data : pandas DataFrame object
            A DataFrame object with column names same as the variables in the model.

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> from pgmpy.models import BayesianModel
        >>> values = pd.DataFrame(np.random.randint(low=0, high=2, size=(100, 5)),
        ...                       columns=['A', 'B', 'C', 'D', 'E'])
        >>> train_data = values[:80]
        >>> predict_data = values[80:]
        >>> model = BayesianModel([('A', 'B'), ('C', 'B'), ('C', 'D'), ('B', 'E')])
        >>> model.fit(values)
        >>> predict_data = predict_data.copy()
        >>> predict_data.drop('B', axis=1, inplace=True)
        >>> y_prob = model.predict_probability(predict_data)
        >>> y_prob 
            B_0         B_1
        80  0.439178    0.560822
        81  0.581970    0.418030
        82  0.488275    0.511725
        83  0.581970    0.418030
        84  0.510794    0.489206
        85  0.439178    0.560822
        86  0.439178    0.560822
        87  0.417124    0.582876
        88  0.407978    0.592022
        89  0.429905    0.570095
        90  0.581970    0.418030
        91  0.407978    0.592022
        92  0.429905    0.570095
        93  0.429905    0.570095
        94  0.439178    0.560822
        95  0.407978    0.592022
        96  0.559904    0.440096
        97  0.417124    0.582876
        98  0.488275    0.511725
        99  0.407978    0.592022
        """
        from pgmpy.inference import VariableElimination

        if set(data.columns) == set(self.nodes()):
            raise ValueError("No variable missing in data. Nothing to predict")

        elif set(data.columns) - set(self.nodes()):
            raise ValueError("Data has variables which are not in the model")

        missing_variables = set(self.nodes()) - set(data.columns)
        pred_values = defaultdict(list)

        model_inference = VariableElimination(self)
        for index, data_point in data.iterrows():
            states_dict = model_inference.query(variables=missing_variables, evidence=data_point.to_dict())
            for k, v in states_dict.items():
                for l in range(len(v.values)):
                    state = self.get_cpds(k).state_names[k][l]
                    pred_values[k + '_' + str(state)].append(v.values[l])
        return pd.DataFrame(pred_values, index=data.index)
示例#39
0
import pandas as pd
from pgmpy.estimators import BayesianEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
f=open('data7_name.csv','r')
attributes= f.readline().split(',')
heartDisease=pd.read_csv('data7.csv',names=attributes)
print("\nAttributes and datatypes")
print(heartDisease.dtypes)
model=BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'),('exang','trestbps'),
('trestbps','heartdisease'),('fbs','heartdisease')])
model.fit(heartDisease,BayesianEstimator)
HeartDisease_infer=VariableElimination(model)
print("\n 1. Probability heart disease given age=28")
q=HeartDisease_infer.query(['heartdisease'],{'age':28})
print(q['heartdisease'])
print("\n 2. Probability of heart disease for male")
q=HeartDisease_infer.query(['heartdisease'],{'sex':1})
print(q['heartdisease'])
示例#40
0
import numpy as np
import pandas as pd
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination

heart_data = pd.read_csv("Data7.csv")
heart_data = heart_data.replace("?", np.nan)

model = BayesianModel([('age', 'trestbps'),
                       ('age', 'fbs'), ('sex', 'trestbps'),
                       ('exang', 'trestbps'), ('trestbps', 'heartdisease'),
                       ('fbs', 'heartdisease'), ('heartdisease', 'restecg'),
                       ('heartdisease', 'thalach'), ('heartdisease', 'chol')])
model.fit(heart_data, estimator=MaximumLikelihoodEstimator)
infer = VariableElimination(model)

q = infer.query(variables=['heartdisease'], evidence={'chol': 100})
print(q["heartdisease"])
q = infer.query(variables=['heartdisease'], evidence={'age': 28})
print(q["heartdisease"])
示例#41
0
文件: heart.py 项目: suman-kr/ml-repo
import pandas as pd

from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination

dataset = pd.read_csv('dataset.csv')

model = BayesianModel([('HD', 'AGE'), ('HD', 'GENDER'), ('CP', 'AGE'),
                       ('CHOLESTEROL', 'AGE'), ('HD', 'BP'), ('GENDER', 'CP')])

model.fit(dataset, estimator=MaximumLikelihoodEstimator)

print('\n Inferencing with Bayesian Network:')

HeartDisease_infer = VariableElimination(model)
# for cpd in model.get_cpds():
#     print("CPD of {variable}:".format(variable=cpd.variable))
#     print(cpd)
#     print(model.check_model())
print('\n1.Probability of HeartDisease given Gender = Female')
q = HeartDisease_infer.query(variables=['HD'], evidence={'GENDER': 1})
print(q['HD'])

print('\n2. Probability of HeartDisease given BP = Low')
q = HeartDisease_infer.query(variables=['HD'], evidence={'BP': 1})
print(q['HD'])
示例#42
0
#Question2

#create a Bayesian Model and generate CPD using MLE
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
estimator = MaximumLikelihoodEstimator(model, data)
cpds = estimator.get_parameters()
#Write your code
fruit_cpd = cpds[0]
size_cpd = cpds[1]
tasty_cpd = cpds[2]
print(tasty_cpd)
#write cpd of tasty to csv
res = pd.DataFrame(b)
res.to_csv('/code/output/output2.csv', index=False, header=False)

#Question3
for i in range(0, 3):
    model.add_cpds(cpds[i])
#create a Bayesian model and run variable elimination algorithm on it
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
model_inference = VariableElimination(model)
query = model_inference.map_query(variables=['tasty'])
#Expected Output
print(query)
result = pd.DataFrame(query, index=[0])
#write you output to csv
result.to_csv('/code/output/output3.csv', index=False)
示例#43
0
attributes = lines[0]
# Read Cleveland Heart disease data
heartDisease = pd.read_csv('heart.csv', names=attributes)
heartDisease = heartDisease.replace('?', np.nan)
# Display the data
print('Few examples from the dataset are given below')
print(heartDisease.head())
print('\nAttributes and datatypes')
print(heartDisease.dtypes)
# Model Bayesian Network
model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'),
                       ('sex', 'trestbps'), ('sex', 'trestbps'),
                       ('exang', 'trestbps'), ('trestbps', 'heartdisease'),
                       ('fbs', 'heartdisease'), ('heartdisease', 'restecg'),
                       ('heartdisease', 'thalach'), ('heartdisease', 'chol')])
# Learning CPDs using Maximum Likelihood Estimators
print('\nLearning CPDs using Maximum Likelihood Estimators...')
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)
# Deducing with Bayesian Network
print('\nInferencing with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)
print('\n1.Probability of HeartDisease given Age=20')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 40})
print(q['heartdisease'])
print('\n2. Probability of HeartDisease given chol (Cholestoral) =100')
q = HeartDisease_infer.query(variables=['heartdisease'],
                             evidence={
                                 'sex': 0,
                                 'chol': 100
                             })
print(q['heartdisease'])
示例#44
0
report_cpd = TabularCPD(variable='Report',
                        variable_card=2,
                        evidence=['Leaving'],
                        evidence_card=[2],
                        values=[[0.75, 0.01], [0.25, 0.99]])

smoke_cpd = TabularCPD(variable='Smoke',
                       variable_card=2,
                       evidence=['Fire'],
                       evidence_card=[2],
                       values=[[0.9, 0.1], [0.1, 0.9]])

report_model.add_cpds(fire_cpd, smoke_cpd, tampering_cpd, alarm_cpd,
                      leaving_cpd, report_cpd)
# print(report_model.get_cpds())
# print(report_model.active_trail_nodes('Report'))
# print(report_model.local_independencies('Alarm'))
# print(report_model.get_independencies())

report_infer = VariableElimination(report_model)
prob_temp = report_infer.query(variables=['Report', 'Leaving'])
# print(prob_temp['Report'])
# print(prob_temp['Leaving'])

prob_alarm_given_smoke_report = report_infer.query(variables=['Alarm'],
                                                   evidence={
                                                       'Tampering': 0,
                                                       'Fire': 1
                                                   })
print(prob_alarm_given_smoke_report['Alarm'])
示例#45
0
cpd_l = TabularCPD(variable='L',
                   variable_card=2,
                   values=[[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]],
                   evidence=['G'],
                   evidence_card=[3])

cpd_s = TabularCPD(variable='S',
                   variable_card=2,
                   values=[[0.95, 0.2], [0.05, 0.8]],
                   evidence=['I'],
                   evidence_card=[2])

# Associating the CPDs with the network
model.add_cpds(cpd_d, cpd_i, cpd_g, cpd_l, cpd_s)

# check_model checks for the network structure and CPDs and verifies that the CPDs are correctly
# defined and sum to 1.
model.check_model()

from pgmpy.inference import VariableElimination
infer = VariableElimination(model)

evidence = {'G': 2, 'S': 1}  # grade=C, SAT=Good
postD = infer.query(['D'], evidence=evidence).values
postI = infer.query(['I'], evidence=evidence).values

print('\n')
print('Pr(Difficulty=Hard|Grade=C,SAT=Good) = {:0.2f}'.format(postD[1]))
print('Pr(Intelligent=High|Grade=C,SAT=Good) = {:0.2f}'.format(postI[1]))
示例#46
0
df_result = pd.read_csv("D:\\Satl_project\\correct\\bayesian\\b2_input.csv")

# This is the input file which contains input data. here there is a slight change. in actual we have 3 levels level-1/2/3
# but in this file the levels are 0/1/2 because by default it starts from 0 so we have renamed the actual levels , 1->0,2->1,3->2
df = pd.read_csv("D:\\Satl_project\\correct\\bayesian\\b3_input.csv")

df_test = df.iloc[
    401:
    501, :]  # for five fold cross validation we need to run this code 5 times with different range. like 0-101,101,201 and so on
a = df_test.index
df_train = df.drop(df.index[a])

model_asset.fit(df_train)
model_asset.get_cpds()
model_asset.get_cardinality()
infer_asset = VariableElimination(model_asset)
df_test['Bayesian_label'] = 0
df_test = df_test.reset_index()
df_test = df_test.drop(['index'], axis=1)

# print df_test
for index, row in df_test.iterrows():
    #print index
    a, b, c = row['Literacy'], row['Formal Employment'], row['Current Status']
    #print a,b,c,d,e
    q_asset = infer_asset.query(['CHH_Change'],
                                evidence={
                                    'Literacy': a,
                                    'Formal Employment': b,
                                    'Current Status': c
                                })
示例#47
0
cpd_getting_up_late = TabularCPD('getting_up_late', 2, [[0.6], [0.4]])
cpd_late_for_school = TabularCPD('late_for_school', 2,
                                 [[0.9, 0.45, 0.8, 0.1],
                                  [0.1, 0.55, 0.2, 0.9]],
                                 evidence=['getting_up_late', 'traffic_jam'],
                                 evidence_card=[2, 2])
cpd_long_queues = TabularCPD('long_queues', 2,
                             [[0.9, 0.2],
                              [0.1, 0.8]],
                             evidence=['traffic_jam'],
                             evidence_card=[2])
model.add_cpds(cpd_rain, cpd_accident,
               cpd_traffic_jam, cpd_getting_up_late,
               cpd_late_for_school, cpd_long_queues)
# Calculating max marginals
model_inference = VariableElimination(model)
model_inference.max_marginal(variables=['late_for_school'])
model_inference.max_marginal(variables=['late_for_school', 'traffic_jam'])
# For any evidence in the network we can simply pass the evidence
# argument which is a dict of the form of {variable: state}
model_inference.max_marginal(variables=['late_for_school'],
                             evidence={'traffic_jam': 1})
model_inference.max_marginal(variables=['late_for_school'],
                             evidence={'traffic_jam': 1,
                                       'getting_up_late': 0})
model_inference.max_marginal(variables=['late_for_school','long_queues'],
                             evidence={'traffic_jam': 1,
                                       'getting_up_late': 0}
# Again as in the case of VariableEliminaion we can also pass the
# elimination order of variables for MAP queries. If not specified
# pgmpy automatically computes the best elimination order for the
示例#48
0
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
from pgmpy.factors import TabularCPD
# Now first create the model.
restaurant = BayesianModel([('location', 'cost'),
                            ('quality', 'cost'),
                            ('cost', 'no_of_people'),
                            ('location', 'no_of_people')])
cpd_location = TabularCPD('location', 2, [[0.6, 0.4]])
cpd_quality = TabularCPD('quality', 3, [[0.3, 0.5, 0.2]])
cpd_cost = TabularCPD('cost', 2,
                      [[0.8, 0.6, 0.1, 0.6, 0.6, 0.05],
                       [0.2, 0.1, 0.9, 0.4, 0.4, 0.95]],
                      ['location', 'quality'], [2, 3])
cpd_no_of_people = TabularCPD('no_of_people', 2,
                              [[0.6, 0.8, 0.1, 0.6],
                               [0.4, 0.2, 0.9, 0.4]],
                              ['cost', 'location'], [2, 2])
restaurant.add_cpds(cpd_location, cpd_quality,
                    cpd_cost, cpd_no_of_people)
# Creating the inference object of the model
restaurant_inference = VariableElimination(restaurant)
# Doing simple queries over one or multiple variables.
restaurant_inference.query(variables=['location'])
restaurant_inference.query(variables=['location', 'no_of_people'])
# We can also specify the order in which the variables are to be
# eliminated. If not specified pgmpy automatically computes the
# best possible elimination order.
restaurant_inference.query(variables=['no_of_people'],
                           elimination_order=['location', 'cost', 'quality'])