def test_sumout_var(self): f = Factor(self.bn,'Alarm') f.sumout_var('Earthquake') self.assertListEqual(f.scope,['Alarm','Burglary']) self.assertDictEqual(f.stride, {'Alarm':1,'Burglary':2}) self.assertListEqual(list(f.cpt), [ 0.8545, 0.1455, 0.055 , 0.945 ])
def test_maxout_var(self): """ I KNOW THIS IS CORRECT. """ f = Factor(self.bn, 'Alarm') f.maxout_var('Burglary') self.assertListEqual(list(f.cpt), [0.999, 0.94, 0.71, 0.95]) self.assertListEqual(f.scope, ['Alarm', 'Earthquake']) self.assertDictEqual(f.stride, {'Alarm': 1, 'Earthquake': 2})
def test_multiply_factor(self): f1 = Factor(self.bn,'Alarm') f2 = Factor(self.bn,'Burglary') f1.multiply_factor(f2) f3 = Factor(self.bn,'Burglary') f4 = Factor(self.bn,'Alarm') f3.multiply_factor(f4) self.assertListEqual(list(f1.cpt),list(f3.cpt))
def test_maxout_var(self): """ I KNOW THIS IS CORRECT. """ f = Factor(self.bn,'Alarm') f.maxout_var('Burglary') self.assertListEqual(list(f.cpt), [ 0.999, 0.94 , 0.71 , 0.95 ]) self.assertListEqual(f.scope,['Alarm','Earthquake']) self.assertDictEqual(f.stride, {'Alarm':1,'Earthquake':2})
def refresh(self): """ Refresh Factorization attributes. """ self._phi = [Factor(self.bn, rv) for rv in self.bn.nodes()] self.map_factors = OrderedDict() self.map_assignment = dict() self.map_prob = -1
def __init__(self, bn, nodes=None): """ Initialize a Factorization object. The *nodes* argument allows you to make a Factorization object that includes only a subset of the random variables in the passed-in BayesNet object. This is mostly useful for CliqueTree algorithms. """ self.bn = bn if nodes is not None: self._phi = [Factor(bn, rv) for rv in nodes] else: self._phi = [Factor(bn, rv) for rv in bn.nodes()] ## MAP-BASED ATTRIBUTES ## self.map_factors = OrderedDict() self.map_assignment = dict() self.map_prob = -1
def random_sample(bn, n=1000): """ Take a random sample of "n" observations from a BayesNet object. This is essentially just the forward sample algorithm that returns the samples. Parameters ---------- *bn* : a BayesNet object from which to sample *n* : an integer The number of observations to take *evidence* : a dictionary, key=rv & value=instantiation Evidence to pass in Returns ------- *sample_dict* : a list of samples, where each sample is a list of values in bn.nodes() (topsort) order Notes ----- """ sample = np.empty((n, bn.num_nodes()), dtype=np.int) rv_map = dict([(rv, idx) for idx, rv in enumerate(bn.nodes())]) factor_map = dict([(rv, Factor(bn, rv)) for rv in bn.nodes()]) for i in xrange(n): for rv in bn.nodes(): f = deepcopy(factor_map[rv]) # reduce_factor by parent samples for p in bn.parents(rv): f.reduce_factor(p, bn.values(p)[sample[i][rv_map[p]]]) choice_vals = bn.values(rv) choice_probs = f.cpt chosen_val = np.random.choice(choice_vals, p=choice_probs) sample[i][rv_map[rv]] = bn.values(rv).index(chosen_val) return sample
def test_reduce_factor(self): f = Factor(self.bn, 'Alarm') f.reduce_factor('Burglary','Yes') self.assertListEqual(list(f.cpt), [ 0.06, 0.94, 0.05, 0.95])
def test_to_log(self): f = Factor(self.bn,'Earthquake') f.to_log() self.assertEqual(round(sum(f.cpt),4),-6.2166)
def lw_sample(bn, evidence={}, target=None, n=1000): """ Approximate Marginal probabilities from likelihood weighted sample algorithm on a BayesNet object. Arguments --------- *bn* : a BayesNet object *n* : an integer The number of samples to take *evidence* : a dictionary, where key = rv, value = instantiation Returns ------- *sample_dict* : a dictionary where key = rv and value = another dictionary where key = rv instantiation and value = marginal probability Effects ------- None Notes ----- """ sample_dict = {} weight_list = np.ones(n) # factor_dict = dict([(var,Factor(bn, var)) for var in bn.V]) # parent_dict = dict([(var, bn.data[var]['parents']) for var in bn.V]) # create sample diction ct_discard = 0 for var in bn.nodes(): sample_dict[var] = {} # print(var) for val in bn.values(var): # print (val) sample_dict[var][val] = 0 # start to draw sample for i in range(n): new_sample = {} for rv in bn.nodes(): # _get_variable_nodes() # for rv in a: # print(rv,'rv is variable') f = Factor(bn, rv) # reduce_factor by parent samples for p in bn.parents(rv): if p in new_sample: f.reduce_factor(p, new_sample[p]) #this line has problem else: #austin add this line to cheat on sampling algorithm # pass new_sample[p] = '1' f.reduce_factor(p, new_sample[p]) # f2 = Factor(bn, p) # choice_vals = bn.values(p) # choice_probs = f2.cpt # chosen_val = np.random.choice(choice_vals, p=choice_probs) # new_sample[rv] = chosen_val # if rv in evidence, choose that value and weight if rv in evidence: # print(rv,'in evidence') chosen_val = evidence[rv] # print(chosen_val,'chosen_val in evidence') weight_list[i] *= f.cpt[bn.values(rv).index(evidence[rv])] # print(weight_list[i],'weight') # if rv not in evidence, sample as usual else: # print(new_sample) choice_vals = bn.values(rv) choice_probs = f.cpt # print(choice_probs) # print(choice_probs) # print(len(choice_vals),len(choice_probs)) chosen_val = np.random.choice(choice_vals, p=choice_probs) # print(weight_list) new_sample[rv] = chosen_val # weight the choice by the evidence likelihood for rv in new_sample: sample_dict[rv][new_sample[rv]] += 1 * weight_list[i] if weight_list[i] == 0: ct_discard += 1 weight_sum = sum(weight_list) # print("total discard sample", ct_discard) # I only add this line for rv in sample_dict: for val in sample_dict[rv]: sample_dict[rv][val] /= weight_sum sample_dict[rv][val] = round(sample_dict[rv][val], 4) if target is not None: return sample_dict[target] else: return sample_dict, ct_discard
def test_reduce_factor_by_list(self): f = Factor(self.bn, 'Alarm') f.reduce_factor_by_list([['Burglary','Yes'],['Earthquake','Yes']]) self.assertListEqual(list(f.cpt),[0.05,0.95]) self.assertListEqual(f.scope,['Alarm']) self.assertDictEqual(f.stride,{'Alarm':1})
class FactorTestCase(unittest.TestCase): def setUp(self): self.data_path = os.path.join(dirname(dirname(dirname(dirname(__file__)))),'data') self.bn = read_bn(os.path.join(self.data_path,'cmu.bn')) self.f = Factor(self.bn, 'Alarm') def tearDown(self): pass # Factor Creation Tests def test_factor_init(self): self.assertIsInstance(self.f,Factor) def test_factor_bn(self): self.assertListEqual(self.f.bn.V, ['Burglary', 'Earthquake', 'Alarm', 'JohnCalls', 'MaryCalls']) def test_factor_var(self): self.assertEqual(self.f.var, 'Alarm') def test_factor_scope(self): self.assertListEqual(self.f.scope,['Alarm','Earthquake','Burglary']) def test_factor_card(self): self.assertDictEqual(self.f.card, {'Alarm':2, 'Burglary':2, 'Earthquake':2}) def test_factor_stride(self): self.assertDictEqual(self.f.stride, {'Alarm':1, 'Burglary':4, 'Earthquake':2}) def test_factor_cpt(self): self.assertListEqual(list(self.f.cpt), [ 0.999, 0.001, 0.71 , 0.29 , 0.06 , 0.94 , 0.05 , 0.95 ]) # Factor Operations Tests def test_multiply_factor(self): f1 = Factor(self.bn,'Alarm') f2 = Factor(self.bn,'Burglary') f1.multiply_factor(f2) f3 = Factor(self.bn,'Burglary') f4 = Factor(self.bn,'Alarm') f3.multiply_factor(f4) self.assertListEqual(list(f1.cpt),list(f3.cpt)) def test_sumover_var(self): self.f.sumover_var('Burglary') self.assertListEqual(list(self.f.cpt),[0.5,0.5]) def test_sumout_var_list(self): f = Factor(self.bn,'Alarm') f.sumout_var_list(['Burglary','Earthquake']) self.assertListEqual(f.scope,['Alarm']) self.assertDictEqual(f.stride,{'Alarm':1}) self.assertListEqual(list(f.cpt),[0.45475,0.54525]) def test_sumout_var(self): f = Factor(self.bn,'Alarm') f.sumout_var('Earthquake') self.assertListEqual(f.scope,['Alarm','Burglary']) self.assertDictEqual(f.stride, {'Alarm':1,'Burglary':2}) self.assertListEqual(list(f.cpt), [ 0.8545, 0.1455, 0.055 , 0.945 ]) def test_maxout_var(self): """ I KNOW THIS IS CORRECT. """ f = Factor(self.bn,'Alarm') f.maxout_var('Burglary') self.assertListEqual(list(f.cpt), [ 0.999, 0.94 , 0.71 , 0.95 ]) self.assertListEqual(f.scope,['Alarm','Earthquake']) self.assertDictEqual(f.stride, {'Alarm':1,'Earthquake':2}) def test_reduce_factor_by_list(self): f = Factor(self.bn, 'Alarm') f.reduce_factor_by_list([['Burglary','Yes'],['Earthquake','Yes']]) self.assertListEqual(list(f.cpt),[0.05,0.95]) self.assertListEqual(f.scope,['Alarm']) self.assertDictEqual(f.stride,{'Alarm':1}) def test_reduce_factor(self): f = Factor(self.bn, 'Alarm') f.reduce_factor('Burglary','Yes') self.assertListEqual(list(f.cpt), [ 0.06, 0.94, 0.05, 0.95]) def test_to_log(self): f = Factor(self.bn,'Earthquake') f.to_log() self.assertEqual(round(sum(f.cpt),4),-6.2166) def test_from_log(self): f = Factor(self.bn, 'Earthquake') f.to_log() f.from_log() self.assertListEqual(list(f.cpt),[0.998,0.002]) def test_normalize(self): self.f.cpt[0]=20 self.f.cpt[1]=20 self.f.cpt[4]=0.94 self.f.cpt[7]=0.15 self.f.normalize() self.assertListEqual(list(self.f.cpt), [0.500,0.500,0.710,0.290,0.5,0.5,0.25,0.75])
def lw_sample(bn, evidence={}, target=None, n=1000): """ Approximate Marginal probabilities from likelihood weighted sample algorithm on a BayesNet object. Arguments --------- *bn* : a BayesNet object *n* : an integer The number of samples to take *evidence* : a dictionary, where key = rv, value = instantiation Returns ------- *sample_dict* : a dictionary where key = rv and value = another dictionary where key = rv instantiation and value = marginal probability Effects ------- None Notes ----- """ sample_dict = {} weight_list = np.ones(n) #factor_dict = dict([(var,Factor(bn, var)) for var in bn.V]) #parent_dict = dict([(var, bn.data[var]['parents']) for var in bn.V]) for var in bn.nodes(): sample_dict[var] = {} for val in bn.values(var): sample_dict[var][val] = 0 for i in range(n): #if i % (n/float(10)) == 0: # print 'Sample: ' , i new_sample = {} for rv in bn.nodes(): f = Factor(bn, rv) # reduce_factor by parent samples for p in bn.parents(rv): f.reduce_factor(p, new_sample[p]) # if rv in evidence, choose that value and weight if rv in evidence: chosen_val = evidence[rv] weight_list[i] *= f.cpt[bn.values(rv).index(evidence[rv])] # if rv not in evidence, sample as usual else: choice_vals = bn.values(rv) choice_probs = f.cpt chosen_val = np.random.choice(choice_vals, p=choice_probs) new_sample[rv] = chosen_val # weight the choice by the evidence likelihood for rv in new_sample: sample_dict[rv][new_sample[rv]] += 1 * weight_list[i] weight_sum = sum(weight_list) for rv in sample_dict: for val in sample_dict[rv]: sample_dict[rv][val] /= weight_sum sample_dict[rv][val] = round(sample_dict[rv][val], 4) if target is not None: return sample_dict[target] else: return sample_dict
def forward_sample(bn, n=1000): """ Approximate marginal probabilities from forward sampling algorithm on a BayesNet object. This algorithm works by repeatedly sampling from the BN and taking the ratio of observations as their marginal probabilities. One sample is done by first sampling from any prior random variables, then moving down the network in topological sort order - sampling from each successive random variable by conditioning on its parents (which have already been sampled higher up the network). Note that there is no evidence to include in this algorithm - the comparative algorithm which includes evidence is the likelihood weighted algorithm (see "lw_sample" function). Arguments --------- *bn* : a BayesNet object *n* : an integer The number of samples to take Returns ------- *sample_dict* : a dictionary, where key = rv, value = another dict where key = instance, value = its probability value Notes ----- - Evidence is not currently implemented. """ sample_dict = {} for var in bn.nodes(): sample_dict[var] = {} for val in bn.values(var): sample_dict[var][val] = 0 for i in range(n): #if i % (n/float(10)) == 0: # print 'Sample: ' , i new_sample = {} for rv in bn.nodes(): f = Factor(bn, rv) for p in bn.parents(rv): f.reduce_factor(p, new_sample[p]) choice_vals = bn.values(rv) choice_probs = f.cpt chosen_val = np.random.choice(choice_vals, p=choice_probs) sample_dict[rv][chosen_val] += 1 new_sample[rv] = chosen_val for rv in sample_dict: for val in sample_dict[rv]: sample_dict[rv][val] = int(sample_dict[rv][val]) / float(n) return sample_dict
def setUp(self): self.data_path = os.path.join(dirname(dirname(dirname(dirname(__file__)))),'data') self.bn = read_bn(os.path.join(self.data_path,'cmu.bn')) self.f = Factor(self.bn, 'Alarm')
def test_sumout_var_list(self): f = Factor(self.bn,'Alarm') f.sumout_var_list(['Burglary','Earthquake']) self.assertListEqual(f.scope,['Alarm']) self.assertDictEqual(f.stride,{'Alarm':1}) self.assertListEqual(list(f.cpt),[0.45475,0.54525])
def lw_sample(bn, evidence={}, target=None, n=1000): """ Approximate Marginal probabilities from likelihood weighted sample algorithm on a BayesNet object. Arguments --------- *bn* : a BayesNet object *n* : an integer The number of samples to take *evidence* : a dictionary, where key = rv, value = instantiation Returns ------- *sample_dict* : a dictionary where key = rv and value = another dictionary where key = rv instantiation and value = marginal probability Effects ------- None Notes ----- """ sample_dict = {} weight_list = np.ones(n) #factor_dict = dict([(var,Factor(bn, var)) for var in bn.V]) #parent_dict = dict([(var, bn.data[var]['parents']) for var in bn.V]) for var in bn.nodes(): sample_dict[var] = {} for val in bn.values(var): sample_dict[var][val] = 0 for i in range(n): #if i % (n/float(10)) == 0: # print 'Sample: ' , i new_sample = {} for rv in bn.nodes(): f = Factor(bn,rv) # reduce_factor by parent samples for p in bn.parents(rv): f.reduce_factor(p,new_sample[p]) # if rv in evidence, choose that value and weight if rv in evidence: chosen_val = evidence[rv] weight_list[i] *= f.cpt[bn.values(rv).index(evidence[rv])] # if rv not in evidence, sample as usual else: choice_vals = bn.values(rv) choice_probs = f.cpt chosen_val = np.random.choice(choice_vals, p=choice_probs) new_sample[rv] = chosen_val # weight the choice by the evidence likelihood for rv in new_sample: sample_dict[rv][new_sample[rv]] += 1*weight_list[i] weight_sum = sum(weight_list) for rv in sample_dict: for val in sample_dict[rv]: sample_dict[rv][val] /= weight_sum sample_dict[rv][val] = round(sample_dict[rv][val],4) if target is not None: return sample_dict[target] else: return sample_dict
def test_from_log(self): f = Factor(self.bn, 'Earthquake') f.to_log() f.from_log() self.assertListEqual(list(f.cpt),[0.998,0.002])
def gibbs_sample(bn, n=1000, burn=200): """ Approximate Marginal probabilities from Gibbs Sampling over a BayesNet object. Arguments --------- *bn* : a BayesNet object *n* : an integer The number of samples to take *burn* : an integer The number of beginning samples to throw away for the MCMC mixing. Returns ------- *sample_dict* : a dictionary where key = rv and value = another dictionary where key = rv instantiation and value = marginal probability Notes ----- """ sample_dict ={} for rv in bn.nodes(): sample_dict[rv]={} for val in bn.values(rv): sample_dict[rv][val] = 0 state = {} for rv in bn.nodes(): state[rv] = np.random.choice(bn.values(rv)) # uniform sample for i in range(n): #if i % (n/float(10)) == 0: # print 'Sample: ' , i for rv in bn.nodes(): # get possible values conditioned on everything else parents = bn.parents(rv) # no parents - prior if len(parents) == 0: choice_vals = bn.values(rv) choice_probs = bn.cpt(rv) # has parent - filter cpt else: f = Factor(bn,rv) for p in parents: f.reduce_factor(p,state[p]) choice_vals = bn.values(rv) choice_probs = f.cpt # sample over remaining possibilities chosen_val = np.random.choice(choice_vals,p=choice_probs) state[rv]=chosen_val # update sample_dict dictionary if i > burn: for rv,val in state.items(): sample_dict[rv][val] +=1 for rv in sample_dict: for val in sample_dict[rv]: sample_dict[rv][val] = round(int(sample_dict[rv][val]) / float(n-burn),4) return sample_dict
def gibbs_sample(bn, n=1000, burn=200): """ Approximate Marginal probabilities from Gibbs Sampling over a BayesNet object. Arguments --------- *bn* : a BayesNet object *n* : an integer The number of samples to take *burn* : an integer The number of beginning samples to throw away for the MCMC mixing. Returns ------- *sample_dict* : a dictionary where key = rv and value = another dictionary where key = rv instantiation and value = marginal probability Notes ----- """ sample_dict = {} for rv in bn.nodes(): sample_dict[rv] = {} for val in bn.values(rv): sample_dict[rv][val] = 0 state = {} for rv in bn.nodes(): state[rv] = np.random.choice(bn.values(rv)) # uniform sample for i in range(n): #if i % (n/float(10)) == 0: # print 'Sample: ' , i for rv in bn.nodes(): # get possible values conditioned on everything else parents = bn.parents(rv) # no parents - prior if len(parents) == 0: choice_vals = bn.values(rv) choice_probs = bn.cpt(rv) # has parent - filter cpt else: f = Factor(bn, rv) for p in parents: f.reduce_factor(p, state[p]) choice_vals = bn.values(rv) choice_probs = f.cpt # sample over remaining possibilities chosen_val = np.random.choice(choice_vals, p=choice_probs) state[rv] = chosen_val # update sample_dict dictionary if i > burn: for rv, val in state.items(): sample_dict[rv][val] += 1 for rv in sample_dict: for val in sample_dict[rv]: sample_dict[rv][val] = round( int(sample_dict[rv][val]) / float(n - burn), 4) return sample_dict
def forward_sample(bn, n=1000): """ Approximate marginal probabilities from forward sampling algorithm on a BayesNet object. This algorithm works by repeatedly sampling from the BN and taking the ratio of observations as their marginal probabilities. One sample is done by first sampling from any prior random variables, then moving down the network in topological sort order - sampling from each successive random variable by conditioning on its parents (which have already been sampled higher up the network). Note that there is no evidence to include in this algorithm - the comparative algorithm which includes evidence is the likelihood weighted algorithm (see "lw_sample" function). Arguments --------- *bn* : a BayesNet object *n* : an integer The number of samples to take Returns ------- *sample_dict* : a dictionary, where key = rv, value = another dict where key = instance, value = its probability value Notes ----- - Evidence is not currently implemented. """ sample_dict = {} for var in bn.nodes(): sample_dict[var] = {} for val in bn.values(var): sample_dict[var][val] = 0 for i in range(n): #if i % (n/float(10)) == 0: # print 'Sample: ' , i new_sample = {} for rv in bn.nodes(): f = Factor(bn,rv) for p in bn.parents(rv): f.reduce_factor(p,new_sample[p]) choice_vals = bn.values(rv) choice_probs = f.cpt chosen_val = np.random.choice(choice_vals, p=choice_probs) sample_dict[rv][chosen_val] += 1 new_sample[rv] = chosen_val for rv in sample_dict: for val in sample_dict[rv]: sample_dict[rv][val] = int(sample_dict[rv][val]) / float(n) return sample_dict