def testMarkovBlanketMultiLevel(self): bn = gum.fastBN("Z<-A->B->C->D->E<-Y;X->G<-F<-C<-I<-H->W") self.assertEqual(gum.MarkovBlanket(bn, "C", 1).size(), 5) self.assertEqual(gum.MarkovBlanket(bn, "C", 2).size(), 11) self.assertEqual(gum.MarkovBlanket(bn, "C", 3).size(), 13) with self.assertRaises(gum.InvalidArgument): err = gum.MarkovBlanket(bn, "C", 0)
def testMarkovBlanketStructure(self): bn = gum.fastBN("a->b->c->d->e;f->d->g;h->i->g") self.assertFalse( gum.MarkovBlanket(bn, "a").hasSameStructure(gum.fastBN("b->a"))) self.assertTrue( gum.MarkovBlanket(bn, "a").hasSameStructure(gum.fastBN("a->b"))) self.assertTrue( gum.MarkovBlanket(bn, "b").hasSameStructure(gum.fastBN("a->b->c"))) self.assertTrue( gum.MarkovBlanket(bn, "c").hasSameStructure( gum.fastBN("b->c->d;f->d"))) self.assertTrue( gum.MarkovBlanket(bn, "d").hasSameStructure( gum.fastBN("c->d->e;f->d->g;i->g"))) self.assertTrue( gum.MarkovBlanket(bn, "e").hasSameStructure(gum.fastBN("d->e"))) self.assertTrue( gum.MarkovBlanket(bn, "f").hasSameStructure(gum.fastBN("c->d;f->d;"))) self.assertTrue( gum.MarkovBlanket(bn, "g").hasSameStructure(gum.fastBN("d->g;i->g;"))) self.assertTrue( gum.MarkovBlanket(bn, "h").hasSameStructure(gum.fastBN("h->i;"))) self.assertTrue( gum.MarkovBlanket(bn, "i").hasSameStructure( gum.fastBN("d->g;h->i->g;;")))
def _get_markov_blanket(self): feats_markov_blanket = [] for i in gum.MarkovBlanket(self.bn, self.target).nodes(): convert = self._get_list_names_order() feats_markov_blanket.append(convert[i]) feats_markov_blanket.remove(self.target) return feats_markov_blanket
def _independenceListForPairs(bn, target=None): """ returns a list of triples `(i,j,k)` for each non arc `(i,j)` such that `i` is independent of `j` given `k`. Parameters ---------- bn: gum.BayesNet the Bayesian Network target: (optional) str or int the name or id of the target variable. If a target is given, only the independence given a subset of the markov blanket of the target are tested. Returns ------- List[(str,str,List[str])] A list of independence found in the structure of BN. """ def powerset(iterable): xs = list(iterable) # note we return an iterator rather than a list return itertools.chain.from_iterable( itertools.combinations(xs, n) for n in range(len(xs) + 1)) # testing every d-separation l = [] nams = sorted(bn.names()) if target is None: firstnams = nams.copy() indepnodes = bn.names() else: indepnodes = { bn.variable(i).name() for i in gum.MarkovBlanket(bn, target).nodes() } if isinstance(target, str): firstnams = [target] else: firstnams = [bn.variable(target).name()] for i in firstnams: nams.remove(i) for j in nams: if not (bn.existsArc(i, j) or bn.existsArc(j, i)): for k in powerset(sorted(indepnodes - {i, j})): if bn.isIndependent(i, j, k): l.append((i, j, tuple(k))) break return l
template.add(gum.LabelizedVariable("occupation", "occupation",['Tech-support', 'Craft-repair', 'Other-service', 'Sales', 'Exec-managerial', 'Prof-specialty', 'Handlers-cleaners', 'Machine-op-inspct', 'Adm-clerical', 'Farming-fishing', 'Transport-moving', 'Priv-house-serv', 'Protective-serv', 'Armed-Forces'])) gnb.showBN(template) train_df.to_csv(os.path.join('/content/gdrive/My Drive/train_data2.csv'), index=False) file = os.path.join('res', 'titanic', '/content/gdrive/My Drive/train_data2.csv') learner = gum.BNLearner(file, template) bn = learner.learnBN() bn gnb.showInformation(bn,{},size="20") gnb.showInference(bn) gnb.showPosterior(bn,evs={"sex": "Male", "age_range": '21-30'},target='target') gnb.sideBySide(bn, gum.MarkovBlanket(bn, 'target'), captions=["Learned Bayesian Network", "Markov blanket of 'target'"]) ie=gum.LazyPropagation(bn) init_belief(ie) ie.addTarget('target') result = testdf.apply(lambda x: is_well_predicted(ie, bn, 0.157935, x), axis=1) result.value_counts(True) positives = sum(result.map(lambda x: 1 if x.startswith("True") else 0 )) total = result.count() print("{0:.2f}% good predictions".format(positives/total*100)) showROC(bn,file, 'target', "True", True, True)
def generate_BN_explanationsMB(instance, label_lst, feature_names, class_var, encoder, scaler, model, path, dataset_name, variance=0.25, algorithm="Hill Climbing"): # necessary for starting Numpy generated random numbers in an initial state np.random.seed(515) # Necessary for starting core Python generated random numbers in a state rn.seed(515) indx = instance['index'] prediction_type = instance['prediction_type'].lower() + "s" prediction_type = prediction_type.replace(" ", "_") # generate permutations df = generate_permutations(instance, label_lst, feature_names, class_var, encoder, scaler, model, variance=variance) # discretize data df_discr = discretize_dataframe(df, class_var, num_bins=4) # save discretised dataframe (for debugging and reproduceability purposes) path_to_permutations = path + "feature_permutations/" + dataset_name.replace( ".csv", "") + "/" + prediction_type + "/" + str(indx) + ".csv" df_discr.to_csv(path_to_permutations, index=False) # normalise dataframe normalise_dataframe(path_to_permutations) # learn BN bn, infoBN, essencGraph = learnBN(path_to_permutations.replace( ".csv", "_norm.csv"), algorithm=algorithm) # perform inference inference = gnb.getInference(bn, evs={}, targets=df_discr.columns.to_list(), size='12') # compute Markov Blanket markov_blanket = gum.MarkovBlanket(bn, class_var) # show networks gnb.sideBySide( *[bn, inference, markov_blanket], captions=["Bayesian Network", "Inference", "Markov Blanket"]) # save to file path_to_explanation = path + "explanations/" + dataset_name.replace( ".csv", "") + "/BN/" + prediction_type + "/" gum.lib.bn2graph.dotize(bn, path_to_explanation + str(indx) + "_BN") gum.saveBN(bn, path_to_explanation + str(indx) + "_BN.net") return [bn, inference, infoBN, markov_blanket]
def testMarkovBlanketSpecialArcs(self): bn = gum.fastBN( "aa->bb->cc->dd->ee;ff->dd->gg;hh->ii->gg;ff->ii;ff->gg") mb = gum.fastBN("cc->dd->ee;ff->dd->gg;ff->gg;ff->ii->gg") self.assertTrue(gum.MarkovBlanket(bn, "dd").hasSameStructure(mb))
def testChain(self): bn = gum.fastBN("a->b->c") eg = gum.MarkovBlanket(bn, "a") eg = gum.MarkovBlanket(bn, 1)