def createBayesianNetwork(): learner=gum.BNLearner("logs/Log/WholeLog.csv") learner.useK2([0,1,2,3,4,5,6,7,8,9,10,11,12]) bn2=learner.learnBN() print("Learned in {0}s".format(learner.currentTime())) gnb.showBN(bn2) return bn2
def createBayesianNetwork(): learner = gum.BNLearner("logs/Log/WholeLog.csv") learner.useK2([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) bn2 = learner.learnBN() print("Learned in {0}s".format(learner.currentTime())) gnb.showBN(bn2) return bn2
df = pandas.read_csv(os.path.join('res', 'titanic', '/content/gdrive/My Drive/train_data2.csv')) for k in train_df.keys(): print('{0}: {1}'.format(k, len(train_df[k].unique()))) template=gum.BayesNet() template.add(gum.LabelizedVariable("target", "target", ['<=50K', '>50K'])) template.add(gum.LabelizedVariable("sex", "sex",['Male','Female'])) template.add(gum.LabelizedVariable("age_range", "age_range",['0-20','21-30','31-65','66-90'])) template.add(gum.LabelizedVariable("race", "race",['White', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo', 'Other', 'Black'])) template.add(gum.LabelizedVariable("workclass", "workclass",['Private', 'Self-emp-not-inc', 'Self-emp-inc', 'Federal-gov', 'Local-gov', 'State-gov', 'Without-pay', 'Never-worked'])) template.add(gum.LabelizedVariable("relationship", "relationship", ['Wife', 'Own-child', 'Husband', 'Not-in-family', 'Other-relative', 'Unmarried'])) template.add(gum.LabelizedVariable("marital_status", "marital_status", ['Married-civ-spouse', 'Divorced', 'Never-married', 'Separated', 'Widowed', 'Married-spouse-absent', 'Married-AF-spouse'])) template.add(gum.LabelizedVariable("occupation", "occupation",['Tech-support', 'Craft-repair', 'Other-service', 'Sales', 'Exec-managerial', 'Prof-specialty', 'Handlers-cleaners', 'Machine-op-inspct', 'Adm-clerical', 'Farming-fishing', 'Transport-moving', 'Priv-house-serv', 'Protective-serv', 'Armed-Forces'])) gnb.showBN(template) train_df.to_csv(os.path.join('/content/gdrive/My Drive/train_data2.csv'), index=False) file = os.path.join('res', 'titanic', '/content/gdrive/My Drive/train_data2.csv') learner = gum.BNLearner(file, template) bn = learner.learnBN() bn gnb.showInformation(bn,{},size="20") gnb.showInference(bn) gnb.showPosterior(bn,evs={"sex": "Male", "age_range": '21-30'},target='target') gnb.sideBySide(bn, gum.MarkovBlanket(bn, 'target'), captions=["Learned Bayesian Network", "Markov blanket of 'target'"])
# %% markdown # ## Parameter Learning from the database # We give the `asiaBN` bayesian network as a parameter for the learner in order to have the variables and order of labels for each variable. # %% codecell # using the BN as template for variables and labels learner = gum.BNLearner(outPath, asiaBN) learner.setInitialDAG(g = asiaBN.dag()) # Learn the parameters when structure is known: asiaBN_learnedParams: BayesNet = learner.learnParameters() gnb.showBN(asiaBN_learnedParams) # gnb.showBN(asiaBN) # same thing # %% codecell # This is the bad example: learning without the initial template gets the nodes and structure wrong learnerNoTemplate = gum.BNLearner(outPath) learnerNoTemplate.setInitialDAG(g = asiaBN.dag()) asiaBNNoTemplate: BayesNet = learnerNoTemplate.learnParameters() gnb.showBN(asiaBNNoTemplate) # %% codecell # This is what the DAG looks like asiaBN.dag() # %% codecell asiaBNNoTemplate.dag() # same # %% codecell
def trainmodel(filename): ob, hb, lb, cb, vb = gentt(filename) #print(ob,hb,lb,cb) #build the model bn = gum.BayesNet(filename) Open0 = bn.add('Open0', ob) High0 = bn.add('High0', hb) Low0 = bn.add('Low0', lb) Close0 = bn.add('Close0', cb) Volume0 = bn.add('Volume0', vb) Open1 = bn.add('Open1', ob) High1 = bn.add('High1', hb) Low1 = bn.add('Low1', lb) Close1 = bn.add('Close1', cb) Volume1 = bn.add('Volume1', vb) learner = gum.BNLearner(filename + "_train.csv", bn) learner.addForbiddenArc('Open1', 'Open0') learner.addForbiddenArc('Open1', 'Close0') learner.addForbiddenArc('Open1', 'High0') learner.addForbiddenArc('Open1', 'Low0') learner.addForbiddenArc('Open1', 'Volume0') learner.addForbiddenArc('High1', 'Open0') learner.addForbiddenArc('High1', 'Close0') learner.addForbiddenArc('High1', 'High0') learner.addForbiddenArc('High1', 'Low0') learner.addForbiddenArc('High1', 'Volume0') learner.addForbiddenArc('Low1', 'Open0') learner.addForbiddenArc('Low1', 'Close0') learner.addForbiddenArc('Low1', 'High0') learner.addForbiddenArc('Low1', 'Low0') learner.addForbiddenArc('Low1', 'Volume0') learner.addForbiddenArc('Close1', 'Open0') learner.addForbiddenArc('Close1', 'Close0') learner.addForbiddenArc('Close1', 'High0') learner.addForbiddenArc('Close1', 'Low0') learner.addForbiddenArc('Close1', 'Volume0') learner.addForbiddenArc('Volume1', 'Open0') learner.addForbiddenArc('Volume1', 'Close0') learner.addForbiddenArc('Volume1', 'High0') learner.addForbiddenArc('Volume1', 'Low0') learner.addForbiddenArc('Volume1', 'Volume0') #learner.addMandatoryArc('Close0','Close1') learner.useLocalSearchWithTabuList() bn = learner.learnBN() gnb.showBN(bn) learner = gum.BNLearner(filename + "_train.csv", bn) learner.setInitialDAG(bn.dag()) learner.useAprioriSmoothing(1) bn = learner.learnParameters() #gnb.showInference(bn,evs={}) #do inference and calculate the accuracy ie = gum.LazyPropagation(bn) ie.makeInference() N = 0.0 acc = 0 with open(filename + '_test.csv', 'r', encoding="utf-8") as csvfile: reader = csv.reader(csvfile) for line in list(reader)[1:]: c, o, h, l, v, t = [ line[0], line[1], line[2], line[3], line[4], line[5] ] ie.eraseAllEvidence() ie.setEvidence({ 'Close0': c, 'Open0': o, 'High0': h, 'Low0': l, 'Volume0': v }) ie.makeInference() prob = ie.posterior(Close1).tolist() if prob[0] < 0.6: N = N + 1 if t == '1': acc = acc + 1 return acc, N
def main(): bn = gum.BayesNet('nuc_inf') #add variables to the network va = gum.LabelizedVariable('nuc', 'a labelized variable', 2) va.addLabel('-1') nuc = bn.add(va) A = bn.add('A', 6) R, N = [bn.add(name, 7) for name in ['R', 'N']] D, Q = [bn.add(name, 2) for name in ['D', 'Q']] partition("protein") learner = gum.BNLearner("protein_train.csv", bn) #These arcs can be added or deleted #learner.addMandatoryArc('A','nuc') #learner.addMandatoryArc('R','nuc') #learner.addMandatoryArc('Q','nuc') #learner.addMandatoryArc('N','nuc') #learner.addMandatoryArc('D','nuc') learner.useLocalSearchWithTabuList() bn0 = learner.learnBN() gnb.showBN(bn0) learner.useGreedyHillClimbing() bn1 = learner.learnBN() gnb.showBN(bn1) learner.useK2([5, 4, 3, 2, 1, 0]) bn2 = learner.learnBN() gnb.showBN(bn2) #We have 2 different BN structures according to the previous parts. Now, we do parameter learning learner = gum.BNLearner("protein_train.csv", bn) learner.setInitialDAG(bn0.dag()) learner.useAprioriSmoothing(1) bn01 = learner.learnParameters() #first gnb.showBN(bn01) learner = gum.BNLearner("protein_train.csv", bn) learner.setInitialDAG(bn2.dag()) learner.useAprioriSmoothing(1) bn11 = learner.learnParameters() #second gnb.showBN(bn11) #first ie1 = gum.LazyPropagation(bn01) ie1.makeInference() gnb.showInference(bn01, evs={}) #second ie2 = gum.LazyPropagation(bn11) ie2.makeInference() gnb.showInference(bn11, evs={}) with open('protein_test.csv', 'r', encoding="utf-8") as csvfile: reader = csv.reader(csvfile) count1 = 1 count2 = 1 acc1 = 0 acc2 = 0 for line in list(reader)[1:]: vnuc, vA, vR, vN, vD, vQ = [ int(line[0]), int(line[1]), int(line[2]), int(line[3]), int(line[4]), int(line[5]) ] #print(vnuc,vA,vR,vN,vD,vQ) ie2.eraseAllEvidence() ie1.eraseAllEvidence() ie1.setEvidence({'A': vA, 'R': vR, 'N': vN, 'D': vD, 'Q': vQ}) ie2.setEvidence({'A': vA, 'R': vR, 'N': vN, 'D': vD, 'Q': vQ}) ie1.makeInference() ie2.makeInference() ie2.addTarget(nuc) ie1.addTarget(nuc) if len(ie2.posterior(nuc).argmax() ) == 1: #if we have one determined value of prob #print(ie2.posterior(nuc)) #print(ie2.posterior(nuc).argmax()[0]['nuc']) if ie2.posterior(nuc).argmax()[0]['nuc'] == 2: #nuc=-1 if vnuc == -1: acc2 = acc2 + 1 if ie2.posterior(nuc).argmax()[0]['nuc'] == vnuc: acc2 = acc2 + 1 count2 = count2 + 1 if len(ie1.posterior(nuc).argmax()) == 1: #print(ie1.posterior(nuc)) #print(ie1.posterior(nuc).argmax()[0]['nuc']) if ie1.posterior(nuc).argmax()[0]['nuc'] == 2: if vnuc == -1: acc1 = acc1 + 1 if ie1.posterior(nuc).argmax()[0]['nuc'] == vnuc: acc1 = acc1 + 1 count1 = count1 + 1 acc2 = acc2 / count2 acc1 = acc1 / count1 print(acc2, acc1)