def reBOWModel2(attrlist): preurl = '/home/ren/remote/ruleMining2/' # attrlist = ['spouse', 'song', 'mother', 'film', 'father', 'child', 'filmCastmember', 'songPerformer'] #attrlist = ['mother'] model1, model2, model3 = [], {}, {} alldatas1, alldatas2, alldatas3 = [], {}, {} for attr_i in range(len(attrlist)): attr = attrlist[attr_i] datas = writereadFile.readContent(preurl + 'outdatas_baseline/train_' + attr + '.csv') dataset = [] for data in datas: dataset.extend(data.split(' ')) alldatas1.append(dataset) clusDict2 = json.load( open(preurl + 'outdatas_baseline/cluster_' + attr + '.json')) clusDict3 = json.load( open(preurl + 'outdatas_baseline/second_' + attr + '.json')) # alldata2List = [] alldatas2[attr_i] = [] alldatas3[attr_i] = {} clusDict2keys = list(clusDict2.keys()) clusDict2keys.sort() for j in clusDict2keys: dataset2 = [] for data_2 in clusDict2[j]: dataset2.extend(data_2.split(' ')) # alldata2List.append(dataset2) alldatas2[attr_i].append(dataset2) # alldata3List = [] alldatas3[attr_i][j] = [] clus3_j = clusDict3[j] clus3_jkeys = list(clus3_j.keys()) clus3_jkeys.sort() for k in clus3_jkeys: # print(attr_i,j,k) dataset3 = [] for data_3 in clus3_j[k]: dataset3.extend(data_3.split(' ')) # alldata3List.append(dataset3) alldatas3[attr_i][j].append(dataset3) # print(len(alldatas1)) # for p in alldatas2: # print(p,len(alldatas2[p])) # for q in alldatas3: # for o in alldatas3[q]: # print(q,o,len(alldatas3[q][o])) model1 = reBOWModelist(alldatas1) for key2 in alldatas2: model2[key2] = reBOWModelist(alldatas2[key2]) model3[key2] = {} for key3 in alldatas3[key2].keys(): model3[key2][key3] = reBOWModelist(alldatas3[key2][key3]) return model1, model2, model3
def reRulesDict(wikiDatas, attrlist): preurl = '/home/ren/remote/ruleMining2/' # attrlist = ['spouse', 'song', 'mother', 'film', 'father', 'child', 'filmCastmember', 'songPerformer'] #attrlist = ['mother'] rulesDcit = {} rulesDcit2 = {} rulesDcit3 = {} for attr_i in range(len(attrlist)): attr = attrlist[attr_i] datas = writereadFile.readContent(preurl + 'outdatas_baseline/train_' + attr + '.csv') rulesRootlist = rulesGet.lzx_SCsearch_conf_filt(wikiDatas, datas) #放了一阶规则和二阶规则 rulesDcit[attr_i] = rulesRootlist rulesDcit2[attr_i] = {} rulesDcit3[attr_i] = {} clusDict2 = json.load( open(preurl + 'outdatas_baseline/cluster_' + attr + '.json')) clusDict3 = json.load( open(preurl + 'outdatas_baseline/second_' + attr + '.json')) for j in clusDict2.keys(): r2_rulesList = rulesGet.lzx_SCsearch_conf_filt( wikiDatas, clusDict2[j]) r2 = r2_rulesList[0] newr2 = newRulesFirst(rulesRootlist[0], r2) r2_2 = r2_rulesList[1] newr2_2 = newRulesSecond(rulesRootlist[1], r2_2) rulesDcit2[attr_i][j] = [newr2, newr2_2] rulesDcit3[attr_i][j] = {} clus3_j = clusDict3[j] for k in clus3_j.keys(): if clus3_j[k] == clusDict2[j]: new_rules3 = newr2 new_rules3_2 = newr2_2 else: r3_rulesList = rulesGet.lzx_SCsearch_conf_filt( wikiDatas, clus3_j[k]) r3 = r3_rulesList[0] new_rules3 = newRulesFirst(newr2, r3) r3_2 = r3_rulesList[1] new_rules3_2 = newRulesSecond(newr2_2, r3_2) rulesDcit3[attr_i][j][k] = [new_rules3, new_rules3_2] return rulesDcit, rulesDcit2, rulesDcit3
def testMarch_BOW(attrlist): preurl = '/home/ren/remote/ruleMining2/' # attrlist = ['spouse', 'song', 'mother', 'film', 'father', 'child', 'filmCastmember', 'songPerformer'] #attrlist = ['mother'] testmarchDcit = {} testmarchDcit2 = {} testmarchDcit3 = {} model1, model2, model3 = reBOWModel2(attrlist) # for i in range(len(attrlist)): # testmarchDcit[i] = [] # testmarchDcit2[i] = {} # testmarchDcit3[i] = {} for attr_i in range(len(attrlist)): attr = attrlist[attr_i] datas = writereadFile.readContent(preurl + 'outdatas_baseline/test_' + attr + '.csv') for testSent in datas: marchindex, sim1 = testSents(testSent, model1) if sim1 > 0: if marchindex not in testmarchDcit: testmarchDcit[marchindex] = [] testmarchDcit[marchindex].append(testSent) model2Test = model2[marchindex] model3Test = model3[marchindex] marchindex2, sim2 = testSents(testSent, model2Test) marchindex3, sim3 = testSents(testSent, model3Test[str(marchindex2)]) if (marchindex, marchindex2) not in testmarchDcit2: testmarchDcit2[(marchindex, marchindex2)] = [] if sim2 > 0: testmarchDcit2[(marchindex, marchindex2)].append(testSent) if (marchindex, marchindex2, marchindex3) not in testmarchDcit3: testmarchDcit3[(marchindex, marchindex2, marchindex3)] = [] if sim3 > 0: testmarchDcit3[(marchindex, marchindex2, marchindex3)].append(testSent) return testmarchDcit, testmarchDcit2, testmarchDcit3
url1 = 'outdatas_baseline' print(url1) allpredictsets_lvl2_ignore = [] allpredictsets_lvl3_ignore = [] allpredictsets_lvl2_fault = [] allpredictsets_lvl3_fault = [] allrelBoD = {} alltestsent = [] allroot = {} allroot_2 = {} for attr_i in range(len(attrlist)): attr = attrlist[attr_i] print(attr) wikiDatas = writereadFile.readWikidatas('/home/ren/remote/ruleMining2/wikidatas/' + attr + '_wikidata.csv') trains = writereadFile.readContent('/home/ren/remote/ruleMining2/outdatas_baseline/train_' + attr + '.csv') templateDictsRoot = {} templateRoot = reBetaPattern.reTemplate(trains) allrelBoD[attr_i] = templateRoot templateDictsRoot[0] = templateRoot # print templateDictsRoot rulesRootlist = rulesGet.lzx_SCsearch_conf_filt(wikiDatas, trains) rulesRoot = rulesRootlist[0] allroot[attr_i] = rulesRoot numbrRoot = len(rulesRoot) rulesRoot_2 = rulesRootlist[1] allroot_2[attr_i] = rulesRoot_2 numbrRoot_2 = len(rulesRoot_2)