def model_config(self,predicate,formula,database,mln_path,db_path): """ Returns the database and mln objects in MLN format --Inputs-- predicate: predicate object with parsed predicates formula: formula object with parsed predicates database:.txt file containing the database(s) mln_path: .mln file name to save the learned weights per formula db_path: .db file to save the progress of the database learning """ base_path = os.getcwd() mln = MLN(grammar='PRACGrammar',logic='FirstOrderLogic') #Parsing with PRACGrammar since we are using clusters for i in predicate: mln << i print('input predicate successful:'+i) for i in formula: mln << i print('input formula successful :'+i) mln.write() mln.tofile(base_path + '/'+ mln_path) db = Database.load(mln,database) #db.write() #db.tofile(base_path + '/'+ db_path) return (db,mln)
def test_learning_taxonomies(): p = '$PRACMLN_HOME/examples/taxonomies/taxonomies.pracmln' mln = MLN(mlnfile=('%s:senses_and_roles.mln' % p), grammar='PRACGrammar') mln.write() dbs = Database.load(mln, dbfiles='%s:training.db' % p) for method in ('DPLL', 'DBPLL_CG', 'DCLL'): for multicore in (True, False): print '=== LEARNING TEST:', method, '===' learn(method=method, mln=mln, db=dbs, verbose=True, multicore=multicore, epreds='is_a', discr_preds=EVIDENCE_PREDS).run()
def test_GSMLN(): # mln = MLN(grammar='GSMLNGrammar') # mln << 'residue(id, profile)' # mln << 'partners(id, id)' # f = "residue(a, $pa) v residue(b, $pb) => partners(a,b)" # # f = "((a(x) ^ b(x)) v (c(x) ^ !(d(x) ^ e(x) ^ g(x)))) => f(x)" # # f = "(a(x) v (b(x) ^ c(x))) => f(x)" # f = mln.logic.grammar.parse_formula(f) # f.print_structure() # print(list(f.literals())) # g = "partners(id, id)" # g = mln.logic.grammar.parse_predicate(g) # print(g) # print(mln.predicates) mln = MLN(mlnfile='smokers.mln', grammar='GSMLNGrammar') # mln.write() # print(mln.predicates) dbs = Database.load(mln, dbfiles='smokers.db') # dbs[0].write() # print(mln.formulas[0].neural) # print(mln.formulas[0].cnf()) # print(mln.nnformulas[0].idx) # print(mln.domains) # print(dbs[0].domains) # mln.formulas[1].print_structure() # mrf = mln.ground(dbs[0]) # grounder = DefaultGroundingFactory(mrf, simplify=False, unsatfailure=True, verbose=False, cache=0) # for f in grounder.itergroundings(): # print(f) # print((mrf.gndatoms)) # mln = MLN(grammar='GSMLNGrammar') # mln << 'Cancer(&person)' # mln << 'Friends(&person,&person)' # mln << 'Smokes(&person)' # f = 'Smokes($x) => Cancer($x)' # g = 'Friends($x,$y) => (Smokes($x) <=> Smokes($y))' # print(mln.logic.grammar.parse_formula(f)) # mln.formula(f) # mln.formula(g) # print(mln.predicates) # print(mln.formulas) # mln.formulas[0].print_structure() # print(mln.domains) # print(mln.formulas[0].cnf()) # this uses the method from base.py learned_mln = mln.learn(databases=dbs, method=GSMLN_L, verbose=True)
def test_learning_taxonomies(): p = os.path.join(locs.examples, 'taxonomies', 'taxonomies.pracmln') mln = MLN(mlnfile=('%s:senses_and_roles.mln' % p), grammar='PRACGrammar') mln.write() dbs = Database.load(mln, dbfiles='%s:training.db' % p) for method in ('DPLL', 'DBPLL_CG', 'DCLL'): for multicore in (True, False): print('=== LEARNING TEST:', method, '===') learn(method=method, mln=mln, db=dbs, verbose=True, multicore=multicore, epreds='is_a', discr_preds=EVIDENCE_PREDS).run()
def train(self, praclearning): print prac_heading('Training knowledgebase') mlnName = praclearning.otherParams.get('mln', None) mlnLogic = praclearning.otherParams.get('logic', None) objName = praclearning.otherParams.get('concept', None) onTheFly = praclearning.otherParams.get('onthefly', False) mln = MLN(mlnfile=os.path.abspath(mlnName), logic=mlnLogic, grammar='PRACGrammar') pracTrainingDBS = praclearning.training_dbs trainingDBS = [] if len(pracTrainingDBS) >= 1 and type( pracTrainingDBS[0]) is str: # db from file logger.info('Learning from db files...') inputdbs = Database.load(mln, dbfile=pracTrainingDBS, ignore_unknown_preds=True) trainingDBS += inputdbs elif len(pracTrainingDBS) > 1: logger.info('Learning from db files (xfold)...') trainingDBS = pracTrainingDBS else: # db from inference result logger.info('Learning from inference result...') inputdbs = pracTrainingDBS for db in inputdbs: db << 'object(cluster, {})'.format(objName) trainingDBS.append(db) outputfile = '{}_trained.mln'.format(mlnName.split('.')[0]) # learning mln trainedMLN = mln.learnWeights(trainingDBS, LearningMethods.DCLL, evidencePreds=possibleProps, partSize=1, gaussianPriorSigma=10, useMultiCPU=0, optimizer='cg', learningRate=0.9) print prac_heading('Learnt Formulas') trainedMLN.printFormulas() trainedMLN.write(file(outputfile, "w")) return trainedMLN
from pracmln import MLN from pracmln import Database from pracmln import MLNQuery mln = MLN(mlnfile='./data/smokers/mlns/smoking_trained.mln',grammar='PRACGrammar', logic='FirstOrderLogic') mln.write() db = Database.load(mln,'./data/smokers/dbs/smoking-test.db')[0] db.write() print("Running Query...") result = MLNQuery(mln=mln, db=db).run() print(result)
Takes a sequence of databases and performs a multi-dimensional scaling on the synsets given by the merged set of 'concept' domains in them. ''' domains = mergedom(*map(lambda d: d.domains, dbs)) concepts = domains.get('concept', None) if concepts is None: logger.error('Domain "concepts" not found in databases.') return if 'null' in concepts: # remove the null concept del concepts[concepts.index('null')] synsets = map(lambda x: wn.synset(x), concepts) # @UndefinedVariable distance = zeros((len(synsets),len(synsets))) for (i, pointi) in enumerate(synsets): for (j, pointj) in enumerate(synsets): sys.stdout.write('{:f} / {:f} \r'.format(i, len(synsets))) sim = synsets[i].path_similarity(synsets[j]) if sim is None: sim = 0 distance[i,j] = 1. - sim Y, eig = doMDS(distance, dimensions=2) pylab.figure(1) for i, s in enumerate(synsets): text(Y[i,0],Y[i,1], s.name, fontsize=8) pylab.plot(Y[:,0],Y[:,1],'.') pylab.show() if __name__ == '__main__': prac = PRAC() dbs = list(Database.load(prac.mln, os.path.join('/', 'home', 'nyga', 'work', 'nl_corpora', 'wikihow', 'Slicing.db'))) domains = mergedom(*map(lambda d: d.domains, dbs)) concepts = domains.get('concept', None)
args = parser.parse_args() if args.learn: print('you chose learn the weights for the mln') predicate = s.read_predicate('predicate.txt') formula = s.read_formula('formula.txt',predicate) data,mln = s.model_config(predicate,formula,'data.txt','results.mln','results.db') with open('base.mln', 'wb') as base_mln_file: pickle.dump(mln, base_mln_file) output = s.activate_model(data,mln) output.tofile(os.getcwd() + '/' + 'learnt_mln.mln') elif args.query: print('you chose to query the mln') mln = MLN.load(files='learnt_mln.mln') infer_world = Database.load(mln,'inference_data.txt') s.inference('query.txt',infer_world,mln) else: print ('please input learn (-l) or query (-q) to proceed') #query(queries='Cancer(x)', method='MC-SAT', mln=mln, db=data, verbose=False, multicore=True).run().results # ============================================================================= # predicate_list = [(x,x.lower()) for x in predicate_list] # predicate = [x.replace(' ','').lower() for x in predicate if x !=''] # predicate2 = []
valmap = dict([(val, computeClosestCluster(val, self.clusters[domain])[1][0]) for val in newdb.domains[domain]]) newdb.domains[domain] = valmap.values() # replace the affected evidences for ev in newdb.evidence.keys(): truth = newdb.evidence[ev] _, pred, params = db.mln.logic.parse_literal(ev) if domain in self.mln.predicate(pred).argdoms: # domain is affected by the mapping newdb.retract(ev) newargs = [v if domain != self.mln.predicate(pred).argdoms[i] else valmap[v] for i, v in enumerate(params)] atom = '%s%s(%s)' % ('' if truth else '!', pred, ','.join(newargs)) newdb << atom newdbs.append(newdb) return newdbs if __name__ == '__main__': mln = MLN.load('/home/nyga/code/pracmln/examples/object-recognition/object-recognition.pracmln:object-detection.mln') dbs = Database.load(mln, '/home/nyga/code/pracmln/examples/object-recognition/object-recognition.pracmln:scenes-new.db') # do some plain clustering on texts s = ['otto', 'otte', 'obama', 'markov logic network', 'markov logic', 'otta', 'markov random field'] s = set([val for db in Database.load(mln, '/home/nyga/code/pracmln/examples/object-recognition/object-recognition.pracmln:scenes-new.db') for val in db.domains['text']]) clusters = SAHN(s) for c in clusters: print(c) # apply clustering to a set of databases cluster = NoisyStringClustering(mln, ['text']) cluster.materialize(dbs)
def test_reasoning(): mln = MLN.load(files='./mln/alarm.mln') db = Database.load(mln, './mln/alarm.db') result = MLNQuery(mln=mln, db=db).run() result.write()
mln << 'instance(cluster, instance)' mln << 'object(cluster, object!)' ##formulas mln << '0 shape(?c, +?sha) ^ color(?c, +?col) ^ size(?c, +?size) ^ instance(?c, +?inst) ^ object(?c, +?obj)' mln << '0 goggles_Logo(?c, +?comp) ^ object(?c, +?obj)' mln << '0 goggles_Text(?c, +?text) ^ object(?c, +?obj)' mln << '0 goggles_Product(?c, +?prod) ^ object(?c, +?obj)' mln << '0 scene(+?s) ^ object(?c, +?obj)' ##unique clusters mln << '0 scene(+?s) ^ object(?c1, +?t1) ^ object(?c2, +?t2) ^ ?c1 =/= ?c2' dbFileName = '/home/dominik/python_ws/testDB.txt' #allDB = Database.load(mln, '/home/dominik/python_ws/testDB.txt') allDB = Database.load(mln, dbFileName) predictionList = [] predIdx = 0 groundTruthList = [] gtIdx = 0 i = 0 splits = 10 testArray = np.array(allDB, dtype=Database) kf = KFold(n_splits=splits, shuffle=True) for train, test in kf.split(testArray): print("%s %s" % (train, test)) predList = [] pIdx = 0 gtList = []