class XValFold(object): ''' Class representing and providing methods for a cross validation fold. ''' def __init__(self, params): ''' params being a XValFoldParams object. ''' self.params = params self.fold_id = 'Fold-%d' % params.fold_idx self.confmat = ConfusionMatrix() # write the training and testing databases into a file with open( os.path.join(params.directory, 'train_dbs_%d.db' % params.fold_idx), 'w+') as dbfile: Database.write_dbs(params.learn_dbs, dbfile) with open( os.path.join(params.directory, 'test_dbs_%d.db' % params.fold_idx), 'w+') as dbfile: Database.write_dbs(params.test_dbs, dbfile) def eval(self, mln, dbs): ''' Returns a confusion matrix for the given (learned) MLN evaluated on the databases given in dbs. ''' querypred = self.params.querypred # query_dom = self.params.query_dom # sig = ['?arg%d' % i for i, _ in enumerate(mln.predicates[query_pred])] # querytempl = '%s(%s)' % (query_pred, ','.join(sig)) # dbs = map(lambda db: db.copy(mln), dbs) for db_ in dbs: # save and remove the query predicates from the evidence db = db_.copy() gndtruth = mln.ground(db) gndtruth.apply_cw() for atom, _ in db.gndatoms(querypred): out('removing evidence', repr(atom)) del db.evidence[atom] db.write() stop() try: resultdb = MLNQuery(config=self.params.queryconf, mln=mln, method=InferenceMethods.WCSPInference, db=db, cw_preds=[ p.name for p in mln.predicates if p.name != self.params.querypred ], multicore=False).run().resultdb result = mln.ground(db) result.set_evidence(resultdb) for variable in result.variables: if variable.predicate.name != querypred: continue pvalue = variable.evidence_value() tvalue = variable.evidence_value() prediction = [ a for a, v in variable.atomvalues(pvalue) if v == 1 ] truth = [ a for a, v in variable.atomvalues(tvalue) if v == 1 ] prediction = str(prediction[0]) if prediction else None truth = str(truth[0]) if truth else None self.confmat.addClassificationResult(prediction, truth) # sig2 = list(sig) # entityIdx = mln.predicates[query_pred].argdoms.index(query_dom) # for entity in db.domains[]: # sig2[entityIdx] = entity # query = '%s(%s)' % (queryPred, ','.join(sig2)) # for truth in trueDB.query(query): # truth = truth.values().pop() # for pred in resultDB.query(query): # pred = pred.values().pop() # self.confMatrix.addClassificationResult(pred, truth) # for e, v in trueDB.evidence.iteritems(): # if v is not None: # db.addGroundAtom('%s%s' % ('' if v is True else '!', e)) except: logger.critical(''.join( traceback.format_exception(*sys.exc_info()))) def run(self): ''' Runs the respective fold of the crossvalidation. ''' logger.info('Running fold %d of %d...' % (self.params.fold_idx + 1, self.params.folds)) directory = self.params.directory try: # # Apply noisy string clustering # log.debug('Transforming noisy strings...') # if self.params.noisyStringDomains is not None: # noisyStrTrans = NoisyStringTransformer(self.params.mln, self.params.noisyStringDomains, True) # learnDBs_ = noisyStrTrans.materializeNoisyDomains(self.params.learnDBs) # testDBs_ = noisyStrTrans.transformDBs(self.params.testDBs) # else: # learnDBs_ = self.params.learnDBs # testDBs_ = self.params.testDBs # train the MLN mln = self.params.mln logger.debug('Starting learning...') learn_dbs = [db.copy() for db in self.params.learn_dbs] # apply closed world for fuzzy atoms for db in learn_dbs: for a, v in db.gndatoms([ p.name for p in mln.predicates if isinstance(p, FuzzyPredicate) ]): if v != 1: db[a] = 0 learned = MLNLearn(config=self.params.learnconf, mln=mln, db=learn_dbs, multicore=False).run() #200 # store the learned MLN in a file learned.tofile( os.path.join(directory, 'run_%d.mln' % self.params.fold_idx)) logger.debug('Finished learning.') # evaluate the MLN logger.debug('Evaluating.') # learnedMLN.setClosedWorldPred(None) # if self.params.cwPreds is None: # self.params.cwPreds = [p for p in mln.predicates if p != self.params.queryPred] # for pred in [pred for pred in self.params.cwPreds if pred in learnedMLN.predicates]: # learnedMLN.setClosedWorldPred(pred) self.eval(learned, self.params.test_dbs) self.confmat.toFile( os.path.join(directory, 'conf_matrix_%d.cm' % self.params.fold_idx)) logger.debug('Evaluation finished.') except (KeyboardInterrupt, SystemExit): logger.critical("Exiting...") return None
class XValFold(object): ''' Class representing and providing methods for a cross validation fold. ''' def __init__(self, params): ''' params being a XValFoldParams object. ''' self.params = params self.fold_id = 'Fold-%d' % params.fold_idx self.confmat = ConfusionMatrix() # write the training and testing databases into a file with open(os.path.join(params.directory, 'train_dbs_%d.db' % params.fold_idx), 'w+') as dbfile: Database.write_dbs(params.learn_dbs, dbfile) with open(os.path.join(params.directory, 'test_dbs_%d.db' % params.fold_idx), 'w+') as dbfile: Database.write_dbs(params.test_dbs, dbfile) def eval(self, mln, dbs): ''' Returns a confusion matrix for the given (learned) MLN evaluated on the databases given in dbs. ''' querypred = self.params.querypred # query_dom = self.params.query_dom # sig = ['?arg%d' % i for i, _ in enumerate(mln.predicates[query_pred])] # querytempl = '%s(%s)' % (query_pred, ','.join(sig)) # dbs = map(lambda db: db.copy(mln), dbs) for db_ in dbs: # save and remove the query predicates from the evidence db = db_.copy() gndtruth = mln.ground(db) gndtruth.apply_cw() for atom, _ in db.gndatoms(querypred): out('removing evidence', repr(atom)) del db.evidence[atom] db.write() stop() try: resultdb = MLNQuery(config=self.params.queryconf, mln=mln, method=InferenceMethods.WCSPInference, db=db, cw_preds=[p.name for p in mln.predicates if p.name != self.params.querypred], multicore=False).run().resultdb result = mln.ground(db) result.set_evidence(resultdb) for variable in result.variables: if variable.predicate.name != querypred: continue pvalue = variable.evidence_value() tvalue = variable.evidence_value() prediction = [a for a, v in variable.atomvalues(pvalue) if v == 1] truth = [a for a, v in variable.atomvalues(tvalue) if v == 1] prediction = str(prediction[0]) if prediction else None truth = str(truth[0]) if truth else None self.confmat.addClassificationResult(prediction, truth) # sig2 = list(sig) # entityIdx = mln.predicates[query_pred].argdoms.index(query_dom) # for entity in db.domains[]: # sig2[entityIdx] = entity # query = '%s(%s)' % (queryPred, ','.join(sig2)) # for truth in trueDB.query(query): # truth = truth.values().pop() # for pred in resultDB.query(query): # pred = pred.values().pop() # self.confMatrix.addClassificationResult(pred, truth) # for e, v in trueDB.evidence.iteritems(): # if v is not None: # db.addGroundAtom('%s%s' % ('' if v is True else '!', e)) except: logger.critical(''.join(traceback.format_exception(*sys.exc_info()))) def run(self): ''' Runs the respective fold of the crossvalidation. ''' logger.info('Running fold %d of %d...' % (self.params.fold_idx + 1, self.params.folds)) directory = self.params.directory try: # # Apply noisy string clustering # log.debug('Transforming noisy strings...') # if self.params.noisyStringDomains is not None: # noisyStrTrans = NoisyStringTransformer(self.params.mln, self.params.noisyStringDomains, True) # learnDBs_ = noisyStrTrans.materializeNoisyDomains(self.params.learnDBs) # testDBs_ = noisyStrTrans.transformDBs(self.params.testDBs) # else: # learnDBs_ = self.params.learnDBs # testDBs_ = self.params.testDBs # train the MLN mln = self.params.mln logger.debug('Starting learning...') learn_dbs = [db.copy() for db in self.params.learn_dbs] # apply closed world for fuzzy atoms for db in learn_dbs: for a, v in db.gndatoms([p.name for p in mln.predicates if isinstance(p, FuzzyPredicate)]): if v != 1: db[a] = 0 learned = MLNLearn(config=self.params.learnconf, mln=mln, db=learn_dbs, multicore=False).run()#200 # store the learned MLN in a file learned.tofile(os.path.join(directory, 'run_%d.mln' % self.params.fold_idx)) logger.debug('Finished learning.') # evaluate the MLN logger.debug('Evaluating.') # learnedMLN.setClosedWorldPred(None) # if self.params.cwPreds is None: # self.params.cwPreds = [p for p in mln.predicates if p != self.params.queryPred] # for pred in [pred for pred in self.params.cwPreds if pred in learnedMLN.predicates]: # learnedMLN.setClosedWorldPred(pred) self.eval(learned, self.params.test_dbs) self.confmat.toFile(os.path.join(directory, 'conf_matrix_%d.cm' % self.params.fold_idx)) logger.debug('Evaluation finished.') except (KeyboardInterrupt, SystemExit): logger.critical("Exiting...") return None