def prepareResults(directory, logic): cm = ConfusionMatrix() for f in os.listdir(os.path.join(directory, logic)): matrix = pickle.load(open(os.path.join(directory, logic, f), 'rb')) cm.combine(matrix) cm.toFile(os.path.join(directory, logic, 'conf_matrix.cm'))
def run(self): ''' Runs the respective fold of the crossvalidation. ''' log = logging.getLogger(self.fold_id) log.info('Running fold %d of %d...' % (self.params.foldIdx + 1, self.params.foldCount)) directory = self.params.directory try: # Apply noisy string clustering log.debug('Transforming noisy strings...') if self.params.noisyStringDomains is not None: noisyStrTrans = NoisyStringTransformer( self.params.mln, self.params.noisyStringDomains, True) learnDBs_ = noisyStrTrans.materializeNoisyDomains( self.params.learnDBs) testDBs_ = noisyStrTrans.transformDBs(self.params.testDBs) else: learnDBs_ = self.params.learnDBs testDBs_ = self.params.testDBs # train the MLN mln = self.params.mln log.debug('Starting learning...') learnedMLN = mln.learnWeights(learnDBs_, method=self.params.learningMethod, verbose=verbose, evidencePreds=[ "is_a", "ac_word", ], partSize=2, optimizer='cg', maxrepeat=1) # store the learned MLN in a file learnedMLN.writeToFile( os.path.join(directory, 'run_%d.mln' % self.params.foldIdx)) log.debug('Finished learning.') # evaluate the MLN log.debug('Evaluating.') learnedMLN.setClosedWorldPred(None) if self.params.cwPreds is None: self.params.cwPreds = [ p for p in mln.predicates if p != self.params.queryPred ] for pred in [ pred for pred in self.params.cwPreds if pred in learnedMLN.predicates ]: learnedMLN.setClosedWorldPred(pred) #FOL cm = ConfusionMatrix() self.evalMLN(learnedMLN, testDBs_, 'FirstOrderLogic', cm) cm.toFile( os.path.join(directory, 'FOL', 'conf_matrix_%d.cm' % self.params.foldIdx)) #FUZZY cm = ConfusionMatrix() self.evalMLN(learnedMLN, testDBs_, 'FuzzyLogic', cm) cm.toFile( os.path.join(directory, 'FUZZY', 'conf_matrix_%d.cm' % self.params.foldIdx)) log.debug('Evaluation finished.') except (KeyboardInterrupt, SystemExit): log.critical("Exiting...") return None
class XValFold(object): ''' Class representing and providing methods for a cross validation fold. ''' def __init__(self, params): ''' params being a XValFoldParams object. ''' self.params = params self.fold_id = 'Fold-%d' % params.fold_idx self.confmat = ConfusionMatrix() # write the training and testing databases into a file with open( os.path.join(params.directory, 'train_dbs_%d.db' % params.fold_idx), 'w+') as dbfile: Database.write_dbs(params.learn_dbs, dbfile) with open( os.path.join(params.directory, 'test_dbs_%d.db' % params.fold_idx), 'w+') as dbfile: Database.write_dbs(params.test_dbs, dbfile) def eval(self, mln, dbs): ''' Returns a confusion matrix for the given (learned) MLN evaluated on the databases given in dbs. ''' querypred = self.params.querypred # query_dom = self.params.query_dom # sig = ['?arg%d' % i for i, _ in enumerate(mln.predicates[query_pred])] # querytempl = '%s(%s)' % (query_pred, ','.join(sig)) # dbs = map(lambda db: db.copy(mln), dbs) for db_ in dbs: # save and remove the query predicates from the evidence db = db_.copy() gndtruth = mln.ground(db) gndtruth.apply_cw() for atom, _ in db.gndatoms(querypred): out('removing evidence', repr(atom)) del db.evidence[atom] db.write() stop() try: resultdb = MLNQuery(config=self.params.queryconf, mln=mln, method=InferenceMethods.WCSPInference, db=db, cw_preds=[ p.name for p in mln.predicates if p.name != self.params.querypred ], multicore=False).run().resultdb result = mln.ground(db) result.set_evidence(resultdb) for variable in result.variables: if variable.predicate.name != querypred: continue pvalue = variable.evidence_value() tvalue = variable.evidence_value() prediction = [ a for a, v in variable.atomvalues(pvalue) if v == 1 ] truth = [ a for a, v in variable.atomvalues(tvalue) if v == 1 ] prediction = str(prediction[0]) if prediction else None truth = str(truth[0]) if truth else None self.confmat.addClassificationResult(prediction, truth) # sig2 = list(sig) # entityIdx = mln.predicates[query_pred].argdoms.index(query_dom) # for entity in db.domains[]: # sig2[entityIdx] = entity # query = '%s(%s)' % (queryPred, ','.join(sig2)) # for truth in trueDB.query(query): # truth = truth.values().pop() # for pred in resultDB.query(query): # pred = pred.values().pop() # self.confMatrix.addClassificationResult(pred, truth) # for e, v in trueDB.evidence.iteritems(): # if v is not None: # db.addGroundAtom('%s%s' % ('' if v is True else '!', e)) except: logger.critical(''.join( traceback.format_exception(*sys.exc_info()))) def run(self): ''' Runs the respective fold of the crossvalidation. ''' logger.info('Running fold %d of %d...' % (self.params.fold_idx + 1, self.params.folds)) directory = self.params.directory try: # # Apply noisy string clustering # log.debug('Transforming noisy strings...') # if self.params.noisyStringDomains is not None: # noisyStrTrans = NoisyStringTransformer(self.params.mln, self.params.noisyStringDomains, True) # learnDBs_ = noisyStrTrans.materializeNoisyDomains(self.params.learnDBs) # testDBs_ = noisyStrTrans.transformDBs(self.params.testDBs) # else: # learnDBs_ = self.params.learnDBs # testDBs_ = self.params.testDBs # train the MLN mln = self.params.mln logger.debug('Starting learning...') learn_dbs = [db.copy() for db in self.params.learn_dbs] # apply closed world for fuzzy atoms for db in learn_dbs: for a, v in db.gndatoms([ p.name for p in mln.predicates if isinstance(p, FuzzyPredicate) ]): if v != 1: db[a] = 0 learned = MLNLearn(config=self.params.learnconf, mln=mln, db=learn_dbs, multicore=False).run() #200 # store the learned MLN in a file learned.tofile( os.path.join(directory, 'run_%d.mln' % self.params.fold_idx)) logger.debug('Finished learning.') # evaluate the MLN logger.debug('Evaluating.') # learnedMLN.setClosedWorldPred(None) # if self.params.cwPreds is None: # self.params.cwPreds = [p for p in mln.predicates if p != self.params.queryPred] # for pred in [pred for pred in self.params.cwPreds if pred in learnedMLN.predicates]: # learnedMLN.setClosedWorldPred(pred) self.eval(learned, self.params.test_dbs) self.confmat.toFile( os.path.join(directory, 'conf_matrix_%d.cm' % self.params.fold_idx)) logger.debug('Evaluation finished.') except (KeyboardInterrupt, SystemExit): logger.critical("Exiting...") return None
logger.info('Params for fold %d:\n%s' % (fold_idx, str(params))) if multicore: # set up a pool of worker processes try: workerPool = Pool() logger.info('Starting %d-fold Cross-Validation in %d processes.' % (folds, workerPool._processes)) result = workerPool.map_async(runFold, foldRunnables).get() workerPool.close() workerPool.join() cm = ConfusionMatrix() for r in result: cm.combine(r.confmat) elapsedTimeMP = time.time() - startTime cm.toFile(os.path.join(expdir, 'conf_matrix.cm')) # create the pdf table and move it into the log directory # this is a dirty hack since pdflatex apparently # does not support arbitrary output paths pdfname = 'conf_matrix' logger.info('creating pdf if confusion matrix...') cm.toPDF(pdfname) os.rename('%s.pdf' % pdfname, os.path.join(expdir, '%s.pdf' % pdfname)) except (KeyboardInterrupt, SystemExit, SystemError): logger.critical("Caught KeyboardInterrupt, terminating workers") workerPool.terminate() workerPool.join() exit(1) except: logger.error('\n' +
class XValFold(object): ''' Class representing and providing methods for a cross validation fold. ''' def __init__(self, params): ''' params being a XValFoldParams object. ''' self.params = params self.fold_id = 'Fold-%d' % params.fold_idx self.confmat = ConfusionMatrix() # write the training and testing databases into a file with open(os.path.join(params.directory, 'train_dbs_%d.db' % params.fold_idx), 'w+') as dbfile: Database.write_dbs(params.learn_dbs, dbfile) with open(os.path.join(params.directory, 'test_dbs_%d.db' % params.fold_idx), 'w+') as dbfile: Database.write_dbs(params.test_dbs, dbfile) def eval(self, mln, dbs): ''' Returns a confusion matrix for the given (learned) MLN evaluated on the databases given in dbs. ''' querypred = self.params.querypred # query_dom = self.params.query_dom # sig = ['?arg%d' % i for i, _ in enumerate(mln.predicates[query_pred])] # querytempl = '%s(%s)' % (query_pred, ','.join(sig)) # dbs = map(lambda db: db.copy(mln), dbs) for db_ in dbs: # save and remove the query predicates from the evidence db = db_.copy() gndtruth = mln.ground(db) gndtruth.apply_cw() for atom, _ in db.gndatoms(querypred): out('removing evidence', repr(atom)) del db.evidence[atom] db.write() stop() try: resultdb = MLNQuery(config=self.params.queryconf, mln=mln, method=InferenceMethods.WCSPInference, db=db, cw_preds=[p.name for p in mln.predicates if p.name != self.params.querypred], multicore=False).run().resultdb result = mln.ground(db) result.set_evidence(resultdb) for variable in result.variables: if variable.predicate.name != querypred: continue pvalue = variable.evidence_value() tvalue = variable.evidence_value() prediction = [a for a, v in variable.atomvalues(pvalue) if v == 1] truth = [a for a, v in variable.atomvalues(tvalue) if v == 1] prediction = str(prediction[0]) if prediction else None truth = str(truth[0]) if truth else None self.confmat.addClassificationResult(prediction, truth) # sig2 = list(sig) # entityIdx = mln.predicates[query_pred].argdoms.index(query_dom) # for entity in db.domains[]: # sig2[entityIdx] = entity # query = '%s(%s)' % (queryPred, ','.join(sig2)) # for truth in trueDB.query(query): # truth = truth.values().pop() # for pred in resultDB.query(query): # pred = pred.values().pop() # self.confMatrix.addClassificationResult(pred, truth) # for e, v in trueDB.evidence.iteritems(): # if v is not None: # db.addGroundAtom('%s%s' % ('' if v is True else '!', e)) except: logger.critical(''.join(traceback.format_exception(*sys.exc_info()))) def run(self): ''' Runs the respective fold of the crossvalidation. ''' logger.info('Running fold %d of %d...' % (self.params.fold_idx + 1, self.params.folds)) directory = self.params.directory try: # # Apply noisy string clustering # log.debug('Transforming noisy strings...') # if self.params.noisyStringDomains is not None: # noisyStrTrans = NoisyStringTransformer(self.params.mln, self.params.noisyStringDomains, True) # learnDBs_ = noisyStrTrans.materializeNoisyDomains(self.params.learnDBs) # testDBs_ = noisyStrTrans.transformDBs(self.params.testDBs) # else: # learnDBs_ = self.params.learnDBs # testDBs_ = self.params.testDBs # train the MLN mln = self.params.mln logger.debug('Starting learning...') learn_dbs = [db.copy() for db in self.params.learn_dbs] # apply closed world for fuzzy atoms for db in learn_dbs: for a, v in db.gndatoms([p.name for p in mln.predicates if isinstance(p, FuzzyPredicate)]): if v != 1: db[a] = 0 learned = MLNLearn(config=self.params.learnconf, mln=mln, db=learn_dbs, multicore=False).run()#200 # store the learned MLN in a file learned.tofile(os.path.join(directory, 'run_%d.mln' % self.params.fold_idx)) logger.debug('Finished learning.') # evaluate the MLN logger.debug('Evaluating.') # learnedMLN.setClosedWorldPred(None) # if self.params.cwPreds is None: # self.params.cwPreds = [p for p in mln.predicates if p != self.params.queryPred] # for pred in [pred for pred in self.params.cwPreds if pred in learnedMLN.predicates]: # learnedMLN.setClosedWorldPred(pred) self.eval(learned, self.params.test_dbs) self.confmat.toFile(os.path.join(directory, 'conf_matrix_%d.cm' % self.params.fold_idx)) logger.debug('Evaluation finished.') except (KeyboardInterrupt, SystemExit): logger.critical("Exiting...") return None
foldRunnables.append(XValFold(params)) logger.info('Params for fold %d:\n%s' % (fold_idx, str(params))) if multicore: # set up a pool of worker processes try: workerPool = Pool() logger.info('Starting %d-fold Cross-Validation in %d processes.' % (folds, workerPool._processes)) result = workerPool.map_async(runFold, foldRunnables).get() workerPool.close() workerPool.join() cm = ConfusionMatrix() for r in result: cm.combine(r.confmat) elapsedTimeMP = time.time() - startTime cm.toFile(os.path.join(expdir, 'conf_matrix.cm')) # create the pdf table and move it into the log directory # this is a dirty hack since pdflatex apparently # does not support arbitrary output paths pdfname = 'conf_matrix' logger.info('creating pdf if confusion matrix...') cm.toPDF(pdfname) os.rename('%s.pdf' % pdfname, os.path.join(expdir, '%s.pdf' % pdfname)) except (KeyboardInterrupt, SystemExit, SystemError): logger.critical("Caught KeyboardInterrupt, terminating workers") workerPool.terminate() workerPool.join() exit(1) except: logger.error('\n' + ''.join(traceback.format_exception(*sys.exc_info()))) exit(1)