def __init__(self, mln, noisyStringDomains, verbose=True): self.mln = mln self.noisyStringDomains = noisyStringDomains self.verbose = verbose self.clusters = {} # maps domain name -> list of clusters self.noisyDomains = {} self.log = logs.getlogger('NoisyString')
def runFold(fold): log = logs.getlogger(fold.fold_id) try: fold.run() except: raise Exception(''.join(traceback.format_exception(*sys.exc_info()))) return fold
def run(self): optimizer = self.optimizer p = self.problem f = p.f grad = p.grad # coerce return types f = lambda wt: numpy.float64(p.f(wt)) grad = lambda wt: numpy.array(list(map(numpy.float64, p.grad(wt)))) # negate for minimization neg_f = lambda wt: -f(wt) neg_grad = lambda wt: -grad(wt) #if not useGrad or not p.useGrad(): neg_grad = None if not p.usef: neg_f = lambda wt: -p._fDummy(wt) log = logs.getlogger(self.__class__.__name__) if optimizer == "bfgs": params = dict([k_v for k_v in iter(self.optParams.items()) if k_v[0] in ["gtol", "epsilon", "maxiter"]]) if self.verbose: print("starting optimization with %s... %s\n" % (optimizer, params)) wt, f_opt, grad_opt, Hopt, func_calls, grad_calls, warn_flags = fmin_bfgs(neg_f, self.wt, fprime=neg_grad, full_output=True, **params) if self.verbose: print("optimization done with %s..." % optimizer) print("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags)) elif optimizer == "cg": params = dict([k_v1 for k_v1 in iter(self.optParams.items()) if k_v1[0] in ["gtol", "epsilon", "maxiter"]]) log.info("starting optimization with %s... %s" % (optimizer, params)) wt, f_opt, func_calls, grad_calls, warn_flags = fmin_cg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params) log.info("optimization done with %s..." % optimizer) log.info("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags)) elif optimizer == "ncg": params = dict([k_v2 for k_v2 in iter(self.optParams.items()) if k_v2[0] in ["avextol", "epsilon", "maxiter"]]) log.info("starting optimization with %s... %s" % (optimizer, params)) wt, f_opt, func_calls, grad_calls, warn_flags = fmin_ncg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params) log.info("optimization done with %s..." % optimizer) log.info("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags)) elif optimizer == "fmin": params = dict([k_v3 for k_v3 in iter(self.optParams.items()) if k_v3[0] in ["xtol", "ftol", "maxiter"]]) log.info("starting optimization with %s... %s" % (optimizer, params)) wt = fmin(neg_f, self.wt, args=(), full_output=True, **params) log.info("optimization done with %s..." % optimizer) elif optimizer == "powell": params = dict([k_v4 for k_v4 in iter(self.optParams.items()) if k_v4[0] in ["xtol", "ftol", "maxiter"]]) log.info("starting optimization with %s... %s" % (optimizer, params)) wt = fmin_powell(neg_f, self.wt, args=(), full_output=True, **params) log.info("optimization done with %s..." % optimizer) elif optimizer == 'l-bfgs-b': params = dict([k_v5 for k_v5 in iter(self.optParams.items()) if k_v5[0] in ["gtol", "epsilon", "maxiter", 'bounds']]) log.info("starting optimization with %s... %s" % (optimizer, params)) if 'bounds' in params: params['bounds'] = (params['bounds'],) * len(self.wt) wt, f_opt, d = fmin_l_bfgs_b(neg_f, self.wt, fprime=neg_grad, **params) log.info("optimization done with %s..." % optimizer) log.info("f-opt: %.16f\n" % (-f_opt)) else: raise Exception("Unknown optimizer '%s'" % optimizer) return wt
def run(self): optimizer = self.optimizer p = self.problem f = p.f grad = p.grad # coerce return types f = lambda wt: numpy.float64(p.f(wt)) grad = lambda wt: numpy.array(map(numpy.float64, p.grad(wt))) # negate for minimization neg_f = lambda wt: -f(wt) neg_grad = lambda wt: -grad(wt) #if not useGrad or not p.useGrad(): neg_grad = None if not p.usef: neg_f = lambda wt: -p._fDummy(wt) log = logs.getlogger(self.__class__.__name__) if optimizer == "bfgs": params = dict(filter(lambda (k,v): k in ["gtol", "epsilon", "maxiter"], self.optParams.iteritems())) if self.verbose: print "starting optimization with %s... %s" % (optimizer, params) wt, f_opt, grad_opt, Hopt, func_calls, grad_calls, warn_flags = fmin_bfgs(neg_f, self.wt, fprime=neg_grad, full_output=True, **params) if self.verbose: print "optimization done with %s..." % optimizer print "f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags) elif optimizer == "cg": params = dict(filter(lambda (k,v): k in ["gtol", "epsilon", "maxiter"], self.optParams.iteritems())) log.info("starting optimization with %s... %s" % (optimizer, params)) wt, f_opt, func_calls, grad_calls, warn_flags = fmin_cg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params) log.info("optimization done with %s..." % optimizer) log.info("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags)) elif optimizer == "ncg": params = dict(filter(lambda (k,v): k in ["avextol", "epsilon", "maxiter"], self.optParams.iteritems())) log.info("starting optimization with %s... %s" % (optimizer, params)) wt, f_opt, func_calls, grad_calls, warn_flags = fmin_ncg(neg_f, self.wt, fprime=neg_grad, args=(), full_output=True, **params) log.info("optimization done with %s..." % optimizer) log.info("f-opt: %.16f\nfunction evaluations: %d\nwarning flags: %d\n" % (-f_opt, func_calls, warn_flags)) elif optimizer == "fmin": params = dict(filter(lambda (k,v): k in ["xtol", "ftol", "maxiter"], self.optParams.iteritems())) log.info("starting optimization with %s... %s" % (optimizer, params)) wt = fmin(neg_f, self.wt, args=(), full_output=True, **params) log.info("optimization done with %s..." % optimizer) elif optimizer == "powell": params = dict(filter(lambda (k,v): k in ["xtol", "ftol", "maxiter"], self.optParams.iteritems())) log.info("starting optimization with %s... %s" % (optimizer, params)) wt = fmin_powell(neg_f, self.wt, args=(), full_output=True, **params) log.info("optimization done with %s..." % optimizer) elif optimizer == 'l-bfgs-b': params = dict(filter(lambda (k,v): k in ["gtol", "epsilon", "maxiter", 'bounds'], self.optParams.iteritems())) log.info("starting optimization with %s... %s" % (optimizer, params)) if 'bounds' in params: params['bounds'] = (params['bounds'],) * len(self.wt) wt, f_opt, d = fmin_l_bfgs_b(neg_f, self.wt, fprime=neg_grad, **params) log.info("optimization done with %s..." % optimizer) log.info("f-opt: %.16f\n" % (-f_opt)) else: raise Exception("Unknown optimizer '%s'" % optimizer) return wt
def evalMLN(self, mln, dbs, module): ''' Returns a confusion matrix for the given (learned) MLN evaluated on the databases given in dbs. ''' log = logs.getlogger(self.fold_id) queryPred = self.params.queryPred queryDom = self.params.queryDom sig = [ '?arg%d' % i for i, _ in enumerate(self.params.altMLN.predicates[queryPred]) ] querytempl = '%s(%s)' % (queryPred, ','.join(sig)) dbs = [db.duplicate() for db in dbs] infer = PRACInference(module.prac, []) inferenceStep = PRACInferenceStep(infer, self) for db in dbs: # save and remove the query predicates from the evidence trueDB = Database(self.params.altMLN) for bindings in db.query(querytempl): atom = querytempl for binding in bindings: atom = atom.replace(binding, bindings[binding]) trueDB.addGroundAtom(atom) db.retractGndAtom(atom) try: inferenceStep.output_dbs = [db] infer.inference_steps = [inferenceStep] module.prac.run(infer, module, mln=mln) resultDB = infer.inference_steps[-1].output_dbs[-1] sig2 = list(sig) entityIdx = mln.predicates[queryPred].index(queryDom) for entity in db.domains[queryDom]: sig2[entityIdx] = entity query = '%s(%s)' % (queryPred, ','.join(sig2)) for truth in trueDB.query(query): truth = list(truth.values()).pop() for pred in resultDB.query(query): pred = list(pred.values()).pop() self.confMatrix.addClassificationResult(pred, truth) for e, v in trueDB.evidence.items(): if v is not None: db.addGroundAtom('%s%s' % ('' if v is True else '!', e)) except: log.critical(''.join( traceback.format_exception(*sys.exc_info())))
def run(self): log = logs.getlogger(self.__class__.__name__) norm = 1 alpha = 1.0 step = 1 log.info('starting optimization with %s... (alpha=%f)' % (self.__class__.__name__, alpha)) f_ = None while True: grad = self.learner.grad(self.wt) norm = numpy.linalg.norm(grad) f_ = self.learner.f(self.wt) print() print('|grad| =', norm) if norm < self.gtol or (self.maxiter is not None and step > self.maxiter): break exitNow = False w_ = None smaller = False bigger = False f_opt = f_ while not exitNow: w = self.wt + grad * alpha print() f = self.learner.f(w, verbose=True) if f_ < f: # if smaller: if f_opt < f: self.wt = numpy.array(list(w)) f_ = f alpha *= (1 + self.learningRate) exitNow = True # else: bigger = True w_ = numpy.array(list(w)) elif f_ > f: if bigger: if f_opt < f: self.wt = w_ f_ = f exitNow = True alpha *= (1.0 - self.learningRate) smaller = True else: exitNow = True f_ = f print() print('alpha =', alpha) return self.wt
def run(self): ''' Runs the respective fold of the crossvalidation. ''' log = logs.getlogger(self.fold_id) log.info('Running fold %d of %d...' % (self.params.foldIdx + 1, self.params.foldCount)) directory = self.params.directory try: # Apply noisy string clustering log.debug('Transforming noisy strings...') if self.params.noisyStringDomains is not None: noisyStrTrans = NoisyStringTransformer( self.params.altMLN, self.params.noisyStringDomains, True) learnDBs_ = noisyStrTrans.materializeNoisyDomains( self.params.learnDBs) testDBs_ = noisyStrTrans.transformDBs(self.params.testDBs) else: learnDBs_ = self.params.learnDBs testDBs_ = self.params.testDBs # train the MLN log.debug('Starting learning...') module = self.params.module praclearn = PRACLearning(module.prac) praclearn.otherParams['mln'] = self.params.mlnFileName praclearn.otherParams['logic'] = self.params.logic praclearn.otherParams['onthefly'] = self.params.onthefly praclearn.training_dbs = learnDBs_ learnedMLN = module.train(praclearn) # store the learned MLN in a file learnedMLN.writeToFile( os.path.join(directory, 'run_%d.mln' % self.params.foldIdx)) log.debug('Finished learning.') # evaluate the MLN log.debug('Evaluating.') self.evalMLN(learnedMLN, testDBs_, module) self.confMatrix.toFile( os.path.join(directory, 'conf_matrix_%d.cm' % self.params.foldIdx)) log.debug('Evaluation finished.') except (KeyboardInterrupt, SystemExit): log.critical("Exiting...") return None
def run(self): log = logs.getlogger(self.__class__.__name__) norm = 1 alpha = 1.0 step = 1 log.info('starting optimization with %s... (alpha=%f)' % (self.__class__.__name__, alpha)) f_ = None while True: grad = self.learner.grad(self.wt) norm = numpy.linalg.norm(grad) f_ = self.learner.f(self.wt) print() print('|grad| =', norm) if norm < self.gtol or (self.maxiter is not None and step > self.maxiter): break exitNow = False w_ = None smaller = False bigger = False f_opt = f_ while not exitNow: w = self.wt + grad * alpha print() f = self.learner.f(w, verbose=True) if f_ < f: # if smaller: if f_opt < f: self.wt = numpy.array(list(w)) f_ = f alpha *= (1 + self.learningRate) exitNow = True # else: bigger = True w_ = numpy.array(list(w)) elif f_ > f: if bigger: if f_opt < f: self.wt = w_ f_ = f exitNow = True alpha *= (1.0 - self.learningRate) smaller = True else: exitNow = True f_ = f print() print('alpha =', alpha) return self.wt
def __init__(self, prac, instr): ''' PRAC inference initialization. :param prac: reference to the PRAC instance. :param instr: (str/iterable) list of natural-language sentences subject to inference. ''' self._logger = logs.getlogger(self.__class__.__name__, level=logs.DEBUG) self.prac = prac prac.deinit_modules() self.watch = StopWatch() if type(instr) in {list, tuple}: instr_ = instr elif isinstance(instr, basestring): instr_ = [instr] self.fringe = [] pred = None for i in instr_: self.fringe.append(NLInstruction(self, i, pred=pred)) pred = self.fringe[-1] self.root = list(self.fringe) self.lastnode = None
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import os import tempfile from PIL import Image import base64 logger = logs.getlogger(__name__, logs.DEBUG) # Default packages to use when generating output default_packages = [ 'amsmath', 'amsthm', 'amssymb', 'bm' ] def __build_preamble(packages, declarations): preamble = '\documentclass{article}\n' for p in packages: preamble += "\\usepackage{{{}}}\n".format(p)
def main(): headline("Running xfold...") usage = 'PRAC Crossfold validation on pracmodules' parser = argparse.ArgumentParser(description=usage) parser.add_argument("-k", "--folds", dest="folds", type='int', default=10, help="Number of folds for k-fold Cross Validation") parser.add_argument("-p", "--percent", dest="percent", type='int', default=100, help="Use only PERCENT% of the data. (default=100)") parser.add_argument("-v", "--verbose", dest="verbose", action='store_true', default=False, help="Verbose mode.") parser.add_argument("-c", "--multicore", dest="multicore", action='store_true', default=False, help="Verbose mode.") parser.add_argument('-n', '--noisy', dest='noisy', type='str', default=None, help='-nDOMAIN defines DOMAIN as a noisy string.') parser.add_argument( '-f', '--folder', dest='folder', type='str', default=None, help='-f <folder> the folder in which the results shall be saved.') parser.add_argument('-m', '--module', dest='module', type='str', default=None, help='-m <module> the module for the mln to xfold.') parser.add_argument('-d', '--dbs', action='callback', type='string', callback=parse_list, dest='dbs') parser.add_argument('--predicate', dest='predicate', type='str', default=None, help='The predicate.') parser.add_argument('--domain', dest='domain', type='str', default=None, help='The domain.') parser.add_argument('--mln', dest='mln', type='str', default=None, help='The mln needed for training and inference.') parser.add_argument( '--altMLN', dest='altMLN', type='str', default=None, help='Alternative mln for loading the database files. Optional') parser.add_argument('--logic', dest='logic', type='str', default='FuzzyLogic', help='The logic to load the mln with.') parser.add_argument("--onthefly", dest="onthefly", default=False, action='store_true', help="Generate MLN on the fly") args = parser.parse_args() opts_ = vars(args) folds = args.folds percent = args.percent verbose = args.verbose multicore = args.multicore dirname = args.folder moduleName = args.module noisy = ['text'] predName = args.predicate domain = args.domain dbfiles = args.dbs mlnFileName = args.mln altMLNFileName = args.altMLN or args.mln # equal to mlnFileName if no alternative mln given logic = args.logic onthefly = args.onthefly startTime = time.time() # set up the directory timestamp = time.strftime("%Y-%b-%d-%H-%M-%S", time.localtime()) if dirname is None: idx = 1 while True: dirname = '%s-%d' % (moduleName, idx) idx += 1 if not os.path.exists(dirname): break dirname += '-' + timestamp expdir = os.getenv('PRAC_EXPERIMENTS', '.') expdir = os.path.join(expdir, dirname) if os.path.exists(expdir): print('Directory "%s" exists. Overwrite? ([y]/n)' % expdir, ' ') answer = sys.stdin.read(1) if answer not in ('y', '\n'): exit(0) else: shutil.rmtree(expdir) os.mkdir(expdir) # set up the logger logs.getlogger().setLevel(logs.INFO) log = logs.getlogger() fileLogger = FileHandler(os.path.join(expdir, 'xval.log')) fileLogger.setFormatter(logs.formatter) log.addHandler(fileLogger) log.info('Log for %d-fold cross-validation of %s using %s' % (folds, moduleName, dbfiles)) log.info('Date: %s' % timestamp) log.info('Results will be written into %s' % expdir) # load module prac = PRAC() module = prac.module(moduleName) # read MLN and dbs mln_ = readMLNFromFile(mlnFileName, logic=logic) altMLN = readMLNFromFile(altMLNFileName, logic=logic) dbs = [] for dbfile in dbfiles: db = readDBFromFile(altMLN, dbfile) if type(db) is list: dbs.extend(db) else: dbs.append(db) log.info('Read %d databases.' % len(dbs)) cwpreds = [pred for pred in mln_.predicates if pred != predName] # create the partition of data subsetLen = int(math.ceil(len(dbs) * percent / 100.0)) if subsetLen < len(dbs): log.info('Using only %d of %d DBs' % (subsetLen, len(dbs))) dbs = sample(dbs, subsetLen) if len(dbs) < folds: log.error( 'Cannot do %d-fold cross validation with only %d databases.' % (folds, len(dbs))) exit(0) shuffle(dbs) partSize = int(math.ceil(len(dbs) / float(folds))) partition = [] for i in range(folds): partition.append(dbs[i * partSize:(i + 1) * partSize]) foldRunnables = [] for foldIdx in range(folds): params = XValFoldParams() params.mln = mln_.duplicate() params.altMLN = altMLN.duplicate() params.learnDBs = [] for dbs in [d for i, d in enumerate(partition) if i != foldIdx]: params.learnDBs.extend(dbs) params.testDBs = partition[foldIdx] params.foldIdx = foldIdx params.foldCount = folds params.noisyStringDomains = noisy params.directory = expdir params.queryPred = predName params.queryDom = domain params.module = module params.logic = logic params.mlnFileName = mlnFileName params.altMLNFileName = altMLNFileName params.onthefly = onthefly foldRunnables.append(XValFold(params)) log.info('Params for fold %d:\n%s' % (foldIdx, str(params))) if multicore: # set up a pool of (non-daemon!!) worker processes try: workerPool = NDPool() log.info('Starting %d-fold Cross-Validation in %d processes.' % (folds, workerPool._processes)) result = workerPool.map_async(runFold, foldRunnables).get() workerPool.close() workerPool.join() cm = ConfusionMatrixSim() for r in result: cm.combine(r.confMatrix) elapsedTime = time.time() - startTime cm.toFile(os.path.join(expdir, 'conf_matrix.cm')) cm.precisionsToFile(os.path.join(expdir, 'precisions.txt')) cm.precisionsToFile(os.path.join(expdir, 'precisions_sim.txt'), sim=True) pdfname = 'conf_matrix' pdfnameSim = 'conf_matrix_sim' log.info('creating pdf if confusion matrix...') cm.toPDF(pdfname) cm.toPDF(pdfnameSim, sim=True) os.rename('{}.pdf'.format(pdfname), os.path.join(expdir, '{}.pdf'.format(pdfname))) os.rename('{}_sim.pdf'.format(pdfname), os.path.join(expdir, '{}_sim.pdf'.format(pdfname))) except (KeyboardInterrupt, SystemExit, SystemError): log.critical("Caught KeyboardInterrupt, terminating workers") workerPool.terminate() workerPool.join() exit(1) except: log.error('\n' + ''.join(traceback.format_exception(*sys.exc_info()))) exit(1) else: log.info( 'Starting {}-fold Cross-Validation in 1 process.'.format(folds)) cm = ConfusionMatrixSim() for fold in foldRunnables: cm.combine(runFold(fold).confMatrix) elapsedTime = time.time() - startTime cm.toFile(os.path.join(expdir, 'conf_matrix.cm')) cm.precisionsToFile(os.path.join(expdir, 'precisions.txt')) cm.precisionsToFile(os.path.join(expdir, 'precisions_sim.txt'), sim=True) pdfname = 'conf_matrix' pdfnameSim = 'conf_matrix_sim' log.info('creating pdf if confusion matrix...') cm.toPDF(pdfname) cm.toPDF(pdfnameSim, sim=True) os.rename('{}.pdf'.format(pdfname), os.path.join(expdir, '{}.pdf'.format(pdfname))) os.rename('{}_sim.pdf'.format(pdfname), os.path.join(expdir, '{}_sim.pdf'.format(pdfname))) log.info('{}-fold crossvalidation {} took {:.2f} min'.format( folds, '(MP)' if multicore else '(SP)', elapsedTime / 60.0))
def parse_db(mln, content, ignore_unknown_preds=False, db=None, dirs=['.'], projectpath=None): ''' Reads one or more databases in a string representation and returns the respective Database objects. :param mln: the MLN object which should be used to load the database. :param content: the string representation of one or multiple ('---'-separated) databases :param ignore_unknown_preds: by default this function raises an Exception when it encounters a predicate in the DB that has not been declared in the associated MLN. ignore_unknown_preds=True simply ignores such predicates. :param db: the Database object that shall receive the facts stored in the new DB. If None, a new `Database` object will be created. ''' log = logs.getlogger('db') content = stripComments(content) allow_multiple = True if db is None: allow_multiple = True db = Database(mln, ignore_unknown_preds=ignore_unknown_preds) dbs = [] # expand domains with dbtext constants and save evidence for line, l in enumerate(content.split("\n")): l = l.strip() if l == '': continue # separator between independent databases elif l == '---' and not db.isempty(): dbs.append(db) db = Database(mln) continue # domain declaration elif "{" in l: domname, constants = db.mln.logic.parse_domain(l) domnames = [domname for _ in constants] # include elif l.startswith('#include'): filename = l[len("#include "):].strip() m = re.match(r'"(?P<filename>.+)"', filename) if m is not None: filename = m.group('filename') # if the path is relative, look for the respective file # relatively to all paths specified. Take the first file matching. if not mlnpath(filename).exists: includefilename = None for d in dirs: mlnp = '/'.join([d, filename]) if mlnpath(mlnp).exists: includefilename = mlnp break if includefilename is None: raise Exception('File not found: %s' % filename) else: includefilename = filename else: m = re.match(r'<(?P<filename>.+)>', filename) if m is not None: filename = m.group('filename') else: raise MLNParsingError('Malformed #include statement: %s' % line) if projectpath is None: raise MLNParsingError('No project specified: Cannot locate import from project: %s' % filename) includefilename = ':'.join([projectpath, filename]) logger.debug('Including file: "%s"' % includefilename) p = mlnpath(includefilename) dbs.extend(parse_db(content=mlnpath(includefilename).content, ignore_unknown_preds=ignore_unknown_preds, dirs=[p.resolve_path()]+dirs, projectpath=ifnone(p.project, projectpath, lambda x: '/'.join(p.path+[x])), mln=mln)) continue # valued evidence elif l[0] in "0123456789": s = l.find(" ") gndatom = l[s + 1:].replace(" ", "") value = float(l[:s]) if value < 0 or value > 1: raise Exception('Valued evidence must be in [0,1]') if gndatom in db.evidence: raise Exception("Duplicate soft evidence for '%s'" % gndatom) try: _, predname, constants = mln.logic.parse_literal(gndatom) # TODO Should we allow soft evidence on non-atoms here? (This assumes atoms) except NoSuchPredicateError, e: if ignore_unknown_preds: continue else: raise e domnames = mln.predicate(predname).argdoms db << (gndatom, value) # literal else: if l[0] == "?": raise Exception("Unknown literals not supported (%s)" % l) # this is an Alchemy feature try: true, predname, constants = mln.logic.parse_literal(l) except NoSuchPredicateError, e: if ignore_unknown_preds: continue else: raise e except Exception, e: traceback.print_exc() raise MLNParsingError('Error parsing line %d: %s (%s)' % (line+1, l, e.message))
def __init__(self, master, prac, node, gconf, directory='.'): self.logger = logs.getlogger(self.__class__.__name__, level=logs.DEBUG) self.master = master self.initialized = False self.master.bind('<Return>', self.start) self.master.bind('<Escape>', lambda a: self.master.quit()) self.master.protocol('WM_DELETE_WINDOW', self.quit) self.prac = prac self.prac_inference = node.pracinfer self.infStep = None self.module_dir = os.path.join(locations.pracmodules, 'wnsenses') self.frame = Frame(master) self.frame.pack(fill=BOTH, expand=1) self.frame.columnconfigure(1, weight=1) # module selection row = 0 Label(self.frame, text="Module: ").grid(row=row, column=0, sticky="E") modules = sorted([module for module in self.prac._manifests_by_name]) self.selected_module = StringVar(master) self.selected_module.trace("w", self.select_module) self.list_modules = OptionMenu(*(self.frame, self.selected_module) + tuple(modules)) self.list_modules.grid(row=row, column=1, sticky="NWE") # Project selection row += 1 Label(self.frame, text="Project: ").grid(row=row, column=0, sticky="E") saveprojectcontainer = Frame(self.frame) saveprojectcontainer.grid(row=row, column=1, sticky="NEWS") saveprojectcontainer.columnconfigure(0, weight=1) self.selected_project = StringVar(master) projectfiles = [''] self.list_projects = OptionMenu( *(saveprojectcontainer, self.selected_project) + tuple(projectfiles)) self.list_projects.grid(row=0, column=0, sticky="NWES") self.selected_project.trace("w", self.select_project) # save proj file self.btn_saveproj = Button(saveprojectcontainer, text='Save Project...', command=self.noask_save_project) self.btn_saveproj.grid(row=0, column=1, sticky="E") # save proj file as... self.btn_saveproj = Button(saveprojectcontainer, text='Save Project as...', command=self.ask_save_project) self.btn_saveproj.grid(row=0, column=2, sticky="E") # logic selection row += 1 Label(self.frame, text='Logic: ').grid(row=row, column=0, sticky='E') logics = ['FirstOrderLogic', 'FuzzyLogic'] self.selected_logic = StringVar(master) self.selected_logic.trace('w', self.settings_setdirty) l = OptionMenu(*(self.frame, self.selected_logic) + tuple(logics)) l.grid(row=row, column=1, sticky='NWE') # mln section row += 1 Label(self.frame, text="MLN: ").grid(row=row, column=0, sticky='NE') self.mln_container = FileEditBar(self.frame, dir=self.module_dir, filesettings={ 'extension': '.mln', 'ftypes': [('MLN files', '.mln')] }, defaultname='*unknown{}', importhook=self.import_mln, deletehook=self.delete_mln, projecthook=self.save_proj, filecontenthook=self.mlnfilecontent, fileslisthook=self.mlnfiles, updatehook=self.update_mln, onchangehook=self.project_setdirty) self.mln_container.grid(row=row, column=1, sticky="NEWS") self.mln_container.columnconfigure(1, weight=2) self.frame.rowconfigure(row, weight=1) row += 1 self.use_emln = IntVar() self.use_emln.set(0) self.cb_use_emln = Checkbutton(self.frame, text="use model extension", variable=self.use_emln, command=self.onchange_use_emln) self.cb_use_emln.grid(row=row, column=1, sticky="W") # mln extension section row += 1 self.emlncontainerrow = row self.emln_label = Label(self.frame, text="EMLN: ") self.emln_label.grid(row=self.emlncontainerrow, column=0, sticky='NE') self.emln_container = FileEditBar(self.frame, dir=self.module_dir, filesettings={ 'extension': '.emln', 'ftypes': [('MLN extension files', '.emln') ] }, defaultname='*unknown{}', importhook=self.import_emln, deletehook=self.delete_emln, projecthook=self.save_proj, filecontenthook=self.emlnfilecontent, fileslisthook=self.emlnfiles, updatehook=self.update_emln, onchangehook=self.project_setdirty) self.emln_container.grid(row=self.emlncontainerrow, column=1, sticky="NEWS") self.emln_container.columnconfigure(1, weight=2) self.onchange_use_emln(dirty=False) self.frame.rowconfigure(row, weight=1) # db section row += 1 Label(self.frame, text="Evidence: ").grid(row=row, column=0, sticky='NE') self.db_container = FileEditBar(self.frame, dir=self.module_dir, filesettings={ 'extension': '.db', 'ftypes': [('Database files', '.db')] }, defaultname='*unknown{}', importhook=self.import_db, deletehook=self.delete_db, projecthook=self.save_proj, filecontenthook=self.dbfilecontent, fileslisthook=self.dbfiles, updatehook=self.update_db, onchangehook=self.project_setdirty) self.db_container.grid(row=row, column=1, sticky="NEWS") self.db_container.columnconfigure(1, weight=2) self.frame.rowconfigure(row, weight=1) # inference method selection row += 1 self.list_methods_row = row Label(self.frame, text="Method: ").grid(row=row, column=0, sticky=E) self.selected_method = StringVar(master) self.selected_method.trace('w', self.settings_setdirty) self.list_methods = OptionMenu(self.frame, self.selected_method, *InferenceMethods.names()) self.list_methods.grid(row=self.list_methods_row, column=1, sticky="NWE") # queries row += 1 Label(self.frame, text="Queries: ").grid(row=row, column=0, sticky=E) self.query = StringVar(master) Entry(self.frame, textvariable=self.query).grid(row=row, column=1, sticky="NEW") # parameters row += 1 Label(self.frame, text="Parameters: ").grid(row=row, column=0, sticky="NE") self.params = StringVar(master) self.entry_params = Entry(self.frame, textvariable=self.params) self.entry_params.grid(row=row, column=1, sticky="NEW") # closed-world predicates row += 1 Label(self.frame, text="CW preds: ").grid(row=row, column=0, sticky="NE") self.cwpreds = StringVar(master) self.entry_cw = Entry(self.frame, textvariable=self.cwpreds) self.entry_cw.grid(row=row, column=1, sticky="NEW") # all preds open-world cw_container = Frame(self.frame) cw_container.grid(row=row, column=1, sticky="NES") self.closed_world = IntVar() self.cb_closed_world = Checkbutton( cw_container, text="Apply CW assumption to all except queries", variable=self.closed_world) self.cb_closed_world.grid(row=row, column=2, sticky='E') # Multiprocessing and verbose row += 1 options_container = Frame(self.frame) options_container.grid(row=row, column=1, sticky='NEWS') self.multicore = IntVar() self.cb_multicore = Checkbutton(options_container, text="Use all CPUs", variable=self.multicore) self.cb_multicore.grid(row=0, column=0, sticky=W) self.verbose = IntVar() self.cb_verbose = Checkbutton(options_container, text="verbose", variable=self.verbose) self.cb_verbose.grid(row=0, column=1, sticky=W) self.keep_evidence = IntVar() self.cb_keep_evidence = Checkbutton(options_container, text="keep result", variable=self.keep_evidence) self.cb_keep_evidence.grid(row=0, column=2, sticky=W) self.keep_evidence.set(True) # start and continue buttons row += 1 self.btn_container = Frame(self.frame) self.btn_container.grid(row=row, column=1, sticky='EW') start_button = Button(self.btn_container, text="Start Inference", command=self.start) start_button.grid(row=0, column=1, sticky='E') continue_button = Button(self.btn_container, text="Continue >", command=self.oncontinue) continue_button.grid(row=0, column=2, sticky='W') self.settings_dirty = IntVar() self.project_dirty = IntVar() self.gconf = gconf self.project = None self.dir = os.path.abspath( ifnone(gconf['prev_query_path'], DEFAULT_CONFIG)) if gconf['prev_query_project':self.dir] is not None: self.load_project( os.path.join(self.dir, gconf['prev_query_project':self.dir])) else: self.new_project() self.config = self.project.queryconf self.project.addlistener(self.project_setdirty) self.selected_module.set(self.gconf.get("module", modules[0])) self.update_dbeditor_from_result(node.outdbs) self.mln_container.dirty = False self.emln_container.dirty = False self.db_container.dirty = False self.project_setdirty(dirty=False) self.master.geometry(gconf['window_loc_query']) self.initialized = True
from dnutils import logs from recognize import Voice from microphone import MicLevelController from threading import Timer logger = logs.getlogger(__name__ + 'REC', logs.DEBUG) class VoiceRecorder(): def __init__(self): self.voices = [] self.voip = 0 self.counter = 0 self.recording = False def startListenerThread(self): newVoice = Voice(str(self.counter)) if len(self.voices) < 2: self.voices.append(newVoice) else: if self.recording: Timer(0.6, self.startListenerThread).start() return self.voices[self.voip].stopRecording() self.voices[self.voip] = newVoice self.voip = (self.voip + 1) % 2 newVoice.startRecording() self.counter += 1 if self.counter < 20:
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import os from collections import defaultdict from dnutils import logs from nltk.corpus.reader.wordnet import Synset from prac.core.base import PRACModule, PRACPIPE, DB_TRANSFORM from prac.core.inference import PRACInferenceStep from prac.core.wordnet import WordNet, POS_MAP from pracmln import MLN, Database from pracmln.mln.util import colorize, mergedom logger = logs.getlogger(__name__, logs.INFO) class WNSenses(PRACModule): ''' Extracts possible word senses from WordNet given the part of speech of a word. Depends on the 'syntax' feature extractor. ''' def initialize(self): self.mln = MLN(mlnfile=os.path.join(self.module_path, 'mln', 'predicates.mln'), logic='FuzzyLogic', grammar='PRACGrammar') self.wordnetKBs = {} @DB_TRANSFORM
def parse_db(mln, content, ignore_unknown_preds=False, db=None, dirs=['.'], projectpath=None): """ Reads one or more databases in a string representation and returns the respective Database objects. :param mln: the MLN object which should be used to load the database. :param content: the string representation of one or multiple ('---'-separated) databases :param ignore_unknown_preds: by default this function raises an Exception when it encounters a predicate in the DB that has not been declared in the associated MLN. ignore_unknown_preds=True simply ignores such predicates. :param db: The Database object that shall receive the facts stored in the new DB. If None, a new `Database` object will be created. :return: a list of databases """ log = logs.getlogger('db') content = stripComments(content) allow_multiple = True if db is None: allow_multiple = True db = Database(mln, ignore_unknown_preds=ignore_unknown_preds) dbs = [] # expand domains with dbtext constants and save evidence for line, l in enumerate(content.split("\n")): l = l.strip() if l == '': continue # separator between independent databases elif l == '---' and not db.isempty(): dbs.append(db) db = Database(mln) continue # domain declaration elif "{" in l: domname, constants = db.mln.logic.parse_domain(l) domnames = [domname for _ in constants] # include elif l.startswith('#include'): filename = l[len("#include "):].strip() m = re.match(r'"(?P<filename>.+)"', filename) if m is not None: filename = m.group('filename') # if the path is relative, look for the respective file # relatively to all paths specified. Take the first file matching. if not mlnpath(filename).exists: includefilename = None for d in dirs: mlnp = '/'.join([d, filename]) if mlnpath(mlnp).exists: includefilename = mlnp break if includefilename is None: raise Exception('File not found: %s' % filename) else: includefilename = filename else: m = re.match(r'<(?P<filename>.+)>', filename) if m is not None: filename = m.group('filename') else: raise MLNParsingError('Malformed #include statement: %s' % line) if projectpath is None: raise MLNParsingError( 'No project specified: Cannot locate import from project: %s' % filename) includefilename = ':'.join([projectpath, filename]) logger.debug('Including file: "%s"' % includefilename) p = mlnpath(includefilename) dbs.extend( parse_db(content=mlnpath(includefilename).content, ignore_unknown_preds=ignore_unknown_preds, dirs=[p.resolve_path()] + dirs, projectpath=ifnone(p.project, projectpath, lambda x: '/'.join(p.path + [x])), mln=mln)) continue # valued evidence elif l[0] in "0123456789": s = l.find(" ") gndatom = l[s + 1:].replace(" ", "") value = float(l[:s]) if value < 0 or value > 1: raise Exception('Valued evidence must be in [0,1]') if gndatom in db.evidence: raise Exception("Duplicate soft evidence for '%s'" % gndatom) try: _, predname, constants = mln.logic.parse_literal( gndatom ) # TODO Should we allow soft evidence on non-atoms here? (This assumes atoms) except NoSuchPredicateError as e: if ignore_unknown_preds: continue else: raise e domnames = mln.predicate(predname).argdoms db << (gndatom, value) # literal else: if l[0] == "?": raise Exception("Unknown literals not supported (%s)" % l) # this is an Alchemy feature try: true, predname, constants = mln.logic.parse_literal(l) except NoSuchPredicateError as e: if ignore_unknown_preds: continue else: raise e except Exception as e: traceback.print_exc() raise MLNParsingError('Error parsing line %d: %s (%s)' % (line + 1, l, e.message)) if mln.predicate(predname) is None and ignore_unknown_preds: log.debug('Predicate "%s" is undefined.' % predname) continue elif mln.predicate(predname) is None: raise NoSuchPredicateError(predname) domnames = mln.predicate(predname).argdoms # save evidence true = 1 if true else 0 db << ("%s(%s)" % (predname, ",".join(constants)), true) # expand domains if len(domnames) != len(constants): raise Exception( "Ground atom %s in database %d has wrong number of parameters" % (l, len(dbs))) for i, c in enumerate(constants): db.domain({domnames[i]: c}) if not db.isempty(): dbs.append(db) if len(dbs) > 1 and not allow_multiple: raise Exception( 'Only one single database is permitted when loading via the constructor. Use Database.load() for loading multiple DBs,' ) return dbs
from collections import defaultdict import numpy from dnutils import logs from dnutils.console import barstr from numpy.ma.core import sqrt, log from ..constants import HARD from ..errors import SatisfiabilityException from ..grounding.bpll import BPLLGroundingFactory from ..grounding.default import DefaultGroundingFactory from .common import DiscriminativeLearner, AbstractLearner from ..util import fsum, temporary_evidence logger = logs.getlogger(__name__) class BPLL(AbstractLearner): """ Pseudo-log-likelihood learning with blocking, i.e. a generalization of PLL which takes into consideration the fact that the truth value of a blocked atom cannot be inverted without changing a further atom's truth value from the same block. This learner is fairly efficient, as it computes f and grad based only on a sufficient statistic. """ def __init__(self, mrf, **params): AbstractLearner.__init__(self, mrf, **params) self._pls = None
from . import locations as praclocations from .inference import PRACInferenceStep, PRACInference from .wordnet import WordNet, VERB_TAGS from ..db.ies.models import constants from ..db.ies.models import Word from ..db.ies.extraction import HowtoImport from pracmln import Database, MLN from pracmln import MLNQuery from pracmln.mln import NoSuchPredicateError from pracmln.mln.util import mergedom from collections import defaultdict nltk.data.path = [praclocations.nltk_data] logger = logs.getlogger(__name__, logs.INFO) praclogger = logs.getlogger('PRAC', logs.INFO) aclogger = logs.getlogger('actioncores', logs.INFO) class PRACConfig(ConfigParser): ''' Global configuration data structure for PRAC. Wraps around a ConfigParser ''' DEFAULTS = { 'mongodb': { 'host': 'localhost', 'port': 27017, 'user': '',