def main_mpe_semiring(args): inputfile = args.inputfile init_logger(args.verbose) if args.web: result_handler = print_result_json else: result_handler = print_result if args.output is not None: outf = open(args.output, 'w') else: outf = sys.stdout with Timer("Total"): try: pl = PrologFile(inputfile) lf = LogicFormula.create_from(model, label_all=True) prob, facts = mpe_semiring(lf, args.verbose) result_handler((True, (prob, facts)), outf) except Exception as err: trace = traceback.format_exc() err.trace = trace result_handler((False, err), outf)
def mpe_maxsat(dag, verbose=0, solver=None): if dag.queries(): print('%% WARNING: ignoring queries in file', file=sys.stderr) dag.clear_queries() logger = init_logger(verbose) logger.info('Ground program size: %s' % len(dag)) cnf = CNF.createFrom(dag) for qn, qi in cnf.evidence(): if not cnf.is_true(qi): cnf.add_constraint(TrueConstraint(qi)) queries = list(cnf.labeled()) logger.info('CNF size: %s' % cnf.clausecount) if not cnf.is_trivial(): solver = get_solver(solver) with Timer('Solving'): result = frozenset(solver.evaluate(cnf)) weights = cnf.extract_weights(SemiringProbability()) output_facts = None prob = 1.0 if result is not None: output_facts = [] if queries: for qn, qi, ql in queries: if qi in result: output_facts.append(qn) elif -qi in result: output_facts.append(-qn) for i, n, t in dag: if t == 'atom': if i in result: if not queries: output_facts.append(n.name) prob *= weights[i][0] elif -i in result: if not queries: output_facts.append(-n.name) prob *= weights[i][1] else: prob = 1.0 output_facts = [] return prob, output_facts
def mpe_maxsat(dag, verbose=0, solver=None, minpe=False): logger = init_logger(verbose) logger.info("Ground program size: %s" % len(dag)) cnf = CNF.createFrom(dag, force_atoms=True) for qn, qi in cnf.evidence(): if not cnf.is_true(qi): cnf.add_constraint(TrueConstraint(qi)) queries = list(cnf.labeled()) logger.info("CNF size: %s" % cnf.clausecount) if not cnf.is_trivial(): solver = get_solver(solver) with Timer("Solving"): result = frozenset(solver.evaluate(cnf, invert_weights=minpe)) weights = cnf.extract_weights(SemiringProbability()) output_facts = None prob = 1.0 if result is not None: output_facts = [] if queries: for qn, qi, ql in queries: if qi in result: output_facts.append(qn) elif -qi in result: output_facts.append(-qn) for i, n, t in dag: if t == "atom": if i in result: if not queries: output_facts.append(n.name) prob *= weights[i][0] elif -i in result: if not queries: output_facts.append(-n.name) prob *= weights[i][1] else: prob = 1.0 output_facts = [] return prob, output_facts
def learn(self, significance=None, max_rule_length=None, beam_size=5, m_estimator=1, deterministic=False): log_name = 'structure_learner' if self.__log_file is not None: self.__log = init_logger(verbose=True, name=log_name, out=self.__log_file) self.__log.info('Random seed: %s' % self.__seed) if deterministic: learn_class = ProbFOIL else: learn_class = ProbFOIL2 self.__learner = learn_class(self.__data, logger=log_name, p=significance, l=max_rule_length, beam_size=beam_size, m=m_estimator) time_start = time.time() self.__hypothesis = self.__learner.learn() self.__rules = self.__hypothesis.to_clauses( self.__hypothesis.target.functor) # First rule is failing rule: don't consider it if there are other rules. if len(self.__rules) > 1: del self.__rules[0] time_total = time.time() - time_start if self.__log is not None: self.__log.info('ACCURACY: %f' % self.accuracy()) self.__log.info('PRECISION: %f' % self.precision()) self.__log.info('RECALL: %f' % self.recall()) self.__log.info('ACCURACY: %f' % self.accuracy()) self.__log.info('Total time:\t%.4fs' % time_start) return time_total
from __future__ import print_function import importlib from abc import abstractmethod import numpy as np import pandas as pd from problog.util import init_logger from problog.logic import Term, Object, Constant, term2str, unquote from synthlog.mercs.core.MERCS import MERCS from synthlog.tasks.base_stored_object import StoredObject, cells_to_matrix logger = init_logger() class Predictor(StoredObject): def __init__( self, scope=None, source_columns=None, target_columns=None, database=None, engine=None, ): """ :param scope: A scope, containing table_cell predicates describing a table content. :param source_columns: A list of columns, where column is: column(<table_name>, <col_number>). <table_name> is a table name present in table_cell. These columns will be used as input columns for the predictor. :param target_columns: A list of columns, where column is: column(<table_name>, <col_number>). <table_name> is a table name present in table_cell. These columns will be used as columns to predict for the predictor.
def main(args, result_handler=None): import argparse parser = argparse.ArgumentParser() parser.add_argument("filename") parser.add_argument( "-N", "-n", type=int, dest="n", default=argparse.SUPPRESS, help="Number of samples.", ) parser.add_argument( "--with-facts", action="store_true", help="Also output choice facts (default: just queries).", ) parser.add_argument("--with-probability", action="store_true", help="Show probability.") parser.add_argument("--as-evidence", action="store_true", help="Output as evidence.") parser.add_argument( "--propagate-evidence", dest="propagate_evidence", default=False, action="store_true", help="Enable evidence propagation", ) parser.add_argument( "--dont-propagate-evidence", action="store_false", dest="propagate_evidence", default=False, help="Disable evidence propagation", ) parser.add_argument("--oneline", action="store_true", help="Format samples on one line.") parser.add_argument( "--estimate", action="store_true", help="Estimate probability of queries from samples.", ) parser.add_argument( "--timeout", "-t", type=int, default=0, help="Set timeout (in seconds, default=off).", ) parser.add_argument("--output", "-o", type=str, default=None, help="Filename of output file.") parser.add_argument("--web", action="store_true", help=argparse.SUPPRESS) parser.add_argument("--verbose", "-v", action="count", help="Verbose output") parser.add_argument("--seed", "-s", type=float, help="Random seed", default=None) parser.add_argument("--full-trace", action="store_true") parser.add_argument("--strip-tag", action="store_true", help="Strip outermost tag from output.") parser.add_argument( "-a", "--arg", dest="args", action="append", help="Pass additional arguments to the cmd_args builtin.", ) parser.add_argument("--progress", help="show progress", action="store_true") args = parser.parse_args(args) init_logger(args.verbose, "problog_sample") if args.seed is not None: random.seed(args.seed) else: seed = random.random() logging.getLogger("problog_sample").debug("Seed: %s", seed) random.seed(seed) pl = PrologFile(args.filename) outf = sys.stdout if args.output is not None: outf = open(args.output, "w") if args.timeout: start_timer(args.timeout) # noinspection PyUnusedLocal def signal_term_handler(*sigargs): sys.exit(143) signal.signal(signal.SIGTERM, signal_term_handler) if result_handler is not None or args.web: outformat = "dict" if result_handler is None: result_handler = print_result_json else: outformat = "str" result_handler = print_result try: if args.estimate: results = estimate(pl, **vars(args)) print(format_dictionary(results)) else: result_handler( (True, sample(pl, format=outformat, **vars(args))), output=outf, oneline=args.oneline, ) except Exception as err: trace = traceback.format_exc() err.trace = trace result_handler((False, err), output=outf) if args.timeout: stop_timer() if args.output is not None: outf.close()
def main(argv=sys.argv[1:]): args = argparser().parse_args(argv) if args.seed: seed = args.seed else: seed = str(random.random()) random.seed(seed) logger = 'probfoil' if args.log is None: logfile = None else: logfile = open(args.log, 'w') log = init_logger(verbose=args.verbose, name=logger, out=logfile) log.info('Random seed: %s' % seed) # Load input files data = DataFile(*(PrologFile(source) for source in args.files)) if args.probfoil1: learn_class = ProbFOIL else: learn_class = ProbFOIL2 # this seems to be the default learn_class time_start = time.time() # record start time learn_one = learn_class(data, logger=logger, **vars(args)) hypothesis_one = learn_one.learn() # run learn function from learn_class time_one = time.time() - time_start # time for first stage # call function from defaults.py to construct abnormality predicate construct_ab_pred(hypothesis_one, learn_one, args.files) # reload data files and re-learn rules with new data data = DataFile(*(PrologFile(source) for source in args.files)) learn_two = learn_class(data, logger=logger, **vars(args)) hypothesis_two = learn_two.learn() time_total = time.time() - time_start # get time taken time_two = time_total - time_one # time for second stage print('================ SETTINGS ================') for kv in vars(args).items(): print('%20s:\t%s' % kv) if learn_one.interrupted: print('================ PARTIAL THEORY ================') else: print('================= INTERMEDIATE THEORY =================') rule = hypothesis_one rules = rule.to_clauses( rule.target.functor) # convert rules to clause form # First rule is failing rule: don't print it if there are other rules. if len(rules) > 1: for rule in rules[1:]: print(rule) # print each rule else: print(rules[0]) print('================= FINAL THEORY =================') rule = hypothesis_two rules = rule.to_clauses( rule.target.functor) # convert rules to clause form if len(rules) > 1: for rule in rules[1:]: print(rule) else: print(rules[0]) print('==================== SCORES ====================') print(' Accuracy:\t', accuracy(hypothesis_two)) # compute accuracy print(' Precision:\t', precision(hypothesis_two)) print(' Recall:\t', recall(hypothesis_two)) print('================== STATISTICS ==================') for name, value in learn_one.statistics(): print('%20s Stage One:\t%s' % (name, value)) for name, value in learn_two.statistics(): print('%20s Stage Two:\t%s' % (name, value)) print(' Stage one time:\t%.4fs' % time_one) print(' Stage two time:\t%.4fs' % time_two) print(' Total time:\t%.4fs' % time_total) if logfile: logfile.close()
def main(args, result_handler=None): import argparse parser = argparse.ArgumentParser() parser.add_argument('filename') parser.add_argument('-N', '-n', type=int, dest='n', default=argparse.SUPPRESS, help="Number of samples.") parser.add_argument( '--with-facts', action='store_true', help="Also output choice facts (default: just queries).") parser.add_argument('--with-probability', action='store_true', help="Show probability.") parser.add_argument('--as-evidence', action='store_true', help="Output as evidence.") parser.add_argument('--propagate-evidence', dest='propagate_evidence', default=False, action='store_true', help="Enable evidence propagation") parser.add_argument('--dont-propagate-evidence', action='store_false', dest='propagate_evidence', default=False, help="Disable evidence propagation") parser.add_argument('--oneline', action='store_true', help="Format samples on one line.") parser.add_argument('--estimate', action='store_true', help='Estimate probability of queries from samples.') parser.add_argument('--timeout', '-t', type=int, default=0, help="Set timeout (in seconds, default=off).") parser.add_argument('--output', '-o', type=str, default=None, help="Filename of output file.") parser.add_argument('--web', action='store_true', help=argparse.SUPPRESS) parser.add_argument('--verbose', '-v', action='count', help='Verbose output') parser.add_argument('--seed', '-s', type=float, help='Random seed', default=None) parser.add_argument('--full-trace', action='store_true') parser.add_argument('--strip-tag', action='store_true', help='Strip outermost tag from output.') parser.add_argument( '-a', '--arg', dest='args', action='append', help='Pass additional arguments to the cmd_args builtin.') parser.add_argument('--progress', help='show progress', action='store_true') args = parser.parse_args(args) init_logger(args.verbose, 'problog_sample') if args.seed is not None: random.seed(args.seed) else: seed = random.random() logging.getLogger('problog_sample').debug('Seed: %s', seed) random.seed(seed) pl = PrologFile(args.filename) outf = sys.stdout if args.output is not None: outf = open(args.output, 'w') if args.timeout: start_timer(args.timeout) # noinspection PyUnusedLocal def signal_term_handler(*sigargs): sys.exit(143) signal.signal(signal.SIGTERM, signal_term_handler) if result_handler is not None or args.web: outformat = 'dict' if result_handler is None: result_handler = print_result_json else: outformat = 'str' result_handler = print_result try: if args.estimate: results = estimate(pl, **vars(args)) print(format_dictionary(results)) else: result_handler((True, sample(pl, format=outformat, **vars(args))), output=outf, oneline=args.oneline) except Exception as err: trace = traceback.format_exc() err.trace = trace result_handler((False, err), output=outf) if args.timeout: stop_timer() if args.output is not None: outf.close()
def main(argv=sys.argv[1:]): args = argparser().parse_args(argv) if args.seed: seed = args.seed else: seed = str(random.random()) random.seed(seed) logger = "probfoil" if args.log is None: logfile = None else: logfile = open(args.log, "w") log = init_logger(verbose=args.verbose, name=logger, out=logfile) log.info("Random seed: %s" % seed) # Load input files data = DataFile(*(PrologFile(source) for source in args.files)) if args.probfoil1: learn_class = ProbFOIL else: learn_class = ProbFOIL2 time_start = time.time() learn = learn_class(data, logger=logger, **vars(args)) hypothesis = learn.learn() time_total = time.time() - time_start print("================ SETTINGS ================") for kv in vars(args).items(): print("%20s:\t%s" % kv) if learn.interrupted: print("================ PARTIAL THEORY ================") else: print("================= FINAL THEORY =================") rule = hypothesis rules = rule.to_clauses(rule.target.functor) # First rule is failing rule: don't print it if there are other rules. if len(rules) > 1: for rule in rules[1:]: print(rule) else: print(rules[0]) print("==================== SCORES ====================") print(" Accuracy:\t", accuracy(hypothesis)) print(" Precision:\t", precision(hypothesis)) print(" Recall:\t", recall(hypothesis)) print("================== STATISTICS ==================") for name, value in learn.statistics(): print("%20s:\t%s" % (name, value)) print(" Total time:\t%.4fs" % time_total) if logfile: logfile.close()
def probfoil(**kwargs): args = kwargs if 'seed' in args: seed = args['seed'] else: seed = str(random.random()) args['seed'] = seed random.seed(seed) logger = 'probfoil' if 'log' not in args: args['log'] = None logfile = None else: logfile = open(args['log'], 'w') if 'verbose' not in args: args['verbose'] = 0 if 'm' not in args: args['m'] = 1 if 'beam_size' not in args: args['beam_size'] = 5 if 'p' not in args: args['p'] = None if 'l' not in args: args['l'] = None if 'target' not in args: args['target'] = None if 'symmetry_breaking' not in args: args['symmetry_breaking'] = True if 'settings' in args: settings = args['settings'] del args['settings'] else: settings = None if 'train' in args: train = args['train'] del args['train'] else: train = None if 'test' in args: test = args['test'] del args['test'] else: test = None #settings = args['settings'] #train = args['train'] log = init_logger(verbose=args['verbose'], name=logger, out=logfile) log.info('Random seed: %s' % seed) # Load input files #data = DataFile(*(PrologFile(source) for source in args['files'])) data = DataFile(*(PrologString(source) for source in [settings, train])) if 'probfoil1' in args: learn_class = ProbFOIL else: learn_class = ProbFOIL2 time_start = time.time() learn = learn_class(data, logger=logger, seed=seed, log=args['log'], verbose=args['verbose'], m=args['m'], beam_size=args['beam_size'], p=args['p'], l=args['l']) hypothesis = learn.learn() time_total = time.time() - time_start # Store scores train_accuracy = accuracy(hypothesis) train_precision = precision(hypothesis) train_recall = recall(hypothesis) # Load test data if test != None: test_data = DataFile(*(PrologString(source) for source in [settings, test])) test = learn_class(test_data, logger=logger, seed=seed, log=args['log'], verbose=args['verbose'], m=args['m'], beam_size=args['beam_size'], p=args['p'], l=args['l']) test_hypothesis = test.test_rule(hypothesis) # Store scores test_accuracy = accuracy(test_hypothesis) test_precision = precision(test_hypothesis) test_recall = recall(test_hypothesis) print('================ SETTINGS ================') #for kv in vars(args).items(): for kv in args.items(): print('%20s:\t%s' % kv) if learn.interrupted: print('================ PARTIAL THEORY ================') else: print('================= FINAL THEORY =================') rule = hypothesis rules = rule.to_clauses(rule.target.functor) # First rule is failing rule: don't print it if there are other rules. if len(rules) > 1: for rule in rules[1:]: print(rule) else: print(rules[0]) print('==================== SCORES ====================') print(' Train Set') print(' Accuracy:\t', train_accuracy) print(' Precision:\t', train_precision) print(' Recall:\t', train_recall) if test != None: print(' Test Set') print(' Accuracy:\t', test_accuracy) print(' Precision:\t', test_precision) print(' Recall:\t', test_recall) print('================== STATISTICS ==================') for name, value in learn.statistics(): print('%20s:\t%s' % (name, value)) print(' Total time:\t%.4fs' % time_total) if logfile: logfile.close() #def main(argv=sys.argv[1:]): # args = argparser().parse_args(argv) # # if args.seed: # seed = args.seed # else: # seed = str(random.random()) # random.seed(seed) # # logger = 'probfoil' # # if args.log is None: # logfile = None # else: # logfile = open(args.log, 'w') # # log = init_logger(verbose=args.verbose, name=logger, out=logfile) # # log.info('Random seed: %s' % seed) # # # Load input files # data = DataFile(*(PrologFile(source) for source in args.files)) # # if args.probfoil1: # learn_class = ProbFOIL # else: # learn_class = ProbFOIL2 # # time_start = time.time() # learn = learn_class(data, logger=logger, **vars(args)) # # hypothesis = learn.learn() # time_total = time.time() - time_start # # print ('================ SETTINGS ================') # for kv in vars(args).items(): # print('%20s:\t%s' % kv) # # if learn.interrupted: # print('================ PARTIAL THEORY ================') # else: # print('================= FINAL THEORY =================') # rule = hypothesis # rules = rule.to_clauses(rule.target.functor) # # # First rule is failing rule: don't print it if there are other rules. # if len(rules) > 1: # for rule in rules[1:]: # print (rule) # else: # print (rules[0]) # print ('==================== SCORES ====================') # print (' Accuracy:\t', accuracy(hypothesis)) # print (' Precision:\t', precision(hypothesis)) # print (' Recall:\t', recall(hypothesis)) # print ('================== STATISTICS ==================') # for name, value in learn.statistics(): # print ('%20s:\t%s' % (name, value)) # print (' Total time:\t%.4fs' % time_total) # # if logfile: # logfile.close() # #def argparser(): # parser = argparse.ArgumentParser() # parser.add_argument('files', nargs='+') # parser.add_argument('-1', '--det-rules', action='store_true', dest='probfoil1', # help='learn deterministic rules') # parser.add_argument('-m', help='parameter m for m-estimate', type=float, # default=argparse.SUPPRESS) # parser.add_argument('-b', '--beam-size', type=int, default=5, # help='size of beam for beam search') # parser.add_argument('-p', '--significance', type=float, default=None, # help='rule significance threshold', dest='p') # parser.add_argument('-l', '--length', dest='l', type=int, default=None, # help='maximum rule length') # parser.add_argument('-v', action='count', dest='verbose', default=None, # help='increase verbosity (repeat for more)') # parser.add_argument('--symmetry-breaking', action='store_true', # help='avoid symmetries in refinement operator') # parser.add_argument('--target', '-t', type=str, # help='specify predicate/arity to learn (overrides settings file)') # parser.add_argument('-s', '--seed', help='random seed', default=None) # parser.add_argument('--log', help='write log to file', default=None) # # return parser # # #if __name__ == '__main__': # main()