def load_graph(ontology_list, data, def_format='n3', cache=True): def filter_valid_files(paths): if def_format == 'csv': filter_fn = lambda p: p.endswith('.csv') or p.endswith('.tsv') else: filter_fn = lambda p: p.endswith(def_format) return filter(filter_fn, paths) logger.info('Calculating data checksum') paths = ontology_list + [data] md5 = _md5_checksum(filter_valid_files(paths)) cached_fn = '.%s' % md5 g = None if os.path.exists(cached_fn) and cache: logger.info('Loading cached graph structure') g = _load_cached_graph(cached_fn) else: logger.info('Building graph structure') if def_format == 'n3': g = rdf(paths, def_format=def_format) elif def_format == 'csv': g = csv(ontology_list, data) if cache: _save_graph_to_cache(g, cached_fn) return g
def run(kwargs, cli=False): if cli: logger.setLevel(logging.DEBUG if kwargs['verbose'] else logging.INFO) else: logger.setLevel(logging.NOTSET) logger.info('Starting Hedwig') start = time.time() start_date = datetime.now().isoformat() graph = build_graph(kwargs) logger.info('Building the knowledge base') score_func = getattr(scorefunctions, kwargs['score']) kb = ExperimentKB(graph, score_func, instances_as_leaves=kwargs['leaves']) validator = Validate(kb, significance_test=significance.apply_fisher, adjustment=getattr(adjustment, kwargs['adjust'])) rules_per_target = run_learner(kwargs, kb, validator) rules_report = generate_rules_report(kwargs, rules_per_target) end = time.time() time_taken = end - start logger.info('Finished in %d seconds' % time_taken) logger.info('Outputing results') if kwargs['covered']: with open(kwargs['covered'], 'w') as f: examples = Rule.ruleset_examples_json(rules_per_target) f.write(json.dumps(examples, indent=2)) parameters_report = _parameters_report(kwargs, start_date, time_taken) rules_out_file = kwargs['output'] if rules_out_file: with open(rules_out_file, 'w') as f: if rules_out_file.endswith('json'): f.write( Rule.to_json(rules_per_target, show_uris=kwargs['uris'])) else: f.write(parameters_report) f.write(rules_report) elif cli: print parameters_report print rules_report return rules_per_target
def run(kwargs, cli=False): if cli: logger.setLevel(logging.DEBUG if kwargs['verbose'] else logging.INFO) else: logger.setLevel(logging.NOTSET) logger.info('Starting Hedwig') start = time.time() start_date = datetime.now().isoformat() graph = build_graph(kwargs) logger.info('Building the knowledge base') score_func = getattr(scorefunctions, kwargs['score']) kb = ExperimentKB(graph, score_func, instances_as_leaves=kwargs['leaves']) validator = Validate(kb, significance_test=significance.apply_fisher, adjustment=getattr(adjustment, kwargs['adjust'])) rules_per_target = run_learner(kwargs, kb, validator) rules_report = generate_rules_report(kwargs, rules_per_target) end = time.time() time_taken = end-start logger.info('Finished in %d seconds' % time_taken) logger.info('Outputing results') if kwargs['covered']: with open(kwargs['covered'], 'w') as f: examples = Rule.ruleset_examples_json(rules_per_target) f.write(json.dumps(examples, indent=2)) parameters_report = _parameters_report(kwargs, start_date, time_taken) rules_out_file = kwargs['output'] if rules_out_file: with open(rules_out_file, 'w') as f: if rules_out_file.endswith('json'): f.write(Rule.to_json(rules_per_target, show_uris=kwargs['uris'])) else: f.write(parameters_report) f.write(rules_report) elif cli: print parameters_report print rules_report return rules_per_target
def run_learner(kwargs, kb, validator): if kb.is_discrete_target(): targets = kb.class_values if not kwargs['target'] else [ kwargs['target'] ] else: targets = [None] rules_report = '' rules_per_target = [] for target in targets: if target: logger.info('Starting learner for target \'%s\'' % target) else: logger.info('Ranks detected - starting learner.') learner_cls = { 'heuristic': HeuristicLearner, 'optimal': OptimalLearner }[kwargs['learner']] learner = learner_cls(kb, n=kwargs['beam'], min_sup=int(kwargs['support'] * kb.n_examples()), target=target, depth=kwargs['depth'], sim=0.9, use_negations=kwargs['negations'], optimal_subclass=kwargs['optimalsubclass']) rules = learner.induce() if kb.is_discrete_target(): if kwargs['adjust'] == 'fdr': logger.info('Validating rules, FDR = %.3f' % kwargs['FDR']) elif kwargs['adjust'] == 'fwer': logger.info('Validating rules, alpha = %.3f' % kwargs['alpha']) rules = validator.test(rules, alpha=kwargs['alpha'], q=kwargs['FDR']) rules_per_target.append((target, rules)) return rules_per_target
def run_learner(kwargs, kb, validator): if kb.is_discrete_target(): targets = kb.class_values if not kwargs['target'] else [kwargs['target']] else: targets = [None] rules_report = '' rules_per_target = [] for target in targets: if target: logger.info('Starting learner for target \'%s\'' % target) else: logger.info('Ranks detected - starting learner.') learner_cls = { 'heuristic': HeuristicLearner, 'optimal': OptimalLearner } [kwargs['learner']] learner = learner_cls(kb, n=kwargs['beam'], min_sup=int(kwargs['support']*kb.n_examples()), target=target, depth=kwargs['depth'], sim=0.9, use_negations=kwargs['negations'], optimal_subclass=kwargs['optimalsubclass']) rules = learner.induce() if kb.is_discrete_target(): if kwargs['adjust'] == 'fdr': logger.info('Validating rules, FDR = %.3f' % kwargs['FDR']) elif kwargs['adjust'] == 'fwer': logger.info('Validating rules, alpha = %.3f' % kwargs['alpha']) rules = validator.test(rules, alpha=kwargs['alpha'], q=kwargs['FDR']) rules_per_target.append((target, rules)) return rules_per_target