def sdmaleph_runner(examples, mapping, ontologies=[], posClassVal=None, cutoff=None, relations=[], minPos=defaults['minpos'], noise=defaults['noise'], clauseLen=defaults['clauselength'], dataFormat='tab'): """ SDM-Aleph web service. Inputs: - examples: str, a .tab dataset or a list of pairs - mapping : str, a mapping between examples and ontological terms, - ontologies : a list of {'ontology' : str} dicts - relations : a list of {'relation' : str} dicts - posClassVal : str, if the data is class-labeled, this is the target class, - cutoff : int, if the data is ranked, this is the cutoff value for splitting it into two classes, - minPos : int >= 1, minimum number of true positives per rule - noise : int > 0, false positives allowed per rule - clauseLen : int >= 1, number of predicates per clause, - dataFormat : str, legal values are 'tab' or 'list' Output: - str, the induced theory. @author: Anze Vavpetic, 2011 <*****@*****.**> """ examples = StructuredFormat.parseInput(examples, dataFormat) mapping = StructuredFormat.parseMapping(mapping) relations = StructuredFormat.parseRelations(relations) pos, neg = [],[] if posClassVal: for id, val in examples: if val==posClassVal: pos.append((id, val)) else: neg.append((id, val)) elif cutoff: pos, neg = examples[:cutoff], examples[cutoff:] else: raise Exception('You must specify either the cutoff or the positive class value.') posEx, negEx, b = OWL2X.get_aleph_input([ont['ontology'] for ont in ontologies], mapping, [rel['relation'] for rel in relations], pos, neg) filestem = str(uuid.uuid4()) print '4' runner = Aleph() print '5' # Set parameters for setting, val in defaults.items(): runner.set(setting, val) if minPos >= 1: runner.set('minpos', minPos) else: raise Exception('minPos must be >= 1.') if noise >= 0: runner.set('noise', noise) else: raise Exception('noise must be >= 0.') if clauseLen >= 1: runner.set('clauselength', clauseLen) else: raise Exception('clauseLen must be >= 1.') # Set eval script print '5.2' str_rules, dump = runner.induce(defaults['mode'], posEx, negEx, b, filestem=filestem) rules = __conv(dump, pos, neg) #rules_json = json.dumps(__conv(dump, pos, neg)) print '5.5' rules_w_scores = '' for rule in rules: rules_w_scores += '%s [sup=%d, cov=%d, wracc=%.3f]\n' % (rule['clause'], len(rule['posCovered']), len(rule['covered']), rule['wracc']) print '6' return rules_w_scores