def learn_new_triples(predicate): """learns new triples and stores them in a file""" sc = get_sentence_classifier(predicate) if predicate in numeric_predicates: pattern = '<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#int> .' else: pattern = '<%s> <%s> "%s"@pl .' entities = CandidatesSelector.get_candidates(predicate) entities = entities[:candidates_limit] if verbose: print '%s candidates identified' % len(entities) n = 1000 entities_list = [ entities[i * n:(i + 1) * n] for i in xrange(int(ceil(len(entities) / n))) ] out = open(results_path + 'triples-%s' % predicate, 'w') ve = ValueExtractor(predicate, sc.extractor_training_data) for entities in entities_list: extracted_sentences = sc.extract_sentences(entities) values = ve.extract_values(extracted_sentences) for e, v in values.iteritems(): if v: print >> out, pattern % ( full_resource_name(e).encode('utf-8'), full_predicate_name(predicate).encode('utf-8'), v)
def run_evaluation(predicate, sentence_limit=None): entities, true_values = get_test_data(predicate) sc = get_sentence_classifier(predicate, sentence_limit) true_values = dict((k, v) for k, v in true_values.iteritems() if k in entities) if verbose: print '%d entities were used in evaluation.' % len(entities) extracted_sentences = sc.extract_sentences(entities) ve = ValueExtractor(predicate, sc.extractor_training_data) values = ve.extract_values(extracted_sentences) print '%s results:' % predicate stats, fp, fn = ValueExtractorEvaluator.evaluate(true_values, values) table_format = '%30s %30s %20s %10s' print 'Error table:' print table_format % ('Subject:', 'Gold standard values:', 'Extracted value:', 'Error:') for entity, value in values.iteritems(): if entity not in true_values: true_values[entity] = '-' for entity, true_value in true_values.iteritems(): if entity in fp and entity in fn: err = 'FP/FN' elif entity in fp: err = 'FP' elif entity in fn: err = 'FN' else: err = '' print table_format % (entity[:30], ', '.join(true_value), values[entity] if entity in values else '-', err) print '\n\n' return stats
def learn_new_triples(predicate): """learns new triples and stores them in a file""" sc = get_sentence_classifier(predicate) if predicate in numeric_predicates: pattern = '<%s> <%s> "%s"^^<http://www.w3.org/2001/XMLSchema#int> .' else: pattern = '<%s> <%s> "%s"@pl .' entities = CandidatesSelector.get_candidates(predicate) entities = entities[: candidates_limit] if verbose: print '%s candidates identified' % len(entities) n = 1000 entities_list = [ entities[i*n : (i+1)*n] for i in xrange(int(ceil(len(entities) / n))) ] out = open(results_path + 'triples-%s' % predicate, 'w') ve = ValueExtractor(predicate, sc.extractor_training_data) for entities in entities_list: extracted_sentences = sc.extract_sentences(entities) values = ve.extract_values(extracted_sentences) for e, v in values.iteritems(): if v: print >>out, pattern % ( full_resource_name(e).encode('utf-8'), full_predicate_name(predicate).encode('utf-8'), v )