def main(labeled, wid_title_mapping, processed_out, discarded_out, dataset, format, resource_namespace, fact_namespace, ontology_namespace): # Namespace prefixes for RDF serialization RESOURCE_NS = Namespace(resource_namespace) FACT_EXTRACTION_NS = Namespace(fact_namespace) ONTOLOGY_NS = Namespace(ontology_namespace) NAMESPACE_MANAGER = NamespaceManager(Graph()) NAMESPACE_MANAGER.bind('resource', RESOURCE_NS) NAMESPACE_MANAGER.bind('fact', FACT_EXTRACTION_NS) NAMESPACE_MANAGER.bind('ontology', ONTOLOGY_NS) mapping = json.load(wid_title_mapping) with codecs.open(labeled, 'rb', 'utf8') as f: labeled = json.load(f) processed, discarded = to_assertions(labeled, mapping, NAMESPACE_MANAGER, { 'ontology': ONTOLOGY_NS, 'resource': RESOURCE_NS, 'fact_extraction': FACT_EXTRACTION_NS, }, outfile=dataset, format=format) with codecs.open(processed_out, 'wb', 'utf8') as f: f.writelines('\n'.join(processed)) with codecs.open(discarded_out, 'wb', 'utf8') as f: f.writelines('\n'.join(discarded))
def main(classified_output, output_file, id_to_title, format): sentences = read_sentences(classified_output) labeled = to_labeled(sentences) mapping = json.load(id_to_title) processed, discarded = to_assertions(labeled, mapping, NAMESPACE_MANAGER, { 'ontology': ONTOLOGY_NS, 'resource': RESOURCE_NS, 'fact_extraction': FACT_EXTRACTION_NS, }, output_file, format)
def main(classified_output, output_file, id_to_title, triple_scores, \ format, sentence_score, core_weight, fe_score): """ serializes the classification result into triples optionally scoring sentences and/or frame elements """ sentences = read_sentences(classified_output) labeled = to_labeled(sentences, fe_score) if sentence_score != 'nothing': for sentence in labeled: sentence['score'] = compute_score(sentence, sentence_score, core_weight) mapping = json.load(id_to_title) processed, discarded = to_assertions(labeled, mapping, output_file, triple_scores, format)
def main(labeled, wid_title_mapping, scores, processed_out, discarded_out, dataset, format, resource_namespace, fact_namespace, ontology_namespace): """ this script converts the labeled data produced by the unsupervised approach into actual triples in nt format """ mapping = json.load(wid_title_mapping) with codecs.open(labeled, 'rb', 'utf8') as f: labeled = json.load(f) processed, discarded = to_assertions(labeled, mapping, score_dataset=scores, outfile=dataset, format=format) with codecs.open(processed_out, 'wb', 'utf8') as f: f.writelines('\n'.join(processed)) with codecs.open(discarded_out, 'wb', 'utf8') as f: f.writelines('\n'.join(discarded))