def main(labeled, wid_title_mapping, processed_out, discarded_out, dataset, format,
         resource_namespace, fact_namespace, ontology_namespace):

    # Namespace prefixes for RDF serialization
    RESOURCE_NS = Namespace(resource_namespace)
    FACT_EXTRACTION_NS = Namespace(fact_namespace)
    ONTOLOGY_NS = Namespace(ontology_namespace)
    NAMESPACE_MANAGER = NamespaceManager(Graph())
    NAMESPACE_MANAGER.bind('resource', RESOURCE_NS)
    NAMESPACE_MANAGER.bind('fact', FACT_EXTRACTION_NS)
    NAMESPACE_MANAGER.bind('ontology', ONTOLOGY_NS)

    mapping = json.load(wid_title_mapping)
    with codecs.open(labeled, 'rb', 'utf8') as f:
        labeled = json.load(f)

    processed, discarded = to_assertions(labeled, mapping, NAMESPACE_MANAGER, {
                                            'ontology': ONTOLOGY_NS,
                                            'resource': RESOURCE_NS,
                                            'fact_extraction': FACT_EXTRACTION_NS,
                                         }, outfile=dataset, format=format)
    with codecs.open(processed_out, 'wb', 'utf8') as f:
        f.writelines('\n'.join(processed))

    with codecs.open(discarded_out, 'wb', 'utf8') as f:
        f.writelines('\n'.join(discarded))
Пример #2
0
def main(classified_output, output_file, id_to_title, format):
    sentences = read_sentences(classified_output)
    labeled = to_labeled(sentences)
    mapping = json.load(id_to_title)
    processed, discarded = to_assertions(labeled, mapping, NAMESPACE_MANAGER, {
                                            'ontology': ONTOLOGY_NS,
                                            'resource': RESOURCE_NS,
                                            'fact_extraction': FACT_EXTRACTION_NS,
                                         }, output_file, format)
Пример #3
0
def main(classified_output, output_file, id_to_title, triple_scores, \
         format, sentence_score, core_weight, fe_score):
    """
    serializes the classification result into triples
    optionally scoring sentences and/or frame elements
    """

    sentences = read_sentences(classified_output)
    labeled = to_labeled(sentences, fe_score)

    if sentence_score != 'nothing':
        for sentence in labeled:
            sentence['score'] = compute_score(sentence, sentence_score, core_weight)

    mapping = json.load(id_to_title)
    processed, discarded = to_assertions(labeled, mapping, output_file,
                                         triple_scores, format)
Пример #4
0
def main(classified_output, output_file, id_to_title, triple_scores, \
         format, sentence_score, core_weight, fe_score):
    """
    serializes the classification result into triples
    optionally scoring sentences and/or frame elements
    """

    sentences = read_sentences(classified_output)
    labeled = to_labeled(sentences, fe_score)

    if sentence_score != 'nothing':
        for sentence in labeled:
            sentence['score'] = compute_score(sentence, sentence_score,
                                              core_weight)

    mapping = json.load(id_to_title)
    processed, discarded = to_assertions(labeled, mapping, output_file,
                                         triple_scores, format)
def main(labeled, wid_title_mapping, scores, processed_out, discarded_out, dataset, format,
         resource_namespace, fact_namespace, ontology_namespace):
    """
    this script converts the labeled data produced by the unsupervised approach into
    actual triples in nt format
    """

    mapping = json.load(wid_title_mapping)
    with codecs.open(labeled, 'rb', 'utf8') as f:
        labeled = json.load(f)

    processed, discarded = to_assertions(labeled, mapping, score_dataset=scores,
                                         outfile=dataset, format=format)
    with codecs.open(processed_out, 'wb', 'utf8') as f:
        f.writelines('\n'.join(processed))

    with codecs.open(discarded_out, 'wb', 'utf8') as f:
        f.writelines('\n'.join(discarded))
def main(labeled, wid_title_mapping, scores, processed_out, discarded_out,
         dataset, format, resource_namespace, fact_namespace,
         ontology_namespace):
    """
    this script converts the labeled data produced by the unsupervised approach into
    actual triples in nt format
    """

    mapping = json.load(wid_title_mapping)
    with codecs.open(labeled, 'rb', 'utf8') as f:
        labeled = json.load(f)

    processed, discarded = to_assertions(labeled,
                                         mapping,
                                         score_dataset=scores,
                                         outfile=dataset,
                                         format=format)
    with codecs.open(processed_out, 'wb', 'utf8') as f:
        f.writelines('\n'.join(processed))

    with codecs.open(discarded_out, 'wb', 'utf8') as f:
        f.writelines('\n'.join(discarded))