Exemplo n.º 1
0
def create_naf(text):
    naf = KafNafParser(type="NAF")
    naf.set_version("3.0")
    naf.set_language("nl")
    naf.lang = "nl"
    naf.raw = text
    naf.set_raw(naf.raw)
    return naf
Exemplo n.º 2
0
def get_naf_from_sentences(sentences):
    naf_obj = KafNafParser(type="NAF")
    naf_obj.set_version("3.0")
    naf_obj.set_language("nl")
    naf_obj.lang = "nl"
    naf_obj.raw = '\n'.join([' '.join(s) for s in sentences])
    naf_obj.set_raw(naf_obj.raw)
    # Create text layer
    wcount = 1
    offsets = {}
    txt = naf_obj.get_raw()
    token_ids = []
    for sid, sentence in enumerate(sentences):
        token_ids_sub = []
        for token in sentence:
            token_obj = KafNafParserPy.Cwf(type=naf_obj.get_type())
            token_id = 'w{}'.format(wcount)
            token_length = len(token)
            offsets[wcount] = txt.find(token, offsets.get(wcount - 1, 0))
            token_obj.set_id(token_id)
            token_obj.set_length(str(token_length))
            # token_obj.set_offset(str(offset)) # Is this correct????
            token_obj.set_para('1')
            token_obj.set_sent(str(sid + 1))
            token_obj.set_text(token)
            token_obj.set_offset(str(offsets[wcount]))
            token_ids_sub.append(token_id)
            wcount += 1
            naf_obj.add_wf(token_obj)
        token_ids.append(token_ids_sub)
    # Create term layers
    term_ids = []
    count_terms = 0
    for sid, (sentence, token_ids_sub) in enumerate(zip(sentences, token_ids)):
        term_ids_sub = []
        logger.info('Creating the term layer...')
        for num_token, (token,
                        token_id) in enumerate(zip(sentence, token_ids_sub)):
            new_term_id = 't_' + str(count_terms)
            count_terms += 1
            term_ids_sub.append(new_term_id)
            term_obj = KafNafParserPy.Cterm(type=naf_obj.get_type())
            term_obj.set_id(new_term_id)
            new_span = KafNafParserPy.Cspan()
            new_span.create_from_ids([token_id])
            term_obj.set_span(new_span)
            naf_obj.add_term(term_obj)
        term_ids.append(term_ids_sub)

    return naf_obj, term_ids
Exemplo n.º 3
0
def get_naf(input_filename):
    try:
        naf = KafNafParser(input_filename)
    except XMLSyntaxError:
        with open(input_filename) as input_file:
            input = input_file.read()
        if "<NAF" in input and "</NAF>" in input:
            # I'm guessing this should be a NAF file but something is wrong
            logger.exception("Error parsing NAF file")
            raise
        naf = KafNafParser(type="NAF")
        naf.set_version("3.0")
        naf.set_language("nl")
        naf.lang = "nl"
        naf.raw = input
        naf.set_raw(naf.raw)
    return naf
Exemplo n.º 4
0
def get_naf(input_file):

    input = input_file.read()
    try:
        naf = KafNafParser(BytesIO(input))
    except XMLSyntaxError:
        input = input.decode("utf-8")
        if "<NAF" in input and "</NAF>" in input:
            # I'm guessing this should be a NAF file but something is wrong
            logging.exception("Error parsing NAF file")
            raise
        naf = KafNafParser(type="NAF")
        naf.set_version("3.0")
        naf.set_language("nl")
        naf.lang = "nl"
        naf.raw = input
        naf.set_raw(naf.raw)
    return naf