示例#1
0
def _dep(xml, source_element='basic-dependencies'):
    soup = _soup(xml)
    token_by_ids = _token_by_ids(soup)

    ann_by_ids = defaultdict(dict)
    for s_id, t_id, ann in _pos(xml):
        ann_by_ids[s_id][t_id] = ann
        yield ann

    curr_rel_id = 1
    for sent_e in _find_sentences_element(soup).iter('sentence'):
        sent_id = int(sent_e.get('id'))

        deps_e = sent_e.findall(source_element)
        assert len(deps_e) == 1
        deps_e = deps_e[0]

        for dep_e in deps_e:
            if dep_e.tag != 'dep':
                # To be on the safe side
                continue

            dep_type = dep_e.get('type')
            assert dep_type is not None

            gov_tok_id = int(dep_e.find('governor').get('idx'))
            dep_tok_id = int(dep_e.find('dependent').get('idx'))

            yield BinaryRelationAnnotation('R%s' % curr_rel_id, dep_type,
                                           'Governor',
                                           ann_by_ids[sent_id][gov_tok_id].id,
                                           'Dependent',
                                           ann_by_ids[sent_id][dep_tok_id].id,
                                           '')
            curr_rel_id += 1
示例#2
0
文件: annotator.py 项目: edycop/brat
def _create_relation(ann_obj, projectconf, mods, origin, target, type,
                     attributes, old_type, old_target, undo_resp={}):
    attributes = _parse_attributes(attributes)

    if old_type is not None or old_target is not None:
        assert type in projectconf.get_relation_types(), (
                ('attempting to convert relation to non-relation "%s" ' % (target.type, )) +
                ('(legit types: %s)' % (unicode(projectconf.get_relation_types()), )))

        sought_target = (old_target
                if old_target is not None else target.id)
        sought_type = (old_type
                if old_type is not None else type)

        # We are to change the type, target, and/or attributes
        found = None
        for ann in ann_obj.get_relations():
            if ann.arg2 == sought_target and ann.type == sought_type:
                found = ann
                break

        if found is None:
            # TODO: better response
            Messager.error('_create_relation: failed to identify target relation (type %s, target %s) (deleted?)' % (str(old_type), str(old_target)))
        elif found.arg2 == target.id and found.type != type:
            # no changes to type or target
            pass
        else:
            # type and/or target changed, mark.
            before = unicode(found)
            found.arg2 = target.id
            found.type = type
            mods.change(before, found)

        target_ann = found
    else:
        # Create a new annotation
        new_id = ann_obj.get_new_id('R')
        rel = projectconf.get_relation_by_type(type)
        assert rel is not None and len(rel.arg_list) == 2
        a1l, a2l = rel.arg_list
        ann = BinaryRelationAnnotation(new_id, type, a1l, origin.id, a2l, target.id, '\t')
        mods.addition(ann)
        ann_obj.add_annotation(ann)

        target_ann = ann

    # process attributes
    if target_ann is not None:
        _set_attributes(ann_obj, ann, attributes, mods, undo_resp)
    elif attributes != None:
        Messager.error('_create_relation: cannot set arguments: failed to identify target relation (type %s, target %s) (deleted?)' % (str(old_type), str(old_target)))        

    return target_ann
def _dep(xml, source_element='basic-dependencies'):
    soup = _soup(xml)
    token_by_ids = _token_by_ids(soup)

    ann_by_ids = defaultdict(dict)
    for s_id, t_id, ann in _pos(xml):
        ann_by_ids[s_id][t_id] = ann
        yield ann

    curr_rel_id = 1
    for sent_e in _find_sentences_element(soup).getiterator('sentence'):
        sent_id = int(sent_e.get('id'))

        # Attempt to find dependencies as distinctly named elements as they
        #   were stored in the Stanford XML format prior to 2013.
        deps_e = sent_e.findall(source_element)
        if len(deps_e) == 0:
            # Perhaps we are processing output following the newer standard,
            #   check for the same identifier but as a type attribute for
            #   general "dependencies" elements.
            deps_e = list(e for e in sent_e.getiterator('dependencies')
                    if e.attrib['type'] == source_element)
        assert len(deps_e) == 1
        deps_e = deps_e[0]
        
        for dep_e in deps_e:
            if dep_e.tag != 'dep':
                # To be on the safe side
                continue

            dep_type = dep_e.get('type')
            assert dep_type is not None

            if dep_type == 'root':
                # Skip dependencies to the root node, this behaviour conforms
                #   with how we treated the pre-2013 format.
                continue
            
            gov_tok_id = int(dep_e.find('governor').get('idx'))
            dep_tok_id = int(dep_e.find('dependent').get('idx'))

            yield BinaryRelationAnnotation(
                    'R%s' % curr_rel_id, dep_type,
                    'Governor', ann_by_ids[sent_id][gov_tok_id].id,
                    'Dependent', ann_by_ids[sent_id][dep_tok_id].id,
                    ''
                    )
            curr_rel_id += 1
示例#4
0
文件: tag.py 项目: ngarneau/brat
        mods = ModificationTracker()
        cidmap = {}

        for cid, ann in ((i, a) for i, a in json_resp.iteritems()
                         if _is_relation(a)):
            assert 'rel_type' in ann, 'Tagger response lacks rel_type'
            rel_type = ann['rel_type']
            assert 'arg1' in ann, 'Tagger response lacks arg1'
            arg1 = ann['arg1']
            assert 'arg2' in ann, 'Tagger response lacks arg2'
            arg2 = ann['arg2']

            _id = ann_obj.get_new_id('R')
            cidmap[cid] = _id

            tb = BinaryRelationAnnotation(_id, rel_type, 'Arg1', arg1, 'Arg2',
                                          arg2, "")

            mods.addition(tb)
            ann_obj.add_annotation(tb)

        mod_resp = mods.json_response()
        mod_resp['annotations'] = _json_from_ann(ann_obj)
        return mod_resp


if __name__ == '__main__':
    # Silly test, but helps
    tag('/BioNLP-ST_2011_ID_devel', 'PMC1874608-01-INTRODUCTION', 'random')