Пример #1
0
def __create_span(ann_obj, mods, type, start, end, txt_file_path,
        projectconf, attributes):
    # TODO: Rip this out!
    start = int(start)
    end = int(end)

    # Before we add a new trigger, does it already exist?
    found = None
    for tb_ann in ann_obj.get_textbounds():
        try:
            if (tb_ann.start == start and tb_ann.end == end
                    and tb_ann.type == type):
                found = tb_ann
                break
        except AttributeError:
            # Not a trigger then
            pass

    if found is None:
        # Get a new ID
        new_id = ann_obj.get_new_id('T') #XXX: Cons
        # Get the text span
        with open_textfile(txt_file_path, 'r') as txt_file:
            text = txt_file.read()[start:end]

        #TODO: Data tail should be optional
        if '\n' not in text:
            ann = TextBoundAnnotationWithText(start, end, new_id, type, text)
            ann_obj.add_annotation(ann)
            mods.addition(ann)
        else:
            ann = None
    else:
        ann = found

    if ann is not None:
        if projectconf.is_physical_entity_type(type):
            # TODO: alert that negation / speculation are ignored if set
            event = None
        else:
            # Create the event also
            new_event_id = ann_obj.get_new_id('E') #XXX: Cons
            event = EventAnnotation(ann.id, [], unicode(new_event_id), type, '')
            ann_obj.add_annotation(event)
            mods.addition(event)
    else:
        # We got a newline in the span, don't take any action
        event = None

    return ann, event
Пример #2
0
def _create_argument(ann_obj, projectconf, mods, origin, target, type,
                     attributes, old_type, old_target):
    try:
        arg_tup = (type, str(target.id))

        # Is this an addition or an update?
        if old_type is None and old_target is None:
            if arg_tup not in origin.args:
                before = str(origin)
                origin.add_argument(type, str(target.id))
                mods.change(before, origin)
            else:
                # It already existed as an arg, we were called to do nothing...
                pass
        else:
            # Construct how the old arg would have looked like
            old_arg_tup = (type if old_type is None else old_type,
                           target if old_target is None else old_target)

            if old_arg_tup in origin.args and arg_tup not in origin.args:
                before = str(origin)
                origin.args.remove(old_arg_tup)
                origin.add_argument(type, str(target.id))
                mods.change(before, origin)
            else:
                # Collision etc. don't do anything
                pass
    except AttributeError:
        # The annotation did not have args, it was most likely an entity
        # thus we need to create a new Event...
        new_id = ann_obj.get_new_id('E')
        ann = EventAnnotation(
            origin.id,
            [arg_tup],
            new_id,
            origin.type,
            ''
        )
        ann_obj.add_annotation(ann)
        mods.addition(ann)

    # No addressing mechanism for arguments at the moment
    return None
Пример #3
0
def __create_span(ann_obj, mods, type, offsets, txt_file_path,
                  projectconf, attributes):
    # For event types, reuse trigger if a matching one exists.
    found = None
    if projectconf.is_event_type(type):
        for tb_ann in ann_obj.get_textbounds():
            try:
                if (_offsets_equal(tb_ann.spans, offsets)
                        and tb_ann.type == type):
                    found = tb_ann
                    break
            except AttributeError:
                # Not a trigger then
                pass

    if found is None:
        # Get a new ID
        new_id = ann_obj.get_new_id('T')  # XXX: Cons
        # Get the text span
        with open_textfile(txt_file_path, 'r') as txt_file:
            text = txt_file.read()
            text_span = _text_for_offsets(text, offsets)

        # The below code resolves cases where there are newlines in the
        #   offsets by creating discontinuous annotations for each span
        #   separated by newlines. For most cases it preserves the offsets.
        seg_offsets = []
        for o_start, o_end in offsets:
            pos = o_start
            for text_seg in text_span.split('\n'):
                if not text_seg and o_start != o_end:
                    # Double new-line, skip ahead
                    pos += 1
                    continue
                start = pos
                end = start + len(text_seg)

                # For the next iteration the position is after the newline.
                pos = end + 1

                # Adjust the offsets to compensate for any potential leading
                #   and trailing whitespace.
                start += len(text_seg) - len(text_seg.lstrip())
                end -= len(text_seg) - len(text_seg.rstrip())

                # If there is any segment left, add it to the offsets.
                if start != end:
                    seg_offsets.append((start, end, ))

        # if we're dealing with a null-span
        if not seg_offsets:
            seg_offsets = offsets

        ann_text = DISCONT_SEP.join((text[start:end]
                                     for start, end in seg_offsets))
        ann = TextBoundAnnotationWithText(seg_offsets, new_id, type, ann_text)
        ann_obj.add_annotation(ann)
        mods.addition(ann)
    else:
        ann = found

    if ann is not None:
        if projectconf.is_physical_entity_type(type):
            # TODO: alert that negation / speculation are ignored if set
            event = None
        else:
            # Create the event also
            new_event_id = ann_obj.get_new_id('E')  # XXX: Cons
            event = EventAnnotation(
                ann.id, [], str(new_event_id), type, '')
            ann_obj.add_annotation(event)
            mods.addition(event)
    else:
        # We got a newline in the span, don't take any action
        event = None

    return ann, event
Пример #4
0
def __create_span(ann_obj, mods, type, offsets, txt_file_path, projectconf,
                  attributes):
    # For event types, reuse trigger if a matching one exists.
    found = None
    if projectconf.is_event_type(type):
        for tb_ann in ann_obj.get_textbounds():
            try:
                if (_offsets_equal(tb_ann.spans, offsets)
                        and tb_ann.type == type):
                    found = tb_ann
                    break
            except AttributeError:
                # Not a trigger then
                pass

    if found is None:
        # Get a new ID
        new_id = ann_obj.get_new_id('T')  #XXX: Cons
        # Get the text span
        with open_textfile(txt_file_path, 'r') as txt_file:
            # TODO discont: use offsets instead (note need for int conversion)
            text = _text_for_offsets(txt_file.read(), offsets)

        # The below code resolves cases where there are newlines in the
        #   offsets by creating discontinuous annotations for each span
        #   separated by newlines. For most cases it preserves the offsets.
        seg_offsets = []
        for o_start, o_end in offsets:
            pos = o_start
            for text_seg in text.split('\n'):
                if not text_seg and o_start != o_end:
                    # Double new-line, skip ahead
                    pos += 1
                    continue
                end = pos + len(text_seg)
                seg_offsets.append((pos, end))
                # Our current position is after the newline
                pos = end + 1

        ann = TextBoundAnnotationWithText(
            seg_offsets,
            new_id,
            type,
            # Replace any newlines with the discontinuous separator
            MUL_NL_REGEX.sub(DISCONT_SEP, text))
        ann_obj.add_annotation(ann)
        mods.addition(ann)
    else:
        ann = found

    if ann is not None:
        if projectconf.is_physical_entity_type(type):
            # TODO: alert that negation / speculation are ignored if set
            event = None
        else:
            # Create the event also
            new_event_id = ann_obj.get_new_id('E')  #XXX: Cons
            event = EventAnnotation(ann.id, [], unicode(new_event_id), type,
                                    '')
            ann_obj.add_annotation(event)
            mods.addition(event)
    else:
        # We got a newline in the span, don't take any action
        event = None

    return ann, event