Пример #1
0
 def _views(annotations):
     views = set()
     for ann in annotations:
         spans = ann.spans
         if select(ann.type, "<span>"):
             views.add(_AnnotationView(spans, (ann.type, "<span>"), None))
         for view_name in ann.properties:
             view_value = ann.properties[view_name]
             if view_value is None:
                 view_value = '<none>'
             if select(ann.type, view_name):
                 views.add(_AnnotationView(spans, (ann.type, view_name), view_value))
             if select(ann.type, view_name, view_value) and not isinstance(view_value, anafora.AnaforaAnnotation):
                 views.add(_AnnotationView(spans, (ann.type, view_name, view_value), view_value))
     return views
Пример #2
0
 def _views(annotations):
     views = set()
     for ann in annotations:
         spans = ann.spans
         if select(ann.type, "<span>"):
             views.add(_AnnotationView(spans, (ann.type, "<span>"), None))
         for view_name in ann.properties:
             view_value = ann.properties[view_name]
             if view_value is None:
                 view_value = '<none>'
             if select(ann.type, view_name):
                 views.add(
                     _AnnotationView(spans, (ann.type, view_name),
                                     view_value))
             if select(ann.type, view_name, view_value) and not isinstance(
                     view_value, anafora.AnaforaAnnotation):
                 views.add(
                     _AnnotationView(spans,
                                     (ann.type, view_name, view_value),
                                     view_value))
     return views
Пример #3
0
def test_select_all():
    select = anafora.select.Select()

    assert select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert select('C')
Пример #4
0
def test_select_all():
    select = anafora.select.Select()

    assert select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert select('C')
Пример #5
0
def score_data(reference_data, predicted_data, include=None, exclude=None,
               scores_type=Scores, spans_type=None):
    """
    :param AnaforaData reference_data: reference ("gold standard") Anafora data
    :param AnaforaData predicted_data: predicted (system-generated) Anafora data
    :param set include: types of annotations to include (others will be excluded); may be type names,
        (type-name, property-name) tuples, (type-name, property-name, property-value) tuples
    :param set exclude: types of annotations to exclude; may be type names, (type-name, property-name) tuples,
        (type-name, property-name, property-value) tuples
    :param type scores_type: type for calculating matches between predictions and reference
    :param type spans_type: wrapper object to apply to annotation spans
    :return dict: mapping from (annotation type[, property name[, property value]]) to Scores object
    """

    # returns true if this type:property:value is accepted by includes= and excludes=
    select = anafora.select.Select(include, exclude)

    # get reference and predicted annotations
    reference_annotations = reference_data.annotations
    predicted_annotations = [] if predicted_data is None else predicted_data.annotations

    # determines available views by examining all the annotations
    span = "<span>"
    views = {}
    if select("*"):
        views["*"] = ToSet(select=select,
                           spans_type=spans_type)
    if select("*", span):
        views["*", span] = ToSet(select=select,
                                 spans_type=spans_type,
                                 prop_name=None)
    for annotations in [reference_annotations, predicted_annotations]:
        for ann in annotations:
            if ann.type not in views:
                if select(ann.type):
                    views[ann.type] = ToSet(select=select,
                                            spans_type=spans_type,
                                            type_name=ann.type)
            if (ann.type, span) not in views:
                if select(ann.type, span):
                    views[ann.type, span] = ToSet(select=select,
                                                  spans_type=spans_type,
                                                  type_name=ann.type,
                                                  prop_name=None)
            for prop_name, prop_value in ann.properties.items():
                if (ann.type, prop_name) not in views:
                    if select(ann.type, prop_name):
                        views[ann.type, prop_name] = ToSet(
                            select=select,
                            spans_type=spans_type,
                            type_name=ann.type,
                            prop_name=prop_name)
                if not isinstance(prop_value, anafora.AnaforaAnnotation):
                    if (ann.type, prop_name, prop_value) not in views:
                        if select(ann.type, prop_name, prop_value):
                            prop_value_name = "<none>" if prop_value is None else prop_value
                            views[ann.type, prop_name, prop_value_name] = ToSet(
                                select=select,
                                spans_type=spans_type,
                                type_name=ann.type,
                                prop_name=prop_name,
                                prop_value=prop_value)

    # fill a mapping from a name (type, type:property or type:property:value) to the corresponding scores
    result = collections.defaultdict(lambda: scores_type())
    for view_name in sorted(views, key=lambda x: x if isinstance(x, tuple) else (x,)):
        to_set = views[view_name]
        set1 = to_set(reference_annotations)
        set2 = to_set(predicted_annotations)
        result[view_name].add(set1, set2)

    # return the collected scores
    return result
Пример #6
0
 def _del_excluded_properties(annotations):
     for ann in annotations:
         if select(ann.type):
             for name in list(ann.properties):
                 if not select(ann.type, name):
                     del ann.properties[name]
Пример #7
0
def score_data(reference_data,
               predicted_data,
               include=None,
               exclude=None,
               scores_type=Scores,
               annotation_wrapper=None):
    """
    :param AnaforaData reference_data: reference ("gold standard") Anafora data
    :param AnaforaData predicted_data: predicted (system-generated) Anafora data
    :param set include: types of annotations to include (others will be excluded); may be type names,
        (type-name, property-name) tuples, (type-name, property-name, property-value) tuples
    :param set exclude: types of annotations to exclude; may be type names, (type-name, property-name) tuples,
        (type-name, property-name, property-value) tuples
    :param type scores_type: type for calculating matches between predictions and reference
    :param type annotation_wrapper: wrapper type to apply to AnaforaAnnotations
    :return dict: mapping from (annotation type[, property name[, property value]]) to Scores object
    """

    # groups items from iterables by a key function
    def _group_by(reference_iterable, predicted_iterable, key_function):
        groups = collections.defaultdict(lambda: (set(), set()))
        for iterable, index in [(reference_iterable, 0),
                                (predicted_iterable, 1)]:
            for item in iterable:
                groups[key_function(item)][index].add(item)
        return groups

    # returns true if this type:property:value is accepted by includes= and excludes=
    select = anafora.select.Select(include, exclude)

    # generates a view of just the annotation's spans, and of each of its selected properties
    def _views(annotations):
        views = set()
        for ann in annotations:
            spans = ann.spans
            if select(ann.type, "<span>"):
                views.add(_AnnotationView(spans, (ann.type, "<span>"), None))
            for view_name in ann.properties:
                view_value = ann.properties[view_name]
                if view_value is None:
                    view_value = '<none>'
                if select(ann.type, view_name):
                    views.add(
                        _AnnotationView(spans, (ann.type, view_name),
                                        view_value))
                if select(ann.type, view_name, view_value) and not isinstance(
                        view_value, anafora.AnaforaAnnotation):
                    views.add(
                        _AnnotationView(spans,
                                        (ann.type, view_name, view_value),
                                        view_value))
        return views

    # get reference and predicted annotations
    reference_annotations = reference_data.annotations
    predicted_annotations = [] if predicted_data is None else predicted_data.annotations

    # FIXME: this avoids counting excluded properties, but modifies the data
    def _del_excluded_properties(annotations):
        for ann in annotations:
            if select(ann.type):
                for name in list(ann.properties):
                    if not select(ann.type, name):
                        del ann.properties[name]

    _del_excluded_properties(reference_annotations)
    _del_excluded_properties(predicted_annotations)

    # if necessary, wrap the annotations in a wrapper class
    if annotation_wrapper is not None:
        reference_annotations = map(annotation_wrapper, reference_annotations)
        predicted_annotations = map(annotation_wrapper, predicted_annotations)

    # fill a mapping from a name (type, type:property or type:property:value) to the corresponding scores
    result = collections.defaultdict(lambda: scores_type())
    results_by_type = _group_by(reference_annotations, predicted_annotations,
                                lambda a: a.type)
    for ann_type in sorted(results_by_type):

        # update whole-annotation scores
        type_reference_annotations, type_predicted_annotations = results_by_type[
            ann_type]
        if select(ann_type):
            result["*"].add(type_reference_annotations,
                            type_predicted_annotations)
            result[ann_type].add(type_reference_annotations,
                                 type_predicted_annotations)

        # update span and property scores
        reference_views = _views(type_reference_annotations)
        predicted_views = _views(type_predicted_annotations)
        results_by_view = _group_by(reference_views, predicted_views,
                                    lambda t: t.name)
        for view_name in sorted(results_by_view):
            view_reference_annotations, view_predicted_annotations = results_by_view[
                view_name]
            result[view_name].add(view_reference_annotations,
                                  view_predicted_annotations)
            if isinstance(view_name, tuple) and len(
                    view_name) == 2 and view_name[1] == "<span>":
                result["*", "<span>"].add(view_reference_annotations,
                                          view_predicted_annotations)

    # return the collected scores
    return result
Пример #8
0
def score_data(reference_data,
               predicted_data,
               include=None,
               exclude=None,
               scores_type=Scores,
               spans_type=None):
    """
    :param AnaforaData reference_data: reference ("gold standard") Anafora data
    :param AnaforaData predicted_data: predicted (system-generated) Anafora data
    :param set include: types of annotations to include (others will be excluded); may be type names,
        (type-name, property-name) tuples, (type-name, property-name, property-value) tuples
    :param set exclude: types of annotations to exclude; may be type names, (type-name, property-name) tuples,
        (type-name, property-name, property-value) tuples
    :param type scores_type: type for calculating matches between predictions and reference
    :param type spans_type: wrapper object to apply to annotation spans
    :return dict: mapping from (annotation type[, property name[, property value]]) to Scores object
    """

    # returns true if this type:property:value is accepted by includes= and excludes=
    select = anafora.select.Select(include, exclude)

    # get reference and predicted annotations
    reference_annotations = reference_data.annotations
    predicted_annotations = [] if predicted_data is None else predicted_data.annotations

    # determines available views by examining all the annotations
    span = "<span>"
    views = {}
    if select("*"):
        views["*"] = ToSet(select=select, spans_type=spans_type)
    if select("*", span):
        views["*", span] = ToSet(select=select,
                                 spans_type=spans_type,
                                 prop_name=None)
    for annotations in [reference_annotations, predicted_annotations]:
        for ann in annotations:
            if ann.type not in views:
                if select(ann.type):
                    views[ann.type] = ToSet(select=select,
                                            spans_type=spans_type,
                                            type_name=ann.type)
            if (ann.type, span) not in views:
                if select(ann.type, span):
                    views[ann.type, span] = ToSet(select=select,
                                                  spans_type=spans_type,
                                                  type_name=ann.type,
                                                  prop_name=None)
            for prop_name, prop_value in ann.properties.items():
                if (ann.type, prop_name) not in views:
                    if select(ann.type, prop_name):
                        views[ann.type,
                              prop_name] = ToSet(select=select,
                                                 spans_type=spans_type,
                                                 type_name=ann.type,
                                                 prop_name=prop_name)
                if not isinstance(prop_value, anafora.AnaforaAnnotation):
                    if (ann.type, prop_name, prop_value) not in views:
                        if select(ann.type, prop_name, prop_value):
                            if prop_value is not None:
                                views[ann.type, prop_name, prop_value] = ToSet(
                                    select=select,
                                    spans_type=spans_type,
                                    type_name=ann.type,
                                    prop_name=prop_name,
                                    prop_value=prop_value)

    # fill a mapping from a name (type, type:property or type:property:value) to the corresponding scores
    result = collections.defaultdict(lambda: scores_type())
    for view_name in sorted(views,
                            key=lambda x: x
                            if isinstance(x, tuple) else (x, )):
        to_set = views[view_name]
        set1 = to_set(reference_annotations)
        set2 = to_set(predicted_annotations)
        result[view_name].add(set1, set2)

    # return the collected scores
    return result
Пример #9
0
def score_data(reference_data, predicted_data, include=None, exclude=None,
               scores_type=Scores, annotation_wrapper=None):
    """
    :param AnaforaData reference_data: reference ("gold standard") Anafora data
    :param AnaforaData predicted_data: predicted (system-generated) Anafora data
    :param set include: types of annotations to include (others will be excluded); may be type names,
        (type-name, property-name) tuples, (type-name, property-name, property-value) tuples
    :param set exclude: types of annotations to exclude; may be type names, (type-name, property-name) tuples,
        (type-name, property-name, property-value) tuples
    :param type scores_type: type for calculating matches between predictions and reference
    :param type annotation_wrapper: wrapper type to apply to AnaforaAnnotations
    :return dict: mapping from (annotation type[, property name[, property value]]) to Scores object
    """

    # groups items from iterables by a key function
    def _group_by(reference_iterable, predicted_iterable, key_function):
        groups = collections.defaultdict(lambda: (set(), set()))
        for iterable, index in [(reference_iterable, 0), (predicted_iterable, 1)]:
            for item in iterable:
                groups[key_function(item)][index].add(item)
        return groups

    # returns true if this type:property:value is accepted by includes= and excludes=
    select = anafora.select.Select(include, exclude)

    # generates a view of just the annotation's spans, and of each of its selected properties
    def _views(annotations):
        views = set()
        for ann in annotations:
            spans = ann.spans
            if select(ann.type, "<span>"):
                views.add(_AnnotationView(spans, (ann.type, "<span>"), None))
            for view_name in ann.properties:
                view_value = ann.properties[view_name]
                if view_value is None:
                    view_value = '<none>'
                if select(ann.type, view_name):
                    views.add(_AnnotationView(spans, (ann.type, view_name), view_value))
                if select(ann.type, view_name, view_value) and not isinstance(view_value, anafora.AnaforaAnnotation):
                    views.add(_AnnotationView(spans, (ann.type, view_name, view_value), view_value))
        return views

    # get reference and predicted annotations
    reference_annotations = reference_data.annotations
    predicted_annotations = [] if predicted_data is None else predicted_data.annotations

    # if necessary, wrap the annotations in a wrapper class
    if annotation_wrapper is not None:
        reference_annotations = map(annotation_wrapper, reference_annotations)
        predicted_annotations = map(annotation_wrapper, predicted_annotations)

    # fill a mapping from a name (type, type:property or type:property:value) to the corresponding scores
    result = collections.defaultdict(lambda: scores_type())
    results_by_type = _group_by(reference_annotations, predicted_annotations, lambda a: a.type)
    for ann_type in sorted(results_by_type):

        # update whole-annotation scores
        type_reference_annotations, type_predicted_annotations = results_by_type[ann_type]
        if select(ann_type):
            result[ann_type].add(type_reference_annotations, type_predicted_annotations)

        # update span and property scores
        reference_views = _views(type_reference_annotations)
        predicted_views = _views(type_predicted_annotations)
        results_by_view = _group_by(reference_views, predicted_views, lambda t: t.name)
        for view_name in sorted(results_by_view):
            view_reference_annotations, view_predicted_annotations = results_by_view[view_name]
            result[view_name].add(view_reference_annotations, view_predicted_annotations)

    # return the collected scores
    return result
Пример #10
0
def select_prop_value():
    select = anafora.select.Select(include={'A', ('B', 'R', 'V')})

    assert select('A')
    assert not select('B', 'P')
    assert not select('B', 'R')
    assert select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert not select('C')

    select = anafora.select.Select(exclude={'A', ('B', 'R', 'V')})

    assert not select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert not select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(include={'C', ('C', 'R', 'W')})

    assert not select('A')
    assert not select('B', 'P')
    assert not select('B', 'R')
    assert not select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert select('C')
Пример #11
0
def test_select_prop_name():
    select = anafora.select.Select(include=[('A',), ('B', 'R')])

    assert select('A')
    assert not select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert not select('C')

    select = anafora.select.Select(exclude={'A', ('B', 'R')})

    assert not select('A')
    assert select('B', 'P')
    assert not select('B', 'R')
    assert not select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(include={'B'}, exclude={('B', 'P')})

    assert not select('A')
    assert not select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert not select('C')
Пример #12
0
def test_select_star():
    select = anafora.select.Select(include={'*'})

    assert select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(exclude={'*'})

    assert not select('A')
    assert not select('B', 'P')
    assert not select('B', 'R')
    assert not select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert not select('C')

    select = anafora.select.Select(include={('*', 'R')})

    assert not select('A')
    assert not select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert not select('C')

    select = anafora.select.Select(include=[('C',)], exclude=[('C', '*')])

    assert not select('A')
    assert not select('B', 'P')
    assert not select('B', 'R')
    assert not select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(include=['C', 'B'], exclude=[('C', '*'), ('B', '*', '*')])

    assert not select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert not select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(exclude={('C', 'R', '*')})

    assert select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(include={('*', '*', '*')})

    assert not select('A')
    assert not select('B', 'P')
    assert not select('B', 'R')
    assert select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert not select('C')

    select = anafora.select.Select(exclude={('*', '*', '*')})

    assert select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert not select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert select('C')
Пример #13
0
def select_prop_value():
    select = anafora.select.Select(include={'A', ('B', 'R', 'V')})

    assert select('A')
    assert not select('B', 'P')
    assert not select('B', 'R')
    assert select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert not select('C')

    select = anafora.select.Select(exclude={'A', ('B', 'R', 'V')})

    assert not select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert not select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(include={'C', ('C', 'R', 'W')})

    assert not select('A')
    assert not select('B', 'P')
    assert not select('B', 'R')
    assert not select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert select('C')
Пример #14
0
def test_select_prop_name():
    select = anafora.select.Select(include=[('A', ), ('B', 'R')])

    assert select('A')
    assert not select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert not select('C')

    select = anafora.select.Select(exclude={'A', ('B', 'R')})

    assert not select('A')
    assert select('B', 'P')
    assert not select('B', 'R')
    assert not select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(include={'B'}, exclude={('B', 'P')})

    assert not select('A')
    assert not select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert not select('C')
Пример #15
0
def test_select_star():
    select = anafora.select.Select(include={'*'})

    assert select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(exclude={'*'})

    assert not select('A')
    assert not select('B', 'P')
    assert not select('B', 'R')
    assert not select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert not select('C')

    select = anafora.select.Select(include={('*', 'R')})

    assert not select('A')
    assert not select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert not select('C')

    select = anafora.select.Select(include=[('C', )], exclude=[('C', '*')])

    assert not select('A')
    assert not select('B', 'P')
    assert not select('B', 'R')
    assert not select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(include=['C', 'B'],
                                   exclude=[('C', '*'), ('B', '*', '*')])

    assert not select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert not select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(exclude={('C', 'R', '*')})

    assert select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert select('C')

    select = anafora.select.Select(include={('*', '*', '*')})

    assert not select('A')
    assert not select('B', 'P')
    assert not select('B', 'R')
    assert select('B', 'R', 'V')
    assert select('C', 'R', 'V')
    assert select('C', 'R', 'W')
    assert not select('C')

    select = anafora.select.Select(exclude={('*', '*', '*')})

    assert select('A')
    assert select('B', 'P')
    assert select('B', 'R')
    assert not select('B', 'R', 'V')
    assert not select('C', 'R', 'V')
    assert not select('C', 'R', 'W')
    assert select('C')