Exemplo n.º 1
0
 def _(cid1a=id_,
       cid1b=id_,
       ann1=id_,
       v1=coref_value,
       t1=time_value,
       cid2a=id_,
       cid2b=id_,
       ann2=id_,
       v2=coref_value,
       t2=time_value):
     label_store.delete_all()
     l1 = Label(cid1a, cid1b, ann1, v1, epoch_ticks=t1)
     l2 = Label(cid2a, cid2b, ann2, v2, epoch_ticks=t2)
     label_store.put(l1)
     label_store.put(l2)
     if l1.same_subject_as(l2):
         if l1.epoch_ticks == l2.epoch_ticks:
             expected = [l2]
         else:
             expected = list(sorted([l1, l2]))[0:1]
     elif cid1a == cid2a or cid1a == cid2b:
         expected = list(sorted([l1, l2]))
     else:
         expected = [l1]
     assert (list(label_store.everything(content_id=cid1a)) == expected)
Exemplo n.º 2
0
 def _(cid1=id_, cid2=id_, ann=id_, v=coref_value, t=time_value):
     label_store.delete_all()
     l1 = Label(cid1, cid2, ann, v, epoch_ticks=t)
     l2 = Label(cid1, cid2, ann, v, epoch_ticks=t+1)
     label_store.put(l1)
     label_store.put(l2)
     assert list(label_store.everything()) == [l2]
     assert list(label_store.everything(include_deleted=True)) == [l2, l1]
Exemplo n.º 3
0
def test_label_reverse_equality(cid1=id_,
                                cid2=id_,
                                ann=id_,
                                v=coref_value,
                                t=time_value):
    l1 = Label(cid1, cid2, ann, v, epoch_ticks=t)
    l2 = Label(cid2, cid1, ann, v, epoch_ticks=t)
    assert l1 == l2
    assert hash(l1) == hash(l2)
Exemplo n.º 4
0
def test_label_diff_empty():
    old = [Label('a', 'b', 'foo', 1, epoch_ticks=0)]
    new = [Label('a', 'b', 'foo', 1, epoch_ticks=1)]
    assert old != new
    assert diff_labels_sets(old, new) == {
        'add': set(),
        'delete': set(),
        'change': set(),
    }
Exemplo n.º 5
0
def test_no_prefix_subtopic(label_store):
    foo_bar = Label('Foo', 'Bar', '', 1, 'Foo', 'Bar')
    foobaz_bar = Label('Foo Baz', 'Bar', '', 1, 'Foo Baz', 'Bar')

    label_store.put(foo_bar)
    label_store.put(foobaz_bar)

    direct = list(label_store.directly_connected(('Foo', 'Foo')))
    assert direct == [foo_bar]
Exemplo n.º 6
0
def test_same_subject(cid1=id_, cid2=id_, s1=id_, s2=id_, ann=id_,
                      v1=coref_value, v2=coref_value,
                      t1=time_value, t2=time_value):
    l1 = Label(cid1, cid2, ann, v1, epoch_ticks=t1,
               subtopic_id1=s1, subtopic_id2=s2)
    l2 = Label(cid1, cid2, ann, v2, epoch_ticks=t2,
               subtopic_id1=s1, subtopic_id2=s2)
    assert l1.same_subject_as(l2)
    assert l2.same_subject_as(l1)
Exemplo n.º 7
0
def test_label_order_on_value(cid1=id_,
                              cid2=id_,
                              ann=id_,
                              t=time_value,
                              v1=coref_value,
                              v2=coref_value):
    lab1 = Label(cid1, cid2, ann, v1, epoch_ticks=t)
    lab2 = Label(cid1, cid2, ann, v2, epoch_ticks=t)
    assert ((v1 < v2 and lab1 < lab2) or (v1 == v2 and lab1 == lab2)
            or (v1 > v2 and lab1 > lab2))
Exemplo n.º 8
0
def test_direct_connect_unordered(label_store):
    ab = Label('a', 'b', '', 1)
    ac = Label('c', 'a', '', 1)
    bc = Label('b', 'c', '', 1)
    label_store.put(ab)
    label_store.put(ac)
    label_store.put(bc)

    direct = list(label_store.directly_connected('a'))
    assert direct == [ab, ac]
Exemplo n.º 9
0
def test_connected_component_many_diff_value(label_store):
    ab = Label('a', 'b', '', 1)
    bc = Label('b', 'c', '', -1)
    cd = Label('c', 'd', '', 1)
    label_store.put(ab)
    label_store.put(bc)
    label_store.put(cd)

    connected = list(label_store.connected_component('a'))
    assert frozenset(connected) == frozenset([ab])
Exemplo n.º 10
0
    def _(cid1=id_, cid2=id_, ann=id_, v1=coref_value, v2=coref_value):
        label_store.delete_all()

        lab1 = Label(cid1, cid2, ann, v1)
        lab2 = Label(cid2, cid1, ann, v2, epoch_ticks=lab1.epoch_ticks + 1)
        label_store.put(lab1)
        label_store.put(lab2)

        assert list(label_store.directly_connected(cid1)) == [lab2]
        assert list(label_store.directly_connected(cid2)) == [lab2]
Exemplo n.º 11
0
def test_connected_component_unordered(label_store):
    ab = Label('a', 'b', '', 1)
    ac = Label('c', 'a', '', 1)
    bc = Label('b', 'c', '', 1)
    label_store.put(ab)
    label_store.put(ac)
    label_store.put(bc)

    connected = list(label_store.connected_component('a'))
    assert frozenset(connected) == frozenset([ab, ac, bc])
Exemplo n.º 12
0
def test_sub_connected(label_store):
    a1b2 = Label('a', 'b', '', 1, '1', '2')
    b2c3 = Label('b', 'c', '', 1, '2', '3')
    b4c5 = Label('b', 'c', '', 1, '4', '5')
    label_store.put(a1b2)
    label_store.put(b2c3)
    label_store.put(b4c5)

    connected = list(label_store.connected_component(('a', '1')))
    assert frozenset(connected) == frozenset([a1b2, b2c3])
Exemplo n.º 13
0
def test_meta_storage(label_store):
    label = Label('a', 'b', '', 1, '1', '2')
    label.meta['hello'] = 'world'
    label.meta['subtopic1_name'] = 'foo'
    label.meta['some_num'] = 5
    label.meta['some_datastructure'] = [1, 2, 3]

    label_store.put(label)
    label_from_store = label_store.get('a', 'b', '', subid1='1', subid2='2')
    assert label == label_from_store
    assert label.meta == label_from_store.meta
Exemplo n.º 14
0
def test_list_two(app, label_store):
    label_store.put(
        Label('c1', 'c2', 'a1', CorefValue.Positive, epoch_ticks=1234567890))
    label_store.put(
        Label('c1', 'c2', 'a2', CorefValue.Negative, epoch_ticks=1234567890))

    app.runcmd('list', [])

    assert (
        app.stdout.getvalue() == 'c1 ==(1) c2 by a1 at 2009-02-13 23:31:30\n'
        'c1 !=(0) c2 by a2 at 2009-02-13 23:31:30\n')
Exemplo n.º 15
0
    def _(cid1=id_, cid2=id_, ann=id_, v1=coref_value, v2=coref_value):
        label_store.delete_all()

        lab1 = Label(cid1, cid2, ann, v1)
        lab2 = Label(cid2, cid1, ann, v2, epoch_ticks=lab1.epoch_ticks + 1)
        label_store.put(lab1)
        label_store.put(lab2)
        got = label_store.get(cid1, cid2, ann)
        assert got == lab2
        assert got != lab1
        assert got.value == lab2.value
Exemplo n.º 16
0
def test_label_most_recent_first_unordered(cid1=id_,
                                           cid2=id_,
                                           ann=id_,
                                           v1=coref_value,
                                           v2=coref_value,
                                           t=time_value):
    lab1 = Label(cid1, cid2, ann, v1, epoch_ticks=t)
    lab2 = Label(cid2, cid1, ann, v2, epoch_ticks=t + 1)
    assert lab2 < lab1
    assert not (lab1 == lab2)
    assert sorted([lab1, lab2]) == [lab2, lab1]
    assert list(Label.most_recent([lab2, lab1])) == [lab2]
Exemplo n.º 17
0
def test_sub_expand(label_store):
    a1b2 = Label('a', 'b', '', 1, '1', '2')
    b2c3 = Label('b', 'c', '', 1, '2', '3')
    b4c5 = Label('b', 'c', '', 1, '4', '5')  # not in subtopic expansion!
    label_store.put(a1b2)
    label_store.put(b2c3)
    label_store.put(b4c5)

    # Not phyiscally present in the label table, but part of expansion!
    a1c3 = Label('a', 'c', '', 1, '1', '3')

    connected = list(label_store.expand(('a', '1')))
    assert frozenset(connected) == frozenset([a1b2, b2c3, a1c3])
Exemplo n.º 18
0
 def _(cid1a=id_, cid1b=id_, ann1=id_, v1=coref_value, t1=time_value,
       cid2a=id_, cid2b=id_, ann2=id_, v2=coref_value, t2=time_value):
     label_store.delete_all()
     l1 = Label(cid1a, cid1b, ann1, v1, epoch_ticks=t1)
     l2 = Label(cid2a, cid2b, ann2, v2, epoch_ticks=t2)
     label_store.put(l1)
     label_store.put(l2)
     if l1.same_subject_as(l2) and l1.epoch_ticks == l2.epoch_ticks:
         expected = [l2]
     else:
         expected = list(sorted([l1, l2]))
     assert (list(label_store.everything(include_deleted=True)) ==
             expected)
Exemplo n.º 19
0
 def _(cid1a=id_, cid1b=id_, ann1=id_, v1=coref_value, t1=time_value,
       cid2a=id_, cid2b=id_, ann2=id_, v2=coref_value, t2=time_value):
     label_store.delete_all()
     l1 = Label(cid1a, cid1b, ann1, v1, epoch_ticks=t1)
     l2 = Label(cid2a, cid2b, ann2, v2, epoch_ticks=t2)
     label_store.put(l1)
     label_store.put(l2)
     if l1.same_subject_as(l2) and l1.epoch_ticks == l2.epoch_ticks:
         expected = [l2]
     else:
         expected = list(sorted([l1, l2]))
     assert (list(label_store.everything(include_deleted=True)) ==
             expected)
Exemplo n.º 20
0
def test_sub_direct_connect(label_store):
    a1b2 = Label('a', 'b', '', 1, '1', '2')
    a1c3 = Label('a', 'c', '', 1, '1', '3')
    b2c3 = Label('b', 'c', '', 1, '2', '3')
    a4b2 = Label('a', 'b', '', 1, '4', '2')
    label_store.put(a1b2)
    label_store.put(a1c3)
    label_store.put(b2c3)
    label_store.put(a4b2)

    # a4b2 should not be included because we're demanding a specific
    # subtopic_id of 'a'.
    direct = list(label_store.directly_connected(('a', '1')))
    assert direct == [a1b2, a1c3]
Exemplo n.º 21
0
def test_connected_component_collision(label_store):
    # You can't store the hashes of objects and expect there to never
    # be collisions.  As a corollary, hash(str) isn't that great
    # vs. small changes, and the recommended technique of xoring
    # together field hashes can get collisions quickly.
    # In particular, hash('test0') ^ hash('test1') is 1,
    # as is hash('test2') ^ hash('test3').
    ab = Label('test0', 'test1', '', 1)
    bc = Label('test1', 'test2', '', 1)
    cd = Label('test2', 'test3', '', 1)
    label_store.put(ab)
    label_store.put(bc)
    label_store.put(cd)

    assert list(label_store.connected_component('test0')) == [ab, bc, cd]
Exemplo n.º 22
0
def negative_subtopic_labels(label_store, folders, cid, subid):
    subfolders = list(folders.parent_subfolders((cid, subid)))

    # Find any directly connected negative labels to any item in the
    # containing subfolder.
    for fid, subfolder_id in subfolders:
        for cid2, subid2 in folders.items(fid, subfolder_id):
            for lab in label_store.directly_connected(cid2):
                if lab.value == CorefValue.Negative \
                        and lab.subtopic_for(cid2) == subid2:
                    yield lab

    # Find all items in subfolders other than the subfolder that contains
    # (cid, subid) and add negative labels. Stay inside the folder (topic)
    # for now though.
    #
    # It's possible that `(cid, subid)` are in more than one subfolder,
    # but in SortingDesk, `subid` is usually some kind of offset or hash,
    # so it's probably very unlikely. In any case, if it is in more than
    # one subfolder, then it's a user error and we just have to hope that
    # the model figures it out.
    in_fids = set()
    for fid, subfolder_id in subfolders:
        in_fids.add(fid)
        for cousin_subid in folders.subfolders(fid):
            if cousin_subid == subfolder_id:
                # You can't be a cousin to yourself!
                continue
            for cid2, subid2 in folders.items(fid, cousin_subid):
                # TODO: Fix annotator id here. (We need to push annotator
                # information down into the search engine; the rest is
                # trivial.) ---AG
                yield Label(cid, cid2, Folders.DEFAULT_ANNOTATOR_ID,
                            CorefValue.Negative, subid, subid2)

    # If we exhaust the above, then let's start adding negative labels with
    # other topics.
    for other_fid in folders.folders():
        if other_fid in in_fids:
            # The item was found in one of these folders above, so ignore
            # it here.
            continue
        # We're home free. Find every item in this folder and make a
        # negative label for each.
        for other_subid in folders.subfolders(other_fid):
            for cid2, subid2 in folders.items(other_fid, other_subid):
                yield Label(cid, cid2, Folders.DEFAULT_ANNOTATOR_ID,
                            CorefValue.Negative, subid, subid2)
Exemplo n.º 23
0
def v1_label_put(request, response, visid_to_dbid, config, label_hooks,
                 label_store, cid1, cid2, annotator_id):
    '''Store a single label.

    The route for this endpoint is:
    ``PUT /dossier/v1/labels/<content_id1>/<content_id2>/<annotator_id>``.

    ``content_id`` are the ids of the feature collections to
    associate. ``annotator_id`` is a string that identifies the
    human that created the label. The value of the label should
    be in the request body as one of the following three values:
    ``-1`` for not coreferent, ``0`` for "I don't know if they
    are coreferent" and ``1`` for coreferent.

    Optionally, the query parameters ``subtopic_id1`` and
    ``subtopic_id2`` may be specified. Neither, both or either may
    be given. ``subtopic_id1`` corresponds to a subtopic in
    ``content_id1`` and ``subtopic_id2`` corresponds to a subtopic
    in ``content_id2``.

    This endpoint returns status ``201`` upon successful storage.
    Any existing labels with the given ids are overwritten.
    '''
    coref_value = CorefValue(int(request.body.read()))
    lab = Label(visid_to_dbid(cid1),
                visid_to_dbid(cid2),
                annotator_id,
                coref_value,
                subtopic_id1=request.query.get('subtopic_id1'),
                subtopic_id2=request.query.get('subtopic_id2'))
    label_store.put(lab)
    response.status = 201
Exemplo n.º 24
0
def diff_labels_sets(old, new):
    diff = Label.diff(old, new)
    return {
        'add': set(diff['add']),
        'delete': set(diff['delete']),
        'change': set(diff['change']),
    }
Exemplo n.º 25
0
def diff_labels_sets(old, new):
    diff = Label.diff(old, new)
    return {
        'add': set(diff['add']),
        'delete': set(diff['delete']),
        'change': set(diff['change']),
    }
Exemplo n.º 26
0
def test_content_id_order(cid1=id_, cid2=id_, ann=id_, v=coref_value):
    l = Label(cid1, cid2, ann, v)
    assert cid1 in l
    assert cid2 in l
    assert l.content_id1 <= l.content_id2
    assert l.content_id1 == min(cid1, cid2)
    assert l.content_id2 == max(cid1, cid2)
Exemplo n.º 27
0
def test_subtopic_id(cid1=id_, cid2=id_, s1=id_, s2=id_, ann=id_,
                     v=coref_value):
    l = Label(cid1, cid2, ann, v, subtopic_id1=s1, subtopic_id2=s2)
    assert cid1 in l
    assert (cid1, None) in l
    assert (cid1, s1) in l
    assert cid2 in l
    assert (cid2, None) in l
    assert (cid2, s2) in l
    assert l.other(cid1) == cid2
    assert l.other(cid2) == cid1
    if cid1 != cid2:
        assert l.subtopic_for(cid1) == s1
        assert l.subtopic_for(cid2) == s2
    else:
        assert l.subtopic_for(cid1) == min(s1, s2)
Exemplo n.º 28
0
def build_demo_data(kvl):

    label_store = LabelStore(kvl)

    topic = 'where_are_aid_workers_housed_near_Monrovia'
    subtopics = ['Tanji_Fish_Curing_Site', 'Camp_Ramrod', 'Town_of_Wamba']
    subtopic_to_documents = {
        0:
        [(random_sid(), '2100-%d|%s' % (len(subtopics[0]), subtopics[0]), 3),
         (random_sid(), '15-93|we_drove_out_to_the_other_side_' +
          'of_the_river_delta_to_a_small_fish_smoking_camp', 2)],
        1:
        [(random_sid(), '3120-%d|%s' % (len(subtopics[1]), subtopics[1]), 2),
         (random_sid(), '200-217|Ramrod_(Facility)', 3)],
        2:
        [(random_sid(), '3120-%d|%s' % (len(subtopics[2]), subtopics[2]), 3),
         (random_sid(), '53-63|Wamba_Town', 2),
         (random_sid(), '44-50|Woomba', 1)]
    }

    for idx, subtopic in enumerate(subtopics):
        for stream_id, subtopic_id2, rating in subtopic_to_documents[idx]:

            print stream_id

            label = Label(topic,
                          stream_id,
                          'John',
                          CorefValue.Positive,
                          subtopic_id1=subtopic,
                          subtopic_id2=subtopic_id2,
                          rating=rating)
            label_store.put(label)
Exemplo n.º 29
0
    def _(cid1=id_, cid2=id_, ann=id_, v=coref_value):
        label_store.delete_all()

        lab = Label(cid1, cid2, ann, v)
        label_store.put(lab)
        got = label_store.get(cid2, cid1, ann)
        assert lab == got and lab.value == got.value
Exemplo n.º 30
0
 def _(cid1a=id_, cid1b=id_, ann1=id_, v1=coref_value, t1=time_value,
       cid2a=id_, cid2b=id_, ann2=id_, v2=coref_value, t2=time_value):
     label_store.delete_all()
     l1 = Label(cid1a, cid1b, ann1, v1, epoch_ticks=t1)
     l2 = Label(cid2a, cid2b, ann2, v2, epoch_ticks=t2)
     label_store.put(l1)
     label_store.put(l2)
     if l1.same_subject_as(l2):
         if l1.epoch_ticks == l2.epoch_ticks:
             expected = [l2]
         else:
             expected = list(sorted([l1, l2]))[0:1]
     elif cid1a == cid2a or cid1a == cid2b:
         expected = list(sorted([l1, l2]))
     else:
         expected = [l1]
     assert (list(label_store.everything(content_id=cid1a)) == expected)
Exemplo n.º 31
0
def test_negative_label_inference(label_store):
    ac = Label('a', 'c', '', 1)
    bc = Label('b', 'c', '', 1)

    de = Label('d', 'e', '', 1)
    df = Label('d', 'f', '', 1)
    dg = Label('d', 'g', '', -1)

    cd = Label('c', 'd', '', -1)
    fh = Label('f', 'h', '', 1)

    label_store.put(ac)
    label_store.put(bc)
    label_store.put(de)
    label_store.put(df)
    label_store.put(cd)
    label_store.put(dg)
    label_store.put(fh)

    def get_pair(label):
        return (label.content_id1, label.content_id2)

    correct_pairs = [('a', 'd'), ('b', 'd'), ('c', 'd'), ('c', 'e'),
                     ('c', 'f'), ('c', 'h')]
    # [but not (a,b) <-/-> (e,f)]

    inference = label_store.negative_label_inference(cd)

    assert frozenset(map(get_pair, inference)) == \
        frozenset(correct_pairs)
Exemplo n.º 32
0
def test_negative_inference(label_store):
    ac = Label('a', 'c', '', 1)
    bc = Label('b', 'c', '', 1)

    de = Label('d', 'e', '', 1)
    df = Label('d', 'f', '', 1)

    cg = Label('c', 'g', '', -1)
    dg = Label('d', 'g', '', -1)

    hg = Label('h', 'g', '', 1)

    label_store.put(ac)
    label_store.put(bc)
    label_store.put(de)
    label_store.put(df)
    label_store.put(cg)
    label_store.put(dg)
    label_store.put(hg)

    def get_pair(label):
        return (label.content_id1, label.content_id2)

    correct_pairs = [('a', 'g'), ('b', 'g'), ('c', 'g'), ('c', 'h'),
                     ('d', 'g'), ('d', 'h'), ('e', 'g'), ('f', 'g')]

    inference = label_store.negative_inference('g')

    assert frozenset(map(get_pair, inference)) == \
        frozenset(correct_pairs)
Exemplo n.º 33
0
def test_label_most_recent_first_unordered(cid1=id_, cid2=id_, ann=id_,
                                           v1=coref_value, v2=coref_value,
                                           t=time_value):
    lab1 = Label(cid1, cid2, ann, v1, epoch_ticks=t)
    lab2 = Label(cid2, cid1, ann, v2, epoch_ticks=t + 1)
    assert lab2 < lab1
    assert not (lab1 == lab2)
    assert sorted([lab1, lab2]) == [lab2, lab1]
    assert list(Label.most_recent([lab2, lab1])) == [lab2]
Exemplo n.º 34
0
def test_connected_component_many_most_recent_diff_value(label_store):
    ab = Label('a', 'b', '', 1)
    bc = Label('b', 'c', '', 1)
    cd = Label('c', 'd', '', 1)
    label_store.put(ab)
    label_store.put(bc)
    label_store.put(cd)

    connected = list(label_store.connected_component('a'))
    assert frozenset(connected) == frozenset([ab, bc, cd])

    # This label should overwrite the existing `bc` label and contract
    # the connected component to just `ab`.
    bc = Label('b', 'c', '', -1, epoch_ticks=bc.epoch_ticks + 1)
    label_store.put(bc)

    connected = list(label_store.connected_component('a'))
    assert frozenset(connected) == frozenset([ab])
Exemplo n.º 35
0
def test_connected_component_many_most_recent(label_store):
    ab = Label('a', 'b', '', 1)
    bc = Label('b', 'c', '', -1)
    cd = Label('c', 'd', '', 1)
    label_store.put(ab)
    label_store.put(bc)
    label_store.put(cd)

    connected = list(label_store.connected_component('a'))
    assert frozenset(connected) == frozenset([ab])

    # This label should overwrite the existing `bc` label and expand
    # the connected component to `cd` through transitivity.
    bc = Label('b', 'c', '', 1, epoch_ticks=bc.epoch_ticks + 1)
    label_store.put(bc)

    connected = list(label_store.connected_component('a'))
    assert frozenset(connected) == frozenset([ab, bc, cd])
Exemplo n.º 36
0
def test_subtopic_order(cid=id_, s1=id_, s2=id_, ann=id_, v=coref_value):
    l = Label(cid, cid, ann, v, subtopic_id1=s1, subtopic_id2=s2)
    assert cid in l
    assert (cid, s1) in l
    assert (cid, s2) in l
    assert l.content_id1 == cid
    assert l.content_id2 == cid
    assert l.subtopic_id1 <= l.subtopic_id2
    assert l.subtopic_id1 == min(s1, s2)
    assert l.subtopic_id2 == max(s1, s2)
Exemplo n.º 37
0
def dict_to_label(d):
    return Label(
        content_id1=d['content_id1'],
        content_id2=d['content_id2'],
        annotator_id=d['annotator_id'],
        value=CorefValue(d['value']),
        subtopic_id1=d.get('subtopic_id1', None),
        subtopic_id2=d.get('subtopic_id2', None),
        epoch_ticks=d.get('epoch_ticks', None),  # will become time.time()
        rating=d.get('rating', None),
    )