Пример #1
0
def test_add_grouped():
    story_data = pd.DataFrame({
        "boxes": ["b1", "b2"],
        "fruits": [["f11", "f12", "f13", "f14"], ["f21", "f22", "f23", "f24"]],
    })

    rel = Relationship(seed=1)
    ag = rel.ops.add_grouped(from_field="boxes", grouped_items_field="fruits")

    ag(story_data)

    # we should have 4 relationships from b1 and from b2
    assert rel.get_relations(from_ids=["b1"])["from"].tolist() == [
        "b1", "b1", "b1", "b1"
    ]

    assert rel.get_relations(from_ids=["b2"])["from"].tolist() == [
        "b2", "b2", "b2", "b2"
    ]

    # pointing to each of the values above
    assert rel.get_relations(from_ids=["b1"])["to"].tolist() == [
        "f11", "f12", "f13", "f14"
    ]
    assert rel.get_relations(from_ids=["b2"])["to"].tolist() == [
        "f21", "f22", "f23", "f24"
    ]
Пример #2
0
def test_select_many_with_drop_should_remove_elements():

    story_data_index = build_ids(5, prefix="cl_", max_length=1)

    # makes a copy since we're going to drop some elements
    four_to_plenty_copy = Relationship(seed=1)
    for i in range(100):
        four_to_plenty_copy.add_relations(
            from_ids=["a", "b", "c", "d"],
            to_ids=["a_%d" % i, "b_%d" % i,
                    "c_%d" % i, "d_%d" % i])

    selection = four_to_plenty.select_many(from_ids=pd.Series(
        ["a", "b", "c", "b", "a"], index=story_data_index),
                                           named_as="selected_sets",
                                           quantities=[4, 5, 6, 7, 8],
                                           remove_selected=True,
                                           discard_empty=False)

    # makes sure all selected values have been removed
    for from_id in selection.index:
        for to_id in selection.ix[from_id]["selected_sets"].tolist():
            rels = four_to_plenty_copy.get_relations(from_ids=[from_id])
            assert to_id not in rels["to"]
Пример #3
0
def test_select_many_several_times_with_pop_should_empty_all_data():

    rel = Relationship(seed=1234)
    froms = ["id1"] * 2500 + ["id2"] * 1500 + ["id3"] * 500
    tos = np.random.choice(a=range(10), size=len(froms))
    rel.add_relations(from_ids=froms, to_ids=tos)

    assert rel.get_relations().shape[0] == 2500 + 1500 + 500

    # we'll be selecting 1000 values from all 3 ids, 3 times

    # first selection: we should be able to get some values out, though id3
    # should already be exhausted
    selection1 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection1.columns.tolist() == ["the_selection"]
    assert sorted(selection1.index.tolist()) == ["f1", "f2", "f3"]

    # only 500 could be obtained from "id3":
    selection_sizes1 = selection1["the_selection"].map(len)
    assert selection_sizes1[["f1", "f2", "f3"]].tolist() == [1000, 1000, 500]

    # remove_selected => size of the relationship should have decreased
    assert rel.get_relations().shape[0] == 1500 + 500 + 0

    # second selection: similar story for id2 as for id3, plus now id3 should
    # just return an empty list (since discard_empty is False)
    selection2 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection2.columns.tolist() == ["the_selection"]
    assert sorted(selection2.index.tolist()) == ["f1", "f2", "f3"]

    # only 500 could be obtained from "id2" and nothing from "id2":
    selection_sizes2 = selection2["the_selection"].map(len)
    assert selection_sizes2[["f1", "f2", "f3"]].tolist() == [1000, 500, 0]

    # remove_selected => size of the relationship should have decreased
    assert rel.get_relations().shape[0] == 500 + 0 + 0

    # third selection: should be very simlar to above
    selection3 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection3.columns.tolist() == ["the_selection"]
    assert sorted(selection3.index.tolist()) == ["f1", "f2", "f3"]

    selection_sizes3 = selection3["the_selection"].map(len)
    assert selection_sizes3[["f1", "f2", "f3"]].tolist() == [500, 0, 0]

    # the relationship should now be empty
    assert rel.get_relations().shape[0] == 0 + 0 + 0

    # one last time: selection from a fully empty relationship
    # third selection: should be very similar to above
    selection4 = rel.select_many(from_ids=pd.Series(["id1", "id2", "id3"],
                                                    index=["f1", "f2", "f3"]),
                                 named_as="the_selection",
                                 quantities=[1000, 1000, 1000],
                                 remove_selected=True,
                                 discard_empty=False)

    assert selection4.columns.tolist() == ["the_selection"]
    assert sorted(selection4.index.tolist()) == ["f1", "f2", "f3"]

    selection_sizes4 = selection4["the_selection"].map(len)
    assert selection_sizes4[["f1", "f2", "f3"]].tolist() == [0, 0, 0]

    # relationship should still be empty
    assert rel.get_relations().shape[0] == 0