Пример #1
0
def test_threshold_match_children_2d():
    threshold = gate.ThresholdGate(gate_name="test",
                                   parent="test parent",
                                   x="X",
                                   y="Y",
                                   method="density")
    x = np.random.normal(loc=1., scale=1.5, size=1000)
    y = np.random.normal(loc=1., scale=1.5, size=1000)
    data = pd.DataFrame({"X": x, "Y": y})
    threshold.add_child(
        gate.ChildThreshold(name="positive",
                            definition="++,+-",
                            geom=ThresholdGeom(x_threshold=0.5)))
    threshold.add_child(
        gate.ChildThreshold(name="negative",
                            definition="--,-+",
                            geom=ThresholdGeom(x_threshold=0.5)))
    pos = gate.Population(population_name="p1",
                          parent="root",
                          definition="++",
                          geom=ThresholdGeom(x_threshold=0.6),
                          index=data[data.X >= 0.6].index.values)
    neg = gate.Population(population_name="p2",
                          parent="root",
                          definition="--,-+",
                          geom=ThresholdGeom(x_threshold=0.6),
                          index=data[data.X < 0.6].index.values)
    pops = threshold._match_to_children([neg, pos])
    pos = [p for p in pops if p.definition == "++"][0]
    assert pos.population_name == "positive"
    neg = [p for p in pops if p.definition == "--,-+"][0]
    assert neg.population_name == "negative"
Пример #2
0
def test_threshold_match_children_1d():
    threshold = gate.ThresholdGate(gate_name="test",
                                   parent="test parent",
                                   x="X",
                                   method="density")
    data = np.random.normal(loc=1., scale=1.5, size=1000)
    threshold.add_child(
        gate.ChildThreshold(name="positive",
                            definition="+",
                            geom=ThresholdGeom(x_threshold=0.5)))
    threshold.add_child(
        gate.ChildThreshold(name="negative",
                            definition="-",
                            geom=ThresholdGeom(x_threshold=0.5)))
    pos = gate.Population(population_name="p1",
                          parent="root",
                          definition="+",
                          geom=ThresholdGeom(x_threshold=0.6),
                          index=data[np.where(data >= 0.6)])
    neg = gate.Population(population_name="p2",
                          parent="root",
                          definition="-",
                          geom=ThresholdGeom(x_threshold=0.6),
                          index=data[np.where(data >= 0.6)])
    pops = threshold._match_to_children([neg, pos])
    pos = [p for p in pops if p.definition == "+"][0]
    assert pos.population_name == "positive"
    neg = [p for p in pops if p.definition == "-"][0]
    assert neg.population_name == "negative"
Пример #3
0
def test_threshold_add_child_invalid_1d(d):
    threshold = gate.ThresholdGate(gate_name="test",
                                   parent="test parent",
                                   method="manual",
                                   x="X")
    child = gate.ChildThreshold(name="test child",
                                definition=d,
                                geom=ThresholdGeom(x="X",
                                                   x_threshold=0.56,
                                                   y_threshold=0.75))
    with pytest.raises(AssertionError) as err:
        threshold.add_child(child)
    assert str(
        err.value) == "Invalid child definition, should be either '+' or '-'"
Пример #4
0
def test_threshold_predict_2d():
    data, _ = make_blobs(n_samples=3000,
                         n_features=2,
                         centers=[(1., 1.), (1., 5.), (5., 0.2)],
                         random_state=42)
    data = pd.DataFrame({"X": data[:, 0], "Y": data[:, 1]})
    threshold = gate.ThresholdGate(gate_name="test",
                                   parent="test parent",
                                   x="X",
                                   y="Y",
                                   method="density")
    threshold.fit(data=data)
    new_data, _ = make_blobs(n_samples=3000,
                             n_features=2,
                             centers=[(1., 1.), (5., 0.2)],
                             random_state=42)
    new_data = pd.DataFrame({"X": new_data[:, 0], "Y": new_data[:, 1]})
    pops = threshold.predict(new_data)
    assert len(pops) == 4
    assert all([isinstance(p, gate.Population) for p in pops])
    assert all([isinstance(p.geom, ThresholdGeom) for p in pops])
    assert all([p.geom.x == threshold.x for p in pops])
    assert all([p.geom.y == threshold.y for p in pops])
    assert all(p.geom.transform_x == threshold.transformations.get("x")
               for p in pops)
    assert all(p.geom.transform_y == threshold.transformations.get("y")
               for p in pops)
    assert all(i in [p.definition for p in pops]
               for i in ["++", "--", "-+", "+-"])
    neg_idx = new_data[
        (new_data.X < threshold.children[0].geom.x_threshold)
        & (new_data.Y < threshold.children[0].geom.y_threshold)].index.values
    pos_idx = new_data[
        (new_data.X >= threshold.children[0].geom.x_threshold)
        & (new_data.Y >= threshold.children[0].geom.y_threshold)].index.values
    negpos_idx = new_data[
        (new_data.X < threshold.children[0].geom.x_threshold)
        & (new_data.Y >= threshold.children[0].geom.y_threshold)].index.values
    posneg_idx = new_data[
        (new_data.X >= threshold.children[0].geom.x_threshold)
        & (new_data.Y < threshold.children[0].geom.y_threshold)].index.values
    pos_pop = [p for p in pops if p.definition == "++"][0]
    neg_pop = [p for p in pops if p.definition == "--"][0]
    posneg_pop = [p for p in pops if p.definition == "+-"][0]
    negpos_pop = [p for p in pops if p.definition == "-+"][0]
    assert np.array_equal(neg_pop.index, neg_idx)
    assert np.array_equal(pos_pop.index, pos_idx)
    assert np.array_equal(negpos_pop.index, negpos_idx)
    assert np.array_equal(posneg_pop.index, posneg_idx)
Пример #5
0
def test_threshold_fit_1d():
    np.random.seed(42)
    n1 = np.random.normal(loc=0.2, scale=1, size=500)
    n2 = np.random.normal(loc=2.5, scale=0.2, size=250)
    n3 = np.random.normal(loc=6.5, scale=0.5, size=500)
    data = pd.DataFrame({"X": np.hstack([n1, n2, n3])})
    threshold = gate.ThresholdGate(gate_name="test",
                                   parent="test parent",
                                   x="X",
                                   method="density")
    threshold.fit(data=data)
    assert len(threshold.children) == 2
    assert threshold.children[0].geom.x_threshold == threshold.children[
        1].geom.x_threshold
    assert round(threshold.children[0].geom.x_threshold) == 4
    assert all(
        [i in [c.definition for c in threshold.children] for i in ["+", "-"]])
Пример #6
0
def test_threshold_add_child():
    threshold = gate.ThresholdGate(gate_name="test",
                                   parent="test parent",
                                   x="X",
                                   y="Y",
                                   method="manual",
                                   transformations={"x": "logicle"})
    child = gate.ChildThreshold(name="test child",
                                definition="++",
                                geom=ThresholdGeom(x_threshold=0.56,
                                                   y_threshold=0.75))
    threshold.add_child(child)
    assert len(threshold.children)
    assert threshold.children[0].geom.x == threshold.x
    assert threshold.children[0].geom.y == threshold.y
    assert threshold.children[0].geom.transform_x == "logicle"
    assert not threshold.children[0].geom.transform_y
Пример #7
0
def test_threshold_fit_2d():
    data, labels = make_blobs(n_samples=3000,
                              n_features=2,
                              centers=[(1., 1.), (1., 5.), (5., 0.2)],
                              random_state=42)
    data = pd.DataFrame({"X": data[:, 0], "Y": data[:, 1]})
    threshold = gate.ThresholdGate(gate_name="test",
                                   parent="test parent",
                                   x="X",
                                   y="Y",
                                   method="density")
    threshold.fit(data)
    assert len(threshold.children) == 4
    assert len(set([c.geom.x_threshold for c in threshold.children])) == 1
    assert len(set([c.geom.y_threshold for c in threshold.children])) == 1
    assert all([
        i in [c.definition for c in threshold.children]
        for i in ["++", "--", "+-", "-+"]
    ])
    assert 2 < threshold.children[0].geom.x_threshold < 4
    assert 2 < threshold.children[0].geom.y_threshold < 4
Пример #8
0
def test_threshold_fit_predict_2d():
    data, _ = make_blobs(n_samples=4000,
                         n_features=2,
                         centers=[(1., 1.), (1., 7.), (7., 2.), (7., 6.2)],
                         random_state=42)
    data = pd.DataFrame({"X": data[:, 0], "Y": data[:, 1]})
    threshold = gate.ThresholdGate(gate_name="test",
                                   parent="test parent",
                                   x="X",
                                   y="Y",
                                   method="density")
    threshold.fit(data)
    threshold.label_children({
        "++": "Top left",
        "--": "Other",
        "-+": "Other",
        "+-": "Other"
    })
    data, _ = make_blobs(n_samples=3000,
                         n_features=2,
                         centers=[(1., 1.), (1., 7.), (7., 6.2)],
                         random_state=42)
    data = pd.DataFrame({"X": data[:, 0], "Y": data[:, 1]})
    pops = threshold.fit_predict(data=data)
    assert len(pops) == 2
    assert all([isinstance(p, gate.Population) for p in pops])
    assert all([isinstance(p.geom, ThresholdGeom) for p in pops])
    assert all([p.geom.x == threshold.x for p in pops])
    assert all([p.geom.y == threshold.y for p in pops])
    assert all(p.geom.transform_x == threshold.transformations.get("x")
               for p in pops)
    assert all(p.geom.transform_y == threshold.transformations.get("y")
               for p in pops)
    top_left = [p for p in pops if p.population_name == "Top left"][0]
    other = [p for p in pops if p.population_name == "Other"][0]
    assert top_left.definition == "++"
    assert {"+-", "-+", "--"} == set(other.definition.split(","))
    assert len(top_left.index) < len(other.index)
    assert len(top_left.index) > 900
    assert len(other.index) > 1900
Пример #9
0
def test_threshold_fit_predict_1d():
    n1 = np.random.normal(loc=0.2, scale=1, size=500)
    n2 = np.random.normal(loc=2.5, scale=0.2, size=250)
    n3 = np.random.normal(loc=6.5, scale=0.5, size=500)
    data = pd.DataFrame({"X": np.hstack([n1, n2, n3])})
    threshold = gate.ThresholdGate(gate_name="test",
                                   parent="test parent",
                                   x="X",
                                   method="density")
    threshold.fit(data=data)
    threshold.label_children({"+": "Positive", "-": "Negative"})
    new_data = pd.DataFrame({
        "X":
        np.hstack([
            np.random.normal(loc=0.2, scale=1, size=200),
            np.random.normal(loc=6.5, scale=0.5, size=1000)
        ])
    })
    pops = threshold.fit_predict(new_data)
    assert len(pops) == 2
    assert all([isinstance(p, gate.Population) for p in pops])
    assert all([isinstance(p.geom, ThresholdGeom) for p in pops])
    assert all([p.geom.x == threshold.x for p in pops])
    assert all([p.geom.y == threshold.y for p in pops])
    assert all(p.geom.transform_x == threshold.transformations.get("x")
               for p in pops)
    assert all(p.geom.transform_y == threshold.transformations.get("y")
               for p in pops)
    assert all(i in [p.definition for p in pops] for i in ["+", "-"])
    pos_pop = [p for p in pops if p.definition == "+"][0]
    assert pos_pop.population_name == "Positive"
    neg_pop = [p for p in pops if p.definition == "-"][0]
    assert neg_pop.population_name == "Negative"
    assert len(pos_pop.index) > len(neg_pop.index)
    assert len(pos_pop.index) > 800
    assert len(neg_pop.index) < 300
Пример #10
0
def test_threshold_predict_1d():
    n1 = np.random.normal(loc=0.2, scale=1, size=500)
    n2 = np.random.normal(loc=2.5, scale=0.2, size=250)
    n3 = np.random.normal(loc=6.5, scale=0.5, size=500)
    data = pd.DataFrame({"X": np.hstack([n1, n2, n3])})
    threshold = gate.ThresholdGate(gate_name="test",
                                   parent="test parent",
                                   x="X",
                                   method="density")
    threshold.fit(data=data)
    new_data = pd.DataFrame({
        "X":
        np.hstack([
            np.random.normal(loc=0.2, scale=1, size=500),
            np.random.normal(loc=6.5, scale=0.5, size=500)
        ])
    })
    pops = threshold.predict(new_data)
    assert len(pops) == 2
    assert all([isinstance(p, gate.Population) for p in pops])
    assert all([isinstance(p.geom, ThresholdGeom) for p in pops])
    assert all([p.geom.x == threshold.x for p in pops])
    assert all([p.geom.y == threshold.y for p in pops])
    assert all(p.geom.transform_x == threshold.transformations.get("x")
               for p in pops)
    assert all(p.geom.transform_y == threshold.transformations.get("y")
               for p in pops)
    assert all(i in [p.definition for p in pops] for i in ["+", "-"])
    neg_idx = new_data[
        new_data.X < threshold.children[0].geom.x_threshold].index.values
    pos_idx = new_data[
        new_data.X >= threshold.children[0].geom.x_threshold].index.values
    pos_pop = [p for p in pops if p.definition == "+"][0]
    neg_pop = [p for p in pops if p.definition == "-"][0]
    assert np.array_equal(neg_pop.index, neg_idx)
    assert np.array_equal(pos_pop.index, pos_idx)