def test_l2():
    a = get_series('cdr3.test.ann')
    b = get_series('cdr3.test2.ann')
    assert l2(a, b) == 23230 * np.sqrt(2)
    a = get_series('cdr3.test3.ann')
    b = get_series('cdr3.test4.ann')
    assert l2(a, b) == math.sqrt(3**2 + 2**2)
def test_jensen_shannon():
    # two identical samples gives distance 0
    a = get_series('cdr3.test.ann')
    b = get_series('cdr3.test.ann')
    assert jensen_shannon(a, b) == 0
    # two samples which when normalized are identical also give a distance of 0
    # a = <0, 2>, b = <0, 4>
    # a' = a/|a| = <0, 1>, b' = <0, 1>
    a = get_series('cdr3.test3.ann')
    b = get_series('cdr3.test9.ann')
    assert jensen_shannon(a, b) == 0
    # a = <0, 1>, b = <1, 0>
    # m = (a + b)/2 = <0.5, 0.5>
    # D(a || m) = 0*log(0/0.5) + 1*log(1/0.5) = 0 + 1*1 = 1 (using log base 2)
    # D(b || m) = 1
    # JSdist(a, b) = sqrt((D(a || m) + D(b || m)) / 2) = sqrt((1 + 1) / 2) = 1
    a = get_series('cdr3.test10.ann')
    b = get_series('cdr3.test11.ann')
    assert jensen_shannon(a, b) == 1
    # make sure that things are aligned by index
    a = get_series('cdr3.test10.ann')
    b = get_series('cdr3.test13.ann')
    assert jensen_shannon(a, b) == 1
    # check that a fill value of 0 is used for missing cdr3s
    a = get_series('cdr3.test10.ann')
    b = get_series('cdr3.test12.ann')
    assert jensen_shannon(a, b) == 1
def test_jaccard_index():
    a = get_series('cdr3.test3.ann')
    b = get_series('cdr3.test4.ann')
    dist = jaccard_index(a, b)
    assert dist == 1 / 2
    a = get_series('cdr3.test3.ann')
    b = get_series('cdr3.test5.ann')
    dist = jaccard_index(a, b)
    assert dist == 1 / 3
示例#4
0
def test_min():
  a = get_series('cdr3.test6.ann')
  b = get_series('cdr3.test7.ann')
  df = pd.DataFrame({'a': a, 'b': b}).fillna(0)
  min_ = df.apply(min, axis=1)
  assert len(min_) == 5
  assert min_['a'] == 1
  assert min_['b'] == 3
  assert min_['c'] == 0
  assert min_['d'] == 0
  assert min_['e'] == 7
示例#5
0
def test_add():
  # with .add() method
  a = get_series('cdr3.test.ann')
  b = get_series('cdr3.test2.ann')
  c = a.add(b, fill_value=0)
  correct_c = c
  # add any number of series
  a = get_series('cdr3.test.ann')
  b = get_series('cdr3.test2.ann')
  c = functools.reduce(lambda a,b: a.add(b, fill_value=0), [a,b])
  assert c.equals(correct_c)
def test_get_pairwise_distances():
    # no vectors
    assert get_pairwise_distances([], jaccard) == []
    # 1 vec
    a = get_series('cdr3.test3.ann')
    assert get_pairwise_distances([a], jaccard) == []
    # 2 vecs
    a = get_series('cdr3.test3.ann')
    assert get_pairwise_distances([a, a], jaccard) == [0]
    # 3 vecs
    a = get_series('cdr3.test3.ann')
    b = get_series('cdr3.test4.ann')
    assert get_pairwise_distances([a, a, b], lp(1)) == [0, 5, 5]
def test_lp():
    # p=1
    a = get_series('cdr3.test3.ann')
    b = get_series('cdr3.test4.ann')
    assert lp(1)(a, b) == 3 + 2
    # p=2
    a = get_series('cdr3.test.ann')
    b = get_series('cdr3.test2.ann')
    assert lp(2)(a, b) == 23230 * np.sqrt(2)
    a = get_series('cdr3.test3.ann')
    b = get_series('cdr3.test4.ann')
    assert lp(2)(a, b) == math.sqrt(3**2 + 2**2)
    # p=3
    a = get_series('cdr3.test3.ann')
    b = get_series('cdr3.test4.ann')
    assert lp(3)(a, b) == (3**3 + 2**3)**(1 / 3)
def test_linfty():
    # two identical samples gives distance 0
    a = get_series('cdr3.test.ann')
    b = get_series('cdr3.test.ann')
    assert linfty(a, b) == 0
    # between two lines, the first one has the max diff
    a = get_series('cdr3.test3.ann')
    b = get_series('cdr3.test4.ann')
    assert linfty(a, b) == 3
    # a letter present in one sample and missing in the other has the max diff
    a = get_series('cdr3.test6.ann')
    b = get_series('cdr3.test7.ann')
    assert linfty(a, b) == 6
    # the max diff isn't from the first line in the file
    a = get_series('cdr3.test7.ann')
    b = get_series('cdr3.test8.ann')
    assert linfty(a, b) == 1
示例#9
0
def test_make_series_compatible():
  a = get_series('cdr3.test4.ann')
  b = get_series('cdr3.test5.ann')
  a,b = make_series_compatible([a, b])
示例#10
0
def test_init():
  get_series('cdr3.a.A_2000_2001_d_00_47407.ann')
示例#11
0
def test_l2_distance():
  a = get_series('cdr3.test.ann')
  b = get_series('cdr3.test2.ann')
  c = np.sqrt((a.subtract(b, fill_value=0)**2).sum())
def test_weighted_jaccard_index():
    a = get_series('cdr3.test6.ann')
    b = get_series('cdr3.test7.ann')
    index = weighted_jaccard_index(a, b)
    assert index == 11 / 24