def test_compute_distances12(adata_cdr3, adata_cdr3_mock_distance_calculator): """Test for #174. Gracefully handle the case when there are no distances. """ adata_cdr3.obs["IR_VJ_1_cdr3"] = np.nan adata_cdr3.obs["IR_VDJ_1_cdr3"] = np.nan # test both receptor arms, primary chain only tn = IrNeighbors( adata_cdr3, metric=adata_cdr3_mock_distance_calculator, receptor_arms="all", dual_ir="primary_only", sequence="aa", cutoff=0, ) tn.compute_distances() print(tn.dist.toarray()) npt.assert_equal(tn.dist.toarray(), np.zeros((5, 5)))
def test_compute_distances11(adata_cdr3, adata_cdr3_mock_distance_calculator): tn = IrNeighbors( adata_cdr3, metric=adata_cdr3_mock_distance_calculator, receptor_arms="all", dual_ir="all", sequence="aa", ) tn.compute_distances() print(tn.dist.toarray()) npt.assert_equal( tn.dist.toarray(), np.array( [ [1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], ] ), )
def test_compute_distances6(adata_cdr3, adata_cdr3_mock_distance_calculator): # test both receptor arms, primary chain only tn = IrNeighbors( adata_cdr3, metric=adata_cdr3_mock_distance_calculator, receptor_arms="all", dual_ir="primary_only", sequence="aa", ) tn.compute_distances() print(tn.dist.toarray()) npt.assert_equal( tn.dist.toarray(), np.array( [ [1, 13, 0, 0, 0], [13, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1], ] ), )
def test_compute_distances3(adata_cdr3, adata_cdr3_mock_distance_calculator): # test single chain with custom distance tn = IrNeighbors( adata_cdr3, metric=adata_cdr3_mock_distance_calculator, receptor_arms="VJ", dual_ir="primary_only", sequence="aa", ) tn.compute_distances() assert tn.dist.nnz == 9 npt.assert_equal( tn.dist.toarray(), np.array( [ [1, 4, 0, 1, 0], [4, 1, 0, 4, 0], [0] * 5, [1, 4, 0, 1, 0], [0] * 5, ] ), )
def test_compute_distances2(adata_cdr3, adata_cdr3_mock_distance_calculator): # test single receptor arm with multiple chains and identity distance tn = IrNeighbors( adata_cdr3, metric="identity", cutoff=0, receptor_arms="VJ", dual_ir="any", sequence="aa", ) tn.compute_distances() npt.assert_equal( tn.dist.toarray(), np.array( [ [1, 1, 0, 1, 1], [1, 1, 0, 0, 0], [0] * 5, [1, 0, 0, 1, 1], [1, 0, 0, 1, 1], ] ), )
def test_compute_distances5(adata_cdr3, adata_cdr3_mock_distance_calculator): # test single receptor arm with multiple chains and custom distance tn = IrNeighbors( adata_cdr3, metric=adata_cdr3_mock_distance_calculator, receptor_arms="VJ", dual_ir="all", sequence="aa", ) tn.compute_distances() print(tn.dist.toarray()) npt.assert_equal( tn.dist.toarray(), np.array( [ [1, 0, 0, 4, 0], [0, 1, 0, 0, 4], [0, 0, 0, 0, 0], [4, 0, 0, 1, 0], [0, 4, 0, 0, 1], ] ), )
def test_dist_to_connectivities(adata_cdr3): # empty anndata, just need the object tn = IrNeighbors(adata_cdr3, metric="alignment", cutoff=10) tn._dist_mat = scipy.sparse.csr_matrix( [[0, 1, 1, 5], [0, 0, 2, 8], [1, 5, 0, 2], [10, 0, 0, 0]] ) C = tn.connectivities assert C.nnz == tn._dist_mat.nnz npt.assert_equal( C.toarray(), np.array([[0, 1, 1, 0.6], [0, 0, 0.9, 0.3], [1, 0.6, 0, 0.9], [0.1, 0, 0, 0]]), ) tn2 = IrNeighbors(adata_cdr3, metric="identity", cutoff=0) tn2._dist_mat = scipy.sparse.csr_matrix( [[0, 1, 1, 0], [0, 0, 1, 0], [1, 0, 0, 0], [0, 0, 0, 0]] ) C = tn2.connectivities assert C.nnz == tn2._dist_mat.nnz npt.assert_equal( C.toarray(), tn2._dist_mat.toarray(), )
def test_build_index_dict(adata_cdr3): tn = IrNeighbors( adata_cdr3, receptor_arms="VJ", dual_ir="primary_only", sequence="nt", cutoff=0, metric="identity", ) tn._build_index_dict() npt.assert_equal( tn.index_dict, { "VJ": { "chain_inds": [1], "unique_seqs": ["GCGAUGGCG", "GCGGCGGCG", "GCUGCUGCU"], "seq_to_cell": {1: {0: [1], 1: [0], 2: [3]}}, } }, ) tn = IrNeighbors( adata_cdr3, receptor_arms="all", dual_ir="all", sequence="aa", metric="identity", cutoff=0, ) tn._build_index_dict() print(tn.index_dict) npt.assert_equal( tn.index_dict, { "VJ": { "chain_inds": [1, 2], "unique_seqs": ["AAA", "AHA"], "seq_to_cell": { 1: {0: [0, 3], 1: [1]}, 2: {0: [3, 4], 1: [0]}, }, "chains_per_cell": np.array([2, 1, 0, 2, 1]), }, "VDJ": { "chain_inds": [1, 2], "unique_seqs": ["AAA", "KK", "KKK", "KKY", "LLL"], "seq_to_cell": { 1: {0: [], 1: [1], 2: [], 3: [0], 4: [3, 4]}, 2: {0: [3], 1: [], 2: [0, 1], 3: [], 4: []}, }, "chains_per_cell": np.array([2, 2, 0, 2, 1]), }, }, ) tn2 = IrNeighbors( adata_cdr3, receptor_arms="any", dual_ir="any", sequence="aa", metric="alignment", cutoff=10, ) tn2._build_index_dict() print(tn2.index_dict) npt.assert_equal( tn2.index_dict, { "VJ": { "chain_inds": [1, 2], "unique_seqs": ["AAA", "AHA"], "seq_to_cell": { 1: {0: [0, 3], 1: [1]}, 2: {0: [3, 4], 1: [0]}, }, }, "VDJ": { "chain_inds": [1, 2], "unique_seqs": ["AAA", "KK", "KKK", "KKY", "LLL"], "seq_to_cell": { 1: {0: [], 1: [1], 2: [], 3: [0], 4: [3, 4]}, 2: {0: [3], 1: [], 2: [0, 1], 3: [], 4: []}, }, }, }, )
def test_seq_to_cell_idx(): unique_seqs = np.array(["AAA", "ABA", "CCC", "XXX", "AA"]) cdr_seqs = np.array(["AAA", "CCC", "ABA", "CCC", np.nan, "AA", "AA"]) result = IrNeighbors._seq_to_cell_idx(unique_seqs, cdr_seqs) assert result == {0: [0], 1: [2], 2: [1, 3], 3: [], 4: [5, 6]}