Exemplo n.º 1
0
def test_update_plus(airr_reannotated):
    vdj = ddl.Dandelion(airr_reannotated)
    vdj.update_plus()
    assert 'mu_count' in vdj.metadata
    vdj.update_plus(option='sequence')
    assert 'sequence_VDJ' in vdj.metadata
    vdj.update_plus(option='cdr3 lengths')
    assert 'junction_aa_length_VDJ' in vdj.metadata
    vdj = ddl.Dandelion(airr_reannotated)
    vdj.update_plus(option='mutations')
    assert 'mu_count' in vdj.metadata
    vdj.update_plus(option='all')
    assert 'sequence_VDJ' in vdj.metadata
    vdj.update_plus(option='cdr3 lengths')
Exemplo n.º 2
0
def test_travdv_filter(create_testfolder, dummy_adata_travdv):
    vdj = ddl.Dandelion(
        str(create_testfolder) + '/dandelion/filtered_contig_dandelion.tsv')
    assert vdj.data.shape[0] == 23
    assert len([i for i in vdj.data['locus'] if i == 'TRD']) == 0
    vdj2, adata = ddl.pp.filter_contigs(vdj, dummy_adata_travdv)
    assert vdj2.data.shape[0] == 8
Exemplo n.º 3
0
def to_dandelion(adata: AnnData):
    """Export data to `Dandelion <https://github.com/zktuong/dandelion>`_ (:cite:`Stephenson2021`).

    Parameters
    ----------
    adata
        annotated data matrix with :term:`IR` annotations.

    Returns
    -------
    `Dandelion` object.
    """
    try:
        import dandelion as ddl
    except:
        raise ImportError("Please install dandelion: pip install sc-dandelion.")
    airr_cells = to_airr_cells(adata)

    contig_dicts = {}
    for tmp_cell in airr_cells:
        for i, chain in enumerate(tmp_cell.to_airr_records(), start=1):
            # dandelion-specific modifications
            chain.update(
                {
                    "sequence_id": f"{tmp_cell.cell_id}_contig_{i}",
                }
            )
            contig_dicts[chain["sequence_id"]] = chain

    data = pd.DataFrame.from_dict(contig_dicts, orient="index")
    return ddl.Dandelion(ddl.load_data(data))
Exemplo n.º 4
0
def test_mutation(create_testfolder, airr_reannotated):
    f = create_testfolder / "test.tsv"
    airr_reannotated.to_csv(f, sep='\t', index=False)
    ddl.pp.quantify_mutations(f)
    out = pd.read_csv(f, sep='\t')
    vdj = ddl.Dandelion(out)
    assert not vdj.data.mu_count.empty
    ddl.pp.quantify_mutations(f, frequency=True)
    assert not vdj.data.mu_freq.empty
Exemplo n.º 5
0
def test_manual_threshold_and_define_clones(create_testfolder):
    f = create_testfolder / "test.tsv"
    out = pd.read_csv(f, sep='\t')
    vdj = ddl.Dandelion(out)
    vdj.threshold = 0.1
    ddl.tl.define_clones(vdj)
    assert not vdj.data.clone_id.empty
    ddl.tl.define_clones(vdj, key_added='changeo_clone')
    assert not vdj.data.changeo_clone.empty
Exemplo n.º 6
0
def test_quantify_mut_2(create_testfolder, processed_files, freq, colname):
    f = create_testfolder / str('dandelion/' + processed_files['filtered'])
    vdj = ddl.Dandelion(f)
    ddl.pp.quantify_mutations(vdj, frequency=freq)
    assert not vdj.data[colname].empty
    if colname == 'mu_freq':
        assert vdj.data[colname].dtype == float
    else:
        assert vdj.data[colname].dtype == int
Exemplo n.º 7
0
def test_setup(create_testfolder, airr_reannotated, dummy_adata):
    vdj, adata = ddl.pp.filter_contigs(airr_reannotated, dummy_adata)
    assert airr_reannotated.shape[0] == 9
    assert vdj.data.shape[0] == 7
    assert vdj.metadata.shape[0] == 4
    assert adata.n_obs == 5
    vdj.data['clone_id'] = ['A', 'A', 'A', 'A', 'A', 'A', 'A']
    vdj = ddl.Dandelion(vdj.data)
    ddl.tl.generate_network(vdj)
    ddl.tl.transfer(adata, vdj)
    assert 'clone_id' in adata.obs
    assert 'X_vdj' in adata.obsm
    f1 = create_testfolder / "test.h5"
    f2 = create_testfolder / "test.h5ad"
    vdj.write_h5(f1)
    adata.write_h5ad(f2)
Exemplo n.º 8
0
def test_container():
    os.system(
        "cd /tests; python /share/dandelion_preprocess.py --meta test.csv;")
    dat = pd.read_csv(
        '/tests/sample_test_10x/dandelion/filtered_contig_dandelion.tsv',
        sep='\t')
    assert not dat['c_call'].empty
    assert not dat['v_call_genotyped'].empty
    assert not dat['mu_count'].empty
    assert not dat['mu_freq'].empty
    vdj = None
    try:
        vdj = ddl.Dandelion(dat)
    except:
        pass
    assert vdj is not None
Exemplo n.º 9
0
def test_generate_network(create_testfolder, resample, expected):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    if resample is not None:
        vdj = ddl.tl.generate_network(vdj, downsample=resample)
    else:
        ddl.tl.generate_network(vdj)
    assert vdj.distance is not None
    assert vdj.edges is None
    assert vdj.n_obs == expected
    assert vdj.layout is not None
    assert vdj.graph is not None
    vdj.data['clone_id'] = '1'
    vdj = ddl.Dandelion(vdj.data)
    assert vdj.data.clone_id.dtype == 'object'
    ddl.tl.generate_network(vdj)
    assert vdj.edges is not None
Exemplo n.º 10
0
def test_update_germlines(create_testfolder, processed_files, database_paths,
                          fasta_10x):
    f = create_testfolder / str('dandelion/' + processed_files['filtered'])
    vdj = ddl.Dandelion(f)
    vdj.update_germline(germline=database_paths['germline'])
    assert len(vdj.germline) > 0
    out_file = str(create_testfolder) + "/test_airr_reannotated.h5"
    vdj.write_h5(out_file)
    tmp = ddl.read_h5(out_file)
    assert len(tmp.germline) > 0
    vdj.update_germline(germline=database_paths['germline'],
                        corrected=str(create_testfolder) +
                        "/filtered_contig.fasta")
    assert len(vdj.germline) > 0
    vdj.update_germline(germline=database_paths['germline'],
                        corrected=fasta_10x)
    assert len(vdj.germline) > 0
    with pytest.raises(TypeError):
        vdj.update_germline(germline=database_paths['germline'], corrected=[])
Exemplo n.º 11
0
def test_setup():
    file = "https://cf.10xgenomics.com/samples/cell-vdj/5.0.0/sc5p_v2_hs_B_1k_multi_5gex_b/sc5p_v2_hs_B_1k_multi_5gex_b_vdj_b_airr_rearrangement.tsv"
    r = requests.get(file)
    test_data = pd.read_csv(StringIO(r.text), sep="\t")
    test_data["locus"] = [
        "IGH" if "IGH" in i else
        "IGK" if "IGK" in i else "IGL" if "IGL" in i else None
        for i in test_data.v_call
    ]
    test_data["umi_count"] = test_data["duplicate_count"]
    test_data["sample_id"] = "test"
    test_ddl = ddl.Dandelion(test_data)
    test_ddl.write_h5("tests/test.h5", compression="bzip2")
    test_ddl.write_pkl("tests/test.pkl.pbz2")
    test = ddl.read_h5("tests/test.h5")
    _ = ddl.read_pkl("tests/test.pkl.pbz2")
    scfile = "https://cf.10xgenomics.com/samples/cell-vdj/5.0.0/sc5p_v2_hs_B_1k_multi_5gex_b/sc5p_v2_hs_B_1k_multi_5gex_b_count_filtered_feature_bc_matrix.h5"
    r = requests.get(scfile)
    open("tests/sctest.h5", "wb").write(r.content)
    adata = sc.read_10x_h5("tests/sctest.h5")
    adata.write("tests/sctest.h5ad", compression="gzip")
    print(test)
    print(adata)
Exemplo n.º 12
0
def test_readwrite_h5(create_testfolder):
    out_file1 = str(create_testfolder) + "/test_airr_reannotated.tsv"
    out_file2 = str(create_testfolder) + "/test_airr_reannotated.h5"
    vdj = ddl.Dandelion(out_file1)
    assert not vdj.data.np1_length.empty
    assert not vdj.data.np2_length.empty
    assert not vdj.data.junction_length.empty
    vdj.write_h5(out_file2)
    vdj2 = ddl.read_h5(out_file2)
    assert not vdj2.data.np1_length.empty
    assert not vdj2.data.np2_length.empty
    assert not vdj2.data.junction_length.empty
    vdj.write_h5(out_file2, complib='blosc:lz4')
    vdj2 = ddl.read_h5(out_file2)
    assert not vdj2.data.np1_length.empty
    assert not vdj2.data.np2_length.empty
    assert not vdj2.data.junction_length.empty
    vdj.write_h5(out_file2, compression='blosc:lz4')
    vdj2 = ddl.read_h5(out_file2)
    assert not vdj2.data.np1_length.empty
    assert not vdj2.data.np2_length.empty
    assert not vdj2.data.junction_length.empty
    with pytest.raises(ValueError):
        vdj.write_h5(out_file2, complib='blosc:lz4', compression='blosc:lz4')
Exemplo n.º 13
0
def test_readwrite_pkl(create_testfolder):
    out_file1 = str(create_testfolder) + "/test_airr_reannotated.tsv"
    out_file2 = str(create_testfolder) + "/test_airr_reannotated.pkl"
    out_file3 = str(create_testfolder) + "/test_airr_reannotated.pkl.gz"
    out_file4 = str(create_testfolder) + "/test_airr_reannotated.pkl.pbz2"
    vdj = ddl.Dandelion(out_file1)
    assert not vdj.data.np1_length.empty
    assert not vdj.data.np2_length.empty
    assert not vdj.data.junction_length.empty
    vdj.write_pkl(out_file2)
    vdj3 = ddl.read_pkl(out_file2)
    assert not vdj3.data.np1_length.empty
    assert not vdj3.data.np2_length.empty
    assert not vdj3.data.junction_length.empty
    vdj.write_pkl(out_file3)
    vdj4 = ddl.read_pkl(out_file3)
    assert not vdj4.data.np1_length.empty
    assert not vdj4.data.np2_length.empty
    assert not vdj4.data.junction_length.empty
    vdj.write_pkl(out_file4)
    vdj5 = ddl.read_pkl(out_file4)
    assert not vdj5.data.np1_length.empty
    assert not vdj5.data.np2_length.empty
    assert not vdj5.data.junction_length.empty
Exemplo n.º 14
0
def test_update_germlines(create_testfolder, processed_files, database_paths):
    f = create_testfolder / str('dandelion/' + processed_files['filtered'])
    vdj = ddl.Dandelion(f)
    vdj.update_germlines(database_paths['germline'])
    assert len(vdj.germline) > 0
Exemplo n.º 15
0
def test_loadtravdv2(airr_travdv):
    vdj = ddl.Dandelion(airr_travdv)
    assert vdj.data.shape[0] == 6
    assert all([i == 'TRD' for i in vdj.data['locus']])
Exemplo n.º 16
0
def test_create_germlines(create_testfolder, database_paths):
    f = create_testfolder / "test.tsv"
    out = pd.read_csv(f, sep='\t')
    vdj = ddl.Dandelion(out)
    ddl.pp.create_germlines(vdj, germline=database_paths['germline'])
    assert not vdj.data.germline_alignment_d_mask.empty
Exemplo n.º 17
0
def test_loadtravdv_reannotated(create_testfolder):
    vdj = ddl.Dandelion(
        str(create_testfolder) + '/dandelion/filtered_contig_dandelion.tsv')
    assert vdj.data.shape[0] == 23
    assert len([i for i in vdj.data['locus'] if i == 'TRD']) == 0
Exemplo n.º 18
0
def test_update_germlines_fail(create_testfolder, processed_files):
    f = create_testfolder / str('dandelion/' + processed_files['filtered'])
    vdj = ddl.Dandelion(f)
    with pytest.raises(KeyError):
        vdj.update_germline()