def test_makes_df(self): from idiva.clf.df import v0_df from idiva.io import ReadVCF from idiva.utils import seek_then_rewind for k in PATHS: with PATHS[k].open(mode='r') as fd: assert isinstance(fd, io.TextIOBase) with seek_then_rewind(fd): datalines = list(ReadVCF(fd)) with seek_then_rewind(fd): df = v0_df(ReadVCF(fd)) self.assertEqual(len(datalines), len(df))
def maker_clinvar() -> pd.DataFrame: """ creates the clinvar dataframe """ from idiva.db import clinvar_open from idiva.io import ReadVCF from idiva.db.clinvar import clinvar_to_df log.info('Making clinvar df.') with clinvar_open(which=clinvar_file) as fd: return clinvar_to_df(ReadVCF(fd))
def test_fisher_large_head(self): from idiva.io import ReadVCF, open_maybe_gz with open_maybe_gz(PATHS_LARGE_HEAD['case'], mode='r') as case: with open_maybe_gz(PATHS_LARGE_HEAD['ctrl'], mode='r') as ctrl: from idiva.stat.vcf_to_fisher import vcf_to_fisher result = vcf_to_fisher(case=ReadVCF(case), ctrl=ReadVCF(ctrl)) df = result.df from idiva.utils.testing import whatsmyname out_dir = MY_SPACE / whatsmyname() print(df)
def test_join_does_something(self): from idiva.io import ReadVCF from idiva.clf.df import v0_df, join dfs = {} for k in PATHS: with PATHS[k].open(mode='r') as fd: assert isinstance(fd, io.TextIOBase) dfs[k] = v0_df(ReadVCF(fd)) df = join(case=dfs['case'], ctrl=dfs['ctrl'])
def test_poc_head(self): """ Proof-of-concept. """ from idiva.io import ReadVCF, open_maybe_gz for (k, ref_file) in PATHS_LARGE_HEAD.items(): can_file = (MY_SPACE / F"{sys._getframe().f_code.co_name}__{k}").with_suffix(".log") with open_maybe_gz(ref_file, mode='r') as fd_ref: assert isinstance(fd_ref, io.TextIOBase) vcf = ReadVCF(fd_ref).preload_all() with open(can_file, mode='w') as fd_can: with redirect_stdout(fd_can): for (k, v) in vcf.meta.items(): if isinstance(v, str): if (str(k).lower() == "filedate"): from idiva.io.out import fileDate v = fileDate if (str(k).lower() == "source"): from idiva.io.out import source v = source print(F"##{k}={v}") elif isinstance(v, dict): for (i, v) in v.items(): assert isinstance(v, dict) assert v p = ','.join(F"{k}={v if v is not None else '.'}" for (k, v) in v.items()) print(F"##{k}=<ID={i},{p}>") print(F"#{SEP.join(vcf.header)}") for dataline in vcf: print(str(dataline)) from idiva.io import Oneliner with open_maybe_gz(ref_file, mode='r') as fd_ref: with open_maybe_gz(can_file, mode='r') as fd_can: assert isinstance(fd_ref, io.TextIOBase) assert isinstance(fd_can, io.TextIOBase) lines_ref = list(Oneliner(fd_ref)) lines_can = list(Oneliner(fd_can)) for (ref, can) in zip(lines_ref, lines_can): if not (ref.startswith("##fileDate") or ref.startswith("##source")): self.assertEqual(can, ref) self.assertEqual(len(lines_can), len(lines_ref))
def test_combine(self): from idiva.io import ReadVCF from idiva.io.vcf import SEP from idiva.clf.df import v0_df, join, dtype_v0 dfs = {} for k in PATHS: with PATHS[k].open(mode='r') as fd: assert isinstance(fd, io.TextIOBase) dfs[k] = v0_df(ReadVCF(fd)) candidate = join(case=dfs['case'], ctrl=dfs['ctrl']) def read_csv(file) -> pd.DataFrame: return pd.read_csv(file, sep=SEP).astype( { 'CHROM': str, 'POS': int, 'ID': str, 'ALT0_case': float, 'ALT1_case': float, 'ALT2_case': float, 'ALT0_ctrl': float, 'ALT1_ctrl': float, 'ALT2_ctrl': float, } ) ref_file = MY_SPACE / "reference.txt" # Hack to create the reference # candidate.to_csv(ref_file, sep=SEP, index=True) reference = read_csv(ref_file) # Write and read `candidate` to make comparable to `reference` import tempfile with tempfile.NamedTemporaryFile(mode='w') as tf: candidate.to_csv(tf, sep=SEP, index=True) tf.flush() candidate = read_csv(tf.name) self.assertTrue(reference.equals(candidate))