예제 #1
0
 def test_missing_format_keys(self):
     values = [{}, {"format_name": ""}, {"format_version": ""}]
     for attrs in values:
         self.write_file(attrs, self.path)
         with self.assertRaises(exceptions.FileFormatError):
             tszip.decompress(self.path)
         with self.assertRaises(exceptions.FileFormatError):
             tszip.print_summary(self.path)
예제 #2
0
 def test_bad_format_name(self):
     for bad_name in ["", "xyz", [1234]]:
         self.write_file(
             {"format_name": bad_name, "format_version": [1, 0]}, self.path
         )
         with self.assertRaises(exceptions.FileFormatError):
             tszip.decompress(self.path)
         with self.assertRaises(exceptions.FileFormatError):
             tszip.print_summary(self.path)
예제 #3
0
 def test_format_too_new(self):
     self.write_file({
         "format_name": "tszip",
         "format_version": [2, 0]
     }, self.path)
     with self.assertRaises(exceptions.FileFormatError):
         tszip.decompress(self.path)
     with self.assertRaises(exceptions.FileFormatError):
         tszip.print_summary(self.path)
예제 #4
0
 def test_keep(self):
     self.assertTrue(self.trees_path.exists())
     self.run_tszip([str(self.trees_path), "--keep"])
     self.assertTrue(self.trees_path.exists())
     outpath = pathlib.Path(str(self.trees_path) + ".tsz")
     self.assertTrue(outpath.exists())
     ts = tszip.decompress(outpath)
     self.assertEqual(ts.tables, self.ts.tables)
예제 #5
0
 def test_suffix(self):
     self.assertTrue(self.trees_path.exists())
     self.run_tszip([str(self.trees_path), "-S", ".XYZasdf"])
     self.assertFalse(self.trees_path.exists())
     outpath = pathlib.Path(str(self.trees_path) + ".XYZasdf")
     self.assertTrue(outpath.exists())
     ts = tszip.decompress(outpath)
     self.assertEqual(ts.tables, self.ts.tables)
예제 #6
0
파일: cli.py 프로젝트: OrkoHunter/tszip
def run_decompress(args):
    logger.info("Decompressing {}".format(args.file))
    if not args.file.endswith(".zarr"):
        raise ValueError("Compressed file must have .zarr suffix")
    ts = tszip.decompress(args.file)
    outfile = args.file[:-5]
    logger.info("Writing to {}".format(outfile))
    ts.dump(outfile)
예제 #7
0
 def verify(self, ts):
     with tempfile.TemporaryDirectory() as tmpdir:
         path = pathlib.Path(tmpdir) / "treeseq.tsz"
         tszip.compress(ts, path)
         other_ts = tszip.decompress(path)
     G1 = ts.genotype_matrix()
     G2 = other_ts.genotype_matrix()
     self.assertTrue(np.array_equal(G1, G2))
예제 #8
0
 def test_overwrite(self):
     self.assertTrue(self.trees_path.exists())
     outpath = pathlib.Path(str(self.trees_path) + ".tsz")
     outpath.touch()
     self.assertTrue(self.trees_path.exists())
     self.run_tszip([str(self.trees_path), "--force"])
     self.assertFalse(self.trees_path.exists())
     self.assertTrue(outpath.exists())
     ts = tszip.decompress(outpath)
     self.assertEqual(ts.tables, self.ts.tables)
예제 #9
0
 def test_variants_only(self):
     self.assertTrue(self.trees_path.exists())
     self.run_tszip([str(self.trees_path), "--variants-only"])
     self.assertFalse(self.trees_path.exists())
     outpath = pathlib.Path(str(self.trees_path) + ".tsz")
     self.assertTrue(outpath.exists())
     ts = tszip.decompress(outpath)
     self.assertNotEqual(ts.tables, self.ts.tables)
     G1 = ts.genotype_matrix()
     G2 = self.ts.genotype_matrix()
     self.assertTrue(np.array_equal(G1, G2))
예제 #10
0
def run_decompress(args):
    setup_logging(args)
    for file_arg in args.files:
        logger.info("Decompressing {}".format(file_arg))
        if not file_arg.endswith(args.suffix):
            exit("Compressed file must have '{}' suffix".format(args.suffix))
        infile = pathlib.Path(file_arg)
        outfile = pathlib.Path(file_arg[:-len(args.suffix)])
        check_output(outfile, args)
        with check_load_errors(file_arg):
            ts = tszip.decompress(file_arg)
        ts.dump(str(outfile))
        logger.info("Wrote {}".format(outfile))
        remove_input(infile, args)
예제 #11
0
def run_decompress(args):
    setup_logging(args)
    for file_arg in args.files:
        logger.info(f"Decompressing {file_arg}")
        if not file_arg.endswith(args.suffix):
            exit(f"Compressed file must have '{args.suffix}' suffix")
        infile = pathlib.Path(file_arg)
        if args.stdout:
            args.keep = True
            outfile = sys.stdout
        else:
            outfile = pathlib.Path(file_arg[:-len(args.suffix)])
            check_output(outfile, args)
        with check_load_errors(file_arg):
            ts = tszip.decompress(file_arg)
        ts.dump(outfile)
        logger.info(f"Wrote {outfile}")
        remove_input(infile, args)
예제 #12
0
 def verify(self, ts):
     if ts.num_migrations > 0:
         raise unittest.SkipTest("Migrations not supported")
     with tempfile.TemporaryDirectory() as tmpdir:
         path = pathlib.Path(tmpdir) / "treeseq.tsz"
         tszip.compress(ts, path, variants_only=True)
         other_ts = tszip.decompress(path)
     self.assertEqual(ts.num_sites, other_ts.num_sites)
     for var1, var2 in zip(ts.variants(), other_ts.variants()):
         self.assertTrue(np.array_equal(var1.genotypes, var2.genotypes))
         self.assertEqual(var1.site.position, var2.site.position)
         self.assertEqual(var1.alleles, var2.alleles)
     # Populations, individuals and sites should be untouched if there are no
     # unreachable individuals.
     t1 = ts.tables
     t2 = other_ts.tables
     self.assertEqual(t1.sequence_length, t2.sequence_length)
     self.assertEqual(t1.populations, t2.populations)
     self.assertEqual(t1.individuals, t2.individuals)
     self.assertEqual(t1.sites, t2.sites)
     # We should be adding an extra provenance record in here due to simplify.
     self.assertEqual(len(t1.provenances), len(t2.provenances) - 1)
# In[12]:

arg_list = sys.argv
model = arg_list[1] + '_'
list_ts_files = [
    x for x in os.listdir('tree_seq_files/') if x.startswith(model)
]
n = len(list_ts_files)

# ## 1) Compute statistics from list of .ts files

# In[13]:

# iterate through that list, getting stats for every file object
for file in list_ts_files:
    ts = tszip.decompress('tree_seq_files/' + file)
    sample_list = make_sample_list(ts)
    tajimas_pi.append(ts.diversity(sample_sets=sample_list))
    D_DenAnc.append(D_AncEst(ts, "DEN"))
    D_NeaAnc.append(D_AncEst(ts, "NEA"))
    f4_DenAnc_a.append(f4_DenAncEst(ts, "a"))
    f4_DenAnc_b.append(f4_DenAncEst(ts, "b"))
    f4_Est_c.append(f4_AncEst(ts, "c"))
    f4_Est_d.append(f4_AncEst(ts, "d"))
    f4_Est_e.append(f4_AncEst(ts, "e"))
    mig_segs_DEN_AYT, mig_segs_DEN_PAP, mig_segs_NEA_AYT, mig_segs_NEA_PAP = get_mig_segs(
        ts)
    mean_tractL_DEN.append(
        np.array([np.mean(mig_segs_DEN_PAP),
                  np.mean(mig_segs_DEN_AYT)]))
    mean_tractL_NEA.append(
예제 #14
0
 def test_load_dir(self):
     with self.assertRaises(OSError):
         tszip.decompress(self.path.parent)
예제 #15
0
 def test_missing_file(self):
     path = "/no/such/file"
     with self.assertRaises(FileNotFoundError):
         tszip.decompress(path)
예제 #16
0
 def test_wrong_format(self):
     for contents in ["", "1234", "X" * 1024]:
         with open(str(self.path), "w") as f:
             f.write(contents)
         with self.assertRaises(exceptions.FileFormatError):
             tszip.decompress(self.path)
예제 #17
0
 def test_decompress(self):
     files = pathlib.Path(__file__).parent / "files"
     ts = tszip.decompress(files / "1.0.0.trees.tsz")
     ts2 = tskit.load(files / "1.0.0.trees")
     assert ts == ts2
예제 #18
0
 def verify(self, ts):
     with tempfile.TemporaryDirectory() as tmpdir:
         path = pathlib.Path(tmpdir) / "treeseq.tsz"
         tszip.compress(ts, path)
         other_ts = tszip.decompress(path)
     self.assertEqual(ts.tables, other_ts.tables)