def test_import_table_force_bgz(self): f = new_temp_file(suffix=".bgz") t = hl.utils.range_table(10, 5) t.export(f) f2 = new_temp_file(suffix=".gz") run_command(["cp", uri_path(f), uri_path(f2)]) t2 = hl.import_table(f2, force_bgz=True, impute=True).key_by('idx') self.assertTrue(t._same(t2))
def test_export_plink(self): vcf_file = resource('sample.vcf') mt = hl.split_multi_hts(hl.import_vcf(vcf_file, min_partitions=10)) # permute columns so not in alphabetical order! import random indices = list(range(mt.count_cols())) random.shuffle(indices) mt = mt.choose_cols(indices) split_vcf_file = uri_path(new_temp_file()) hl_output = uri_path(new_temp_file()) plink_output = uri_path(new_temp_file()) merge_output = uri_path(new_temp_file()) hl.export_vcf(mt, split_vcf_file) hl.export_plink(mt, hl_output) run_command(["plink", "--vcf", split_vcf_file, "--make-bed", "--out", plink_output, "--const-fid", "--keep-allele-order"]) data = [] with open(uri_path(plink_output + ".bim")) as file: for line in file: row = line.strip().split() row[1] = ":".join([row[0], row[3], row[5], row[4]]) data.append("\t".join(row) + "\n") with open(plink_output + ".bim", 'w') as f: f.writelines(data) run_command(["plink", "--bfile", plink_output, "--bmerge", hl_output, "--merge-mode", "6", "--out", merge_output]) same = True with open(merge_output + ".diff") as f: for line in f: row = line.strip().split() if row != ["SNP", "FID", "IID", "NEW", "OLD"]: same = False break self.assertTrue(same)
def test_export_plink(self): vcf_file = resource('sample.vcf') mt = hl.split_multi_hts(hl.import_vcf(vcf_file, min_partitions=10)) split_vcf_file = uri_path(new_temp_file()) hl_output = uri_path(new_temp_file()) plink_output = uri_path(new_temp_file()) merge_output = uri_path(new_temp_file()) hl.export_vcf(mt, split_vcf_file) hl.export_plink(mt, hl_output) run_command([ "plink", "--vcf", split_vcf_file, "--make-bed", "--out", plink_output, "--const-fid", "--keep-allele-order" ]) data = [] with open(uri_path(plink_output + ".bim")) as file: for line in file: row = line.strip().split() row[1] = ":".join([row[0], row[3], row[5], row[4]]) data.append("\t".join(row) + "\n") with open(plink_output + ".bim", 'w') as f: f.writelines(data) run_command([ "plink", "--bfile", plink_output, "--bmerge", hl_output, "--merge-mode", "6", "--out", merge_output ]) same = True with open(merge_output + ".diff") as f: for line in f: row = line.strip().split() if row != ["SNP", "FID", "IID", "NEW", "OLD"]: same = False break self.assertTrue(same)
def test_old_index_file_throws_error(self): sample_file = resource('random.sample') bgen_file = resource('random.bgen') # missing file if os.path.exists(bgen_file + '.idx2'): run_command(['rm', '-r', bgen_file + '.idx2']) with self.assertRaisesRegex(FatalError, 'have no .idx2 index file'): hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file, n_partitions=3) # old index file run_command(['touch', bgen_file + '.idx']) with self.assertRaisesRegex(FatalError, 'have no .idx2 index file'): hl.import_bgen(bgen_file, ['GT', 'GP'], sample_file) run_command(['rm', bgen_file + '.idx'])