def test_remap_snps_invalid_assembly(self): s = SNPs("tests/input/GRCh37.csv") chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(-1) assert s.build == 37 assert s.assembly == "GRCh37" assert len(chromosomes_remapped) == 0 assert len(chromosomes_not_remapped) == 2
def test_remap_snps_37_to_37(self): s = SNPs("tests/input/GRCh37.csv") chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(37) assert s.build == 37 assert s.assembly == "GRCh37" assert len(chromosomes_remapped) == 0 assert len(chromosomes_not_remapped) == 2 pd.testing.assert_frame_equal(s.snps, self.snps_GRCh37())
def test_remap_snps_37_to_36(self): s = SNPs("tests/input/GRCh37.csv") chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(36) assert s.build == 36 assert s.assembly == "NCBI36" assert len(chromosomes_remapped) == 2 assert len(chromosomes_not_remapped) == 0 pd.testing.assert_frame_equal(s.snps, self.snps_NCBI36())
def test_remap_snps_36_to_37_multiprocessing(self): s = SNPs("tests/input/NCBI36.csv", parallelize=True) chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(37) assert s.build == 37 assert s.assembly == "GRCh37" assert len(chromosomes_remapped) == 2 assert len(chromosomes_not_remapped) == 0 pd.testing.assert_frame_equal(s.snps, self.snps_GRCh37())
def f2(): s = SNPs("tests/input/NCBI36.csv") chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(37) self.assertEqual(s.build, 37) self.assertEqual(s.assembly, "GRCh37") self.assertEqual(len(chromosomes_remapped), 2) self.assertEqual(len(chromosomes_not_remapped), 0) pd.testing.assert_frame_equal( s.snps, self.snps_GRCh37(), check_exact=True )
def test_remap_snps_37_to_38_with_PAR_SNP(self): if (not os.getenv("DOWNLOADS_ENABLED") or os.getenv("DOWNLOADS_ENABLED") == "true"): s = SNPs("tests/input/GRCh37_PAR.csv") assert s.snp_count == 3 chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(38) assert s.build == 38 assert s.assembly == "GRCh38" assert len(chromosomes_remapped) == 2 assert len(chromosomes_not_remapped) == 1 assert s.snp_count == 2 pd.testing.assert_frame_equal(s.snps, self.snps_GRCh38_PAR())
#! /usr/bin/env python """ Validate input VCF files & remap them to GRCh37. depends on: > python 3 > argparse==1.4.0 > snps==0.4.0 > io """ import argparse from snps import SNPs import io parser = argparse.ArgumentParser(description='Remap VCF files to GRCh37') parser.add_argument('-i', '--input_file', help='Input VCF file') parser.add_argument('-o', '--output_file', help='Output VCF file basename') args = vars(parser.parse_args()) input_file = args['input_file'] output_file = args['output_file'] output_file_name = f"{output_file}.vcf" # read & validate input file snps = SNPs(input_file) # remap SNPs if reference genome is not GRCh37 if snps.build_detected and snps.build != 37: snps.remap_snps(37) # save to file saved_snps = snps.save_snps(output_file_name, sep="\t", header=False, vcf=True)
) parser.add_argument( '-t', '--input_target', help= 'Input BIM file (a combination of all BIM files, transformed into a 23andme-like format' ) parser.add_argument( '-b', '--input_base', help='Input base file, transformed into a 23andme-like format') args = vars(parser.parse_args()) # Args to variable input_target = args['input_target'] input_base = args['input_base'] ############################################################################### # Detect builds and update the base's build if it does not match the target's # ############################################################################### target = SNPs(input_target, output_dir='.') base = SNPs(input_base, output_dir='.') if base.build != target.build: base.remap_snps(target.build) updated_base = base.save_snps("new_base_coordinates.txt", sep="\t", header=True)
def test_remap_snps_no_snps(self): s = SNPs() chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(38) assert not s.build assert len(chromosomes_remapped) == 0 assert len(chromosomes_not_remapped) == 0