def merge_snps(self): if not self.snps_can_be_merged: return snps = self.snps.filter(generated_by_lineage=True) if len(snps) == 1: # remove SNPs generated by lineage since we're remaking that file snps[0].delete() if self.get_discrepant_snps(): # remove discrepant SNPs since we'll be refreshing that data self.discrepant_snps.delete() with tempfile.TemporaryDirectory() as tmpdir: l = Lineage(output_dir=tmpdir, parallelize=False) ind = l.create_individual("ind") for snps in self.snps.all(): if snps.build != 37: temp = l.create_individual("temp", snps.file.path) temp.remap_snps(37, parallelize=False) temp_snps = temp.save_snps() ind.load_snps(temp_snps) del temp else: ind.load_snps(snps.file.path) snps.merged = True snps.save() if ind.snp_count != 0: if len(ind.discrepant_snps) != 0: dsnps = DiscrepantSnps.objects.create( user=self.user, individual=self, snp_count=len(ind.discrepant_snps), ) discrepant_snps_file = ind.save_discrepant_snps() dsnps.file.name = dsnps.get_relative_path() dsnps.save() shutil.move(discrepant_snps_file, dsnps.file.path) merged_snps_file = ind.save_snps() summary_info, snps_is_valid = parse_snps(merged_snps_file) if snps_is_valid: summary_info["generated_by_lineage"] = True summary_info["merged"] = True self.add_snps(merged_snps_file, summary_info)
def remap_snps(self): # SNPs already remapped if len(self.snps.filter(generated_by_lineage=True)) == 3: return if len(self.snps.filter(generated_by_lineage=True)) == 1: snps = self.snps.filter(generated_by_lineage=True).get() else: # TODO: merge SNPs here, but for now just get canonical SNPs; assume Build 37 snps = self.get_canonical_snps() if not snps: return with tempfile.TemporaryDirectory() as tmpdir: l = Lineage(output_dir=tmpdir, parallelize=False) ind = l.create_individual("lineage_NCBI36", snps.file.path) ind.remap_snps(36, parallelize=False) file = ind.save_snps() summary_info, snps_is_valid = parse_snps(file) if snps_is_valid: summary_info["generated_by_lineage"] = True summary_info["merged"] = True self.add_snps(file, summary_info) ind = l.create_individual("lineage_GRCh38", snps.file.path) ind.remap_snps(38, parallelize=False) file = ind.save_snps() summary_info, snps_is_valid = parse_snps(file) if snps_is_valid: summary_info["generated_by_lineage"] = True summary_info["merged"] = True self.add_snps(file, summary_info)
#!/usr/local/bin/python3.8 import sys import logging, sys logger = logging.getLogger() logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler(sys.stdout)) from lineage import Lineage l = Lineage(output_dir='storage/app/dna/output') var1 = sys.argv[1] var2 = sys.argv[2] file1 = "storage/app/dna/" + sys.argv[3] file2 = "storage/app/dna/" + sys.argv[4] user662 = l.create_individual(var1, file1) user663 = l.create_individual(var2, file2) discordant_snps = l.find_discordant_snps(user662, user663, save_output=True) len(discordant_snps.loc[discordant_snps['chrom'] != 'MT']) results = l.find_shared_dna([user662, user663], cM_threshold=0.75, snp_threshold=1100)
class TestSnps(BaseLineageTestCase): def setUp(self): self.l = Lineage() self.snps_GRCh38 = SNPs("tests/input/GRCh38.csv") self.snps = SNPs("tests/input/chromosomes.csv") self.snps_none = SNPs(None) self.del_output_dir_helper() def snps_discrepant_pos(self): return self.create_snp_df(rsid=["rs3094315"], chrom=["1"], pos=[1], genotype=["AA"]) def test_assembly(self): assert self.snps_GRCh38.assembly == "GRCh38" def test_assembly_no_snps(self): assert self.snps_none.assembly == "" def test_snp_count(self): assert self.snps.snp_count == 6 def test_snp_count_no_snps(self): assert self.snps_none.snp_count == 0 def test_chromosomes(self): assert self.snps.chromosomes == ["1", "2", "3", "5", "PAR", "MT"] def test_chromosomes_no_snps(self): assert self.snps_none.chromosomes == [] def test_chromosomes_summary(self): assert self.snps.chromosomes_summary == "1-3, 5, PAR, MT" def test_chromosomes_summary_no_snps(self): assert self.snps_none.chromosomes_summary == "" def test_build_no_snps(self): assert self.snps_none.build is None def test_build_detected_no_snps(self): assert not self.snps_none.build_detected def test_build_detected_PAR_snps(self): if os.getenv("DOWNLOADS_ENABLED"): snps = SNPs("tests/input/GRCh37_PAR.csv") assert snps.build == 37 assert snps.build_detected def test_sex_no_snps(self): assert self.snps_none.sex == "" def test_sex_Male_Y_chrom(self): ind = self.simulate_snps( self.l.create_individual("test_snps_sex_Male_Y_chrom"), chrom="Y", pos_start=1, pos_max=59373566, pos_step=10000, ) file = ind.save_snps() from lineage.snps import SNPs snps = SNPs(file) assert snps.sex == "Male" def test_get_summary(self): assert self.snps_GRCh38.get_summary() == { "source": "generic", "assembly": "GRCh38", "build": 38, "build_detected": True, "snp_count": 4, "chromosomes": "1, 3", "sex": "", } def test_get_summary_no_snps(self): assert self.snps_none.get_summary() is None def test_is_valid_True(self): assert self.snps_GRCh38.is_valid() def test_is_valid_False(self): assert not self.snps_none.is_valid() def test__read_raw_data(self): assert self.snps_none.snps is None assert self.snps_none.source == "" def test__lookup_build_with_snp_pos_None(self): snps = SNPs() snps._snps = self.snps_discrepant_pos() assert snps.detect_build() is None def test_get_assembly_None(self): snps = SNPs() snps._build = None assert snps.get_assembly() is ""
def find_discordant_snps(self, progress_recorder=None): ind1_snps = self.individual1.get_canonical_snps() ind2_snps = self.individual2.get_canonical_snps() if not ind1_snps or not ind2_snps: self.delete() return if self.individual3: ind3_snps = self.individual3.get_canonical_snps() if not ind3_snps: self.delete() return with tempfile.TemporaryDirectory() as tmpdir: l = Lineage(output_dir=tmpdir, parallelize=False) ind1_snps_file = shutil.copy( ind1_snps.file.path, os.path.join(tmpdir, "ind1_snps" + ind1_snps.file_ext), ) ind2_snps_file = shutil.copy( ind2_snps.file.path, os.path.join(tmpdir, "ind2_snps" + ind2_snps.file_ext), ) if self.individual3: ind3_snps_file = shutil.copy( ind3_snps.file.path, os.path.join(tmpdir, "ind3_snps" + ind3_snps.file_ext), ) ind1 = l.create_individual(self.individual1.name, ind1_snps_file) ind2 = l.create_individual(self.individual2.name, ind2_snps_file) if self.individual3: ind3 = l.create_individual(self.individual3.name, ind3_snps_file) else: ind3 = None discordant_snps = l.find_discordant_snps(ind1, ind2, ind3, save_output=True) self.total_discordant_snps = len(discordant_snps) for root, dirs, files in os.walk(tmpdir): for file in files: file_path = os.path.join(root, file) if "discordant_snps" in file: self.discordant_snps_csv.name = get_relative_user_dir_file( self.user.uuid, uuid4()) compress_file(file_path, self.discordant_snps_csv.path) self.discordant_snps_pickle = get_relative_user_dir_file( self.user.uuid, uuid4(), ".pkl.gz") discordant_snps.to_pickle( self.discordant_snps_pickle.path) break self.setup_complete = True self.save()
def find_shared_dna_genes(self, progress_recorder=None): ind1_snps = self.individual1.get_canonical_snps() ind2_snps = self.individual2.get_canonical_snps() if not ind1_snps or not ind2_snps: self.delete() return with tempfile.TemporaryDirectory() as tmpdir: l = Lineage(output_dir=tmpdir, parallelize=False) ind1_snps_file = shutil.copy( ind1_snps.file.path, os.path.join(tmpdir, "ind1_snps" + ind1_snps.file_ext), ) ind2_snps_file = shutil.copy( ind2_snps.file.path, os.path.join(tmpdir, "ind2_snps" + ind2_snps.file_ext), ) ind1 = l.create_individual(self.individual1.name, ind1_snps_file) ind2 = l.create_individual(self.individual2.name, ind2_snps_file) shared_dna_one_chrom, shared_dna_two_chrom, shared_genes_one_chrom, shared_genes_two_chrom = l.find_shared_dna( ind1, ind2, cM_threshold=float(self.cM_threshold), snp_threshold=int(self.snp_threshold), shared_genes=True, save_output=True, ) self.total_shared_segments_one_chrom = len(shared_dna_one_chrom) self.total_shared_segments_two_chrom = len(shared_dna_two_chrom) self.total_shared_cMs_one_chrom = Decimal( shared_dna_one_chrom["cMs"].sum()) self.total_shared_cMs_two_chrom = Decimal( shared_dna_two_chrom["cMs"].sum()) self.total_snps_one_chrom = shared_dna_one_chrom["snps"].sum() self.total_snps_two_chrom = shared_dna_two_chrom["snps"].sum() self.total_chrom_one_chrom = len( shared_dna_one_chrom["chrom"].unique()) self.total_chrom_two_chrom = len( shared_dna_two_chrom["chrom"].unique()) self.total_shared_genes_one_chrom = len(shared_genes_one_chrom) self.total_shared_genes_two_chrom = len(shared_genes_two_chrom) for root, dirs, files in os.walk(tmpdir): for file in files: file_path = os.path.join(root, file) if ".png" in file: self.shared_dna_plot_png.name = get_relative_user_dir_file( self.user.uuid, uuid4(), ".png") shutil.move(file_path, self.shared_dna_plot_png.path) os.chmod(self.shared_dna_plot_png.path, 0o640) elif "shared_dna_one_chrom" in file: self.shared_dna_one_chrom_csv = get_relative_user_dir_file( self.user.uuid, uuid4()) compress_file(file_path, self.shared_dna_one_chrom_csv.path) self.shared_dna_one_chrom_pickle = get_relative_user_dir_file( self.user.uuid, uuid4(), ".pkl.gz") shared_dna_one_chrom.to_pickle( self.shared_dna_one_chrom_pickle.path) elif "shared_genes_one_chrom" in file: self.shared_genes_one_chrom_csv = get_relative_user_dir_file( self.user.uuid, uuid4()) compress_file(file_path, self.shared_genes_one_chrom_csv.path) self.shared_genes_one_chrom_pickle = get_relative_user_dir_file( self.user.uuid, uuid4(), ".pkl.gz") shared_genes_one_chrom.to_pickle( self.shared_genes_one_chrom_pickle.path) elif "shared_dna_two_chrom" in file: self.shared_dna_two_chrom_csv = get_relative_user_dir_file( self.user.uuid, uuid4()) compress_file(file_path, self.shared_dna_two_chrom_csv.path) self.shared_dna_two_chrom_pickle = get_relative_user_dir_file( self.user.uuid, uuid4(), ".pkl.gz") shared_dna_two_chrom.to_pickle( self.shared_dna_two_chrom_pickle.path) elif "shared_genes_two_chrom" in file: self.shared_genes_two_chrom_csv = get_relative_user_dir_file( self.user.uuid, uuid4()) compress_file(file_path, self.shared_genes_two_chrom_csv.path) self.shared_genes_two_chrom_pickle = get_relative_user_dir_file( self.user.uuid, uuid4(), ".pkl.gz") shared_genes_two_chrom.to_pickle( self.shared_genes_two_chrom_pickle.path) self.setup_complete = True self.save()