Пример #1
0
    def merge_snps(self):
        if not self.snps_can_be_merged:
            return

        snps = self.snps.filter(generated_by_lineage=True)
        if len(snps) == 1:
            # remove SNPs generated by lineage since we're remaking that file
            snps[0].delete()

        if self.get_discrepant_snps():
            # remove discrepant SNPs since we'll be refreshing that data
            self.discrepant_snps.delete()

        with tempfile.TemporaryDirectory() as tmpdir:
            l = Lineage(output_dir=tmpdir, parallelize=False)

            ind = l.create_individual("ind")
            for snps in self.snps.all():
                if snps.build != 37:
                    temp = l.create_individual("temp", snps.file.path)
                    temp.remap_snps(37, parallelize=False)
                    temp_snps = temp.save_snps()
                    ind.load_snps(temp_snps)
                    del temp
                else:
                    ind.load_snps(snps.file.path)

                snps.merged = True
                snps.save()

            if ind.snp_count != 0:
                if len(ind.discrepant_snps) != 0:
                    dsnps = DiscrepantSnps.objects.create(
                        user=self.user,
                        individual=self,
                        snp_count=len(ind.discrepant_snps),
                    )
                    discrepant_snps_file = ind.save_discrepant_snps()
                    dsnps.file.name = dsnps.get_relative_path()
                    dsnps.save()
                    shutil.move(discrepant_snps_file, dsnps.file.path)

                merged_snps_file = ind.save_snps()
                summary_info, snps_is_valid = parse_snps(merged_snps_file)

                if snps_is_valid:
                    summary_info["generated_by_lineage"] = True
                    summary_info["merged"] = True
                    self.add_snps(merged_snps_file, summary_info)
Пример #2
0
    def remap_snps(self):
        # SNPs already remapped
        if len(self.snps.filter(generated_by_lineage=True)) == 3:
            return

        if len(self.snps.filter(generated_by_lineage=True)) == 1:
            snps = self.snps.filter(generated_by_lineage=True).get()
        else:
            # TODO: merge SNPs here, but for now just get canonical SNPs; assume Build 37
            snps = self.get_canonical_snps()

        if not snps:
            return

        with tempfile.TemporaryDirectory() as tmpdir:
            l = Lineage(output_dir=tmpdir, parallelize=False)

            ind = l.create_individual("lineage_NCBI36", snps.file.path)
            ind.remap_snps(36, parallelize=False)
            file = ind.save_snps()

            summary_info, snps_is_valid = parse_snps(file)

            if snps_is_valid:
                summary_info["generated_by_lineage"] = True
                summary_info["merged"] = True
                self.add_snps(file, summary_info)

            ind = l.create_individual("lineage_GRCh38", snps.file.path)
            ind.remap_snps(38, parallelize=False)
            file = ind.save_snps()

            summary_info, snps_is_valid = parse_snps(file)

            if snps_is_valid:
                summary_info["generated_by_lineage"] = True
                summary_info["merged"] = True
                self.add_snps(file, summary_info)
Пример #3
0
#!/usr/local/bin/python3.8
import sys
import logging, sys

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(sys.stdout))
from lineage import Lineage

l = Lineage(output_dir='storage/app/dna/output')

var1 = sys.argv[1]
var2 = sys.argv[2]

file1 = "storage/app/dna/" + sys.argv[3]
file2 = "storage/app/dna/" + sys.argv[4]

user662 = l.create_individual(var1, file1)
user663 = l.create_individual(var2, file2)
discordant_snps = l.find_discordant_snps(user662, user663, save_output=True)
len(discordant_snps.loc[discordant_snps['chrom'] != 'MT'])
results = l.find_shared_dna([user662, user663],
                            cM_threshold=0.75,
                            snp_threshold=1100)
Пример #4
0
class TestSnps(BaseLineageTestCase):
    def setUp(self):
        self.l = Lineage()
        self.snps_GRCh38 = SNPs("tests/input/GRCh38.csv")
        self.snps = SNPs("tests/input/chromosomes.csv")
        self.snps_none = SNPs(None)
        self.del_output_dir_helper()

    def snps_discrepant_pos(self):
        return self.create_snp_df(rsid=["rs3094315"],
                                  chrom=["1"],
                                  pos=[1],
                                  genotype=["AA"])

    def test_assembly(self):
        assert self.snps_GRCh38.assembly == "GRCh38"

    def test_assembly_no_snps(self):
        assert self.snps_none.assembly == ""

    def test_snp_count(self):
        assert self.snps.snp_count == 6

    def test_snp_count_no_snps(self):
        assert self.snps_none.snp_count == 0

    def test_chromosomes(self):
        assert self.snps.chromosomes == ["1", "2", "3", "5", "PAR", "MT"]

    def test_chromosomes_no_snps(self):
        assert self.snps_none.chromosomes == []

    def test_chromosomes_summary(self):
        assert self.snps.chromosomes_summary == "1-3, 5, PAR, MT"

    def test_chromosomes_summary_no_snps(self):
        assert self.snps_none.chromosomes_summary == ""

    def test_build_no_snps(self):
        assert self.snps_none.build is None

    def test_build_detected_no_snps(self):
        assert not self.snps_none.build_detected

    def test_build_detected_PAR_snps(self):
        if os.getenv("DOWNLOADS_ENABLED"):
            snps = SNPs("tests/input/GRCh37_PAR.csv")
            assert snps.build == 37
            assert snps.build_detected

    def test_sex_no_snps(self):
        assert self.snps_none.sex == ""

    def test_sex_Male_Y_chrom(self):
        ind = self.simulate_snps(
            self.l.create_individual("test_snps_sex_Male_Y_chrom"),
            chrom="Y",
            pos_start=1,
            pos_max=59373566,
            pos_step=10000,
        )
        file = ind.save_snps()
        from lineage.snps import SNPs

        snps = SNPs(file)
        assert snps.sex == "Male"

    def test_get_summary(self):
        assert self.snps_GRCh38.get_summary() == {
            "source": "generic",
            "assembly": "GRCh38",
            "build": 38,
            "build_detected": True,
            "snp_count": 4,
            "chromosomes": "1, 3",
            "sex": "",
        }

    def test_get_summary_no_snps(self):
        assert self.snps_none.get_summary() is None

    def test_is_valid_True(self):
        assert self.snps_GRCh38.is_valid()

    def test_is_valid_False(self):
        assert not self.snps_none.is_valid()

    def test__read_raw_data(self):
        assert self.snps_none.snps is None
        assert self.snps_none.source == ""

    def test__lookup_build_with_snp_pos_None(self):
        snps = SNPs()
        snps._snps = self.snps_discrepant_pos()
        assert snps.detect_build() is None

    def test_get_assembly_None(self):
        snps = SNPs()
        snps._build = None
        assert snps.get_assembly() is ""
Пример #5
0
    def find_discordant_snps(self, progress_recorder=None):
        ind1_snps = self.individual1.get_canonical_snps()
        ind2_snps = self.individual2.get_canonical_snps()

        if not ind1_snps or not ind2_snps:
            self.delete()
            return

        if self.individual3:
            ind3_snps = self.individual3.get_canonical_snps()

            if not ind3_snps:
                self.delete()
                return

        with tempfile.TemporaryDirectory() as tmpdir:
            l = Lineage(output_dir=tmpdir, parallelize=False)

            ind1_snps_file = shutil.copy(
                ind1_snps.file.path,
                os.path.join(tmpdir, "ind1_snps" + ind1_snps.file_ext),
            )

            ind2_snps_file = shutil.copy(
                ind2_snps.file.path,
                os.path.join(tmpdir, "ind2_snps" + ind2_snps.file_ext),
            )

            if self.individual3:
                ind3_snps_file = shutil.copy(
                    ind3_snps.file.path,
                    os.path.join(tmpdir, "ind3_snps" + ind3_snps.file_ext),
                )

            ind1 = l.create_individual(self.individual1.name, ind1_snps_file)
            ind2 = l.create_individual(self.individual2.name, ind2_snps_file)

            if self.individual3:
                ind3 = l.create_individual(self.individual3.name,
                                           ind3_snps_file)
            else:
                ind3 = None

            discordant_snps = l.find_discordant_snps(ind1,
                                                     ind2,
                                                     ind3,
                                                     save_output=True)

            self.total_discordant_snps = len(discordant_snps)

            for root, dirs, files in os.walk(tmpdir):
                for file in files:
                    file_path = os.path.join(root, file)
                    if "discordant_snps" in file:
                        self.discordant_snps_csv.name = get_relative_user_dir_file(
                            self.user.uuid, uuid4())
                        compress_file(file_path, self.discordant_snps_csv.path)

                        self.discordant_snps_pickle = get_relative_user_dir_file(
                            self.user.uuid, uuid4(), ".pkl.gz")
                        discordant_snps.to_pickle(
                            self.discordant_snps_pickle.path)

                        break

        self.setup_complete = True
        self.save()
Пример #6
0
    def find_shared_dna_genes(self, progress_recorder=None):
        ind1_snps = self.individual1.get_canonical_snps()
        ind2_snps = self.individual2.get_canonical_snps()

        if not ind1_snps or not ind2_snps:
            self.delete()
            return

        with tempfile.TemporaryDirectory() as tmpdir:
            l = Lineage(output_dir=tmpdir, parallelize=False)

            ind1_snps_file = shutil.copy(
                ind1_snps.file.path,
                os.path.join(tmpdir, "ind1_snps" + ind1_snps.file_ext),
            )

            ind2_snps_file = shutil.copy(
                ind2_snps.file.path,
                os.path.join(tmpdir, "ind2_snps" + ind2_snps.file_ext),
            )

            ind1 = l.create_individual(self.individual1.name, ind1_snps_file)
            ind2 = l.create_individual(self.individual2.name, ind2_snps_file)

            shared_dna_one_chrom, shared_dna_two_chrom, shared_genes_one_chrom, shared_genes_two_chrom = l.find_shared_dna(
                ind1,
                ind2,
                cM_threshold=float(self.cM_threshold),
                snp_threshold=int(self.snp_threshold),
                shared_genes=True,
                save_output=True,
            )

            self.total_shared_segments_one_chrom = len(shared_dna_one_chrom)
            self.total_shared_segments_two_chrom = len(shared_dna_two_chrom)
            self.total_shared_cMs_one_chrom = Decimal(
                shared_dna_one_chrom["cMs"].sum())
            self.total_shared_cMs_two_chrom = Decimal(
                shared_dna_two_chrom["cMs"].sum())
            self.total_snps_one_chrom = shared_dna_one_chrom["snps"].sum()
            self.total_snps_two_chrom = shared_dna_two_chrom["snps"].sum()
            self.total_chrom_one_chrom = len(
                shared_dna_one_chrom["chrom"].unique())
            self.total_chrom_two_chrom = len(
                shared_dna_two_chrom["chrom"].unique())
            self.total_shared_genes_one_chrom = len(shared_genes_one_chrom)
            self.total_shared_genes_two_chrom = len(shared_genes_two_chrom)

            for root, dirs, files in os.walk(tmpdir):
                for file in files:
                    file_path = os.path.join(root, file)
                    if ".png" in file:
                        self.shared_dna_plot_png.name = get_relative_user_dir_file(
                            self.user.uuid, uuid4(), ".png")
                        shutil.move(file_path, self.shared_dna_plot_png.path)
                        os.chmod(self.shared_dna_plot_png.path, 0o640)

                    elif "shared_dna_one_chrom" in file:
                        self.shared_dna_one_chrom_csv = get_relative_user_dir_file(
                            self.user.uuid, uuid4())
                        compress_file(file_path,
                                      self.shared_dna_one_chrom_csv.path)

                        self.shared_dna_one_chrom_pickle = get_relative_user_dir_file(
                            self.user.uuid, uuid4(), ".pkl.gz")

                        shared_dna_one_chrom.to_pickle(
                            self.shared_dna_one_chrom_pickle.path)

                    elif "shared_genes_one_chrom" in file:
                        self.shared_genes_one_chrom_csv = get_relative_user_dir_file(
                            self.user.uuid, uuid4())
                        compress_file(file_path,
                                      self.shared_genes_one_chrom_csv.path)

                        self.shared_genes_one_chrom_pickle = get_relative_user_dir_file(
                            self.user.uuid, uuid4(), ".pkl.gz")

                        shared_genes_one_chrom.to_pickle(
                            self.shared_genes_one_chrom_pickle.path)

                    elif "shared_dna_two_chrom" in file:
                        self.shared_dna_two_chrom_csv = get_relative_user_dir_file(
                            self.user.uuid, uuid4())
                        compress_file(file_path,
                                      self.shared_dna_two_chrom_csv.path)

                        self.shared_dna_two_chrom_pickle = get_relative_user_dir_file(
                            self.user.uuid, uuid4(), ".pkl.gz")

                        shared_dna_two_chrom.to_pickle(
                            self.shared_dna_two_chrom_pickle.path)

                    elif "shared_genes_two_chrom" in file:
                        self.shared_genes_two_chrom_csv = get_relative_user_dir_file(
                            self.user.uuid, uuid4())
                        compress_file(file_path,
                                      self.shared_genes_two_chrom_csv.path)

                        self.shared_genes_two_chrom_pickle = get_relative_user_dir_file(
                            self.user.uuid, uuid4(), ".pkl.gz")

                        shared_genes_two_chrom.to_pickle(
                            self.shared_genes_two_chrom_pickle.path)

        self.setup_complete = True
        self.save()