Exemplo n.º 1
0
 def execute(self, executable):
     """The method execute checks which program has to be executed and executes this program
     :param executable: the argument of the commandline which determines which program has to be executed
     :type executable: str
     """
     if executable == "haplotyping":
         if Grid.useGrid == True:
             Haplotyper.executeBeagleCluster(self.pool)
         else:
             Haplotyper.executeBeagleMultiThread(self.pool)
     elif executable == "snvCalling":
         if Program.config.snvCaller == "samtools":  # @UndefinedVariable
             SamtoolsMpileup.executeSamtoolsMultiThreaded(self.pool)
         elif Program.config.snvCaller == "GATK":  # @UndefinedVariable
             Gatk.Gatk(self.pool).callSnvs()
     elif executable == "mapping":
         mapper = Mapper.Mapper()
         for sample in self.samples:
             mapper.map(sample)
     elif executable == "allelicDiversity":
         if Program.config.gffFile == None:  # @UndefinedVariable
             print("When calculating the allelic diversity, a gff file is needed, this option can be set with the option --gff <file>")
             exit()
         allelicDiverityCalculator = AllelicDiversity.AllelicDiversity(self.pool, Program.config.gffFile)  # @UndefinedVariable
         allelicDiverityCalculator.getAllelicDiversity()
     elif executable == "findLoci":
         if Program.config.phenoData == None:  # @UndefinedVariable
             print("When finding loci, a csv file is needed with the phenotype data, this option can be set with the option --phen <file>")
             exit()
         if Program.config.gffFile == None:  # @UndefinedVariable
             print("When finding loci, a file with phenotype data is needed, this option can be set with the option --gff <file>")
             exit()
         lociFinder = LociFinder.LociFinder()
         lociFinder.findLoci(self.pool)
Exemplo n.º 2
0
 def testExecuteBeagleGrid(self):
     expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testFiltered.vcf"
     Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
     createdOutFile = TestHaplotyper.testPool.vcf[TestHaplotyper.chrIndex].fileName
     self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
     #Check if the file contains exactly one snp
     self.checkNoOfSnps(expOutFile)
Exemplo n.º 3
0
    def getAllelicDiversity(self):
        """The method getAllelicDiversity calculates the allelic diversity and writes the output to a file.
        
        """
        if Grid.useGrid == True:
            Haplotyper.executeBeagleCluster(self.pool)
        else:
            Haplotyper.executeBeagleMultiThread(self.pool)

        for vcf in self.pool.vcf:
            if vcf == None:
                logging.info("Starting to calculate the allelic diversity")
                outputFile = self.pool.outputDir + "/allelicDiversity.csv"
            else:
                logging.info("calculating allelic diversity of " + vcf)
                outputFile = self.pool.outputDir + "/" + vcf + "_" + "allelicDiversity.csv"
            try:
                self.vcfFile = self.pool.vcf[vcf].getFile()
                self._parseFiles(vcf)
                haplotypes = self._getAllHaplotypesByAccession(self.allContigs)
                accessions = haplotypes.values()[0].keys()

                with open(outputFile, "w") as outWriter:
                    outWriter.write("contig\toriginal\t")
                    for accession in accessions:
                        outWriter.write(accession + "_1\t" + accession +
                                        "_2\t")
                    outWriter.write("\n")
                    for contigId in self.allContigs:
                        outWriter.write(contigId + "\t")
                        try:
                            outWriter.write(
                                self.allContigs[contigId].refHaplotype + "\t")
                        except AttributeError:
                            outWriter.write("-\t")
                        for accession in accessions:
                            for i in range(2):
                                if contigId in haplotypes:
                                    outWriter.write(
                                        haplotypes[contigId][accession][i] +
                                        "\t")
                                else:
                                    outWriter.write("-\t")
                        outWriter.write("\n")
            except IndexError:
                if vcf == None:
                    logging.warning("No SNPs within contigs found")
                else:
                    logging.warning("No SNPs within contigs found of " + vcf)

            except Exception as ex:
                if vcf == None:
                    logging.error(
                        "an error occured during parsing the vcf file")
                else:
                    logging.error("an error occured during parsing " + vcf)
                logging.error(ex)
                traceback.print_exc()
Exemplo n.º 4
0
    def testHaplotyperPathGrid(self):         
        TestHaplotyper.testPool.vcf ={}
        TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool, "testLib")
        TestHaplotyper.testPool.addSample(TestHaplotyper.sample)
        TestHaplotyper.sample.bam = BamFile.BamFile(TestHaplotyper.testPool, TestHaplotyper.sample, TestHaplotyper.inputBam, sortedBam = True, headerLine = True, duplicates = False, mdTag = True, index = True)
 
        Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
#         self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
        self.checkNoOfSnps("../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf")
Exemplo n.º 5
0
 def testExecuteBeagleGrid(self):
     expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testFiltered.vcf"
     Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
     createdOutFile = TestHaplotyper.testPool.vcf[
         TestHaplotyper.chrIndex].fileName
     self.assertEqual(
         os.path.abspath(createdOutFile), os.path.abspath(expOutFile),
         os.path.abspath(createdOutFile) + " not is " +
         os.path.abspath(expOutFile))
     #Check if the file contains exactly one snp
     self.checkNoOfSnps(expOutFile)
Exemplo n.º 6
0
 def getAllelicDiversity(self):
     """The method getAllelicDiversity calculates the allelic diversity and writes the output to a file.
     
     """
     if Grid.useGrid == True:
         Haplotyper.executeBeagleCluster(self.pool)
     else:
         Haplotyper.executeBeagleMultiThread(self.pool)
     
     for vcf in self.pool.vcf: 
         if vcf == None:
             logging.info("Starting to calculate the allelic diversity")
             outputFile = self.pool.outputDir + "/allelicDiversity.csv"  
         else:
             logging.info("calculating allelic diversity of " + vcf)
             outputFile = self.pool.outputDir + "/"+vcf + "_" + "allelicDiversity.csv"  
         try:
             self.vcfFile = self.pool.vcf[vcf].getFile()
             self._parseFiles(vcf)
             haplotypes = self._getAllHaplotypesByAccession(self.allContigs)
             accessions = haplotypes.values()[0].keys()
             
             with open(outputFile, "w") as outWriter:
                 outWriter.write("contig\toriginal\t")
                 for accession in accessions: outWriter.write( accession + "_1\t" + accession + "_2\t")
                 outWriter.write("\n")
                 for contigId in self.allContigs:
                     outWriter.write(contigId + "\t")
                     try:
                         outWriter.write(self.allContigs[contigId].refHaplotype + "\t")
                     except AttributeError: outWriter.write("-\t")
                     for accession in accessions:
                         for i in range(2):
                             if contigId in haplotypes:
                                 outWriter.write(haplotypes[contigId][accession][i] + "\t")
                             else:
                                 outWriter.write("-\t")
                     outWriter.write("\n")
         except IndexError:
             if vcf == None:
                 logging.warning("No SNPs within contigs found")
             else:
                 logging.warning("No SNPs within contigs found of " + vcf)
         
         except Exception as ex:
             if vcf == None:
                 logging.error("an error occured during parsing the vcf file")
             else:
                 logging.error("an error occured during parsing " + vcf)
             logging.error(ex)
             traceback.print_exc()
             
Exemplo n.º 7
0
    def testHaplotyperFullPathGrid(self):
        expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf"
        gzFile = "../testFiles/input/test.fq.gz"
        refGzFile = "../testFiles/input/revTest.fq.gz"
        TestHaplotyper.testPool.vcf ={}
        TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool, "testLib")
        TestHaplotyper.testPool.addSample(TestHaplotyper.sample)
        TestHaplotyper.sample.setForwardFq(gzFile)
        TestHaplotyper.sample.setReversedFq(refGzFile)
        TestHaplotyper.sample.reversedFq.forward = False
        
        Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
#         createdOutFile = TestHaplotyper.testPool.vcf[TestHaplotyper.chrIndex].fileName
#         self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
        #Check if the file contains exactly one snp
        self.checkNoOfSnps(expOutFile)
Exemplo n.º 8
0
    def testHaplotyperFullPathGrid(self):
        expOutFile = "../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf"
        gzFile = "../testFiles/input/test.fq.gz"
        refGzFile = "../testFiles/input/revTest.fq.gz"
        TestHaplotyper.testPool.vcf = {}
        TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool,
                                              "testLib")
        TestHaplotyper.testPool.addSample(TestHaplotyper.sample)
        TestHaplotyper.sample.setForwardFq(gzFile)
        TestHaplotyper.sample.setReversedFq(refGzFile)
        TestHaplotyper.sample.reversedFq.forward = False

        Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
        #         createdOutFile = TestHaplotyper.testPool.vcf[TestHaplotyper.chrIndex].fileName
        #         self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
        #Check if the file contains exactly one snp
        self.checkNoOfSnps(expOutFile)
Exemplo n.º 9
0
    def testHaplotyperPathGrid(self):
        TestHaplotyper.testPool.vcf = {}
        TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool,
                                              "testLib")
        TestHaplotyper.testPool.addSample(TestHaplotyper.sample)
        TestHaplotyper.sample.bam = BamFile.BamFile(TestHaplotyper.testPool,
                                                    TestHaplotyper.sample,
                                                    TestHaplotyper.inputBam,
                                                    sortedBam=True,
                                                    headerLine=True,
                                                    duplicates=False,
                                                    mdTag=True,
                                                    index=True)

        Haplotyper.executeBeagleCluster(TestHaplotyper.testPool)
        #         self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " +  os.path.abspath(expOutFile))
        self.checkNoOfSnps(
            "../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf"
        )
Exemplo n.º 10
0
 def execute(self, executable):
     """The method execute checks which program has to be executed and executes this program
     :param executable: the argument of the commandline which determines which program has to be executed
     :type executable: str
     """
     if executable == "haplotyping":
         if Grid.useGrid == True:
             Haplotyper.executeBeagleCluster(self.pool)
         else:
             Haplotyper.executeBeagleMultiThread(self.pool)
     elif executable == "snvCalling":
         if Program.config.snvCaller == "samtools":  # @UndefinedVariable
             SamtoolsMpileup.executeSamtoolsMultiThreaded(self.pool)
         elif Program.config.snvCaller == "GATK":  # @UndefinedVariable
             Gatk.Gatk(self.pool).callSnvs()
     elif executable == "mapping":
         mapper = Mapper.Mapper()
         for sample in self.samples:
             mapper.map(sample)
     elif executable == "allelicDiversity":
         if Program.config.gffFile == None:  # @UndefinedVariable
             print(
                 "When calculating the allelic diversity, a gff file is needed, this option can be set with the option --gff <file>"
             )
             exit()
         allelicDiverityCalculator = AllelicDiversity.AllelicDiversity(
             self.pool, Program.config.gffFile)  # @UndefinedVariable
         allelicDiverityCalculator.getAllelicDiversity()
     elif executable == "findLoci":
         if Program.config.phenoData == None:  # @UndefinedVariable
             print(
                 "When finding loci, a csv file is needed with the phenotype data, this option can be set with the option --phen <file>"
             )
             exit()
         if Program.config.gffFile == None:  # @UndefinedVariable
             print(
                 "When finding loci, a file with phenotype data is needed, this option can be set with the option --gff <file>"
             )
             exit()
         lociFinder = LociFinder.LociFinder()
         lociFinder.findLoci(self.pool)