Exemplo n.º 1
0
 def preprocess(self, lib):
     """
     The method preprocess determines which preprocessing steps have to be executed for a given library.
     """
     logging.info("Preprocessing of " + lib.libName)
     LaTeX.ltxSection("Preprocessing of " + lib.libName)
     lib.forward = DirUtils.fileRegexToList(lib.forward)
     lib.reversed = DirUtils.fileRegexToList(lib.reversed)
     if lib.format == "sff":
         for idx, sffFile in enumerate(lib.forward):
             lib.forward[idx] = SffToFastqConverter.SffToFastqConverter(lib.outputDir, sffFile=sffFile).execute()
     smallReport = FastqSmallReport.FastqSmallReport()
     smallReport.createSmallReport(lib.forward, lib.reversed) 
      
     if len(lib.forward)  > 1:     
         lib.forward = FastqCommands.MergeCommand(lib.outputDir, direction="forward", fastqFiles=lib.forward).execute()
         if lib.reversed != None:
             lib.reversed = FastqCommands.MergeCommand(lib.outputDir, direction="reversed", fastqFiles=lib.forward).execute()
         FastqSmallReport.FastqSmallReport().createSmallReport(lib.forward, lib.reversed)
     else:
         lib.forward = lib.forward[0]
         if lib.reversed != None:
             lib.reversed = lib.reversed[0]
     lib.avgReadlength = float(smallReport.fastqInfo[smallReport.fastqInfo.keys()[0]][2])
     
     if lib.sequencingPlatform == "illumina":
         self.illuminaPreprocess(lib)
     elif lib.sequencingPlatform == "454":
         lib.forward = FastqMcfTrimming.FastqTrimmer(lib.outputDir, forward=lib.forward,noTrim=True).execute()
         FastqSmallReport.FastqSmallReport().createSmallReport(lib.forward, lib.reversed)
     
     self.filterContamination(lib) 
Exemplo n.º 2
0
 def doAssembly(self, pool):
     """
     The method doAssembly creates all objects to execute a wgs assembly. Afterwards the insert sizes of all pe and
     mp libraries are estimated.
     """
     logging.info("Executing assembly")
     LaTeX.ltxSection("Assembly")
     if Configuration.instance.getGlobalOption("assembler") == None or Configuration.instance.getGlobalOption("assembler") == "wgs":
         assembler = WgsAssembler.WgsAssembler()
         self.assembly = assembler.doAssembly(pool.outputDir + "assembly/", pool)
     elif Configuration.instance.getGlobalOption("assembler") == "allpaths":
         self.assembly = AllpathsAssembler.AllpathsAssembler().doAssembly(pool.outputDir + "allPathsAssembly/", pool)
         
     for lib in pool.libs:
         if lib.reversed == None:
             continue
         logging.info("Calculating insert sizes for " + lib.libName)
         insertSizeChecker = InsertSizeChecker.InsertSizeChecker()
         insertSizeChecker.checkInsertSize(lib.outputDir, lib.rawForward, lib.rawReversed, self.assembly, lib.libName, lib.insertSize)
 def doGenomeSizeEstimation(self, outputDir, pool):
     """
     The method doGenomeSizeEstimation contains the mainflow of the genomesize estimation. This mainflow contains the following methods:
     * Execute Jellyfish count
     * Execute Jellyfish stats
     * Create a histogram of the unique kmers with Jellyfish histo
     * Draw a histogram of the unique kmers
     * Estimate the genome size with the BGI method
     """
     logging.info("Starting genome size estimation")
     if not os.path.isdir(outputDir):
         os.makedirs(outputDir)
         
     LaTeX.ltxSection("Genome size estimation}")
     self.jellyFishCountsFile = JellyFish.JellyFishCount(outputDir, pool=pool).execute()
     self.jellyFishStatsFile = JellyFish.JellyFishStats(outputDir, jellyFishCountsFile=self.jellyFishCountsFile).execute()
     self.jellyfishHistoFile = JellyFish.JellyFishHisto(outputDir, jellyFishCountsFile=self.jellyFishCountsFile).execute()
     self.genSizeHistoPlot = outputDir + "kmer_graph.png"
     self.peak = int(self.drawHisto(self.jellyfishHistoFile, self.genSizeHistoPlot))
     self.calculateGenomeSize(pool, self.jellyFishStatsFile, self.jellyfishHistoFile)
     Reporter.instance.objects.append(self)