def _rescale_quality_scores(self, config, destination, prefix, files_and_nodes): # Generate plot / table files in internal tree in order to prevent the # user from accidentially messing with them / causing re=runs md_directory = "%s.mapDamage" % (destination, ) output_filename = destination + ".rescaled.bam" # Generates basic plots / table files plot = self._build_mapdamage_plot_node(config, md_directory, prefix, files_and_nodes) # Builds model of post-mortem DNA damage model = MapDamageModelNode.customize(reference=prefix["Reference"], directory=md_directory, dependencies=plot) apply_options(model.command, self.options["mapDamage"]) model = model.build_node() # Rescales BAM quality scores using model built above scale = MapDamageRescaleNode.customize( config=config, reference=prefix["Reference"], input_files=files_and_nodes.keys(), output_file=output_filename, directory=md_directory, dependencies=model) apply_options(scale.command, self.options["mapDamage"]) scale = scale.build_node() # Grab indexing and validation nodes validate = index_and_validate_bam(config, prefix, scale) return {output_filename: validate}, plot, model
def _rescale_quality_scores(self, config, destination, prefix, files_and_nodes): # Generate plot / table files in internal tree in order to prevent the # user from accidentially messing with them / causing re=runs md_directory = "%s.mapDamage" % (destination,) output_filename = destination + ".rescaled.bam" # Generates basic plots / table files plot = self._build_mapdamage_plot_node(config, md_directory, prefix, files_and_nodes) # Builds model of post-mortem DNA damage model = MapDamageModelNode.customize(reference = prefix["Reference"], directory = md_directory, dependencies = plot) apply_options(model.command, self.options["mapDamage"]) model = model.build_node() # Rescales BAM quality scores using model built above scale = MapDamageRescaleNode.customize(config = config, reference = prefix["Reference"], input_files = files_and_nodes.keys(), output_file = output_filename, directory = md_directory, dependencies = model) apply_options(scale.command, self.options["mapDamage"]) scale = scale.build_node() # Grab indexing and validation nodes validate = index_and_validate_bam(config, prefix, scale) return {output_filename: validate}, plot, model
def _build_raw_bam(self, config, prefix, files_and_bams): output_filename = os.path.join(self.folder, "%s.%s.bam" % (self.target, prefix["Name"])) validated_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".validated") node = MergeSamFilesNode(config = config, input_bams = files_and_bams.keys(), output_bam = output_filename, dependencies = self.datadup_check) validated_node = index_and_validate_bam(config, prefix, node, validated_filename) return {output_filename : validated_node}
def _build_raw_bam(self, config, prefix, files_and_bams): output_filename = os.path.join( self.folder, "%s.%s.bam" % (self.target, prefix["Name"])) validated_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".validated") node = MergeSamFilesNode(config=config, input_bams=files_and_bams.keys(), output_bam=output_filename, dependencies=self.datadup_check) validated_node = index_and_validate_bam(config, prefix, node, validated_filename) return {output_filename: validated_node}
def _build_realigned_bam(self, config, prefix, bams): output_filename = os.path.join(self.folder, "%s.%s.realigned.bam" % (self.target, prefix["Name"])) intervals_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".intervals") validated_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".realigned.validated") node = IndelRealignerNode(config = config, reference = prefix["Reference"], infiles = bams.keys(), outfile = output_filename, intervals = intervals_filename, dependencies = self.datadup_check) validated_node = index_and_validate_bam(config, prefix, node, validated_filename) return {output_filename : validated_node}
def _init_pre_aligned_lane(self, config, prefix, record): if prefix["Name"] not in record["Data"]: return input_filename = record["Data"][prefix["Name"]] output_filename = os.path.join(self.folder, "processed.bam") node = CleanupBAMNode(config=config, reference=prefix["Reference"], input_bam=input_filename, output_bam=output_filename, tags=self.tags, dependencies=prefix["Node"]) validated_node = index_and_validate_bam(config, prefix, node) self.bams["Processed"] = {output_filename: validated_node}
def _init_pre_aligned_lane(self, config, prefix, record): if prefix["Name"] not in record["Data"]: return input_filename = record["Data"][prefix["Name"]] output_filename = os.path.join(self.folder, "processed.bam") node = CleanupBAMNode(config = config, reference = prefix["Reference"], input_bam = input_filename, output_bam = output_filename, tags = self.tags, dependencies = prefix["Node"]) validated_node = index_and_validate_bam(config, prefix, node) self.bams["Processed"] = {output_filename : validated_node}
def _build_realigned_bam(self, config, prefix, bams): output_filename = os.path.join( self.folder, "%s.%s.realigned.bam" % (self.target, prefix["Name"])) intervals_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".intervals") validated_filename = os.path.join( self.folder, self.target, prefix["Name"] + ".realigned.validated") node = IndelRealignerNode(config=config, reference=prefix["Reference"], infiles=bams.keys(), outfile=output_filename, intervals=intervals_filename, dependencies=self.datadup_check) validated_node = index_and_validate_bam(config, prefix, node, validated_filename) return {output_filename: validated_node}
def _init_unaligned_lane(self, config, prefix, record): aln_key, aln_func = _select_aligner(record["Options"]) prefix_key = "Node:%s" % (aln_key, ) postfix = [ "minQ%i" % record["Options"]["Aligners"][aln_key]["MinQuality"] ] if not record["Options"]["Aligners"][aln_key].get("UseSeed", True): postfix.append("noSeed") for (key, input_filename) in self.reads.files.iteritems(): # Common parameters between BWA / Bowtie2 output_filename = os.path.join( self.folder, "%s.%s.bam" % (key.lower(), ".".join(postfix))) parameters = { "output_file": output_filename, "prefix": prefix["Path"], "reference": prefix["Reference"], "input_file_1": input_filename, "input_file_2": None, "dependencies": self.reads.nodes + (prefix[prefix_key], ) } if paths.is_paired_end(input_filename): parameters["input_file_1"] = input_filename.format(Pair=1) parameters["input_file_2"] = input_filename.format(Pair=2) alignment_obj = aln_func(config=config, parameters=parameters, tags=self.tags, options=record["Options"]) alignment_opts = record["Options"]["Aligners"][aln_key] alignment_obj.commands["convert"].set_option( '-q', alignment_opts["MinQuality"]) if alignment_opts["FilterUnmappedReads"]: alignment_obj.commands["convert"].set_option('-F', "0x4") alignment_node = alignment_obj.build_node() validated_node = index_and_validate_bam(config, prefix, alignment_node) self.bams[key] = {output_filename: validated_node}
def _remove_pcr_duplicates(self, config, prefix, bams, strategy): rmdup_cls = {"collapsed" : FilterCollapsedBAMNode, "normal" : MarkDuplicatesNode} keep_duplicates = False if isinstance(strategy, types.StringTypes) and (strategy.lower() == "mark"): keep_duplicates = True results = {} for (key, files_and_nodes) in bams.items(): output_filename = self.folder + ".rmdup.%s.bam" % key node = rmdup_cls[key](config = config, input_bams = files_and_nodes.keys(), output_bam = output_filename, keep_dupes = keep_duplicates, dependencies = files_and_nodes.values()) validated_node = index_and_validate_bam(config, prefix, node) results[key] = {output_filename : validated_node} return results
def _remove_pcr_duplicates(self, config, prefix, bams, strategy): rmdup_cls = { "collapsed": FilterCollapsedBAMNode, "normal": MarkDuplicatesNode } keep_duplicates = False if isinstance(strategy, types.StringTypes) and (strategy.lower() == "mark"): keep_duplicates = True results = {} for (key, files_and_nodes) in bams.items(): output_filename = self.folder + ".rmdup.%s.bam" % key node = rmdup_cls[key](config=config, input_bams=files_and_nodes.keys(), output_bam=output_filename, keep_dupes=keep_duplicates, dependencies=files_and_nodes.values()) validated_node = index_and_validate_bam(config, prefix, node) results[key] = {output_filename: validated_node} return results
def _init_unaligned_lane(self, config, prefix, record): aln_key, aln_func = _select_aligner(record["Options"]) prefix_key = "Node:%s" % (aln_key,) postfix = ["minQ%i" % record["Options"]["Aligners"][aln_key]["MinQuality"]] if not record["Options"]["Aligners"][aln_key].get("UseSeed", True): postfix.append("noSeed") for (key, input_filename) in self.reads.files.iteritems(): # Common parameters between BWA / Bowtie2 output_filename = os.path.join(self.folder, "%s.%s.bam" % (key.lower(), ".".join(postfix))) parameters = {"output_file" : output_filename, "prefix" : prefix["Path"], "reference" : prefix["Reference"], "input_file_1" : input_filename, "input_file_2" : None, "dependencies" : self.reads.nodes + (prefix[prefix_key],)} if paths.is_paired_end(input_filename): parameters["input_file_1"] = input_filename.format(Pair = 1) parameters["input_file_2"] = input_filename.format(Pair = 2) alignment_obj = aln_func(config = config, parameters = parameters, tags = self.tags, options = record["Options"]) alignment_opts = record["Options"]["Aligners"][aln_key] alignment_obj.commands["convert"].set_option('-q', alignment_opts["MinQuality"]) if alignment_opts["FilterUnmappedReads"]: alignment_obj.commands["convert"].set_option('-F', "0x4") alignment_node = alignment_obj.build_node() validated_node = index_and_validate_bam(config, prefix, alignment_node) self.bams[key] = {output_filename : validated_node}