Exemplo n.º 1
0
    def _rescale_quality_scores(self, config, destination, prefix,
                                files_and_nodes):
        # Generate plot / table files in internal tree in order to prevent the
        # user from accidentially messing with them / causing re=runs
        md_directory = "%s.mapDamage" % (destination, )
        output_filename = destination + ".rescaled.bam"

        # Generates basic plots / table files
        plot = self._build_mapdamage_plot_node(config, md_directory, prefix,
                                               files_and_nodes)

        # Builds model of post-mortem DNA damage
        model = MapDamageModelNode.customize(reference=prefix["Reference"],
                                             directory=md_directory,
                                             dependencies=plot)
        apply_options(model.command, self.options["mapDamage"])
        model = model.build_node()

        # Rescales BAM quality scores using model built above
        scale = MapDamageRescaleNode.customize(
            config=config,
            reference=prefix["Reference"],
            input_files=files_and_nodes.keys(),
            output_file=output_filename,
            directory=md_directory,
            dependencies=model)
        apply_options(scale.command, self.options["mapDamage"])
        scale = scale.build_node()

        # Grab indexing and validation nodes
        validate = index_and_validate_bam(config, prefix, scale)

        return {output_filename: validate}, plot, model
Exemplo n.º 2
0
    def _rescale_quality_scores(self, config, destination, prefix, files_and_nodes):
        # Generate plot / table files in internal tree in order to prevent the
        # user from accidentially messing with them / causing re=runs
        md_directory  = "%s.mapDamage" % (destination,)
        output_filename = destination + ".rescaled.bam"

        # Generates basic plots / table files
        plot  = self._build_mapdamage_plot_node(config, md_directory, prefix, files_and_nodes)

        # Builds model of post-mortem DNA damage
        model = MapDamageModelNode.customize(reference     = prefix["Reference"],
                                             directory     = md_directory,
                                             dependencies  = plot)
        apply_options(model.command, self.options["mapDamage"])
        model = model.build_node()

        # Rescales BAM quality scores using model built above
        scale = MapDamageRescaleNode.customize(config       = config,
                                               reference    = prefix["Reference"],
                                               input_files  = files_and_nodes.keys(),
                                               output_file  = output_filename,
                                               directory    = md_directory,
                                               dependencies = model)
        apply_options(scale.command, self.options["mapDamage"])
        scale = scale.build_node()

        # Grab indexing and validation nodes
        validate = index_and_validate_bam(config, prefix, scale)

        return {output_filename: validate}, plot, model
Exemplo n.º 3
0
    def _build_raw_bam(self, config, prefix, files_and_bams):
        output_filename = os.path.join(self.folder, "%s.%s.bam" % (self.target, prefix["Name"]))
        validated_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".validated")

        node = MergeSamFilesNode(config       = config,
                                 input_bams   = files_and_bams.keys(),
                                 output_bam   = output_filename,
                                 dependencies = self.datadup_check)
        validated_node = index_and_validate_bam(config, prefix, node, validated_filename)

        return {output_filename : validated_node}
Exemplo n.º 4
0
    def _build_raw_bam(self, config, prefix, files_and_bams):
        output_filename = os.path.join(
            self.folder, "%s.%s.bam" % (self.target, prefix["Name"]))
        validated_filename = os.path.join(self.folder, self.target,
                                          prefix["Name"] + ".validated")

        node = MergeSamFilesNode(config=config,
                                 input_bams=files_and_bams.keys(),
                                 output_bam=output_filename,
                                 dependencies=self.datadup_check)
        validated_node = index_and_validate_bam(config, prefix, node,
                                                validated_filename)

        return {output_filename: validated_node}
Exemplo n.º 5
0
    def _build_realigned_bam(self, config, prefix, bams):
        output_filename    = os.path.join(self.folder, "%s.%s.realigned.bam" % (self.target, prefix["Name"]))
        intervals_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".intervals")
        validated_filename = os.path.join(self.folder, self.target, prefix["Name"] + ".realigned.validated")

        node = IndelRealignerNode(config       = config,
                                  reference    = prefix["Reference"],
                                  infiles      = bams.keys(),
                                  outfile      = output_filename,
                                  intervals    = intervals_filename,
                                  dependencies = self.datadup_check)
        validated_node = index_and_validate_bam(config, prefix, node, validated_filename)

        return {output_filename : validated_node}
Exemplo n.º 6
0
    def _init_pre_aligned_lane(self, config, prefix, record):
        if prefix["Name"] not in record["Data"]:
            return

        input_filename = record["Data"][prefix["Name"]]
        output_filename = os.path.join(self.folder, "processed.bam")

        node = CleanupBAMNode(config=config,
                              reference=prefix["Reference"],
                              input_bam=input_filename,
                              output_bam=output_filename,
                              tags=self.tags,
                              dependencies=prefix["Node"])

        validated_node = index_and_validate_bam(config, prefix, node)
        self.bams["Processed"] = {output_filename: validated_node}
Exemplo n.º 7
0
    def _init_pre_aligned_lane(self, config, prefix, record):
        if prefix["Name"] not in record["Data"]:
            return

        input_filename = record["Data"][prefix["Name"]]
        output_filename = os.path.join(self.folder, "processed.bam")

        node = CleanupBAMNode(config       = config,
                              reference    = prefix["Reference"],
                              input_bam    = input_filename,
                              output_bam   = output_filename,
                              tags         = self.tags,
                              dependencies = prefix["Node"])

        validated_node = index_and_validate_bam(config, prefix, node)
        self.bams["Processed"] = {output_filename : validated_node}
Exemplo n.º 8
0
    def _build_realigned_bam(self, config, prefix, bams):
        output_filename = os.path.join(
            self.folder, "%s.%s.realigned.bam" % (self.target, prefix["Name"]))
        intervals_filename = os.path.join(self.folder, self.target,
                                          prefix["Name"] + ".intervals")
        validated_filename = os.path.join(
            self.folder, self.target, prefix["Name"] + ".realigned.validated")

        node = IndelRealignerNode(config=config,
                                  reference=prefix["Reference"],
                                  infiles=bams.keys(),
                                  outfile=output_filename,
                                  intervals=intervals_filename,
                                  dependencies=self.datadup_check)
        validated_node = index_and_validate_bam(config, prefix, node,
                                                validated_filename)

        return {output_filename: validated_node}
Exemplo n.º 9
0
    def _init_unaligned_lane(self, config, prefix, record):
        aln_key, aln_func = _select_aligner(record["Options"])
        prefix_key = "Node:%s" % (aln_key, )

        postfix = [
            "minQ%i" % record["Options"]["Aligners"][aln_key]["MinQuality"]
        ]
        if not record["Options"]["Aligners"][aln_key].get("UseSeed", True):
            postfix.append("noSeed")

        for (key, input_filename) in self.reads.files.iteritems():
            # Common parameters between BWA / Bowtie2
            output_filename = os.path.join(
                self.folder, "%s.%s.bam" % (key.lower(), ".".join(postfix)))
            parameters = {
                "output_file": output_filename,
                "prefix": prefix["Path"],
                "reference": prefix["Reference"],
                "input_file_1": input_filename,
                "input_file_2": None,
                "dependencies": self.reads.nodes + (prefix[prefix_key], )
            }

            if paths.is_paired_end(input_filename):
                parameters["input_file_1"] = input_filename.format(Pair=1)
                parameters["input_file_2"] = input_filename.format(Pair=2)

            alignment_obj = aln_func(config=config,
                                     parameters=parameters,
                                     tags=self.tags,
                                     options=record["Options"])

            alignment_opts = record["Options"]["Aligners"][aln_key]
            alignment_obj.commands["convert"].set_option(
                '-q', alignment_opts["MinQuality"])
            if alignment_opts["FilterUnmappedReads"]:
                alignment_obj.commands["convert"].set_option('-F', "0x4")

            alignment_node = alignment_obj.build_node()
            validated_node = index_and_validate_bam(config, prefix,
                                                    alignment_node)

            self.bams[key] = {output_filename: validated_node}
Exemplo n.º 10
0
    def _remove_pcr_duplicates(self, config, prefix, bams, strategy):
        rmdup_cls = {"collapsed"  : FilterCollapsedBAMNode,
                     "normal"     : MarkDuplicatesNode}

        keep_duplicates = False
        if isinstance(strategy, types.StringTypes) and (strategy.lower() == "mark"):
            keep_duplicates = True

        results = {}
        for (key, files_and_nodes) in bams.items():
            output_filename = self.folder + ".rmdup.%s.bam" % key
            node = rmdup_cls[key](config       = config,
                                  input_bams   = files_and_nodes.keys(),
                                  output_bam   = output_filename,
                                  keep_dupes   = keep_duplicates,
                                  dependencies = files_and_nodes.values())
            validated_node = index_and_validate_bam(config, prefix, node)

            results[key] = {output_filename : validated_node}
        return results
Exemplo n.º 11
0
    def _remove_pcr_duplicates(self, config, prefix, bams, strategy):
        rmdup_cls = {
            "collapsed": FilterCollapsedBAMNode,
            "normal": MarkDuplicatesNode
        }

        keep_duplicates = False
        if isinstance(strategy, types.StringTypes) and (strategy.lower()
                                                        == "mark"):
            keep_duplicates = True

        results = {}
        for (key, files_and_nodes) in bams.items():
            output_filename = self.folder + ".rmdup.%s.bam" % key
            node = rmdup_cls[key](config=config,
                                  input_bams=files_and_nodes.keys(),
                                  output_bam=output_filename,
                                  keep_dupes=keep_duplicates,
                                  dependencies=files_and_nodes.values())
            validated_node = index_and_validate_bam(config, prefix, node)

            results[key] = {output_filename: validated_node}
        return results
Exemplo n.º 12
0
    def _init_unaligned_lane(self, config, prefix, record):
        aln_key, aln_func = _select_aligner(record["Options"])
        prefix_key = "Node:%s" % (aln_key,)

        postfix = ["minQ%i" % record["Options"]["Aligners"][aln_key]["MinQuality"]]
        if not record["Options"]["Aligners"][aln_key].get("UseSeed", True):
            postfix.append("noSeed")

        for (key, input_filename) in self.reads.files.iteritems():
            # Common parameters between BWA / Bowtie2
            output_filename = os.path.join(self.folder, "%s.%s.bam" % (key.lower(), ".".join(postfix)))
            parameters = {"output_file"  : output_filename,
                          "prefix"       : prefix["Path"],
                          "reference"    : prefix["Reference"],
                          "input_file_1" : input_filename,
                          "input_file_2" : None,
                          "dependencies" : self.reads.nodes + (prefix[prefix_key],)}

            if paths.is_paired_end(input_filename):
                parameters["input_file_1"] = input_filename.format(Pair = 1)
                parameters["input_file_2"] = input_filename.format(Pair = 2)

            alignment_obj  = aln_func(config           = config,
                                      parameters       = parameters,
                                      tags             = self.tags,
                                      options          = record["Options"])

            alignment_opts = record["Options"]["Aligners"][aln_key]
            alignment_obj.commands["convert"].set_option('-q', alignment_opts["MinQuality"])
            if alignment_opts["FilterUnmappedReads"]:
                alignment_obj.commands["convert"].set_option('-F', "0x4")

            alignment_node = alignment_obj.build_node()
            validated_node = index_and_validate_bam(config, prefix, alignment_node)

            self.bams[key] = {output_filename : validated_node}