示例#1
0
    def calc_maxima_forward_reverse_all_peaks(self):
        self.log_info(
            "Calculating the forward and reverse maxima for each of the peak regions..."
        )

        self.peak_maxima = defaultdict(
            lambda: defaultdict(lambda: defaultdict(list)))

        for chrom, start, end in self.peaks:
            peak_str = shared.peak_str(start, end)
            self.log_debug("Calculating maxima for peak %s..." % peak_str)

            max_depth = max_depth = max(
                self.genome.forward_depth[chrom][start:end + 1] +
                self.genome.reverse_depth[chrom][start:end + 1])

            forward = self.get_maxima_single(self.genome.forward_depth, chrom,
                                             start, end, max_depth)
            reverse = self.get_maxima_single(self.genome.reverse_depth, chrom,
                                             start, end, max_depth)

            self.peak_maxima[chrom][peak_str]["FORWARD"] = forward
            self.peak_maxima[chrom][peak_str]["REVERSE"] = reverse
            self.log_debug("Forward Maxima: %s" % shared.pprint_list(forward))
            self.log_debug("Reverse Maxima: %s" % shared.pprint_list(reverse))

            peak_pairs = self.calc_peak_pairs_single(forward, reverse)
            self.log_debug("Peak Pairs: %s" % shared.pprint_list(peak_pairs))
            self.peak_pairs[chrom][peak_str] = peak_pairs
示例#2
0
    def find_sites(self):

        self.log_info("Building the genome read depth model...")
        self.genome = GenomeAlignment(self.bam_file, self.peaks, self.log)
        self.genome.generateModel()
        self.log_debug("Finished building the genome model...")

        self.log_debug("Creating the PeakQC object...")
        self.peakQC = PeakQC(self.genome, self.peaks, self.log)

        for chrom, start_zeroi, end_zeroi in self.peaks:
            peak_str = shared.peak_str(start_zeroi, end_zeroi)

            if self.log: self.log.info("Processing the peak %s..." % peak_str)
            self.peakQC.perform_peak_QC(chrom, start_zeroi, end_zeroi)

            continue

            max_depth = max(
                self.forward_read_count[chrom][start_zeroi:end_zeroi + 1] +
                self.reverse_read_count[chrom][start_zeroi:end_zeroi + 1])
            forward_maxima = self.get_maxima(self.forward_read_count, chrom,
                                             start_zeroi, end_zeroi, max_depth,
                                             lower_peak_cutoff_perc)
            reverse_maxima = self.get_maxima(self.reverse_read_count, chrom,
                                             start_zeroi, end_zeroi, max_depth,
                                             lower_peak_cutoff_perc)
            print start_zeroi + 1
            print end_zeroi + 1
            print ', '.join([str(i) for i in forward_maxima])
            print ', '.join([str(i) for i in reverse_maxima])

        self.peakQC.print_QC_log()
        sys.exit()
示例#3
0
    def find_troughs(self):
        for chrom, start, stop in self.peaks:
            peak_str = shared.peak_str(start, stop)

            for first_maxima, second_maxima in self.peak_pairs[chrom][
                    peak_str]:
                trough = self.find_trough(chrom, first_maxima, second_maxima)
                self.trough_sites[chrom][peak_str].append(trough)
示例#4
0
    def print_QC_log(self):
        print '\t'.join([
            "CHROM", "PEAK", self.depth_tag, self.exist_maxima_tag,
            self.paired_maxima_tag, 'TroughSites', 'TargetSites', 'TargetSeqs'
        ])

        for chrom, start, end in self.peaks:
            peak_str = shared.peak_str(start, end)

            out_list = [chrom, peak_str]
            out_list.append(self.QC_log[chrom][peak_str][self.depth_tag])
            out_list.append(
                self.QC_log[chrom][peak_str][self.exist_maxima_tag])
            out_list.append(
                str(self.QC_log[chrom][peak_str][self.paired_maxima_tag]))

            if len(self.peak_analyzer.trough_sites[chrom][peak_str]) > 0:
                out_list.append(';'.join([
                    str(i + 1)
                    for i in self.peak_analyzer.trough_sites[chrom][peak_str]
                ]))
            else:
                out_list.append("None")

            if len(self.peak_analyzer.target_sites[chrom][peak_str]) > 0:

                targets = self.peak_analyzer.target_sites[chrom][peak_str]
                out_targets = []
                for peak in targets:
                    out = []
                    for target in peak:
                        out.append('-'.join([str(i + 1) for i in target]))
                    out_targets.append('|'.join(out))
                out_str = ';'.join(out_targets)

                out_list.append(out_str)

            else:
                out_list.append("None")

            if len(self.peak_analyzer.target_seqs[chrom][peak_str]) > 0:

                targets = self.peak_analyzer.target_seqs[chrom][peak_str]
                out_targets = []
                for peak in targets:
                    out = []
                    for target in peak:
                        out.append(target)
                    out_targets.append('|'.join(out))
                out_str = ';'.join(out_targets)

                out_list.append(out_str)

            else:
                out_list.append("None")

            print "\t".join(out_list)
示例#5
0
    def perform_depth_QC(self, chrom, start, end):
        peak_str = shared.peak_str(start, end)
        max_depth = max(self.genome.forward_depth[chrom][start:end + 1] +
                        self.genome.reverse_depth[chrom][start:end + 1])

        if max_depth < 20:
            self.QC_log[chrom][peak_str][self.depth_tag] = 'LOW'
        elif max_depth < 50:
            self.QC_log[chrom][peak_str][self.depth_tag] = 'MED'
        else:
            self.QC_log[chrom][peak_str][self.depth_tag] = 'HIGH'
示例#6
0
    def print_QC_log(self):
        print '\t'.join(["CHROM", "PEAK", self.depth_tag, self.exist_maxima_tag, self.paired_maxima_qc])

        for chrom, start, end in self.peaks:
            peak_str = shared.peak_str(start, end)

            out_list = [chrom, peak_str]
            out_list.append(self.QC_log[chrom][peak_str][self.depth_tag])
            out_list.append(self.QC_log[chrom][peak_str][self.exist_maxima_tag])
            out_list.append(self.QC_log[chrom][peak_str][self.paired_maxima_tag])

            print "\t".join(out_list)
示例#7
0
    def perform_exist_maxima_QC(self, chrom, start, end):
        peak_str = shared.peak_str(start, end)

        if len(self.peak_analyzer.peak_maxima[chrom][peak_str]['FORWARD']) == 0 and \
                        len(self.peak_analyzer.peak_maxima[chrom][peak_str]['REVERSE']) == 0:
            self.QC_log[chrom][peak_str][self.exist_maxima_tag] = 'FALSE'

        elif len(self.peak_analyzer.peak_maxima[chrom][peak_str]['FORWARD']) == 0:
            self.QC_log[chrom][peak_str][self.exist_maxima_tag] = 'MISSING_FORWARD'

        elif len(self.peak_analyzer.peak_maxima[chrom][peak_str]['REVERSE']) == 0:
            self.QC_log[chrom][peak_str][self.exist_maxima_tag] = 'MISSING_REVERSE'

        else:
            self.QC_log[chrom][peak_str][self.exist_maxima_tag] = 'TRUE'
示例#8
0
    def get_quality_peaks(self):

        quality_peaks = []

        for chrom, start, stop in self.peaks:
            peak_str = shared.peak_str(start, stop)
            if self.QC_log[chrom][peak_str][self.depth_tag] != 'LOW':

                if self.QC_log[chrom][peak_str][
                        self.exist_maxima_tag] == "TRUE":

                    if self.QC_log[chrom][peak_str][
                            self.paired_maxima_tag] > 0:
                        quality_peaks.append([chrom, start, stop])

        return quality_peaks
示例#9
0
    def find_targets(self, peaks):
        for chrom, start, stop in peaks:
            peak_str = shared.peak_str(start, stop)

            for first_maxima, second_maxima in self.peak_pairs[chrom][
                    peak_str]:
                targets = self.find_targets_single_peak(
                    chrom, first_maxima, second_maxima)

                if len(targets) > 0:
                    target_seqs = [
                        self.genome.genome_seq[chrom][target[0]:target[1] + 1]
                        for target in targets
                    ]
                    #print target_seqs
                    self.target_seqs[chrom][peak_str].append(target_seqs)
                    self.target_sites[chrom][peak_str].append(targets)
示例#10
0
    def find_sites(self):

        self.log_info("Building the genome read depth model...")
        self.genome = GenomeAlignment(self.genome_path, self.bam_file, self.peaks, self.log)
        self.genome.generateModel()
        self.log_debug("Finished building the genome model...")

        self.log_debug("Creating the PeakQC object...")
        self.peakQC = PeakQC(self.genome, self.peaks, self.log)

        for chrom, start_zeroi, end_zeroi in self.peaks:
            peak_str = shared.peak_str(start_zeroi, end_zeroi)

            if self.log: self.log.info("Processing the peak %s..." % peak_str)
            self.peakQC.perform_peak_QC(chrom, start_zeroi, end_zeroi)

        self.peakQC.find_insertion_sites()

        self.peakQC.print_QC_log()


        sys.exit()
示例#11
0
 def perform_paired_maxima_QC(self, chrom, start, end):
     peak_str = shared.peak_str(start, end)
     self.peak_analyzer
示例#12
0
    def perform_paired_maxima_QC(self, chrom, start, end):
        peak_str = shared.peak_str(start, end)
        paired_peaks = len(self.peak_analyzer.peak_pairs[chrom][peak_str])

        self.QC_log[chrom][peak_str][self.paired_maxima_tag] = paired_peaks