Пример #1
0
    def populate_graph(self):
        """
        Add edges from acceptors to donors, donors to acceptors,
        on distinct strands.
        Record the exon number within the transcript.
        """
        print "Populating graph..."
        t1 = time.time()
        for table_name in self.tables:
            print "Adding splice edges from table %s" %(table_name)
            for item in self.tables[table_name]:
                chrom, startvals, endvals, strand, gene = item
                startvals = map(int, startvals.split(",")[:-1])
                # Adds +1 since downloaded UCSC tables are 0-based start!
                startvals = map(str, [x + 1 for x in startvals])
                endvals = endvals.split(",")[:-1]
                indices = range(len(startvals))
                if strand == "-":
                    # If it's a minus strand event, walk the transcript from
                    # end (in order of transcription)
                    startvals = startvals[::-1]
                    endvals = endvals[::-1]
                # Zero-based exon number
                curr_exon_num = 0
                for curr_i, next_i in utils.iter_by_pair(indices, step=1):
                    # Splice from end of current exon to start of next exonp
                    donor_unit = Unit((chrom, startvals[curr_i], strand),
                                      (chrom, endvals[curr_i], strand))
                    acceptor_unit = Unit((chrom, startvals[next_i], strand),
                                         (chrom, endvals[next_i], strand))
                    if strand == "-":
                        # Reverse start/end of donor and acceptor units
                        # if it's a minus strand event
                        donor_unit = Unit((chrom, endvals[curr_i], strand),
                                          (chrom, startvals[curr_i], strand))
                        acceptor_unit = Unit((chrom, endvals[next_i], strand),
                                             (chrom, startvals[next_i], strand))
#                        donor_unit, acceptor_unit = acceptor_unit, donor_unit
                    ## Record the exon number for donor and acceptor
                    # Donor is exon number N
                    self.add_unit_number(donor_unit, curr_exon_num, strand)
                    # Acceptor is exon number N+1
                    self.add_unit_number(donor_unit, curr_exon_num + 1,
                                         strand=strand)
                    self.add_edge(donor_unit, acceptor_unit,
                                  strand=strand)
                    curr_exon_num += 1
        t2 = time.time()
        print "Populating graph took %.2f seconds" %(t2 - t1)
Пример #2
0
 def prob_score(self, subseq):
     """
     Score probability of subseq in sequence.
     """
     if len(subseq) == 0:
         return 0
     # Score first base
     total_logscore = np.log(self.base_freqs[subseq[0]])
     for prev_base, next_base in utils.iter_by_pair(subseq, 1):
         # Score current dinucleotide
         curr_dinuc = "%s%s" %(prev_base, next_base)
         # Divide by sum of all other transitions from the previous base
         curr_dinuc_freq = self.du[curr_dinuc]
         denom_dinuc_freqs = np.sum(self.get_dinuc_freqs_from(prev_base))
         total_logscore += (np.log(curr_dinuc_freq) - \
                            np.log(denom_dinuc_freqs))
     total_score = np.exp(total_logscore)
     return total_score
Пример #3
0
 def prob_score(self, subseq):
     """
     Score probability of subseq in sequence.
     """
     if len(subseq) == 0:
         return 0
     # Score first base
     total_logscore = np.log(self.base_freqs[subseq[0]])
     for prev_base, next_base in utils.iter_by_pair(subseq, 1):
         # Score current dinucleotide
         curr_dinuc = "%s%s" % (prev_base, next_base)
         # Divide by sum of all other transitions from the previous base
         curr_dinuc_freq = self.du[curr_dinuc]
         denom_dinuc_freqs = np.sum(self.get_dinuc_freqs_from(prev_base))
         total_logscore += (np.log(curr_dinuc_freq) - \
                            np.log(denom_dinuc_freqs))
     total_score = np.exp(total_logscore)
     return total_score
Пример #4
0
    def populate_graph(self):
        """
        Add edges from acceptors to donors, donors to acceptors,
        on distinct strands.
        """
        print "Populating graph..."
        t1 = time.time()
        for table_name in self.tables:
            print "Adding splice edges from table %s" %(table_name)
            for item in self.tables[table_name]:
                chrom, startvals, endvals, strand, gene = item
                startvals = map(int, startvals.split(",")[:-1])
                # Adds +1 since downloaded UCSC tables are 0-based start!
                startvals = map(str, [x + 1 for x in startvals])
                endvals = endvals.split(",")[:-1]
                indices = range(len(startvals))
                if strand == "-":
                    # If it's a minus strand event, walk the transcript from
                    # end (in order of transcription)
                    startvals = startvals[::-1]
                    endvals = endvals[::-1]
                for curr_i, next_i in utils.iter_by_pair(indices, step=1):
                    # Splice from end of current exon to start of next exonp
                    donor_unit = Unit((chrom, startvals[curr_i], strand),
                                      (chrom, endvals[curr_i], strand))
                    acceptor_unit = Unit((chrom, startvals[next_i], strand),
                                         (chrom, endvals[next_i], strand))
                    if strand == "-":
                        # Reverse start/end of donor and acceptor units
                        # if it's a minus strand event
                        donor_unit = Unit((chrom, endvals[curr_i], strand),
                                          (chrom, startvals[curr_i], strand))
                        acceptor_unit = Unit((chrom, endvals[next_i], strand),
                                             (chrom, startvals[next_i], strand))
#                        donor_unit, acceptor_unit = acceptor_unit, donor_unit
                    # Record splice site as edge
                    self.add_edge(donor_unit, acceptor_unit,
                                  strand=strand)
        t2 = time.time()
        print "Populating graph took %.2f seconds" %(t2 - t1)