Пример #1
0
    def non_constitutive_case(self):
        '''
        In this case, I also estimate the psi for the target exon since
        it is alternatively spliced. Both upstream and downstream exons are
        checked for the closest sufficiently included exon.
        '''
        print 'non-constitutive case'
        index = self.component.index(self.target)

        # get tx path information
        self.all_paths = algs.AllPaths(self.splice_graph, self.component, self.target, self.splice_graph.chr)
        self.all_paths.trim_tx_paths()
        self.all_paths.set_all_path_coordinates()
        self.paths, self.counts = self.all_paths.estimate_counts()

        if self.upstream and self.downstream:
            # known flanking exon case
            self.psi_upstream = mem.estimate_psi(self.upstream, self.paths, self.counts)
            self.psi_downstream = mem.estimate_psi(self.downstream, self.paths, self.counts)
        elif self.strand == '-':
            self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(self.paths,
                                                                                   self.counts,
                                                                                   self.component[index + 1:])
            self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(self.paths,
                                                                                       self.counts,
                                                                                       list(reversed(self.component[:index])))
        else:
            self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(self.paths,
                                                                                   self.counts,
                                                                                   list(reversed(self.component[:index])))
            self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(self.paths,
                                                                                       self.counts,
                                                                                       self.component[index + 1:])
        utils.save_path_info(self.id, self.paths, self.counts)
        self.psi_target = mem.estimate_psi(self.target, self.paths, self.counts)
Пример #2
0
    def two_biconnected_case(self):
        '''
        This is a case where the target exon is constitutive but has two
        flanking biconnected components. Meaning estimating psi for both
        the upstream and downstream exon is necessary
        '''
        print 'two biconnected case'
        if self.component[0][-1] == self.target:
            before_component, after_component = self.component
        else:
            after_component, before_component = self.component

        # since there is two components I need two subgraphs/paths. One for
        # before and after the target exon (before/after are defined by
        # chromosome position)
        before_all_paths = algs.AllPaths(self.splice_graph, before_component, self.target, self.splice_graph.chr)
        before_all_paths.trim_tx_paths()
        before_paths, before_counts = before_all_paths.estimate_counts()
        after_all_paths = algs.AllPaths(self.splice_graph, after_component, self.target, self.splice_graph.chr)
        after_all_paths.trim_tx_paths()
        after_paths, after_counts = after_all_paths.estimate_counts()

        if self.upstream and self.downstream:
            if self.strand == '+':
                self.psi_upstream = mem.estimate_psi(self.upstream, before_paths, before_counts)
                self.psi_downstream = mem.estimate_psi(self.downstream, after_paths, after_counts)
            elif self.strand == '-':
                self.psi_upstream = mem.estimate_psi(self.upstream, after_paths, after_counts)
                self.psi_downstream = mem.estimate_psi(self.downstream, before_paths, before_counts)
        elif self.strand == '+':
            self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(before_paths,
                                                                                   before_counts,
                                                                                   list(reversed(before_component[:-1])))
            self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(after_paths,
                                                                                       after_counts,
                                                                                       after_component[1:])
        else:
            self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(after_paths,
                                                                                   after_counts,
                                                                                   after_component[1:])
            self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(before_paths,
                                                                                       before_counts,
                                                                                       list(reversed(before_component[:-1])))
        self.total_components = before_component[:-1] + after_component
        self.psi_target = 1.0

        # handle the combined components
        tmp_start_ix = self.total_components.index(self.upstream) if self.splice_graph.strand == '+' else self.total_components.index(self.downstream)
        tmp_end_ix = self.total_components.index(self.downstream) if self.splice_graph.strand == '+' else self.total_components.index(self.upstream)
        self.all_paths = algs.AllPaths(self.splice_graph, self.total_components[tmp_start_ix:tmp_end_ix], self.target, self.splice_graph.chr)
        self.all_paths.trim_tx_paths()
        self.all_paths.set_all_path_coordinates()
        self.paths, self.counts = self.all_paths.estimate_counts()  # used to be self.before_all_paths
        utils.save_path_info(self.id, self.paths, self.counts)
Пример #3
0
def predefined_exons_case(id, target, sGraph, genome, upstream_exon,
                          downstream_exon):
    """
    Strategy:
    1. Use All Paths (then trim)
    2. Save counts/paths to file
    3. get sequence information
    """
    # get possible exons for primer amplification
    tmp_exons = copy.deepcopy(sGraph.get_graph().nodes())
    tmp = sorted(tmp_exons, key=lambda x: (x[0], x[1]))
    if sGraph.strand == '+':
        my_exons = tmp[tmp.index(upstream_exon):tmp.index(downstream_exon) + 1]
    else:
        my_exons = tmp[tmp.index(downstream_exon):tmp.index(upstream_exon) + 1]

    # Use correct tx's and estimate counts/psi
    all_paths = algs.AllPaths(sGraph,
                              my_exons,
                              target,
                              chr=sGraph.chr,
                              strand=sGraph.strand)
    # all_paths.trim_tx_paths()
    #all_paths.trim_tx_paths_using_flanking_exons(sGraph.strand, upstream_exon, downstream_exon)
    all_paths.trim_tx_paths_using_flanking_exons_and_target(
        sGraph.strand, target, upstream_exon, downstream_exon)
    all_paths.set_all_path_coordinates()
    # all_paths.keep_weakly_connected()  # hack to prevent extraneous exons causing problems in EM alg
    paths, counts = all_paths.estimate_counts()  # run EM algorithm
    # psi_target = algs.estimate_psi(target, paths, counts)
    psi_target = mem.estimate_psi(target, paths, counts)
    utils.save_path_info(id, paths,
                         counts)  # save paths/counts in tmp/isoforms/id.json

    # get sequence of upstream/target/downstream combo
    genome_chr = genome[sGraph.chr]  # chr object from pygr
    upstream_seq, target_seq, downstream_seq = genome_chr[upstream_exon[
        0]:upstream_exon[1]], genome_chr[target[0]:target[1]], genome_chr[
            downstream_exon[0]:downstream_exon[1]]  # get sequence using pygr
    if sGraph.strand == '-':
        upstream_seq, target_seq, downstream_seq = -upstream_seq, -target_seq, -downstream_seq  # get reverse-complement if necessary

    return [
        sGraph.strand,
        '%s:%d-%d' % (sGraph.chr, target[0], target[1]),
        psi_target,
        sGraph.chr + ':' +
        '-'.join(map(str, upstream_exon)),  # upstream eg. +chr1:1000-2000
        -1,  # user defined exon, don't estimate psi
        sGraph.chr + ':' +
        '-'.join(map(str, downstream_exon)),  # downstream eg. +chr1:1000-2000
        -1,  # user defined exon, don't estimate psi
        all_paths,
        upstream_seq,
        target_seq,
        downstream_seq
    ]
Пример #4
0
    def last_exon_case(self):
        '''
        Case where the target and one flanking exon are constitutive.
        '''
        print 'last exon case'
        if len(self.graph.successors(self.target)) > 1:
            logging.debug('Conflict between biconnected components and successors')

        possible_const = self.component[:-1]
        possible_const.reverse()  # reverse the order since closer exons should be looked at first

        # get tx path information
        self.all_paths = algs.AllPaths(self.splice_graph, self.component, self.target, self.splice_graph.chr)
        self.all_paths.trim_tx_paths()
        self.all_paths.set_all_path_coordinates()
        self.paths, self.counts = self.all_paths.estimate_counts()

        if self.upstream and self.downstream:
            # user defined flanking exon case
            if self.strand == '+':
                self.psi_upstream = mem.estimate_psi(self.upstream, self.paths, self.counts)
                self.psi_downstream = 1.0
            elif self.strand == '-':
                self.psi_upstream = 1.0
                self.psi_downstream = mem.estimate_psi(self.downstream, self.paths, self.counts)
        if self.strand == '+':
            self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(self.paths,
                                                                                   self.counts, possible_const)
            self.downstream = self.graph.successors(self.target)[0]
            self.psi_downstream = 1.0
            utils.save_path_info(self.id, [p + [self.downstream] for p in self.paths], self.counts)  # add const. downstream exon to all self.paths
        else:
            self.upstream = self.graph.successors(self.target)[0]
            self.psi_upstream = 1.0
            self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(self.paths,
                                                                                       self.counts, possible_const)
            utils.save_path_info(self.id, [[self.upstream] + p for p in self.paths], self.counts)  # add const. upstream exon to all paths
        self.psi_target = 1.0  # the target is constitutive in this case
Пример #5
0
    def first_exon_case(self):
        '''
        Case where the target and one flanking exon is constitutive.
        '''
        print 'first exon case'
        if len(self.graph.predecessors(self.target)) > 1:
            logging.debug('Error: Conflict between biconnected components and predecessors')

        # get tx path information
        self.all_paths = algs.AllPaths(self.splice_graph, self.component, self.target, self.splice_graph.chr)
        self.all_paths.trim_tx_paths()
        self.all_paths.set_all_path_coordinates()
        self.paths, self.counts = self.all_paths.estimate_counts()

        if self.upstream and self.downstream:
            # user defined flanking exon case
            if self.strand == '+' and self.graph.predecessors(self.target)[0] == self.upstream:
                self.psi_upstream = 1.0
                self.psi_downsteam = mem.estimate_psi(self.downstream, self.paths, self.counts)
            elif self.strand == '-' and self.graph.predecessors(self.target)[0] == self.downstream:
                self.psi_downstream = 1.0
                self.psi_upstream = mem.estimate_psi(self.upstream, self.paths, self.counts)
            else:
                raise utils.PrimerSeqError('Error: Flanking exon choice too far from target exon')
        elif self.strand == '+':
            self.upstream = self.graph.predecessors(self.target)[0]
            self.psi_upstream = 1.0  # defined by biconnected component alg as constitutive
            self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(self.paths,
                                                                                       self.counts, self.component[1:])
            utils.save_path_info(self.id, [[self.upstream] + p for p in self.paths], self.counts)  # add const. upstream exon to all self.paths
        else:
            self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(self.paths,
                                                                                   self.counts, self.component[1:])
            self.downstream = self.graph.predecessors(self.target)[0]
            self.psi_downstream = 1.0
            utils.save_path_info(self.id, [p + [self.downstream] for p in self.paths], self.counts)  # add const. downstream exon to all paths
        self.psi_target = 1.0
Пример #6
0
    def find_closest_exon_above_cutoff(self, paths, counts, possible_exons):
        """
        Progressively step away from the target exon to find a sufficient constitutive exon
        """
        psi_list = []
        for exon in possible_exons:
            psi = mem.estimate_psi(exon, paths, counts)
            psi_list.append(psi)
            if psi >= self.cutoff:
                return exon, psi

        # If code reaches this point then there was no flanking exon that met
        # the self.cutoff criteria. Raise utils.PrimerSeqError to indicate failure.
        raise utils.PrimerSeqError('A sufficiently high included flanking exon could '
                                   'not be found (max: %.3f, cutoff: %.3f)' % (max(psi_list), self.cutoff))
Пример #7
0
def predefined_exons_case(id, target, sGraph, genome, upstream_exon, downstream_exon):
    """
    Strategy:
    1. Use All Paths (then trim)
    2. Save counts/paths to file
    3. get sequence information
    """
    # get possible exons for primer amplification
    tmp_exons = copy.deepcopy(sGraph.get_graph().nodes())
    tmp = sorted(tmp_exons, key=lambda x: (x[0], x[1]))
    if sGraph.strand == '+':
        my_exons = tmp[tmp.index(upstream_exon):tmp.index(downstream_exon) + 1]
    else:
        my_exons = tmp[tmp.index(downstream_exon):tmp.index(upstream_exon) + 1]

    # Use correct tx's and estimate counts/psi
    all_paths = algs.AllPaths(sGraph, my_exons, target, chr=sGraph.chr, strand=sGraph.strand)
    # all_paths.trim_tx_paths()
    #all_paths.trim_tx_paths_using_flanking_exons(sGraph.strand, upstream_exon, downstream_exon)
    all_paths.trim_tx_paths_using_flanking_exons_and_target(sGraph.strand, target, upstream_exon, downstream_exon)
    all_paths.set_all_path_coordinates()
    # all_paths.keep_weakly_connected()  # hack to prevent extraneous exons causing problems in EM alg
    paths, counts = all_paths.estimate_counts()  # run EM algorithm
    # psi_target = algs.estimate_psi(target, paths, counts)
    psi_target = mem.estimate_psi(target, paths, counts)
    utils.save_path_info(id, paths, counts)  # save paths/counts in tmp/isoforms/id.json

    # get sequence of upstream/target/downstream combo
    genome_chr = genome[sGraph.chr]  # chr object from pygr
    upstream_seq, target_seq, downstream_seq = genome_chr[upstream_exon[0]:upstream_exon[1]], genome_chr[target[0]:target[1]], genome_chr[downstream_exon[0]:downstream_exon[1]]  # get sequence using pygr
    if sGraph.strand == '-':
        upstream_seq, target_seq, downstream_seq = -upstream_seq, -target_seq, -downstream_seq  # get reverse-complement if necessary

    return [sGraph.strand, '%s:%d-%d' % (sGraph.chr, target[0], target[1]), psi_target,
            sGraph.chr + ':' + '-'.join(map(str, upstream_exon)),  # upstream eg. +chr1:1000-2000
            -1,  # user defined exon, don't estimate psi
            sGraph.chr + ':' + '-'.join(map(str, downstream_exon)),  # downstream eg. +chr1:1000-2000
            -1,  # user defined exon, don't estimate psi
            all_paths, upstream_seq,
            target_seq, downstream_seq]