def non_constitutive_case(self): ''' In this case, I also estimate the psi for the target exon since it is alternatively spliced. Both upstream and downstream exons are checked for the closest sufficiently included exon. ''' print 'non-constitutive case' index = self.component.index(self.target) # get tx path information self.all_paths = algs.AllPaths(self.splice_graph, self.component, self.target, self.splice_graph.chr) self.all_paths.trim_tx_paths() self.all_paths.set_all_path_coordinates() self.paths, self.counts = self.all_paths.estimate_counts() if self.upstream and self.downstream: # known flanking exon case self.psi_upstream = mem.estimate_psi(self.upstream, self.paths, self.counts) self.psi_downstream = mem.estimate_psi(self.downstream, self.paths, self.counts) elif self.strand == '-': self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(self.paths, self.counts, self.component[index + 1:]) self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(self.paths, self.counts, list(reversed(self.component[:index]))) else: self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(self.paths, self.counts, list(reversed(self.component[:index]))) self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(self.paths, self.counts, self.component[index + 1:]) utils.save_path_info(self.id, self.paths, self.counts) self.psi_target = mem.estimate_psi(self.target, self.paths, self.counts)
def two_biconnected_case(self): ''' This is a case where the target exon is constitutive but has two flanking biconnected components. Meaning estimating psi for both the upstream and downstream exon is necessary ''' print 'two biconnected case' if self.component[0][-1] == self.target: before_component, after_component = self.component else: after_component, before_component = self.component # since there is two components I need two subgraphs/paths. One for # before and after the target exon (before/after are defined by # chromosome position) before_all_paths = algs.AllPaths(self.splice_graph, before_component, self.target, self.splice_graph.chr) before_all_paths.trim_tx_paths() before_paths, before_counts = before_all_paths.estimate_counts() after_all_paths = algs.AllPaths(self.splice_graph, after_component, self.target, self.splice_graph.chr) after_all_paths.trim_tx_paths() after_paths, after_counts = after_all_paths.estimate_counts() if self.upstream and self.downstream: if self.strand == '+': self.psi_upstream = mem.estimate_psi(self.upstream, before_paths, before_counts) self.psi_downstream = mem.estimate_psi(self.downstream, after_paths, after_counts) elif self.strand == '-': self.psi_upstream = mem.estimate_psi(self.upstream, after_paths, after_counts) self.psi_downstream = mem.estimate_psi(self.downstream, before_paths, before_counts) elif self.strand == '+': self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(before_paths, before_counts, list(reversed(before_component[:-1]))) self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(after_paths, after_counts, after_component[1:]) else: self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(after_paths, after_counts, after_component[1:]) self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(before_paths, before_counts, list(reversed(before_component[:-1]))) self.total_components = before_component[:-1] + after_component self.psi_target = 1.0 # handle the combined components tmp_start_ix = self.total_components.index(self.upstream) if self.splice_graph.strand == '+' else self.total_components.index(self.downstream) tmp_end_ix = self.total_components.index(self.downstream) if self.splice_graph.strand == '+' else self.total_components.index(self.upstream) self.all_paths = algs.AllPaths(self.splice_graph, self.total_components[tmp_start_ix:tmp_end_ix], self.target, self.splice_graph.chr) self.all_paths.trim_tx_paths() self.all_paths.set_all_path_coordinates() self.paths, self.counts = self.all_paths.estimate_counts() # used to be self.before_all_paths utils.save_path_info(self.id, self.paths, self.counts)
def predefined_exons_case(id, target, sGraph, genome, upstream_exon, downstream_exon): """ Strategy: 1. Use All Paths (then trim) 2. Save counts/paths to file 3. get sequence information """ # get possible exons for primer amplification tmp_exons = copy.deepcopy(sGraph.get_graph().nodes()) tmp = sorted(tmp_exons, key=lambda x: (x[0], x[1])) if sGraph.strand == '+': my_exons = tmp[tmp.index(upstream_exon):tmp.index(downstream_exon) + 1] else: my_exons = tmp[tmp.index(downstream_exon):tmp.index(upstream_exon) + 1] # Use correct tx's and estimate counts/psi all_paths = algs.AllPaths(sGraph, my_exons, target, chr=sGraph.chr, strand=sGraph.strand) # all_paths.trim_tx_paths() #all_paths.trim_tx_paths_using_flanking_exons(sGraph.strand, upstream_exon, downstream_exon) all_paths.trim_tx_paths_using_flanking_exons_and_target( sGraph.strand, target, upstream_exon, downstream_exon) all_paths.set_all_path_coordinates() # all_paths.keep_weakly_connected() # hack to prevent extraneous exons causing problems in EM alg paths, counts = all_paths.estimate_counts() # run EM algorithm # psi_target = algs.estimate_psi(target, paths, counts) psi_target = mem.estimate_psi(target, paths, counts) utils.save_path_info(id, paths, counts) # save paths/counts in tmp/isoforms/id.json # get sequence of upstream/target/downstream combo genome_chr = genome[sGraph.chr] # chr object from pygr upstream_seq, target_seq, downstream_seq = genome_chr[upstream_exon[ 0]:upstream_exon[1]], genome_chr[target[0]:target[1]], genome_chr[ downstream_exon[0]:downstream_exon[1]] # get sequence using pygr if sGraph.strand == '-': upstream_seq, target_seq, downstream_seq = -upstream_seq, -target_seq, -downstream_seq # get reverse-complement if necessary return [ sGraph.strand, '%s:%d-%d' % (sGraph.chr, target[0], target[1]), psi_target, sGraph.chr + ':' + '-'.join(map(str, upstream_exon)), # upstream eg. +chr1:1000-2000 -1, # user defined exon, don't estimate psi sGraph.chr + ':' + '-'.join(map(str, downstream_exon)), # downstream eg. +chr1:1000-2000 -1, # user defined exon, don't estimate psi all_paths, upstream_seq, target_seq, downstream_seq ]
def last_exon_case(self): ''' Case where the target and one flanking exon are constitutive. ''' print 'last exon case' if len(self.graph.successors(self.target)) > 1: logging.debug('Conflict between biconnected components and successors') possible_const = self.component[:-1] possible_const.reverse() # reverse the order since closer exons should be looked at first # get tx path information self.all_paths = algs.AllPaths(self.splice_graph, self.component, self.target, self.splice_graph.chr) self.all_paths.trim_tx_paths() self.all_paths.set_all_path_coordinates() self.paths, self.counts = self.all_paths.estimate_counts() if self.upstream and self.downstream: # user defined flanking exon case if self.strand == '+': self.psi_upstream = mem.estimate_psi(self.upstream, self.paths, self.counts) self.psi_downstream = 1.0 elif self.strand == '-': self.psi_upstream = 1.0 self.psi_downstream = mem.estimate_psi(self.downstream, self.paths, self.counts) if self.strand == '+': self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(self.paths, self.counts, possible_const) self.downstream = self.graph.successors(self.target)[0] self.psi_downstream = 1.0 utils.save_path_info(self.id, [p + [self.downstream] for p in self.paths], self.counts) # add const. downstream exon to all self.paths else: self.upstream = self.graph.successors(self.target)[0] self.psi_upstream = 1.0 self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(self.paths, self.counts, possible_const) utils.save_path_info(self.id, [[self.upstream] + p for p in self.paths], self.counts) # add const. upstream exon to all paths self.psi_target = 1.0 # the target is constitutive in this case
def first_exon_case(self): ''' Case where the target and one flanking exon is constitutive. ''' print 'first exon case' if len(self.graph.predecessors(self.target)) > 1: logging.debug('Error: Conflict between biconnected components and predecessors') # get tx path information self.all_paths = algs.AllPaths(self.splice_graph, self.component, self.target, self.splice_graph.chr) self.all_paths.trim_tx_paths() self.all_paths.set_all_path_coordinates() self.paths, self.counts = self.all_paths.estimate_counts() if self.upstream and self.downstream: # user defined flanking exon case if self.strand == '+' and self.graph.predecessors(self.target)[0] == self.upstream: self.psi_upstream = 1.0 self.psi_downsteam = mem.estimate_psi(self.downstream, self.paths, self.counts) elif self.strand == '-' and self.graph.predecessors(self.target)[0] == self.downstream: self.psi_downstream = 1.0 self.psi_upstream = mem.estimate_psi(self.upstream, self.paths, self.counts) else: raise utils.PrimerSeqError('Error: Flanking exon choice too far from target exon') elif self.strand == '+': self.upstream = self.graph.predecessors(self.target)[0] self.psi_upstream = 1.0 # defined by biconnected component alg as constitutive self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(self.paths, self.counts, self.component[1:]) utils.save_path_info(self.id, [[self.upstream] + p for p in self.paths], self.counts) # add const. upstream exon to all self.paths else: self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(self.paths, self.counts, self.component[1:]) self.downstream = self.graph.predecessors(self.target)[0] self.psi_downstream = 1.0 utils.save_path_info(self.id, [p + [self.downstream] for p in self.paths], self.counts) # add const. downstream exon to all paths self.psi_target = 1.0
def find_closest_exon_above_cutoff(self, paths, counts, possible_exons): """ Progressively step away from the target exon to find a sufficient constitutive exon """ psi_list = [] for exon in possible_exons: psi = mem.estimate_psi(exon, paths, counts) psi_list.append(psi) if psi >= self.cutoff: return exon, psi # If code reaches this point then there was no flanking exon that met # the self.cutoff criteria. Raise utils.PrimerSeqError to indicate failure. raise utils.PrimerSeqError('A sufficiently high included flanking exon could ' 'not be found (max: %.3f, cutoff: %.3f)' % (max(psi_list), self.cutoff))
def predefined_exons_case(id, target, sGraph, genome, upstream_exon, downstream_exon): """ Strategy: 1. Use All Paths (then trim) 2. Save counts/paths to file 3. get sequence information """ # get possible exons for primer amplification tmp_exons = copy.deepcopy(sGraph.get_graph().nodes()) tmp = sorted(tmp_exons, key=lambda x: (x[0], x[1])) if sGraph.strand == '+': my_exons = tmp[tmp.index(upstream_exon):tmp.index(downstream_exon) + 1] else: my_exons = tmp[tmp.index(downstream_exon):tmp.index(upstream_exon) + 1] # Use correct tx's and estimate counts/psi all_paths = algs.AllPaths(sGraph, my_exons, target, chr=sGraph.chr, strand=sGraph.strand) # all_paths.trim_tx_paths() #all_paths.trim_tx_paths_using_flanking_exons(sGraph.strand, upstream_exon, downstream_exon) all_paths.trim_tx_paths_using_flanking_exons_and_target(sGraph.strand, target, upstream_exon, downstream_exon) all_paths.set_all_path_coordinates() # all_paths.keep_weakly_connected() # hack to prevent extraneous exons causing problems in EM alg paths, counts = all_paths.estimate_counts() # run EM algorithm # psi_target = algs.estimate_psi(target, paths, counts) psi_target = mem.estimate_psi(target, paths, counts) utils.save_path_info(id, paths, counts) # save paths/counts in tmp/isoforms/id.json # get sequence of upstream/target/downstream combo genome_chr = genome[sGraph.chr] # chr object from pygr upstream_seq, target_seq, downstream_seq = genome_chr[upstream_exon[0]:upstream_exon[1]], genome_chr[target[0]:target[1]], genome_chr[downstream_exon[0]:downstream_exon[1]] # get sequence using pygr if sGraph.strand == '-': upstream_seq, target_seq, downstream_seq = -upstream_seq, -target_seq, -downstream_seq # get reverse-complement if necessary return [sGraph.strand, '%s:%d-%d' % (sGraph.chr, target[0], target[1]), psi_target, sGraph.chr + ':' + '-'.join(map(str, upstream_exon)), # upstream eg. +chr1:1000-2000 -1, # user defined exon, don't estimate psi sGraph.chr + ':' + '-'.join(map(str, downstream_exon)), # downstream eg. +chr1:1000-2000 -1, # user defined exon, don't estimate psi all_paths, upstream_seq, target_seq, downstream_seq]