def calc_permiscuity(c, juncmap5p, juncmap3p, ggmap): # subtract one since 5' junc position is an open interval coord5p = gene_to_genome_pos(c.mate5p.tx_name, c.mate5p.end - 1, ggmap) coord3p = gene_to_genome_pos(c.mate3p.tx_name, c.mate3p.start, ggmap) partners = juncmap5p[coord5p] cov = partners[coord3p] total_cov = sum(partners.itervalues()) frac5p = cov / float(total_cov) partners = juncmap3p[coord3p] cov = partners[coord5p] total_cov = sum(partners.itervalues()) frac3p = cov / float(total_cov) return frac5p, frac3p
def build_junc_permiscuity_map(chimeras, ggmap): junc5p_map = collections.defaultdict(lambda: collections.defaultdict(lambda: 0)) junc3p_map = collections.defaultdict(lambda: collections.defaultdict(lambda: 0)) for c in chimeras: # subtract one since 5' junc position is an open interval coord5p = gene_to_genome_pos(c.mate5p.tx_name, c.mate5p.end - 1, ggmap) coord3p = gene_to_genome_pos(c.mate3p.tx_name, c.mate3p.start, ggmap) # keep track of total reads eminating from each 5' junction # by keeping a dictionary for each 5' junction to all 3' junctions # that stores the maximum coverage at that 5'/3' pair partners = junc5p_map[coord5p] count = partners[coord3p] partners[coord3p] = max(count, c.weighted_cov) # repeat for 3' partner partners = junc3p_map[coord3p] count = partners[coord5p] partners[coord5p] = max(count, c.weighted_cov) #print '5P', c.mate5p.gene_name, len(partners), sum(partners.itervalues()) #print '3P', c.mate3p.gene_name, len(partners), sum(partners.itervalues()) return junc5p_map, junc3p_map
def build_junc_coverage_map(chimeras, ggmap): junc_cov_map = collections.defaultdict(lambda: [None, None, None]) num_chimeras = 0 for c in chimeras: num_chimeras += 1 # convert to genomic coords # subtract one since 5' junc position is an open interval coord5p = gene_to_genome_pos(c.mate5p.tx_name, c.mate5p.end - 1, ggmap) coord3p = gene_to_genome_pos(c.mate3p.tx_name, c.mate3p.start, ggmap) # keep track of maximum coverage isoform pairkey = (coord5p, coord3p) paircov = (c.encomp_and_spanning, c.weighted_cov, c.encomp_or_spanning) data = junc_cov_map[pairkey] if (data[0] is None) or (cmp(paircov, data[0]) > 0): # store encomp/spanning, weighted coverage, and total reads data[0] = paircov data[1] = c.mate5p.tx_name data[2] = c.mate3p.tx_name logging.debug("Parsed %d chimeras" % (num_chimeras)) kept_isoforms = set(tuple(v[1:3]) for v in junc_cov_map.itervalues()) #del junc_cov_map logging.debug("Kept %d highest coverage isoforms" % (len(kept_isoforms))) return kept_isoforms