def walk_over_read_bipartite(mode, subread_ipds, ref_str, read_ipds, strand, opts): """ Loop over each position in the read string, adding motifs as they are encountered in the walk. """ firsts = opts.bipart_config[0] Ns = opts.bipart_config[1] seconds = opts.bipart_config[2] for first in firsts: last_mod_pos = first - 1 for N in Ns: for second in seconds: length = first + N + second for j in range(len(ref_str) - (length - 1)): seq = ref_str[j:j + length] ipds = read_ipds[j:j + length] if seq.find("*") == -1 and seq.find("X") == -1: if mode == "aligned": if strand == 0: q_motif = motif_tools.rev_comp_motif(seq) elif strand == 1: q_motif = motif_tools.comp_motif(seq) elif mode == "unaligned": q_motif = motif_tools.rev_comp_motif(seq) for base in opts.mod_bases: ref_indexes = [ m.start() for m in re.finditer(base, q_motif) if m.start() <= last_mod_pos ] for ref_index in ref_indexes: rc_index = len(q_motif) - 1 - ref_index if mode == "aligned": if strand == 0: idx = rc_index elif strand == 1: idx = ref_index elif mode == "unaligned": idx = rc_index IPD = ipds[idx] bi_motif = "".join([ q_motif[:first], "N" * N, q_motif[-second:] ]) ref_motif_str = "%s-%s" % (bi_motif, ref_index) try: subread_ipds[ref_motif_str].append(IPD) except KeyError: # logging.warning("Motif %s has unexpected characters (N,etc). Skipping..." % ref_motif_str) pass return subread_ipds
def find_motif_matches(mode, motif, ref_str, strand): if mode == "cmp": if strand == 0: q_motif = motif_tools.sub_bases(motif_tools.rev_comp_motif(motif)) elif strand == 1: q_motif = motif_tools.sub_bases(motif_tools.comp_motif(motif)) matches_iter = re.finditer(q_motif, ref_str) elif mode == "bas": q_motif = motif_tools.sub_bases(motif_tools.rev_comp_motif(motif)) matches_iter = re.finditer(q_motif, ref_str) matches_list = [] for match in matches_iter: matches_list.append(match) return matches_list
def walk_over_read(mode, subread_ipds, ref_str, read_ipds, strand, k, opts): """ Loop over each position in the read string, adding motifs as they are encountered in the walk. """ for j in range(len(ref_str) - 3): seq = ref_str[j:j + k] ipds = read_ipds[j:j + k] if seq.find("*") == -1 and seq.find("X") == -1: if mode == "cmp": if strand == 0: q_motif = motif_tools.rev_comp_motif(seq) elif strand == 1: q_motif = motif_tools.comp_motif(seq) elif mode == "bas": q_motif = motif_tools.rev_comp_motif(ref_str[j:j + k]) for base in opts.mod_bases: ref_indexes = [m.start() for m in re.finditer(base, q_motif)] for ref_index in ref_indexes: rc_index = len(q_motif) - 1 - ref_index if mode == "cmp": if strand == 0: idx = rc_index elif strand == 1: idx = ref_index elif mode == "bas": idx = rc_index IPD = ipds[idx] # If a contiguous motif contains an N, skip # if q_motif.find("N")>-1 and not opts.bipartite: # continue ref_motif_str = "%s-%s" % (q_motif, ref_index) try: subread_ipds[ref_motif_str].append(IPD) except KeyError: # logging.warning("Motif %s has unexpected characters (N,etc). Skipping..." % ref_motif_str) pass return subread_ipds
def kmer_freq(mode, ref_str, strand, opts): ref_str = ref_str.upper() if strand == 1: ref_str = ref_str[::-1] k = opts.comp_kmer kmers = [] for seq in product("ATGC", repeat=k): kmers.append("".join(seq)) kmer_counts = Counter() for j in range(len(ref_str) - (k - 1)): motif = ref_str[j:j + k] kmer_counts[motif] += 1 # Combine forward and reverse complement motifs into one count combined_kmer = Counter() for kmer in kmers: kmer_rc = motif_tools.rev_comp_motif(kmer) if not combined_kmer.get(kmer_rc): combined_kmer[kmer] = kmer_counts[kmer] + kmer_counts[kmer_rc] + 1 return combined_kmer