Exemplo n.º 1
0
def walk_over_read_bipartite(mode, subread_ipds, ref_str, read_ipds, strand,
                             opts):
    """
	Loop over each position in the read string, adding motifs as they
	are encountered in the walk.
	"""
    firsts = opts.bipart_config[0]
    Ns = opts.bipart_config[1]
    seconds = opts.bipart_config[2]
    for first in firsts:
        last_mod_pos = first - 1
        for N in Ns:
            for second in seconds:
                length = first + N + second
                for j in range(len(ref_str) - (length - 1)):
                    seq = ref_str[j:j + length]
                    ipds = read_ipds[j:j + length]
                    if seq.find("*") == -1 and seq.find("X") == -1:
                        if mode == "aligned":
                            if strand == 0:
                                q_motif = motif_tools.rev_comp_motif(seq)
                            elif strand == 1:
                                q_motif = motif_tools.comp_motif(seq)
                        elif mode == "unaligned":
                            q_motif = motif_tools.rev_comp_motif(seq)

                        for base in opts.mod_bases:
                            ref_indexes = [
                                m.start() for m in re.finditer(base, q_motif)
                                if m.start() <= last_mod_pos
                            ]
                            for ref_index in ref_indexes:
                                rc_index = len(q_motif) - 1 - ref_index

                                if mode == "aligned":
                                    if strand == 0:
                                        idx = rc_index
                                    elif strand == 1:
                                        idx = ref_index
                                elif mode == "unaligned":
                                    idx = rc_index

                                IPD = ipds[idx]
                                bi_motif = "".join([
                                    q_motif[:first], "N" * N, q_motif[-second:]
                                ])
                                ref_motif_str = "%s-%s" % (bi_motif, ref_index)

                                try:
                                    subread_ipds[ref_motif_str].append(IPD)
                                except KeyError:
                                    # logging.warning("Motif %s has unexpected characters (N,etc). Skipping..." % ref_motif_str)
                                    pass
    return subread_ipds
Exemplo n.º 2
0
def find_motif_matches(mode, motif, ref_str, strand):
    if mode == "cmp":
        if strand == 0:
            q_motif = motif_tools.sub_bases(motif_tools.rev_comp_motif(motif))
        elif strand == 1:
            q_motif = motif_tools.sub_bases(motif_tools.comp_motif(motif))
        matches_iter = re.finditer(q_motif, ref_str)
    elif mode == "bas":
        q_motif = motif_tools.sub_bases(motif_tools.rev_comp_motif(motif))
        matches_iter = re.finditer(q_motif, ref_str)

    matches_list = []
    for match in matches_iter:
        matches_list.append(match)
    return matches_list
Exemplo n.º 3
0
def walk_over_read(mode, subread_ipds, ref_str, read_ipds, strand, k, opts):
    """
	Loop over each position in the read string, adding motifs as they
	are encountered in the walk.
	"""
    for j in range(len(ref_str) - 3):
        seq = ref_str[j:j + k]
        ipds = read_ipds[j:j + k]

        if seq.find("*") == -1 and seq.find("X") == -1:
            if mode == "cmp":
                if strand == 0:
                    q_motif = motif_tools.rev_comp_motif(seq)
                elif strand == 1:
                    q_motif = motif_tools.comp_motif(seq)
            elif mode == "bas":
                q_motif = motif_tools.rev_comp_motif(ref_str[j:j + k])

            for base in opts.mod_bases:
                ref_indexes = [m.start() for m in re.finditer(base, q_motif)]
                for ref_index in ref_indexes:
                    rc_index = len(q_motif) - 1 - ref_index

                    if mode == "cmp":
                        if strand == 0:
                            idx = rc_index
                        elif strand == 1:
                            idx = ref_index
                    elif mode == "bas":
                        idx = rc_index
                    IPD = ipds[idx]

                    # If a contiguous motif contains an N, skip
                    # if q_motif.find("N")>-1 and not opts.bipartite:
                    # 	continue

                    ref_motif_str = "%s-%s" % (q_motif, ref_index)
                    try:
                        subread_ipds[ref_motif_str].append(IPD)
                    except KeyError:
                        # logging.warning("Motif %s has unexpected characters (N,etc). Skipping..." % ref_motif_str)
                        pass
    return subread_ipds
Exemplo n.º 4
0
def kmer_freq(mode, ref_str, strand, opts):
    ref_str = ref_str.upper()
    if strand == 1:
        ref_str = ref_str[::-1]
    k = opts.comp_kmer
    kmers = []
    for seq in product("ATGC", repeat=k):
        kmers.append("".join(seq))

    kmer_counts = Counter()
    for j in range(len(ref_str) - (k - 1)):
        motif = ref_str[j:j + k]
        kmer_counts[motif] += 1

    # Combine forward and reverse complement motifs into one count
    combined_kmer = Counter()
    for kmer in kmers:
        kmer_rc = motif_tools.rev_comp_motif(kmer)
        if not combined_kmer.get(kmer_rc):
            combined_kmer[kmer] = kmer_counts[kmer] + kmer_counts[kmer_rc] + 1

    return combined_kmer