示例#1
0
def get_ClaRNA_output_from_dot_bracket(ss, temp=True, verbose=False):
    """
    Get dummy ClaRNA output out of dat bracket secondary structure (ss)

    Args:
        ss (string): secondary structure

    Return:

        a filename to ClaRNA output"""
    from rna_tools.SecondaryStructure import parse_vienna_to_pairs

    if ss.find(':') > -1:
        chain, ss = ss.split(':')
    else:
        chain = 'A'
        ss = ss

    pairs, pairs_pk = parse_vienna_to_pairs(ss, remove_gaps_in_ss=False)
    pairs += pairs_pk

    txt = 'Classifier: Clarna\n'
    txt += 'chains:  A 1 ' + str(len(ss)) + '\n'
    for bp in pairs:
        txt += '%s    %i   %s   %i          bp G C                  WW_cis   1 \n' % (
            chain, bp[0], chain, bp[1])
    if verbose: print(txt.strip())

    if temp:
        f = tempfile.NamedTemporaryFile()
        name = f.name
    else:
        name = 'target'

    foutCR = name + '.pdb.outCR'
    if verbose: print(foutCR)
    ft = open(foutCR, 'w')
    ft.write(txt)
    ft.close()
    return foutCR
示例#2
0
def get_ClaRNA_output_from_dot_bracket(ss, temp=True, verbose=False):
    """
    Get dummy ClaRNA output out of dat bracket secondary structure (ss)

    Args:
        ss (string): secondary structure

    Return:

        a filename to ClaRNA output"""
    from rna_tools.SecondaryStructure import parse_vienna_to_pairs

    if ss.find(':') > -1:
        chain,ss = ss.split(':')
    else:
        chain = 'A'
        ss = ss

    pairs, pairs_pk = parse_vienna_to_pairs(ss, remove_gaps_in_ss=False)
    pairs += pairs_pk

    txt = 'Classifier: Clarna\n'
    txt += 'chains:  A 1 ' + str(len(ss)) + '\n'
    for bp in pairs:
        txt += '%s    %i   %s   %i          bp G C                  WW_cis   1 \n' % (chain, bp[0], chain, bp[1])
    if verbose: print(txt.strip())

    if temp:
        f = tempfile.NamedTemporaryFile()
        name = f.name
    else:
        name = 'target'

    foutCR = name + '.pdb.outCR'
    if verbose: print(foutCR)
    ft = open(foutCR, 'w')
    ft.write(txt)
    ft.close()
    return foutCR

def get_parser():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('file', help="file in the Fasta format")
    parser.add_argument('--offset', help="offset", type=int)
    parser.add_argument("-v", "--verbose", action='count', help="be verbose")
    return parser


if __name__ == '__main__':
    parser = get_parser()
    args = parser.parse_args()

    id, seq, ss = open(args.file).read().strip().split('\n')
    pairs, pairs_pk = parse_vienna_to_pairs(ss)

    if args.offset:
        npairs = []
        for pair in pairs:
            npairs.append([pair[0] + args.offset, pair[1] + args.offset])
        for pair in pairs_pk:
            npairs.append([pair[0] + args.offset, pair[1] + args.offset])
    else:
        npairs = pairs + pairs_pk
    npairs.sort()
    print(npairs)
示例#4
0
def rna_dca_mapping(seqfn, gseqfn, file_interactions, noss, noshort, offset, mss, verbose):
    """This function is deviede into

    .. warning:: in the line that we load parameters, watch for sep argument that defines seperator of your file (line21)
    """
    v = verbose

    #
    # Load the data, open files and parse information
    #
    # final ss and seq, ungapped
    f = open(seqfn)
    header = f.readline().strip()  # get rid of header
    seq = f.readline().strip()
    ss = f.readline().strip()
    # gapped
    f = open(gseqfn)
    header = f.readline().strip()  # get rid of header
    gseq = f.readline().strip()
    gss = f.readline().strip()
    # DCA
    df = pd.read_csv(file_interactions,sep=" ")
    interactions = zip(df['i'].tolist(), df['j'].tolist())
    #
    # Show input
    #
    # [(38, 51), (7, 110), (37, 52) from the input
    interactions.sort()
    print 'interactions:\n', interactions
    #
    # Process unmapped scores on gaped sequence
    # I panel
    #
    pairs = parse_vienna_to_pairs(gss)[0]
    print 'pairs', pairs
    print 'UNMAPPED SCORES ' + '/' * len(gseq)
    print '123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789'
    print gseq
    print gss

    for i in interactions:
        # form 0 or from 1 ?! ## be careful here! scores starts from 0 or 1 !?
        ij = [0,0]
        ij[0] = i[0] + 1
        ij[1] = i[1] + 1
        i = ij

        if noshort:
            delta = i[1] - i[0]
            if delta < 6:
                continue

        # remove interactions from gaps
        if gseq[i[0] - 1] == '-' or gseq[i[1] - 1] == '-':
            continue

        if noss:
            if i in pairs:
                continue

        line_new = 'x'.rjust(i[0]) + 'x'.rjust(i[1] - i[0]) + str(i).rjust(len(gseq) - i[1] + 10)
        print line_new
    #
    # How this mapping works, kurwa?
    # II panel
    #
    print 'MAPPED SCORES //' + '/' * len(gseq)
    mapped_interactions = []
    for i in interactions:
        ij = i
        #ij = [0,0]
        #ij[0] = i[0] - 1
        #ij[1] = i[1] - 1
        if v: print 'ij:', ij
        if v: print gseq[ij[0]], gseq[ij[1]]

        # ok, here I test if this pair comes from gaps, . -> -
        if gseq[ij[0]] == '-' or gseq[ij[1]] == '-':
            if v: print 'Removed Interaction:', ij
        else:
            [a,b]=[ij[0] - gseq[:ij[0]].count('-') +1, ij[1] - gseq[:ij[1]].count('-') + 1,]
            if v: print i, '->', a, b
            mapped_interactions.append([a, b])

    print 'Mapped Interactions:\n', mapped_interactions
    if v:
        for i in mapped_interactions:
            print str(i)

    pairs = parse_vienna_to_pairs(gss)[0]
    print 'pairs', pairs
    print '123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789'
    print gseq.replace('-','')  # what is the gap character, - or . ?
    print gss.replace('-', '')
    mapped_interactions.sort()

    filtered_interactions = []
    pairs = parse_vienna_to_pairs(gss.replace('-', ''))[0]
    for i in mapped_interactions:
        if noshort:
            delta = i[1] - i[0]
            if delta < 6:
                continue

        # remove if there is a gap
        if seq[i[0] - 1] == '-' or seq[i[1] - 1] == '-':
            continue

        if noss:
            if i in pairs:
                continue

        line_new = 'x'.rjust(i[0]) + 'x'.rjust(i[1] - i[0]) + str(i).rjust(len(seq) - i[1] + 10)
        print line_new
    #
    # How to include a gap in the mapping?
    # III panel, the final
    #
    print seq
    print ss

    pairs = parse_vienna_to_pairs(ss.replace('-', ''))[0]

    def n_lower_chars(string):
        """ https://stackoverflow.com/questions/10953189/count-lower-case-characters-in-a-string """
        return sum(1 for c in string if c.islower())

    print 'FINAL MAPPING //' + '/' * len(seq)
    nmapped_interactions = []
    for i in mapped_interactions:
        ij = i
        if v: print 'ij:', ij
        if v: print seq[ij[0] - 1], seq[ij[1] - 1]
        # gap mapping
        a = ij[0] + n_lower_chars(seq[:ij[0]])
        b = ij[1] + n_lower_chars(seq[:ij[1]])
        if v: print i, '->', a,b
        nmapped_interactions.append([a,b])
    mapped_interactions = nmapped_interactions

    print(seq)
    print(ss)
    pairs = parse_vienna_to_pairs(ss)[0]
    nmapped_interactions = []
    for i in mapped_interactions:
        if noshort:
            delta = i[1] - i[0]
            if delta < 6:
                continue

        if seq[i[0] - 1] == '-' or seq[i[1] - 1] == '-':
            continue

        if noss:
            if i in pairs:
                continue

        line_new = 'x'.rjust(i[0]) + 'x'.rjust(i[1] - i[0]) + str(i).rjust(len(seq) - i[1] + 10)
        nmapped_interactions.append([i[0], i[1]])
        print line_new
        if mss:
            print(ss)
    mapped_interactions = nmapped_interactions

    if offset:
        # e.g. filter_interaction is [[18, 71]], if offset is 10 then it will give you [[28, 81]]
        nmapped_interactions = [[x[0] + offset, x[1] + offset] for x in mapped_interactions]
    mapped_interactions = nmapped_interactions

    print mapped_interactions
    print 'draw_dists(' + str(mapped_interactions) + ')'
    print 'output file:', file_interactions+"_mapped.csv"
    a = pd.DataFrame(list(mapped_interactions), columns=["i","j"])
    a.to_csv(file_interactions+"_mapped.csv",sep=" ")
def rna_dca_mapping(seqfn, gseqfn, file_interactions, noss, noshort, offset,
                    mss, verbose):
    """This function is deviede into

    .. warning:: in the line that we load parameters, watch for sep argument that defines seperator of your file (line21)
    """
    v = verbose

    #
    # Load the data, open files and parse information
    #
    # final ss and seq, ungapped
    f = open(seqfn)
    header = f.readline().strip()  # get rid of header
    seq = f.readline().strip()
    ss = f.readline().strip()
    # gapped
    f = open(gseqfn)
    header = f.readline().strip()  # get rid of header
    gseq = f.readline().strip()
    gss = f.readline().strip()
    # DCA
    df = pd.read_csv(file_interactions, sep=" ")
    interactions = list(zip(df['i'].tolist(), df['j'].tolist()))
    #
    # Show input
    #
    # [(38, 51), (7, 110), (37, 52) from the input
    interactions.sort()
    print('interactions:\n', interactions)
    #
    # Process unmapped scores on gaped sequence
    # I panel
    #
    pairs = parse_vienna_to_pairs(gss)[0]
    print('pairs', pairs)
    print('UNMAPPED SCORES ' + '/' * len(gseq))
    print(
        '123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789'
    )
    print(gseq)
    print(gss)

    for i in interactions:
        # form 0 or from 1 ?! ## be careful here! scores starts from 0 or 1 !?
        ij = [0, 0]
        ij[0] = i[0] + 1
        ij[1] = i[1] + 1
        i = ij

        if noshort:
            delta = i[1] - i[0]
            if delta < 6:
                continue

        # remove interactions from gaps
        if gseq[i[0] - 1] == '-' or gseq[i[1] - 1] == '-':
            continue

        if noss:
            if i in pairs:
                continue

        line_new = 'x'.rjust(
            i[0]) + 'x'.rjust(i[1] -
                              i[0]) + str(i).rjust(len(gseq) - i[1] + 10)
        print(line_new)
    #
    # How this mapping works, kurwa?
    # II panel
    #
    print('MAPPED SCORES //' + '/' * len(gseq))
    mapped_interactions = []
    for i in interactions:
        ij = i
        #ij = [0,0]
        #ij[0] = i[0] - 1
        #ij[1] = i[1] - 1
        if v: print('ij:', ij)
        if v: print(gseq[ij[0]], gseq[ij[1]])

        # ok, here I test if this pair comes from gaps, . -> -
        if gseq[ij[0]] == '-' or gseq[ij[1]] == '-':
            if v: print('Removed Interaction:', ij)
        else:
            [a, b] = [
                ij[0] - gseq[:ij[0]].count('-') + 1,
                ij[1] - gseq[:ij[1]].count('-') + 1,
            ]
            if v: print(i, '->', a, b)
            mapped_interactions.append([a, b])

    print('Mapped Interactions:\n', mapped_interactions)
    if v:
        for i in mapped_interactions:
            print(str(i))

    pairs = parse_vienna_to_pairs(gss)[0]
    print('pairs', pairs)
    print(
        '123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789'
    )
    print(gseq.replace('-', ''))  # what is the gap character, - or . ?
    print(gss.replace('-', ''))
    mapped_interactions.sort()

    filtered_interactions = []
    pairs = parse_vienna_to_pairs(gss.replace('-', ''))[0]
    for i in mapped_interactions:
        if noshort:
            delta = i[1] - i[0]
            if delta < 6:
                continue

        # remove if there is a gap
        if seq[i[0] - 1] == '-' or seq[i[1] - 1] == '-':
            continue

        if noss:
            if i in pairs:
                continue

        line_new = 'x'.rjust(
            i[0]) + 'x'.rjust(i[1] -
                              i[0]) + str(i).rjust(len(seq) - i[1] + 10)
        print(line_new)
    #
    # How to include a gap in the mapping?
    # III panel, the final
    #
    print(seq)
    print(ss)

    pairs = parse_vienna_to_pairs(ss.replace('-', ''))[0]

    def n_lower_chars(string):
        """ https://stackoverflow.com/questions/10953189/count-lower-case-characters-in-a-string """
        return sum(1 for c in string if c.islower())

    print('FINAL MAPPING //' + '/' * len(seq))
    nmapped_interactions = []
    for i in mapped_interactions:
        ij = i
        if v: print('ij:', ij)
        if v: print(seq[ij[0] - 1], seq[ij[1] - 1])
        # gap mapping
        a = ij[0] + n_lower_chars(seq[:ij[0]])
        b = ij[1] + n_lower_chars(seq[:ij[1]])
        if v: print(i, '->', a, b)
        nmapped_interactions.append([a, b])
    mapped_interactions = nmapped_interactions

    print(seq)
    print(ss)
    pairs = parse_vienna_to_pairs(ss)[0]
    nmapped_interactions = []
    for i in mapped_interactions:
        if noshort:
            delta = i[1] - i[0]
            if delta < 6:
                continue

        if seq[i[0] - 1] == '-' or seq[i[1] - 1] == '-':
            continue

        if noss:
            if i in pairs:
                continue

        line_new = 'x'.rjust(
            i[0]) + 'x'.rjust(i[1] -
                              i[0]) + str(i).rjust(len(seq) - i[1] + 10)
        nmapped_interactions.append([i[0], i[1]])
        print(line_new)
        if mss:
            print(ss)
    mapped_interactions = nmapped_interactions

    if offset:
        # e.g. filter_interaction is [[18, 71]], if offset is 10 then it will give you [[28, 81]]
        nmapped_interactions = [[x[0] + offset, x[1] + offset]
                                for x in mapped_interactions]
    mapped_interactions = nmapped_interactions

    print(mapped_interactions)
    print('draw_dists(' + str(mapped_interactions) + ')')
    print('output file:', file_interactions + "_mapped.csv")
    a = pd.DataFrame(list(mapped_interactions), columns=["i", "j"])
    a.to_csv(file_interactions + "_mapped.csv", sep=" ")
示例#6
0

def get_parser():
    parser = argparse.ArgumentParser(
        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('file', help="file in the Fasta format")
    parser.add_argument('--offset', help="offset", type=int)
    parser.add_argument("-v", "--verbose", action='count',
                        help="be verbose")
    return parser


if __name__ == '__main__':
    parser = get_parser()
    args = parser.parse_args()

    id, seq, ss = open(args.file).read().strip().split('\n')
    pairs, pairs_pk = parse_vienna_to_pairs(ss)

    if args.offset:
        npairs = []
        for pair in pairs:
            npairs.append([pair[0] + args.offset, pair[1] + args.offset])
        for pair in pairs_pk:
            npairs.append([pair[0] + args.offset, pair[1] + args.offset])
    else:
        npairs = pairs + pairs_pk
    npairs.sort()
    print(npairs)