def get_ClaRNA_output_from_dot_bracket(ss, temp=True, verbose=False): """ Get dummy ClaRNA output out of dat bracket secondary structure (ss) Args: ss (string): secondary structure Return: a filename to ClaRNA output""" from rna_tools.SecondaryStructure import parse_vienna_to_pairs if ss.find(':') > -1: chain, ss = ss.split(':') else: chain = 'A' ss = ss pairs, pairs_pk = parse_vienna_to_pairs(ss, remove_gaps_in_ss=False) pairs += pairs_pk txt = 'Classifier: Clarna\n' txt += 'chains: A 1 ' + str(len(ss)) + '\n' for bp in pairs: txt += '%s %i %s %i bp G C WW_cis 1 \n' % ( chain, bp[0], chain, bp[1]) if verbose: print(txt.strip()) if temp: f = tempfile.NamedTemporaryFile() name = f.name else: name = 'target' foutCR = name + '.pdb.outCR' if verbose: print(foutCR) ft = open(foutCR, 'w') ft.write(txt) ft.close() return foutCR
def get_ClaRNA_output_from_dot_bracket(ss, temp=True, verbose=False): """ Get dummy ClaRNA output out of dat bracket secondary structure (ss) Args: ss (string): secondary structure Return: a filename to ClaRNA output""" from rna_tools.SecondaryStructure import parse_vienna_to_pairs if ss.find(':') > -1: chain,ss = ss.split(':') else: chain = 'A' ss = ss pairs, pairs_pk = parse_vienna_to_pairs(ss, remove_gaps_in_ss=False) pairs += pairs_pk txt = 'Classifier: Clarna\n' txt += 'chains: A 1 ' + str(len(ss)) + '\n' for bp in pairs: txt += '%s %i %s %i bp G C WW_cis 1 \n' % (chain, bp[0], chain, bp[1]) if verbose: print(txt.strip()) if temp: f = tempfile.NamedTemporaryFile() name = f.name else: name = 'target' foutCR = name + '.pdb.outCR' if verbose: print(foutCR) ft = open(foutCR, 'w') ft.write(txt) ft.close() return foutCR
def get_parser(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('file', help="file in the Fasta format") parser.add_argument('--offset', help="offset", type=int) parser.add_argument("-v", "--verbose", action='count', help="be verbose") return parser if __name__ == '__main__': parser = get_parser() args = parser.parse_args() id, seq, ss = open(args.file).read().strip().split('\n') pairs, pairs_pk = parse_vienna_to_pairs(ss) if args.offset: npairs = [] for pair in pairs: npairs.append([pair[0] + args.offset, pair[1] + args.offset]) for pair in pairs_pk: npairs.append([pair[0] + args.offset, pair[1] + args.offset]) else: npairs = pairs + pairs_pk npairs.sort() print(npairs)
def rna_dca_mapping(seqfn, gseqfn, file_interactions, noss, noshort, offset, mss, verbose): """This function is deviede into .. warning:: in the line that we load parameters, watch for sep argument that defines seperator of your file (line21) """ v = verbose # # Load the data, open files and parse information # # final ss and seq, ungapped f = open(seqfn) header = f.readline().strip() # get rid of header seq = f.readline().strip() ss = f.readline().strip() # gapped f = open(gseqfn) header = f.readline().strip() # get rid of header gseq = f.readline().strip() gss = f.readline().strip() # DCA df = pd.read_csv(file_interactions,sep=" ") interactions = zip(df['i'].tolist(), df['j'].tolist()) # # Show input # # [(38, 51), (7, 110), (37, 52) from the input interactions.sort() print 'interactions:\n', interactions # # Process unmapped scores on gaped sequence # I panel # pairs = parse_vienna_to_pairs(gss)[0] print 'pairs', pairs print 'UNMAPPED SCORES ' + '/' * len(gseq) print '123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789' print gseq print gss for i in interactions: # form 0 or from 1 ?! ## be careful here! scores starts from 0 or 1 !? ij = [0,0] ij[0] = i[0] + 1 ij[1] = i[1] + 1 i = ij if noshort: delta = i[1] - i[0] if delta < 6: continue # remove interactions from gaps if gseq[i[0] - 1] == '-' or gseq[i[1] - 1] == '-': continue if noss: if i in pairs: continue line_new = 'x'.rjust(i[0]) + 'x'.rjust(i[1] - i[0]) + str(i).rjust(len(gseq) - i[1] + 10) print line_new # # How this mapping works, kurwa? # II panel # print 'MAPPED SCORES //' + '/' * len(gseq) mapped_interactions = [] for i in interactions: ij = i #ij = [0,0] #ij[0] = i[0] - 1 #ij[1] = i[1] - 1 if v: print 'ij:', ij if v: print gseq[ij[0]], gseq[ij[1]] # ok, here I test if this pair comes from gaps, . -> - if gseq[ij[0]] == '-' or gseq[ij[1]] == '-': if v: print 'Removed Interaction:', ij else: [a,b]=[ij[0] - gseq[:ij[0]].count('-') +1, ij[1] - gseq[:ij[1]].count('-') + 1,] if v: print i, '->', a, b mapped_interactions.append([a, b]) print 'Mapped Interactions:\n', mapped_interactions if v: for i in mapped_interactions: print str(i) pairs = parse_vienna_to_pairs(gss)[0] print 'pairs', pairs print '123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789' print gseq.replace('-','') # what is the gap character, - or . ? print gss.replace('-', '') mapped_interactions.sort() filtered_interactions = [] pairs = parse_vienna_to_pairs(gss.replace('-', ''))[0] for i in mapped_interactions: if noshort: delta = i[1] - i[0] if delta < 6: continue # remove if there is a gap if seq[i[0] - 1] == '-' or seq[i[1] - 1] == '-': continue if noss: if i in pairs: continue line_new = 'x'.rjust(i[0]) + 'x'.rjust(i[1] - i[0]) + str(i).rjust(len(seq) - i[1] + 10) print line_new # # How to include a gap in the mapping? # III panel, the final # print seq print ss pairs = parse_vienna_to_pairs(ss.replace('-', ''))[0] def n_lower_chars(string): """ https://stackoverflow.com/questions/10953189/count-lower-case-characters-in-a-string """ return sum(1 for c in string if c.islower()) print 'FINAL MAPPING //' + '/' * len(seq) nmapped_interactions = [] for i in mapped_interactions: ij = i if v: print 'ij:', ij if v: print seq[ij[0] - 1], seq[ij[1] - 1] # gap mapping a = ij[0] + n_lower_chars(seq[:ij[0]]) b = ij[1] + n_lower_chars(seq[:ij[1]]) if v: print i, '->', a,b nmapped_interactions.append([a,b]) mapped_interactions = nmapped_interactions print(seq) print(ss) pairs = parse_vienna_to_pairs(ss)[0] nmapped_interactions = [] for i in mapped_interactions: if noshort: delta = i[1] - i[0] if delta < 6: continue if seq[i[0] - 1] == '-' or seq[i[1] - 1] == '-': continue if noss: if i in pairs: continue line_new = 'x'.rjust(i[0]) + 'x'.rjust(i[1] - i[0]) + str(i).rjust(len(seq) - i[1] + 10) nmapped_interactions.append([i[0], i[1]]) print line_new if mss: print(ss) mapped_interactions = nmapped_interactions if offset: # e.g. filter_interaction is [[18, 71]], if offset is 10 then it will give you [[28, 81]] nmapped_interactions = [[x[0] + offset, x[1] + offset] for x in mapped_interactions] mapped_interactions = nmapped_interactions print mapped_interactions print 'draw_dists(' + str(mapped_interactions) + ')' print 'output file:', file_interactions+"_mapped.csv" a = pd.DataFrame(list(mapped_interactions), columns=["i","j"]) a.to_csv(file_interactions+"_mapped.csv",sep=" ")
def rna_dca_mapping(seqfn, gseqfn, file_interactions, noss, noshort, offset, mss, verbose): """This function is deviede into .. warning:: in the line that we load parameters, watch for sep argument that defines seperator of your file (line21) """ v = verbose # # Load the data, open files and parse information # # final ss and seq, ungapped f = open(seqfn) header = f.readline().strip() # get rid of header seq = f.readline().strip() ss = f.readline().strip() # gapped f = open(gseqfn) header = f.readline().strip() # get rid of header gseq = f.readline().strip() gss = f.readline().strip() # DCA df = pd.read_csv(file_interactions, sep=" ") interactions = list(zip(df['i'].tolist(), df['j'].tolist())) # # Show input # # [(38, 51), (7, 110), (37, 52) from the input interactions.sort() print('interactions:\n', interactions) # # Process unmapped scores on gaped sequence # I panel # pairs = parse_vienna_to_pairs(gss)[0] print('pairs', pairs) print('UNMAPPED SCORES ' + '/' * len(gseq)) print( '123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789' ) print(gseq) print(gss) for i in interactions: # form 0 or from 1 ?! ## be careful here! scores starts from 0 or 1 !? ij = [0, 0] ij[0] = i[0] + 1 ij[1] = i[1] + 1 i = ij if noshort: delta = i[1] - i[0] if delta < 6: continue # remove interactions from gaps if gseq[i[0] - 1] == '-' or gseq[i[1] - 1] == '-': continue if noss: if i in pairs: continue line_new = 'x'.rjust( i[0]) + 'x'.rjust(i[1] - i[0]) + str(i).rjust(len(gseq) - i[1] + 10) print(line_new) # # How this mapping works, kurwa? # II panel # print('MAPPED SCORES //' + '/' * len(gseq)) mapped_interactions = [] for i in interactions: ij = i #ij = [0,0] #ij[0] = i[0] - 1 #ij[1] = i[1] - 1 if v: print('ij:', ij) if v: print(gseq[ij[0]], gseq[ij[1]]) # ok, here I test if this pair comes from gaps, . -> - if gseq[ij[0]] == '-' or gseq[ij[1]] == '-': if v: print('Removed Interaction:', ij) else: [a, b] = [ ij[0] - gseq[:ij[0]].count('-') + 1, ij[1] - gseq[:ij[1]].count('-') + 1, ] if v: print(i, '->', a, b) mapped_interactions.append([a, b]) print('Mapped Interactions:\n', mapped_interactions) if v: for i in mapped_interactions: print(str(i)) pairs = parse_vienna_to_pairs(gss)[0] print('pairs', pairs) print( '123456789112345678921234567893123456789412345678951234567896123456789712345678981234567899123456789' ) print(gseq.replace('-', '')) # what is the gap character, - or . ? print(gss.replace('-', '')) mapped_interactions.sort() filtered_interactions = [] pairs = parse_vienna_to_pairs(gss.replace('-', ''))[0] for i in mapped_interactions: if noshort: delta = i[1] - i[0] if delta < 6: continue # remove if there is a gap if seq[i[0] - 1] == '-' or seq[i[1] - 1] == '-': continue if noss: if i in pairs: continue line_new = 'x'.rjust( i[0]) + 'x'.rjust(i[1] - i[0]) + str(i).rjust(len(seq) - i[1] + 10) print(line_new) # # How to include a gap in the mapping? # III panel, the final # print(seq) print(ss) pairs = parse_vienna_to_pairs(ss.replace('-', ''))[0] def n_lower_chars(string): """ https://stackoverflow.com/questions/10953189/count-lower-case-characters-in-a-string """ return sum(1 for c in string if c.islower()) print('FINAL MAPPING //' + '/' * len(seq)) nmapped_interactions = [] for i in mapped_interactions: ij = i if v: print('ij:', ij) if v: print(seq[ij[0] - 1], seq[ij[1] - 1]) # gap mapping a = ij[0] + n_lower_chars(seq[:ij[0]]) b = ij[1] + n_lower_chars(seq[:ij[1]]) if v: print(i, '->', a, b) nmapped_interactions.append([a, b]) mapped_interactions = nmapped_interactions print(seq) print(ss) pairs = parse_vienna_to_pairs(ss)[0] nmapped_interactions = [] for i in mapped_interactions: if noshort: delta = i[1] - i[0] if delta < 6: continue if seq[i[0] - 1] == '-' or seq[i[1] - 1] == '-': continue if noss: if i in pairs: continue line_new = 'x'.rjust( i[0]) + 'x'.rjust(i[1] - i[0]) + str(i).rjust(len(seq) - i[1] + 10) nmapped_interactions.append([i[0], i[1]]) print(line_new) if mss: print(ss) mapped_interactions = nmapped_interactions if offset: # e.g. filter_interaction is [[18, 71]], if offset is 10 then it will give you [[28, 81]] nmapped_interactions = [[x[0] + offset, x[1] + offset] for x in mapped_interactions] mapped_interactions = nmapped_interactions print(mapped_interactions) print('draw_dists(' + str(mapped_interactions) + ')') print('output file:', file_interactions + "_mapped.csv") a = pd.DataFrame(list(mapped_interactions), columns=["i", "j"]) a.to_csv(file_interactions + "_mapped.csv", sep=" ")