def get_residue_num_list(cg, d): ''' Get a list of nucleotides identifying a loop within this element. If it's a stem, then pick a cycle in the middle. @param cg: A CoarseGrainRNA structure. @param d: The name of the coarse-grain element ''' return list(cg.define_residue_num_iterator(d, adjacent=True)) if d[0] == 'm': return sorted(cg.shortest_bg_loop(d)) if (d[0] != 's'): return list(cg.define_residue_num_iterator(d, adjacent=True)) stem_length = cg.stem_length(d) start_res = cg.defines[d][0] + int(stem_length / 2) pt = fus.dotbracket_to_pairtable(cg.to_dotbracket_string()) other_res = pt[start_res] nucleotide_list = [start_res, start_res + 1, other_res - 1, other_res] return nucleotide_list
def get_residue_num_list(cg, d): ''' Get a list of nucleotides identifying a loop within this element. If it's a stem, then pick a cycle in the middle. @param cg: A CoarseGrainRNA structure. @param d: The name of the coarse-grain element ''' return list(cg.define_residue_num_iterator(d, adjacent=True)) if d[0] == 'm': return sorted(cg.shortest_bg_loop(d)) if (d[0] != 's'): return list(cg.define_residue_num_iterator(d, adjacent=True)) stem_length = cg.stem_length(d) start_res = cg.defines[d][0] + stem_length / 2 pt = fus.dotbracket_to_pairtable(cg.to_dotbracket_string()) other_res = pt[start_res] nucleotide_list = [start_res, start_res+1, other_res-1, other_res] return nucleotide_list
def test_get_sides_plus(self): db = ".((.).(.)).." tuples = fus.pairtable_to_tuples(fus.dotbracket_to_pairtable(db)) c = _BulgeGraphConstruction(tuples) p1 = c._get_sides_plus('s0', 'm0') self.assertEquals(p1[0], 1) p1 = c._get_sides_plus('s0', 'm2') self.assertEquals(p1[0], 2) p1 = c._get_sides_plus('s1', 'm0') self.assertEquals(p1[0], 0)
def test_pairtable_to_elements(self): db = "((((....))..))" # 12345678901234 pt = fus.dotbracket_to_pairtable(db) print("pt:", pt, file=sys.stderr) elements = fus.pairtable_to_elements(pt, 0, 1, len(db)) self.assertTrue(['s', 2, [1, 2, 13, 14]] in elements) self.assertTrue(['s', 4, [3, 4, 9, 10]] in elements) self.assertTrue(['i', 3, [2, 3, 10, 11, 12, 13, ]]) self.assertTrue(['h', 5, [4, 5, 6, 7, 8, 9]]) print("pairtable_to_elements:", elements, file=sys.stderr)
def test_construction_ml(self): db = "(.(.).(.).).." tuples = fus.pairtable_to_tuples(fus.dotbracket_to_pairtable(db)) c = _BulgeGraphConstruction(tuples) self.assertEqual( c.defines, { "s0": [1, 1, 11, 11], "s1": [3, 3, 5, 5], "s2": [7, 7, 9, 9], "m0": [2, 2], "m1": [6, 6], "m2": [10, 10], "h0": [4, 4], "h1": [8, 8], "t0": [12, 13] }) self.assertEqual(c.edges["s1"], {"m0", "m1", "h0"})
def test_construction_rod(self): db = "(((.(((...)))..)))" tuples = fus.pairtable_to_tuples(fus.dotbracket_to_pairtable(db)) c = _BulgeGraphConstruction(tuples) self.assertEqual( c.defines, { "s0": [1, 3, 16, 18], "i0": [4, 4, 14, 15], "s1": [5, 7, 11, 13], "h0": [8, 10] }) self.assertEqual(c.edges, { "s0": {"i0"}, "i0": {"s0", "s1"}, "s1": {"i0", "h0"}, "h0": {"s1"} })
def inverse_fold(): app.logger.info(request.json); if not request.json: abort(400, "Request has no json.") if 'struct' not in request.json: abort(400, "Request has no structure in the json.") if re.match("^[\(\)\.]+$", request.json['struct']) is None: abort(400, "Invalid structure for inverse fold: {}".format(request.json['struct'])) try: pt = fus.dotbracket_to_pairtable(str(request.json['struct'])) except Exception as ex: app.logger.exception(ex) abort(400, "Unbalanced brackets: {}".format(ex)) result = RNA.inverse_fold("", str(request.json['struct']))[0] return json.dumps(result), 201
def inverse_fold(): app.logger.info(request.json) if not request.json: abort(400, "Request has no json.") if 'struct' not in request.json: abort(400, "Request has no structure in the json.") if re.match("^[\(\)\.]+$", request.json['struct']) is None: abort( 400, "Invalid structure for inverse fold: {}".format( request.json['struct'])) try: pt = fus.dotbracket_to_pairtable(str(request.json['struct'])) except Exception as ex: app.logger.exception(ex) abort(400, "Unbalanced brackets: {}".format(ex)) result = RNA.inverse_fold("", str(request.json['struct']))[0] return json.dumps(result), 201
def test_dotbracket_to_pairtable3(self): db = "((((....))..)).((..)))" with self.assertRaises(ValueError): pt = fus.dotbracket_to_pairtable(db)
def test_dotbracket_to_pairtable1(self): db = "....(((((.....((((((((((((.....(..(((....)))..)....))))))))))))......))))).." fus.dotbracket_to_pairtable(db)
def test_dotbracket_to_pairtable(self): """ Convert a dotbracket string to a pair table. """ for pt, db in self.pt_dbs: self.assertEqual(fus.dotbracket_to_pairtable(db), pt)
def test_pairtable_to_elements1(self): db = "(.[.).]" # 12345678901234 pt = fus.dotbracket_to_pairtable(db) print("pt:", pt, file=sys.stderr)
def main(): usage = """ python scripts/dotplus.py sequence Create a file for displaying as a dotplot using the provided sequence. If the provided sequence is '-', then read the sequence from stdin. """ num_args = 1 parser = OptionParser(usage=usage) parser.add_option( '-p', '--probability', dest='probability', default=0.01, help="The probability cutoff for displaying base pair points", type='float') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) if args[0] == '-': seq = sys.stdin.read() else: seq = args[0].strip() # pf_fold returns the MFE as well as the partition function energy mfe, pfe = RNA.pf_fold(seq) prob_matrix = RNA.export_bppm() bp_to_seq = {} # get all of the suboptimal structures subopts = RNA.zukersubopt(seq) for i in range(0, subopts.size()): s = subopts.get(i) pt = fus.dotbracket_to_pairtable(s.structure) # go through each base pair and store the sequence # it came from so that it can be output later for j in range(1, pt[0] + 1): bp = tuple(sorted([j, pt[j]])) if bp in bp_to_seq: continue bp_to_seq[bp] = (s.structure, s.energy) #print s.structure, s.energy bps = [] structs = [] counter = 0 struct_dict = {} base_probs = col.defaultdict(float) print >> sys.stderr, "probability:", options.probability for i, j in it.combinations(range(1, len(seq) + 1), 2): prob = RNA.doubleP_getitem(prob_matrix, RNA.intP_getitem(RNA.cvar.iindx, i) - j) base_probs[i] += prob base_probs[j] += prob if prob > options.probability: struct, energy = bp_to_seq[(i, j)] pp = math.exp((pfe - (energy / 100.)) / .616310776) if struct not in struct_dict: struct_dict[struct] = (struct, pp, counter) index = counter counter += 1 else: index = struct_dict[struct][2] print >> sys.stderr, "ix:", index, "struct:", struct, "pp:", pp bps += [{ "i": i, "j": j, "p": "{:.3f}".format(math.sqrt(prob)), "ix": index }] structs = struct_dict.values() structs.sort(key=lambda x: -x[1]) base_probs = dict([(i, "{:.3f}".format(j)) for (i, j) in base_probs.items()]) structs = [{ "struct": st[0], "sprob": st[1], "ix": st[2] } for st in structs] print json.dumps( { "seq": seq, "structs": structs, "bps": bps, "baseProbs": base_probs.items() }, indent=2)
def main(): usage = """ python scripts/dotplus.py sequence Create a file for displaying as a dotplot using the provided sequence. If the provided sequence is '-', then read the sequence from stdin. """ num_args= 1 parser = OptionParser(usage=usage) parser.add_option('-p', '--probability', dest='probability', default=0.01, help="The probability cutoff for displaying base pair points", type='float') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) if args[0] == '-': seq = sys.stdin.read() else: seq = args[0].strip() # pf_fold returns the MFE as well as the partition function energy mfe, pfe = RNA.pf_fold(seq) prob_matrix = RNA.export_bppm() bp_to_seq = {} # get all of the suboptimal structures subopts = RNA.zukersubopt(seq) for i in range(0, subopts.size()): s = subopts.get(i) pt = fus.dotbracket_to_pairtable(s.structure) # go through each base pair and store the sequence # it came from so that it can be output later for j in range(1, pt[0]+1): bp = tuple(sorted([j, pt[j]])) if bp in bp_to_seq: continue bp_to_seq[bp] = (s.structure, s.energy) #print s.structure, s.energy bps = [] structs = [] counter = 0 struct_dict = {} base_probs = col.defaultdict(float) print >>sys.stderr, "probability:", options.probability for i,j in it.combinations(range(1, len(seq)+1), 2): prob = RNA.doubleP_getitem(prob_matrix, RNA.intP_getitem(RNA.cvar.iindx, i) - j) base_probs[i] += prob base_probs[j] += prob if prob > options.probability: struct, energy = bp_to_seq[(i,j)] pp = math.exp((pfe - (energy / 100.)) / .616310776) if struct not in struct_dict: struct_dict[struct] = (struct, pp, counter) index = counter counter += 1 else: index = struct_dict[struct][2] print >>sys.stderr, "ix:", index, "struct:", struct, "pp:", pp bps += [{"i": i, "j": j, "p": "{:.3f}".format(math.sqrt(prob)), "ix": index}] structs = struct_dict.values() structs.sort(key=lambda x: -x[1]) base_probs = dict([(i, "{:.3f}".format(j)) for (i,j) in base_probs.items()]) structs = [{"struct": st[0], "sprob": st[1], "ix": st[2]} for st in structs] print json.dumps({"seq": seq, "structs": structs, "bps": bps, "baseProbs": base_probs.items()}, indent=2)
def test_dotbracket_to_pairtable3(self): db = "((((....))..)).((..)))" pt = fus.dotbracket_to_pairtable(db)