def fasta_to_positions(fasta_text): fgb.from_fasta_text(fasta_text) bp_string = bg.to_dotbracket_string() print >>sys.stderr, 'bp_string', bp_string; RNA.cvar.rna_plot_type = 1 coords = RNA.get_xy_coordinates(bp_string) xs = np.array([coords.get(i).X for i in range(len(bp_string))]) ys = np.array([coords.get(i).Y for i in range(len(bp_string))]) return zip(xs,ys)
def test_angle_stat_get_angle_from_cg_1(self): fa_text = """>1 AAACCGGGCCCCCCAAUUU (((..(((...)))..))) """ bg = fgb.from_fasta_text(fa_text) cg = ftmc.from_bulge_graph(bg) cg.coords["s0"] = np.array([0., 0., 0.]), np.array([0., 0., 1.]) cg.twists["s0"] = np.array([0., -1., 0]), np.array([0., 1., 0.]) cg.coords["s1"] = np.array([0., 0., 2.]), np.array([0., 0., 3.]) cg.twists["s1"] = np.array([-1., 0., 0.]), np.array([1., 0., 0.]) cg.coords["h0"] = np.array([0, 1, 3]), np.array([0, 2, 4]) cg.add_bulge_coords_from_stems() print(cg.coords["i0"]) print(cg.twists) as1, as2 = cg.get_bulge_angle_stats("i0") self.assertAlmostEqual(as1.get_angle(), math.radians(180)) self.assertAlmostEqual(as2.get_angle(), math.radians(180))
def fasta_to_json(fasta_text, circular=False): """ Create the d3 compatible graph representation from a dotbracket string formatted like so: >id ACCCGGGG (((..))) @param fasta_text: The fasta string. """ bg = fgb.from_fasta_text(fasta_text) return bg_to_json(bg, circular=circular)
def json_to_json(rna_json_str): ''' Convert an RNA json string to fasta file, then to a bulge_graph and then back to a json. The purpose is to maintain the integrity of the molecule and to maintain the positions of all the hidden nodes after modification. ''' with open('test.out', 'w') as f: f.write(rna_json_str) (all_fastas, all_xs, all_ys, all_uids, different_tree_links) = json_to_fasta(rna_json_str) big_json = {'nodes': [], 'links': []} coords_to_index = dict() for fasta_text, xs, ys, uids in zip(all_fastas, all_xs, all_ys, all_uids): bg = fgb.from_fasta_text(fasta_text) new_json = bg_to_json(bg, xs=xs, ys=ys, uids=uids) for l in new_json['links']: # the indices of the new nodes will be offset, so the links # have to have their node pointers adjusted as well l['source'] += len(big_json['nodes']) l['target'] += len(big_json['nodes']) big_json['links'] += [l] # Create a mapping between the coordinates of a node and its index # in the node list. To be used when creating links between different # molecules, which are stored according to the coordinates of the nodes # being linked for i,n in enumerate(new_json['nodes']): if n['node_type'] == 'nucleotide': coords_to_index[(n['x'], n['y'])] = i + len(big_json['nodes']) big_json['nodes'] += new_json['nodes'] # add the links that are between different molecules for dtl in different_tree_links: fud.pv('dtl') n1 = coords_to_index[(dtl[0])] n2 = coords_to_index[(dtl[1])] fud.pv('n1,n2') big_json['links'] += [{'source':n1, 'target':n2, 'link_type':'basepair', 'value':1}] #fud.pv('big_json["nodes"]') return big_json
def test_radius_of_gyration_no_stems(self): bg = fgb.from_fasta_text("AUCG\n....") cg = ftmc.from_bulge_graph(bg) cg.coords["f0"] = [0, 0, 0.], [12., 1, 1] self.assertTrue(math.isnan(cg.radius_of_gyration()))
def main(): usage = """ python fasta_to_reduced_graph.py fasta_file Generate a reduced graph representation of this secondary structure. fasta_file should be either a filename pointing to a fasta file or a dash (-) indicating that the input should come from stdin. """ num_args = 0 parser = OptionParser(usage=usage) #parser.add_option('-o', '--options', dest='some_option', default='yo', help="Place holder for a real option", type='str') #parser.add_option('-u', '--useless', dest='uselesss', default=False, action='store_true', help='Another useless option') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) if args[0] == '-': text = sys.stdin.read() else: with open(args[0], 'r') as f: text = f.read() # load a BulgeGraph from a fasta-like file # i.e. # >test # AACCGG # ((..)) bg = fgb.from_fasta_text(text) prev = None seen = set() out_str = '' elements_traversed = [] for i in range(1, bg.seq_length + 1): # iterate over each nucleotide and get the name of the element # that it is part of node = bg.get_node_from_residue_num(i) if node == prev: # we've already seen this element continue if node[0] == 's': if node in seen: # we've seen this stem before, so we just need to close the bracket out_str += ')' else: # new stem, so open a bracket out_str += '(' elements_traversed += [node] elif node[0] == 'i': out_str += '.' elements_traversed += [node] prev = node seen.add(node) print(out_str) print(",".join(elements_traversed)) for t, e in zip(out_str, elements_traversed): # print the sequences of each element in the reduced representation print(t, e, bg.get_define_seq_str(e))
def load_rna(filename, rna_type="any", allow_many=True, pdb_chain=None, pbd_remove_pk=True, pdb_dotbracket="", dissolve_length_one_stems = True): """ :param rna_type: One of "any", "cg" and "3d" and "pdb" * "any": Return either BulgeGraph or CoarseGrainRNA objekte, depending on the input format * "cg": Always convert to CoarseGrainRNA objects, even if they have no 3D information * "only_cg": Only accept cg-files. * "3d": Return CoarseGrainRNA objects, if the file contains 3D information, raise an error otherwise * "pdb": only accept pdb files :param allow_many: If True, return a list. If False raise an error, if more than one RNA is present. :param pdb_chain: Extract the given chain from the file. Only applicable if filename corresponds to a pdb file :param pdb_remove_pk: Detect pseudoknot-free structures from the pdb. :param pdb_dotbracket: Only applicable, if filename corresponds to a pdb file and pdb_chain is given. :param dissolve_length_one_stems: Ignored if input is in forgi bg/cg format. :retuns: A list of RNAs or a single RNA """ # Is filename a dotbracket string and not a filename? if all( c in ".()[]{}&" for c in filename): # A dotbracket-string was provided via the commandline if not rna_type=="any": warnings.warn("Cannot treat '{}' as dotbracket string, since we need a sequence. " "Trying to treat it as a filename instead...".format(filename)) else: log.info("Assuming RNA %s is a dotbracketstring and not a file.", filename) bg = fgb.from_fasta_text(filename, dissolve_length_one_stems=dissolve_length_one_stems) if allow_many: return [bg] else: return bg with open(filename) as rnafile: filetype = sniff_filetype(rnafile) if rna_type=="pdb" and filetype!="pdb": raise WrongFileFormat("Only PDB files are accepted, but file {} has type {}.".format(filename, filetype)) if rna_type=="only_cg" and filetype!="forgi": raise WrongFileFormat("Only forgi cg files are accepted, but file {} has type {}.".format(filename, filetype)) if filetype=="forgi": cg = ftmc.CoarseGrainRNA(filename) if rna_type in ["3d", "only_cg"] and not cg.coords.is_filled: raise WrongFileFormat("File {} does not contain all 3D coordinates!".format(filename)) if allow_many: return [cg] else: return cg elif filetype=="pdb": if pdb_chain: cgs = [ftmc.load_cg_from_pdb(filename, chain_id=pdb_chain, remove_pseudoknots=pbd_remove_pk and not pdb_dotbracket, secondary_structure=pdb_dotbracket, dissolve_length_one_stems=dissolve_length_one_stems)] if dissolve_length_one_stems: for cg in cgs: cg.dissolve_length_one_stems() else: if pdb_dotbracket: raise ValueError("pdb_dotbracket requires a chain ti be given to avioid ambiguity.") cgs = ftmc.connected_cgs_from_pdb(filename, remove_pseudoknots = pbd_remove_pk, dissolve_length_one_stems=dissolve_length_one_stems) if allow_many: return cgs else: if len(cgs)>1: raise WrongFileFormat("More than one connected RNA component in pdb file {}.".format(filename)) return cgs[0] elif filetype=="mmcif": raise WrongFileFormat("MMCIF files are not yet supported.") elif filetype=="bpseq": if rna_type=="3d": raise WrongFileFormat("bpseq file {} is not supported. We need 3D coordinates!".format(filename)) bg = fgb.BulgeGraph() with open(filename, 'r') as f: text = f.read() try: int(text[0]) except ValueError: i=text.find("\n1 ") text=text[i+1:] bg.from_bpseq_str(text, dissolve_length_one_stems=dissolve_length_one_stems) if rna_type=="cg": bg = ftmc.from_bulge_graph(bg) if allow_many: return [bg] else: return bg elif filetype =="fasta" or filetype=="other": if rna_type=="3d": raise WrongFileFormat("Fasta(like) file {} is not supported. We need 3D coordinates!".format(filename)) try: bgs = fgb.from_fasta(filename, dissolve_length_one_stems=dissolve_length_one_stems) except Exception as e: with log_to_exception(log, e): log.critical("Could not parse file %r.", filename) if filetype=="other": log.critical("We assumed file %r to be some fasta-variant or dotbracket file, but an error occurred during parsing.", filename) raise if isinstance(bgs, fgb.BulgeGraph): bgs = [bgs] if dissolve_length_one_stems: for bg in bgs: bg.dissolve_length_one_stems() if rna_type=="cg": bgs = list(map(ftmc.from_bulge_graph, bgs)) if allow_many: return bgs else: if len(bgs)>1: raise WrongFileFormat("More than one RNA found in fasta/ dotbracket file {}.".format(filename)) return bgs[0]