def string_to_networkx(header, sequence, **options): # defaults energy_range = options.get('energy_range', 10) max_num = options.get('max_num', 3) max_num_subopts = options.get('max_num_subopts', 100) split_components = options.get('split_components', False) seq_struct_list, energy_list = rnasubopt_wrapper(sequence, energy_range=energy_range, max_num=max_num, max_num_subopts=max_num_subopts) if split_components: for seq_struct, energy in zip(seq_struct_list, energy_list): G = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct) G.graph['info'] = 'RNAsubopt energy=%s max_num=%s' % (energy, max_num) if G.number_of_nodes() < 2: G = seq_to_networkx(header, sequence, **options) G.graph['id'] = header G.graph['sequence'] = sequence G.graph['structure'] = seq_struct yield G else: G_global = nx.Graph() G_global.graph['id'] = header G_global.graph['info'] = 'RNAsubopt energy_range=%s max_num=%s' % (energy_range, max_num) G_global.graph['sequence'] = sequence for seq_struct in seq_struct_list: G = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct) G_global = nx.disjoint_union(G_global, G) if G_global.number_of_nodes() < 2: G_global = seq_to_networkx(header, sequence, **options) yield G_global
def _seq_to_eden(self, header, sequence, struct, energy): graph = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=struct) if graph.number_of_nodes() < 2: graph = seq_to_networkx(header, sequence) graph.graph['id'] = header graph.graph['info'] = 'muscle+RNAalifold energy=%.3f' % (energy) graph.graph['energy'] = energy graph.graph['sequence'] = sequence return graph
def rnafold_to_eden(iterable=None, **options): assert (is_iterable(iterable)), 'Not iterable' for header, seq in iterable: try: G = string_to_networkx(header, seq, **options) except Exception as e: print e.__doc__ print e.message print 'Error in: %s' % seq G = seq_to_networkx(header, seq, **options) yield G
def rnafold_to_eden(iterable=None, **options): assert(is_iterable(iterable)), 'Not iterable' for header, seq in iterable: try: G = string_to_networkx(header, seq, **options) except Exception as e: print e.__doc__ print e.message print 'Error in: %s' % seq G = seq_to_networkx(header, seq, **options) yield G
def rnashapes_struct_to_eden(iterable, **options): assert(is_iterable(iterable)), 'Not iterable' for header, seq in iterable: try: for G in string_to_networkx(header, seq, **options): yield G except Exception as e: print e.__doc__ print e.message print 'Error in: %s %s' % (header, seq) G = seq_to_networkx(header, seq, **options) yield G
def rnasubopt_to_eden(iterable, **options): assert(is_iterable(iterable)), 'Not iterable' for header, seq, const in iterable: try: for G in string_to_networkx(header, seq, const, **options): yield G except Exception as e: print e.__doc__ print e.message print 'Error in: %s' % seq G = seq_to_networkx(header, seq, **options) yield G
def rnashapes_struct_to_eden(iterable, **options): assert(is_iterable(iterable)), 'Not iterable' for header, seq in iterable: try: for G in string_to_networkx(header, seq, **options): yield G except Exception as e: print e.__doc__ print e.message print 'Error in: %s %s' % (header, seq) graph = seq_to_networkx(header, seq, **options) yield graph
def rnasubopt_to_eden(iterable, **options): assert (is_iterable(iterable)), 'Not iterable' for header, seq in iterable: try: for graph in string_to_networkx(header, seq, **options): yield graph except Exception as e: print e.__doc__ print e.message print 'Error in: %s' % seq graph = seq_to_networkx(header, seq, **options) yield graph
def string_to_networkx(header, sequence, **options): # defaults shape_type = options.get('shape_type', 5) energy_range = options.get('energy_range', 10) max_num = options.get('max_num', 3) split_components = options.get('split_components', False) seq_info, seq_struct_list, struct_list = rnashapes_wrapper( sequence, shape_type=shape_type, energy_range=energy_range, max_num=max_num, rnashapes_version=options.get('rnashapes_version', 2)) if split_components: for seq_struct, struct in zip(seq_struct_list, struct_list): graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct) graph.graph[ 'info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % ( shape_type, energy_range, max_num) graph.graph['id'] = header + '_' + struct if graph.number_of_nodes() < 2: graph = seq_to_networkx(header, sequence, **options) graph.graph['id'] = header graph.graph['sequence'] = sequence graph.graph['structure'] = seq_struct yield graph else: graph_global = nx.Graph() graph_global.graph['id'] = header graph_global.graph[ 'info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % ( shape_type, energy_range, max_num) graph_global.graph['sequence'] = sequence for seq_struct in seq_struct_list: graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct) graph_global = nx.disjoint_union(graph_global, graph) if graph_global.number_of_nodes() < 2: graph_global = seq_to_networkx(header, sequence, **options) yield graph_global
def rnaplfold_to_eden(iterable, **options): assert(is_iterable(iterable)), 'Not iterable' for header, seq in iterable: try: graph = string_to_networkx(header, seq, **options) except Exception as e: print print '-' * 80 # print e.__doc__ print e.message print 'Error in: %s %s' % (header, seq) print 'Reverting to path graph from sequence' graph = seq_to_networkx(header, seq, **options) yield graph
def rnaplfold_to_eden(iterable, **options): assert (is_iterable(iterable)), 'Not iterable' for header, seq in iterable: try: graph = string_to_networkx(header, seq, **options) except Exception as e: print print '-' * 80 # print e.__doc__ print e.message print 'Error in: %s %s' % (header, seq) print 'Reverting to path graph from sequence' graph = seq_to_networkx(header, seq, **options) yield graph
def string_to_networkx(header, sequence, **options): # defaults shape_type = options.get('shape_type', 5) energy_range = options.get('energy_range', 10) max_num = options.get('max_num', 3) split_components = options.get('split_components', False) seq_info, seq_struct_list, struct_list = rnashapes_wrapper(sequence, shape_type=shape_type, energy_range=energy_range, max_num=max_num, rnashapes_version=options.get('rnashapes_version', 2)) if split_components: for seq_struct, struct in zip(seq_struct_list, struct_list): graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct) graph.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (shape_type, energy_range, max_num) graph.graph['id'] = header + '_' + struct if graph.number_of_nodes() < 2: graph = seq_to_networkx(header, sequence, **options) graph.graph['id'] = header graph.graph['sequence'] = sequence graph.graph['structure'] = seq_struct yield graph else: graph_global = nx.Graph() graph_global.graph['id'] = header graph_global.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (shape_type, energy_range, max_num) graph_global.graph['sequence'] = sequence for seq_struct in seq_struct_list: graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct) graph_global = nx.disjoint_union(graph_global, graph) if graph_global.number_of_nodes() < 2: graph_global = seq_to_networkx(header, sequence, **options) yield graph_global
def string_to_networkx(header, sequence, constraint, **options): # defaults energy_range = options.get('energy_range', 10) max_num = options.get('max_num', 3) max_num_subopts = options.get('max_num_subopts', 100) split_components = options.get('split_components', False) seq_struct_list, energy_list = rnasubopt_wrapper( sequence, constraint, energy_range=energy_range, max_num=max_num, max_num_subopts=max_num_subopts) if split_components: for seq_struct, energy in zip(seq_struct_list, energy_list): graph = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct) graph.graph['info'] = 'RNAsubopt energy=%s max_num=%s' % (energy, max_num) if graph.number_of_nodes() < 2: graph = seq_to_networkx(header, sequence, **options) graph.graph['id'] = header graph.graph['sequence'] = sequence graph.graph['structure'] = seq_struct yield graph else: graph_global = nx.Graph() graph_global.graph['id'] = header graph_global.graph['info'] = 'RNAsubopt energy_range=%s max_num=%s' % ( energy_range, max_num) graph_global.graph['sequence'] = sequence for seq_struct in seq_struct_list: graph = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct) graph_global = nx.disjoint_union(graph_global, graph) if graph_global.number_of_nodes() < 2: graph_global = seq_to_networkx(header, sequence, **options) yield graph_global
def string_to_networkx(header, sequence, **options): # defaults shape_type = options.get('shape_type', 5) energy_range = options.get('energy_range', 10) max_num = options.get('max_num', 3) shape = options.get('shape', False) energy = options.get('energy', False) dotbracket = options.get('dotbracket', True) split_components = options.get('split_components', False) seq_info, seq_struct_list = rnashapes_wrapper(sequence, shape_type=shape_type, energy_range=energy_range, max_num=max_num) if split_components: for shape_str, energy_str, dotbracket_str in seq_struct_list: graph = nx.Graph() if shape: graph_shape = seq_to_networkx('', shape_str) graph = nx.disjoint_union(graph, graph_shape) if energy: graph_energy = seq_to_networkx('', energy_str) graph = nx.disjoint_union(graph, graph_energy) if dotbracket: graph_dotbracket = seq_to_networkx('', dotbracket_str) graph = nx.disjoint_union(graph, graph_dotbracket) graph.graph['id'] = header + '_' + shape_str graph.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s shape=%s energy=%s dotbracket=%s' % ( shape_type, energy_range, max_num, shape, energy, dotbracket) graph.graph['sequence'] = sequence yield graph else: graph_global = nx.Graph() for shape_str, energy_str, dotbracket_str in seq_struct_list: graph = nx.Graph() if shape: graph_shape = seq_to_networkx('', shape_str) graph = nx.disjoint_union(graph, graph_shape) if energy: graph_energy = seq_to_networkx('', energy_str) graph = nx.disjoint_union(graph, graph_energy) if dotbracket: graph_dotbracket = seq_to_networkx('', dotbracket_str) graph = nx.disjoint_union(graph, graph_dotbracket) graph_global = nx.disjoint_union(graph_global, graph) graph_global.graph['id'] = header graph_global.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s shape=%s energy=%s dotbracket=%s' % ( shape_type, energy_range, max_num, shape, energy, dotbracket) graph_global.graph['sequence'] = sequence yield graph_global
def rnafold_to_eden(iterable=None, **options): ''' Parameters ---------- iterable: over (header_string, sequence_string) options Returns ------- nx.graph generator ''' assert (is_iterable(iterable)), 'Not iterable' for header, seq in iterable: try: graph = string_to_networkx(header, seq, **options) except Exception as e: print e.__doc__ print e.message print 'Error in: %s' % seq graph = seq_to_networkx(header, seq, **options) yield graph
def rnashapes_to_eden(iterable, **options): """Transforms sequences to graphs that encode secondary structure information according to the RNAShapes algorithm. Parameters ---------- sequences : iterable iterable pairs of header and sequence strings rnashapes_version : int (default 2) The version of RNAshapes that is in the path. 2 e.g. RNAshapes version 2.1.6 3 e.g. RNAshapes version 3.3.0 shape_type : int (default 5) Is the level of abstraction or dissimilarity which defines a different shape. In general, helical regions are depicted by a pair of opening and closing brackets and unpaired regions are represented as a single underscore. The differences of the shape types are due to whether a structural element (bulge loop, internal loop, multiloop, hairpin loop, stacking region and external loop) contributes to the shape representation: Five types are implemented. 1 Most accurate - all loops and all unpaired [_[_[]]_[_[]_]]_ 2 Nesting pattern for all loop types and unpaired regions in external loop and multiloop [[_[]][_[]_]] 3 Nesting pattern for all loop types but no unpaired regions [[[]][[]]] 4 Helix nesting pattern in external loop and multiloop [[][[]]] 5 Most abstract - helix nesting pattern and no unpaired regions [[][]] energy_range : float (default 10) Sets the energy range as percentage value of the minimum free energy. For example, when relative deviation is specified as 5.0, and the minimum free energy is -10.0 kcal/mol, the energy range is set to -9.5 to -10.0 kcal/mol. Relative deviation must be a positive floating point number; by default it is set to to 10 %. max_num : int (default 3) Is the maximum number of structures that are generated. split_components : bool (default False) If True each structure is yielded as an independent graph. Otherwise all structures are part of the same graph that has therefore several disconnectd components. example: transform a simple sequence using RNAshapes version 3+ >>> graphs = rnashapes_to_eden([("ID", "CCCCCGGGGG")], rnashapes_version=3) >>> g = graphs.next() >>> # extract sequence from graph nodes >>> "".join([ value["label"] for (key, value) in g.nodes(data=True)]) 'CCCCCGGGGG' >>> # get vertice types >>> [(start, end, g.edge[start][end]["type"]) for start, end in g.edges()] [(0, 8, 'basepair'), (0, 1, 'backbone'), (1, 2, 'backbone'), (1, 7, 'basepair'), (2, 3, 'backbone'), (2, 6, 'basepair'), (3, 4, 'backbone'), (4, 5, 'backbone'), (5, 6, 'backbone'), (6, 7, 'backbone'), (7, 8, 'backbone'), (8, 9, 'backbone')] example: transform a simple sequence using RNAshapes version 3+, splitting components >>> graphs = rnashapes_to_eden([("ID", "CCCCCGGGGG")], split_components=True, rnashapes_version=3) >>> g = graphs.next() >>> # extract sequence from graph nodes >>> "".join([ value["label"] for (key, value) in g.nodes(data=True)]) 'CCCCCGGGGG' >>> # get dotbracket structure annotation >>> g.graph["structure"] '(((...))).' >>> # get vertice types >>> [ (start, end, g.edge[start][end]["type"]) for start, end in g.edges()] [(0, 8, 'basepair'), (0, 1, 'backbone'), (1, 2, 'backbone'), (1, 7, 'basepair'), (2, 3, 'backbone'), (2, 6, 'basepair'), (3, 4, 'backbone'), (4, 5, 'backbone'), (5, 6, 'backbone'), (6, 7, 'backbone'), (7, 8, 'backbone'), (8, 9, 'backbone')] test max_num parameter with RNAshapes version 3+ >>> seq = "CGUCGUCGCAUCGUACGCAUGACUCAGCAUCAGACUACGUACGCAUACGUCAGCAUCAGUCAGCAUCAGCAUGCAUCACUAGCAUGCACCCCCGGGGGCACAUCGUACGUACGCUCAGUACACUGCAUGACUACGU" >>> graphs = rnashapes_to_eden([("ID", seq)], split_components=True, max_num=2, rnashapes_version=3) >>> g = graphs.next() >>> # get dotbracket structure annotations >>> len([g.graph["structure"] for g in graphs]) 2 """ assert(is_iterable(iterable)), 'Not iterable' for header, seq in iterable: try: for graph in string_to_networkx(header, seq, **options): yield graph except Exception as e: print e.__doc__ print e.message print 'Error in: %s' % seq graph = seq_to_networkx(header, seq, **options) yield graph
def rnashapes_to_eden(iterable, **options): """Transforms sequences to graphs that encode secondary structure information according to the RNAShapes algorithm. Parameters ---------- sequences : iterable iterable pairs of header and sequence strings rnashapes_version : int (default 2) The version of RNAshapes that is in the path. 2 e.g. RNAshapes version 2.1.6 3 e.g. RNAshapes version 3.3.0 shape_type : int (default 5) Is the level of abstraction or dissimilarity which defines a different shape. In general, helical regions are depicted by a pair of opening and closing brackets and unpaired regions are represented as a single underscore. The differences of the shape types are due to whether a structural element (bulge loop, internal loop, multiloop, hairpin loop, stacking region and external loop) contributes to the shape representation: Five types are implemented. 1 Most accurate - all loops and all unpaired [_[_[]]_[_[]_]]_ 2 Nesting pattern for all loop types and unpaired regions in external loop and multiloop [[_[]][_[]_]] 3 Nesting pattern for all loop types but no unpaired regions [[[]][[]]] 4 Helix nesting pattern in external loop and multiloop [[][[]]] 5 Most abstract - helix nesting pattern and no unpaired regions [[][]] energy_range : float (default 10) Sets the energy range as percentage value of the minimum free energy. For example, when relative deviation is specified as 5.0, and the minimum free energy is -10.0 kcal/mol, the energy range is set to -9.5 to -10.0 kcal/mol. Relative deviation must be a positive floating point number; by default it is set to to 10 %. max_num : int (default 3) Is the maximum number of structures that are generated. split_components : bool (default False) If True each structure is yielded as an independent graph. Otherwise all structures are part of the same graph that has therefore several disconnectd components. example: transform a simple sequence using RNAshapes version 3+ >>> graphs = rnashapes_to_eden([("ID", "CCCCCGGGGG")], rnashapes_version=3) >>> g = graphs.next() >>> # extract sequence from graph nodes >>> "".join([ value["label"] for (key, value) in g.nodes(data=True)]) 'CCCCCGGGGG' >>> # get vertice types >>> [(start, end, g.edge[start][end]["type"]) for start, end in g.edges()] [(0, 8, 'basepair'), (0, 1, 'backbone'), (1, 2, 'backbone'), (1, 7, 'basepair'), (2, 3, 'backbone'), (2, 6, 'basepair'), (3, 4, 'backbone'), (4, 5, 'backbone'), (5, 6, 'backbone'), (6, 7, 'backbone'), (7, 8, 'backbone'), (8, 9, 'backbone')] example: transform a simple sequence using RNAshapes version 3+, splitting components >>> graphs = rnashapes_to_eden([("ID", "CCCCCGGGGG")], split_components=True, rnashapes_version=3) >>> g = graphs.next() >>> # extract sequence from graph nodes >>> "".join([ value["label"] for (key, value) in g.nodes(data=True)]) 'CCCCCGGGGG' >>> # get dotbracket structure annotation >>> g.graph["structure"] '(((...))).' >>> # get vertice types >>> [ (start, end, g.edge[start][end]["type"]) for start, end in g.edges()] [(0, 8, 'basepair'), (0, 1, 'backbone'), (1, 2, 'backbone'), (1, 7, 'basepair'), (2, 3, 'backbone'), (2, 6, 'basepair'), (3, 4, 'backbone'), (4, 5, 'backbone'), (5, 6, 'backbone'), (6, 7, 'backbone'), (7, 8, 'backbone'), (8, 9, 'backbone')] test max_num parameter with RNAshapes version 3+ >>> seq = "CGUCGUCGCAUCGUACGCAUGACUCAGCAUCAGACUACGUACGCAUACGUCAGCAUCAGUCAGCAUCAGCAUGCAUCACUAGCAUGCACCCCCGGGGGCACAUCGUACGUACGCUCAGUACACUGCAUGACUACGU" >>> graphs = rnashapes_to_eden([("ID", seq)], split_components=True, max_num=2, rnashapes_version=3) >>> g = graphs.next() >>> # get dotbracket structure annotations >>> len([g.graph["structure"] for g in graphs]) 2 """ assert (is_iterable(iterable)), 'Not iterable' for header, seq in iterable: try: for graph in string_to_networkx(header, seq, **options): yield graph except Exception as e: print e.__doc__ print e.message print 'Error in: %s' % seq graph = seq_to_networkx(header, seq, **options) yield graph