def string_to_networkx(header, sequence, **options): # defaults energy_range = options.get('energy_range', 10) max_num = options.get('max_num', 3) max_num_subopts = options.get('max_num_subopts', 100) split_components = options.get('split_components', False) seq_struct_list, energy_list = rnasubopt_wrapper(sequence, energy_range=energy_range, max_num=max_num, max_num_subopts=max_num_subopts) if split_components: for seq_struct, energy in zip(seq_struct_list, energy_list): G = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct) G.graph['info'] = 'RNAsubopt energy=%s max_num=%s' % (energy, max_num) if G.number_of_nodes() < 2: G = seq_to_networkx(header, sequence, **options) G.graph['id'] = header G.graph['sequence'] = sequence G.graph['structure'] = seq_struct yield G else: G_global = nx.Graph() G_global.graph['id'] = header G_global.graph['info'] = 'RNAsubopt energy_range=%s max_num=%s' % (energy_range, max_num) G_global.graph['sequence'] = sequence for seq_struct in seq_struct_list: G = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct) G_global = nx.disjoint_union(G_global, G) if G_global.number_of_nodes() < 2: G_global = seq_to_networkx(header, sequence, **options) yield G_global
def __init__(self, sequence_name, sequence, vectorizer, structure): self.sequence_name = sequence_name self.sequence = sequence self.vectorizer = vectorizer self.structure = structure # create a base_graph , expand base_graph = conv.sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=structure) self.base_graph = vectorizer._edge_to_vertex_transform(base_graph) # get an expanded abstract graph abstract_graph = forgi.get_abstr_graph(structure) abstract_graph = vectorizer._edge_to_vertex_transform(abstract_graph) # connect edges to nodes in the abstract graph self.abstract_graph = edge_parent_finder(abstract_graph, self.base_graph) # we are forced to set a label .. for eden reasons def name_edges(graph, what=''): for n, d in graph.nodes(data=True): if 'edge' in d: d['label'] = what # in the abstract graph , all the edge nodes need to have a contracted attribute. # originaly this happens naturally but since we make multiloops into one loop there are some left out def setset(graph): for n, d in graph.nodes(data=True): if 'contracted' not in d: d['contracted'] = set() name_edges(self.abstract_graph) setset(self.abstract_graph)
def transform(self, sequences): """ Parameters ---------- sequences : iterable over rna sequences Returns ------- list of RnaGraphWrappers """ result = [] for sequence in sequences: structure, energy = self.NNmodel.transform_single( ('fake', sequence)) if structure == None: result.append(None) continue if self.structure_mod: structure, sequence = fix_structure(structure, sequence) base_graph = converter.sequence_dotbracket_to_graph( seq_info=sequence, seq_struct=structure) base_graph = self.vectorizer._edge_to_vertex_transform(base_graph) base_graph = expanded_rna_graph_to_digraph(base_graph) base_graph.graph['energy'] = energy result.append( RnaWrapper(sequence, structure, base_graph, self.vectorizer, self.base_thickness_list, include_base=self.include_base, ignore_inserts=self.ignore_inserts)) return result
def transform(self, sequences): """ Parameters ---------- sequences : iterable over rna sequences Returns ------- list of RnaGraphWrappers """ result = [] for sequence in sequences: if type(sequence) == str: structure, energy ,sequence = self.NNmodel.transform_single(('fake', sequence)) base_graph = converter.sequence_dotbracket_to_graph(seq_info=sequence, \ seq_struct=structure) base_graph.graph['sequence']=sequence base_graph.graph['structure']=structure abstract_graph = self.make_abstract.abstract(base_graph.copy()) base_graph = self.vectorizer._edge_to_vertex_transform(base_graph) base_graph = graphlearn.minor.rna.expanded_rna_graph_to_digraph(base_graph) result.append((sequence,structure,base_graph,abstract_graph)) # up: normal preprocessing case, down: hack to avoid overwriting the postprocessor # needs some changing obviously else: result.append(self.re_transform_single(sequence)) return result
def string_to_networkx(header, sequence, **options): seq_info, seq_struct = RNAfold_wrapper(sequence, **options) G = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct) G.graph['info'] = 'RNAfold' G.graph['sequence'] = sequence G.graph['structure'] = seq_struct G.graph['id'] = header return G
def _seq_to_eden(self, header, sequence, struct, energy): graph = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=struct) if graph.number_of_nodes() < 2: graph = seq_to_networkx(header, sequence) graph.graph['id'] = header graph.graph['info'] = 'muscle+RNAalifold energy=%.3f' % (energy) graph.graph['energy'] = energy graph.graph['sequence'] = sequence return graph
def string_to_networkx(header, sequence, **options): # defaults shape_type = options.get('shape_type', 5) energy_range = options.get('energy_range', 10) max_num = options.get('max_num', 3) split_components = options.get('split_components', False) seq_info, seq_struct_list, struct_list = rnashapes_wrapper( sequence, shape_type=shape_type, energy_range=energy_range, max_num=max_num, rnashapes_version=options.get('rnashapes_version', 2)) if split_components: for seq_struct, struct in zip(seq_struct_list, struct_list): graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct) graph.graph[ 'info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % ( shape_type, energy_range, max_num) graph.graph['id'] = header + '_' + struct if graph.number_of_nodes() < 2: graph = seq_to_networkx(header, sequence, **options) graph.graph['id'] = header graph.graph['sequence'] = sequence graph.graph['structure'] = seq_struct yield graph else: graph_global = nx.Graph() graph_global.graph['id'] = header graph_global.graph[ 'info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % ( shape_type, energy_range, max_num) graph_global.graph['sequence'] = sequence for seq_struct in seq_struct_list: graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct) graph_global = nx.disjoint_union(graph_global, graph) if graph_global.number_of_nodes() < 2: graph_global = seq_to_networkx(header, sequence, **options) yield graph_global
def string_to_networkx(header, sequence, constraint, **options): # defaults energy_range = options.get('energy_range', 10) max_num = options.get('max_num', 3) max_num_subopts = options.get('max_num_subopts', 100) split_components = options.get('split_components', False) seq_struct_list, energy_list = rnasubopt_wrapper( sequence, constraint, energy_range=energy_range, max_num=max_num, max_num_subopts=max_num_subopts) if split_components: for seq_struct, energy in zip(seq_struct_list, energy_list): graph = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct) graph.graph['info'] = 'RNAsubopt energy=%s max_num=%s' % (energy, max_num) if graph.number_of_nodes() < 2: graph = seq_to_networkx(header, sequence, **options) graph.graph['id'] = header graph.graph['sequence'] = sequence graph.graph['structure'] = seq_struct yield graph else: graph_global = nx.Graph() graph_global.graph['id'] = header graph_global.graph['info'] = 'RNAsubopt energy_range=%s max_num=%s' % ( energy_range, max_num) graph_global.graph['sequence'] = sequence for seq_struct in seq_struct_list: graph = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct) graph_global = nx.disjoint_union(graph_global, graph) if graph_global.number_of_nodes() < 2: graph_global = seq_to_networkx(header, sequence, **options) yield graph_global
def string_to_networkx(header, sequence, **options): # defaults shape_type = options.get('shape_type', 5) energy_range = options.get('energy_range', 10) max_num = options.get('max_num', 3) split_components = options.get('split_components', False) seq_info, seq_struct_list, struct_list = rnashapes_wrapper(sequence, shape_type=shape_type, energy_range=energy_range, max_num=max_num, rnashapes_version=options.get('rnashapes_version', 2)) if split_components: for seq_struct, struct in zip(seq_struct_list, struct_list): graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct) graph.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (shape_type, energy_range, max_num) graph.graph['id'] = header + '_' + struct if graph.number_of_nodes() < 2: graph = seq_to_networkx(header, sequence, **options) graph.graph['id'] = header graph.graph['sequence'] = sequence graph.graph['structure'] = seq_struct yield graph else: graph_global = nx.Graph() graph_global.graph['id'] = header graph_global.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (shape_type, energy_range, max_num) graph_global.graph['sequence'] = sequence for seq_struct in seq_struct_list: graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct) graph_global = nx.disjoint_union(graph_global, graph) if graph_global.number_of_nodes() < 2: graph_global = seq_to_networkx(header, sequence, **options) yield graph_global
def _sequence_to_base_graph(self, sequence): ''' Parameters ---------- sequence: rna sequence Returns ------- nx.graph ''' structure,sequence = self.NNmodel.transform_single(sequence) base_graph = converter.sequence_dotbracket_to_graph(seq_info=sequence, \ seq_struct=structure) return base_graph
def transform(self, sequences): """ Parameters ---------- sequences : iterable over rna sequences Returns ------- list of RnaGraphWrappers """ result = [] for sequence in sequences: # if we eat a tupple, it musst be a (name, sequence) type :) we only want a sequence if type(sequence) == type(()): logger.warning('YOUR INPUT IS A TUPPLE, GIVE ME A SEQUENCE, SINCERELY -- YOUR RNA PREPROCESSOR') # get structure structure, energy, sequence = self.NNmodel.transform_single(('fake', sequence)) # FIXING STRUCTURE structure, sequence = fix_structure(structure, sequence) if structure == None: result.append(None) continue # built base_graph base_graph = converter.sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=structure) base_graph = self.vectorizer._edge_to_vertex_transform(base_graph) base_graph = expanded_rna_graph_to_digraph(base_graph) base_graph.graph['energy'] = energy base_graph.graph['sequence'] = sequence base_graph.graph['structure'] = structure result.append( (sequence, structure, base_graph, self.abstract_graph(base_graph)) ) return result
def transform(self, sequences): """ Parameters ---------- sequences : iterable over rna sequences Returns ------- list of RnaGraphWrappers """ result = [] for sequence in sequences: # if we eat a tupple, it musst be a (name, sequence) type :) we only want a sequence if type(sequence)== type(()): logger.warning( 'YOUR INPUT IS A TUPPLE, GIVE ME A SEQUENCE, SINCERELY -- YOUR RNA PREPROCESSOR') # get structure structure,energy = self.NNmodel.transform_single(('fake',sequence)) if structure==None: result.append(None) continue # the consensus structure is not meant to be used as a folding plan for individual sequences # so we do the best we can to get a valid structure if self.structure_mod: structure,sequence= fix_structure(structure,sequence) # built base_graph base_graph = converter.sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=structure) base_graph = self.vectorizer._edge_to_vertex_transform(base_graph) base_graph = expanded_rna_graph_to_digraph(base_graph) base_graph.graph['energy']=energy result.append( RnaWrapper(sequence, structure,base_graph, self.vectorizer, self.base_thickness_list,include_base=self.include_base, ignore_inserts=self.ignore_inserts) ) return result