示例#1
0
def string_to_networkx(header, sequence, **options):
    # defaults
    energy_range = options.get('energy_range', 10)
    max_num = options.get('max_num', 3)
    max_num_subopts = options.get('max_num_subopts', 100)
    split_components = options.get('split_components', False)
    seq_struct_list, energy_list = rnasubopt_wrapper(sequence, energy_range=energy_range, max_num=max_num, max_num_subopts=max_num_subopts)
    if split_components:
        for seq_struct, energy in zip(seq_struct_list, energy_list):
            G = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct)
            G.graph['info'] = 'RNAsubopt energy=%s max_num=%s' % (energy, max_num)
            if G.number_of_nodes() < 2:
                G = seq_to_networkx(header, sequence, **options)
            G.graph['id'] = header
            G.graph['sequence'] = sequence
            G.graph['structure'] = seq_struct
            yield G
    else:
        G_global = nx.Graph()
        G_global.graph['id'] = header
        G_global.graph['info'] = 'RNAsubopt energy_range=%s max_num=%s' % (energy_range, max_num)
        G_global.graph['sequence'] = sequence
        for seq_struct in seq_struct_list:
            G = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=seq_struct)
            G_global = nx.disjoint_union(G_global, G)
        if G_global.number_of_nodes() < 2:
            G_global = seq_to_networkx(header, sequence, **options)
        yield G_global
    def __init__(self, sequence_name, sequence, vectorizer, structure):
        self.sequence_name = sequence_name
        self.sequence = sequence
        self.vectorizer = vectorizer
        self.structure = structure

        # create a base_graph , expand
        base_graph = conv.sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=structure)
        self.base_graph = vectorizer._edge_to_vertex_transform(base_graph)

        # get an expanded abstract graph
        abstract_graph = forgi.get_abstr_graph(structure)
        abstract_graph = vectorizer._edge_to_vertex_transform(abstract_graph)
        # connect edges to nodes in the abstract graph
        self.abstract_graph = edge_parent_finder(abstract_graph, self.base_graph)


        # we are forced to set a label .. for eden reasons
        def name_edges(graph, what=''):
            for n, d in graph.nodes(data=True):
                if 'edge' in d:
                    d['label'] = what

        # in the abstract graph , all the edge nodes need to have a contracted attribute.
        # originaly this happens naturally but since we make multiloops into one loop there are some left out
        def setset(graph):
            for n, d in graph.nodes(data=True):
                if 'contracted' not in d:
                    d['contracted'] = set()

        name_edges(self.abstract_graph)
        setset(self.abstract_graph)
示例#3
0
    def transform(self, sequences):
        """
        Parameters
        ----------
        sequences : iterable over rna sequences

        Returns
        -------
        list of RnaGraphWrappers
        """
        result = []
        for sequence in sequences:
            structure, energy = self.NNmodel.transform_single(
                ('fake', sequence))
            if structure == None:
                result.append(None)
                continue
            if self.structure_mod:
                structure, sequence = fix_structure(structure, sequence)
            base_graph = converter.sequence_dotbracket_to_graph(
                seq_info=sequence, seq_struct=structure)
            base_graph = self.vectorizer._edge_to_vertex_transform(base_graph)
            base_graph = expanded_rna_graph_to_digraph(base_graph)
            base_graph.graph['energy'] = energy
            result.append(
                RnaWrapper(sequence,
                           structure,
                           base_graph,
                           self.vectorizer,
                           self.base_thickness_list,
                           include_base=self.include_base,
                           ignore_inserts=self.ignore_inserts))
        return result
示例#4
0
    def transform(self, sequences):
        """
        Parameters
        ----------
        sequences : iterable over rna sequences

        Returns
        -------
            list of RnaGraphWrappers
        """

        result = []
        for sequence in sequences:
            if type(sequence) == str:
                structure, energy ,sequence = self.NNmodel.transform_single(('fake', sequence))

                base_graph = converter.sequence_dotbracket_to_graph(seq_info=sequence, \
                                                                        seq_struct=structure)
                base_graph.graph['sequence']=sequence
                base_graph.graph['structure']=structure

                abstract_graph = self.make_abstract.abstract(base_graph.copy())

                base_graph = self.vectorizer._edge_to_vertex_transform(base_graph)

                base_graph = graphlearn.minor.rna.expanded_rna_graph_to_digraph(base_graph)

                result.append((sequence,structure,base_graph,abstract_graph))

            # up: normal preprocessing case, down: hack to avoid overwriting the postprocessor
            # needs some changing obviously
            else:
                result.append(self.re_transform_single(sequence))
        return result
示例#5
0
def string_to_networkx(header, sequence, **options):
    seq_info, seq_struct = RNAfold_wrapper(sequence, **options)
    G = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct)
    G.graph['info'] = 'RNAfold'
    G.graph['sequence'] = sequence
    G.graph['structure'] = seq_struct
    G.graph['id'] = header
    return G
示例#6
0
文件: rnafold.py 项目: bgruening/EDeN
def string_to_networkx(header, sequence, **options):
    seq_info, seq_struct = RNAfold_wrapper(sequence, **options)
    G = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct)
    G.graph['info'] = 'RNAfold'
    G.graph['sequence'] = sequence
    G.graph['structure'] = seq_struct
    G.graph['id'] = header
    return G
示例#7
0
 def _seq_to_eden(self, header, sequence, struct, energy):
     graph = sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=struct)
     if graph.number_of_nodes() < 2:
         graph = seq_to_networkx(header, sequence)
     graph.graph['id'] = header
     graph.graph['info'] = 'muscle+RNAalifold energy=%.3f' % (energy)
     graph.graph['energy'] = energy
     graph.graph['sequence'] = sequence
     return graph
示例#8
0
def string_to_networkx(header, sequence, **options):
    # defaults
    shape_type = options.get('shape_type', 5)
    energy_range = options.get('energy_range', 10)
    max_num = options.get('max_num', 3)
    split_components = options.get('split_components', False)
    seq_info, seq_struct_list, struct_list = rnashapes_wrapper(
        sequence,
        shape_type=shape_type,
        energy_range=energy_range,
        max_num=max_num,
        rnashapes_version=options.get('rnashapes_version', 2))
    if split_components:
        for seq_struct, struct in zip(seq_struct_list, struct_list):
            graph = sequence_dotbracket_to_graph(seq_info=seq_info,
                                                 seq_struct=seq_struct)
            graph.graph[
                'info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (
                    shape_type, energy_range, max_num)
            graph.graph['id'] = header + '_' + struct
            if graph.number_of_nodes() < 2:
                graph = seq_to_networkx(header, sequence, **options)
                graph.graph['id'] = header
            graph.graph['sequence'] = sequence
            graph.graph['structure'] = seq_struct
            yield graph
    else:
        graph_global = nx.Graph()
        graph_global.graph['id'] = header
        graph_global.graph[
            'info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (
                shape_type, energy_range, max_num)
        graph_global.graph['sequence'] = sequence
        for seq_struct in seq_struct_list:
            graph = sequence_dotbracket_to_graph(seq_info=seq_info,
                                                 seq_struct=seq_struct)
            graph_global = nx.disjoint_union(graph_global, graph)
        if graph_global.number_of_nodes() < 2:
            graph_global = seq_to_networkx(header, sequence, **options)
        yield graph_global
def string_to_networkx(header, sequence, constraint, **options):
    # defaults
    energy_range = options.get('energy_range', 10)
    max_num = options.get('max_num', 3)
    max_num_subopts = options.get('max_num_subopts', 100)
    split_components = options.get('split_components', False)
    seq_struct_list, energy_list = rnasubopt_wrapper(
        sequence,
        constraint,
        energy_range=energy_range,
        max_num=max_num,
        max_num_subopts=max_num_subopts)
    if split_components:
        for seq_struct, energy in zip(seq_struct_list, energy_list):
            graph = sequence_dotbracket_to_graph(seq_info=sequence,
                                                 seq_struct=seq_struct)
            graph.graph['info'] = 'RNAsubopt energy=%s max_num=%s' % (energy,
                                                                      max_num)
            if graph.number_of_nodes() < 2:
                graph = seq_to_networkx(header, sequence, **options)
            graph.graph['id'] = header
            graph.graph['sequence'] = sequence
            graph.graph['structure'] = seq_struct
            yield graph
    else:
        graph_global = nx.Graph()
        graph_global.graph['id'] = header
        graph_global.graph['info'] = 'RNAsubopt energy_range=%s max_num=%s' % (
            energy_range, max_num)
        graph_global.graph['sequence'] = sequence
        for seq_struct in seq_struct_list:
            graph = sequence_dotbracket_to_graph(seq_info=sequence,
                                                 seq_struct=seq_struct)
            graph_global = nx.disjoint_union(graph_global, graph)
        if graph_global.number_of_nodes() < 2:
            graph_global = seq_to_networkx(header, sequence, **options)
        yield graph_global
示例#10
0
def string_to_networkx(header, sequence, **options):
    # defaults
    shape_type = options.get('shape_type', 5)
    energy_range = options.get('energy_range', 10)
    max_num = options.get('max_num', 3)
    split_components = options.get('split_components', False)
    seq_info, seq_struct_list, struct_list = rnashapes_wrapper(sequence,
                                                               shape_type=shape_type,
                                                               energy_range=energy_range,
                                                               max_num=max_num,
                                                               rnashapes_version=options.get('rnashapes_version', 2))
    if split_components:
        for seq_struct, struct in zip(seq_struct_list, struct_list):
            graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct)
            graph.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (shape_type,
                                                                                          energy_range,
                                                                                          max_num)
            graph.graph['id'] = header + '_' + struct
            if graph.number_of_nodes() < 2:
                graph = seq_to_networkx(header, sequence, **options)
                graph.graph['id'] = header
            graph.graph['sequence'] = sequence
            graph.graph['structure'] = seq_struct
            yield graph
    else:
        graph_global = nx.Graph()
        graph_global.graph['id'] = header
        graph_global.graph['info'] = 'RNAshapes shape_type=%s energy_range=%s max_num=%s' % (shape_type,
                                                                                             energy_range,
                                                                                             max_num)
        graph_global.graph['sequence'] = sequence
        for seq_struct in seq_struct_list:
            graph = sequence_dotbracket_to_graph(seq_info=seq_info, seq_struct=seq_struct)
            graph_global = nx.disjoint_union(graph_global, graph)
        if graph_global.number_of_nodes() < 2:
            graph_global = seq_to_networkx(header, sequence, **options)
        yield graph_global
示例#11
0
    def _sequence_to_base_graph(self, sequence):
        '''

        Parameters
        ----------
        sequence: rna sequence

        Returns
        -------
        nx.graph
        '''
        structure,sequence = self.NNmodel.transform_single(sequence)
        base_graph = converter.sequence_dotbracket_to_graph(seq_info=sequence, \
                                                                seq_struct=structure)
        return base_graph
示例#12
0
    def transform(self, sequences):
        """

        Parameters
        ----------
        sequences : iterable over rna sequences

        Returns
        -------
        list of RnaGraphWrappers
        """
        result = []
        for sequence in sequences:

            # if we eat a tupple, it musst be a (name, sequence) type :)  we only want a sequence
            if type(sequence) == type(()):
                logger.warning('YOUR INPUT IS A TUPPLE, GIVE ME A SEQUENCE, SINCERELY -- YOUR RNA PREPROCESSOR')

            # get structure
            structure, energy, sequence = self.NNmodel.transform_single(('fake', sequence))
            # FIXING STRUCTURE
            structure, sequence = fix_structure(structure, sequence)
            if structure == None:
                result.append(None)
                continue


            # built base_graph
            base_graph = converter.sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=structure)
            base_graph = self.vectorizer._edge_to_vertex_transform(base_graph)
            base_graph = expanded_rna_graph_to_digraph(base_graph)
            base_graph.graph['energy'] = energy
            base_graph.graph['sequence'] = sequence
            base_graph.graph['structure'] = structure
            result.append(
                   (sequence, structure, base_graph, self.abstract_graph(base_graph))
            )

        return result
示例#13
0
    def transform(self, sequences):
        """

        Parameters
        ----------
        sequences : iterable over rna sequences

        Returns
        -------
        list of RnaGraphWrappers
        """
        result = []
        for sequence in sequences:

                # if we eat a tupple, it musst be a (name, sequence) type :)  we only want a sequence
                if type(sequence)== type(()):
                    logger.warning( 'YOUR INPUT IS A TUPPLE, GIVE ME A SEQUENCE, SINCERELY -- YOUR RNA PREPROCESSOR')

                # get structure
                structure,energy = self.NNmodel.transform_single(('fake',sequence))
                if structure==None:
                    result.append(None)
                    continue

                # the consensus structure is not meant to be used as a folding plan for individual sequences
                # so we do the best we can to get a valid structure
                if self.structure_mod:
                    structure,sequence= fix_structure(structure,sequence)

                # built base_graph
                base_graph = converter.sequence_dotbracket_to_graph(seq_info=sequence, seq_struct=structure)
                base_graph = self.vectorizer._edge_to_vertex_transform(base_graph)
                base_graph = expanded_rna_graph_to_digraph(base_graph)
                base_graph.graph['energy']=energy
                result.append(
                    RnaWrapper(sequence, structure,base_graph, self.vectorizer, self.base_thickness_list,include_base=self.include_base, ignore_inserts=self.ignore_inserts)
                    )
        return result