Exemplo n.º 1
0
    def test_sequence_to_eden_id_attribute(self):
        """Test if networkx graph ids are set correctly to fasta header. -> header annotation won't be moved to garden"""

        fa_fn = "test/test_fasta_to_sequence_with_center_annotation.fa"
        graphs = sequence_to_eden(fasta_to_sequence(fa_fn))
        graph = graphs.next()
        assert graph.graph["id"] == "ID0 center:25"
def run_rnashape(sequence):
    #
    cmd = 'echo "%s" | ./RNAshapes -t %d -c %d -# %d' % (sequence,5, 10, 1)
    #out = sp.check_output(cmd, shell=True)
    #print out
    text = []
    fp = os.popen(cmd, 'r')
    for line in fp:
        text.append(line)
    fp.close()
    #text = out.strip().split('\n')
    #out.close()
    seq_info = text[0]
    if 'configured to print' in text[-1]:
        struct_text = text[-2]
    else:
        struct_text = text[1]
    # shape:
    structur = struct_text.split()[1]
    # extract the shape bracket notation
    #shape_list += [line.split()[2] for line in struct_text]
    #encoee strucyrte
    graph = sequence_to_eden([("ID", sequence)]).next()
    graph.graph['structure']=structur
    annotate_single(graph)
    encode_struct = ''.join([ x["entity_short"].upper() for x in graph.node.values() ])
    gc.collect()
    return encode_struct
Exemplo n.º 3
0
 def _serial_graph_motif(self, seqs, placeholder=None):
     # make graphs
     iterable = sequence_to_eden(seqs)
     # use node importance and 'position' attribute to identify max_subarrays of a specific size
     graphs = self.vectorizer.annotate(iterable, estimator=self.estimator)
     # use compute_max_subarrays to return an iterator over motives
     motives = []
     for graph in graphs:
         subarrays = compute_max_subarrays(graph=graph, min_subarray_size=self.min_subarray_size, max_subarray_size=self.max_subarray_size)
         for subarray in subarrays:
             motives.append(subarray['subarray_string'])
     return motives
Exemplo n.º 4
0
 def test_symmetric_reweighting_no_annotation(self):
     """This function always expectes annotation of the center position to be
     set using format "center:int" in the fasta header."""
     graph = sequence_to_eden([("ID", "ACGUACGUAC")])
     try:
         graph = va.symmetric_trapezoidal_reweighting(graph,
                                                      high_weight=1,
                                                      low_weight=0,
                                                      radius_high=1,
                                                      distance_high2low=2)
         [x["weight"] for x in graph.next().node.values()]
     except AssertionError:
         pass
     else:
         raise Exception('ExpectedException not thrown')
 def test_symmetric_reweighting_no_annotation(self):
     """This function always expectes annotation of the center position to be
     set using format "center:int" in the fasta header."""
     graph = sequence_to_eden([("ID", "ACGUACGUAC")])
     try:
         graph = va.symmetric_trapezoidal_reweighting(graph,
                                                      high_weight=1,
                                                      low_weight=0,
                                                      radius_high=1,
                                                      distance_high2low=2)
         [x["weight"] for x in graph.next().node.values()]
     except AssertionError:
         pass
     else:
         raise Exception('ExpectedException not thrown')
Exemplo n.º 6
0
 def pre_processor(seqs, **args):
     seqs = seq_to_seq(seqs,
                       modifier=mark_modifier,
                       position=0.5,
                       mark="%")
     seqs = seq_to_seq(seqs,
                       modifier=mark_modifier,
                       position=0.0,
                       mark="@")
     seqs = seq_to_seq(seqs,
                       modifier=mark_modifier,
                       position=1.0,
                       mark="*")
     graphs = sequence_to_eden(seqs)
     return graphs
Exemplo n.º 7
0
 def _serial_graph_motif(self, seqs, placeholder=None):
     # make graphs
     iterable = sequence_to_eden(seqs)
     # use node importance and 'position' attribute to identify max_subarrays of a specific size
     graphs = self.vectorizer.annotate(iterable, estimator=self.estimator)
     # use compute_max_subarrays to return an iterator over motives
     motives = []
     for graph in graphs:
         subarrays = compute_max_subarrays(
             graph=graph,
             min_subarray_size=self.min_subarray_size,
             max_subarray_size=self.max_subarray_size)
         for subarray in subarrays:
             motives.append(subarray['subarray_string'])
     return motives
Exemplo n.º 8
0
 def pre_processor(seqs, **args):
     seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.5, mark='%')
     seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.0, mark='@')
     seqs = seq_to_seq(seqs, modifier=mark_modifier, position=1.0, mark='*')
     graphs = sequence_to_eden(seqs)
     return graphs
Exemplo n.º 9
0
def test_fasta_to_sequence_graph():
    fa_fn = "test/test_fasta_to_sequence.fa"
    seq = fasta_to_sequence(fa_fn)
    sequence_to_eden(seq)
Exemplo n.º 10
0
 def pre_process_graph(iterator):
     from eden.converter.fasta import sequence_to_eden
     graphs = sequence_to_eden(iterator)
     return graphs