def test_sequence_to_eden_id_attribute(self): """Test if networkx graph ids are set correctly to fasta header. -> header annotation won't be moved to garden""" fa_fn = "test/test_fasta_to_sequence_with_center_annotation.fa" graphs = sequence_to_eden(fasta_to_sequence(fa_fn)) graph = graphs.next() assert graph.graph["id"] == "ID0 center:25"
def run_rnashape(sequence): # cmd = 'echo "%s" | ./RNAshapes -t %d -c %d -# %d' % (sequence,5, 10, 1) #out = sp.check_output(cmd, shell=True) #print out text = [] fp = os.popen(cmd, 'r') for line in fp: text.append(line) fp.close() #text = out.strip().split('\n') #out.close() seq_info = text[0] if 'configured to print' in text[-1]: struct_text = text[-2] else: struct_text = text[1] # shape: structur = struct_text.split()[1] # extract the shape bracket notation #shape_list += [line.split()[2] for line in struct_text] #encoee strucyrte graph = sequence_to_eden([("ID", sequence)]).next() graph.graph['structure']=structur annotate_single(graph) encode_struct = ''.join([ x["entity_short"].upper() for x in graph.node.values() ]) gc.collect() return encode_struct
def _serial_graph_motif(self, seqs, placeholder=None): # make graphs iterable = sequence_to_eden(seqs) # use node importance and 'position' attribute to identify max_subarrays of a specific size graphs = self.vectorizer.annotate(iterable, estimator=self.estimator) # use compute_max_subarrays to return an iterator over motives motives = [] for graph in graphs: subarrays = compute_max_subarrays(graph=graph, min_subarray_size=self.min_subarray_size, max_subarray_size=self.max_subarray_size) for subarray in subarrays: motives.append(subarray['subarray_string']) return motives
def test_symmetric_reweighting_no_annotation(self): """This function always expectes annotation of the center position to be set using format "center:int" in the fasta header.""" graph = sequence_to_eden([("ID", "ACGUACGUAC")]) try: graph = va.symmetric_trapezoidal_reweighting(graph, high_weight=1, low_weight=0, radius_high=1, distance_high2low=2) [x["weight"] for x in graph.next().node.values()] except AssertionError: pass else: raise Exception('ExpectedException not thrown')
def pre_processor(seqs, **args): seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.5, mark="%") seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.0, mark="@") seqs = seq_to_seq(seqs, modifier=mark_modifier, position=1.0, mark="*") graphs = sequence_to_eden(seqs) return graphs
def _serial_graph_motif(self, seqs, placeholder=None): # make graphs iterable = sequence_to_eden(seqs) # use node importance and 'position' attribute to identify max_subarrays of a specific size graphs = self.vectorizer.annotate(iterable, estimator=self.estimator) # use compute_max_subarrays to return an iterator over motives motives = [] for graph in graphs: subarrays = compute_max_subarrays( graph=graph, min_subarray_size=self.min_subarray_size, max_subarray_size=self.max_subarray_size) for subarray in subarrays: motives.append(subarray['subarray_string']) return motives
def pre_processor(seqs, **args): seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.5, mark='%') seqs = seq_to_seq(seqs, modifier=mark_modifier, position=0.0, mark='@') seqs = seq_to_seq(seqs, modifier=mark_modifier, position=1.0, mark='*') graphs = sequence_to_eden(seqs) return graphs
def test_fasta_to_sequence_graph(): fa_fn = "test/test_fasta_to_sequence.fa" seq = fasta_to_sequence(fa_fn) sequence_to_eden(seq)
def pre_process_graph(iterator): from eden.converter.fasta import sequence_to_eden graphs = sequence_to_eden(iterator) return graphs