def get_predictions_from_seqdict(self, seqdic, site): """ we need to generate a huge test features object containing all locations found in each seqdict-sequence and each location (this is necessary to efficiently (==fast,low memory) compute the splice outputs """ seqlen = self.window_right + self.window_left + 2 for s in seqdic: position_list = DynamicIntArray() sequence = s.seq positions = s.preds[site].positions for j in xrange(len(positions)): i = positions[j] - self.offset - self.window_left position_list.append_element(i) t = StringCharFeatures([sequence], DNA) t.obtain_by_position_list(seqlen, position_list) self.wd_kernel.init(self.traindat, t) self.wd_kernel.io.enable_progress() l = self.svm.apply().get_labels() self.wd_kernel.cleanup() sys.stdout.write("\n...done...\n") num = len(s.preds[site].positions) scores = num * [0] for j in xrange(num): scores[j] = l[j] s.preds[site].set_scores(scores)
def get_predictions_from_seqdict(self, seqdic, site): """ we need to generate a huge test features object containing all locations found in each seqdict-sequence and each location (this is necessary to efficiently (==fast,low memory) compute the splice outputs """ seqlen=self.window_right+self.window_left+2 for s in seqdic: position_list=DynamicIntArray() sequence=s.seq positions=s.preds[site].positions for j in xrange(len(positions)): i=positions[j] - self.offset -self.window_left position_list.append_element(i) t=StringCharFeatures([sequence], DNA) t.obtain_by_position_list(seqlen, position_list) self.wd_kernel.init(self.traindat, t) self.wd_kernel.io.enable_progress() l=self.svm.apply().get_values() self.wd_kernel.cleanup() sys.stdout.write("\n...done...\n") num=len(s.preds[site].positions) scores= num * [0] for j in xrange(num): scores[j]=l[j] s.preds[site].set_scores(scores)
def get_predictions(self, sequence, positions): seqlen = self.window_right + self.window_left + 2 num = len(positions) position_list = DynamicIntArray() for j in xrange(num): i = positions[j] - self.offset - self.window_left position_list.append_element(i) t = StringCharFeatures([sequence], DNA) t.obtain_by_position_list(seqlen, position_list) self.wd_kernel.init(self.traindat, t) del t self.wd_kernel.io.enable_progress() l = self.svm.classify().get_labels() self.wd_kernel.cleanup() sys.stdout.write("\n...done...\n") return l
def get_predictions(self, sequence, positions): seqlen=self.window_right+self.window_left+2 num=len(positions) position_list=DynamicIntArray() for j in xrange(num): i=positions[j] - self.offset - self.window_left position_list.append_element(i) t=StringCharFeatures([sequence], DNA) t.obtain_by_position_list(seqlen, position_list) self.wd_kernel.init(self.traindat, t) del t self.wd_kernel.io.enable_progress() l=self.svm.apply().get_values() self.wd_kernel.cleanup() sys.stdout.write("\n...done...\n") return l
def features_string_sliding_window_modular(strings): from shogun.Features import StringCharFeatures, DNA from shogun.Library import DynamicIntArray f = StringCharFeatures([strings], DNA) # slide a window of length 5 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(5, 1) #print(f.get_num_vectors()) #print(f.get_vector_length(0)) #print(f.get_vector_length(1)) #print(f.get_features()) # slide a window of length 4 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(4, 1) #print(f.get_num_vectors()) #print(f.get_vector_length(0)) #print(f.get_vector_length(1)) #print(f.get_features()) # extract string-windows at position 0,6,16,25 of window size 4 # (memory efficient, does not copy strings) f.set_features([s]) positions = DynamicIntArray() positions.append_element(0) positions.append_element(6) positions.append_element(16) positions.append_element(25) f.obtain_by_position_list(4, positions) #print(f.get_features()) # now extract windows of size 8 from same positon list f.obtain_by_position_list(8, positions) #print(f.get_features()) return f
def features_string_sliding_window_modular(strings): from shogun.Features import StringCharFeatures, DNA from shogun.Library import DynamicIntArray f = StringCharFeatures([strings], DNA) # slide a window of length 5 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(5, 1) # print(f.get_num_vectors()) # print(f.get_vector_length(0)) # print(f.get_vector_length(1)) # print(f.get_features()) # slide a window of length 4 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(4, 1) # print(f.get_num_vectors()) # print(f.get_vector_length(0)) # print(f.get_vector_length(1)) # print(f.get_features()) # extract string-windows at position 0,6,16,25 of window size 4 # (memory efficient, does not copy strings) f.set_features([s]) positions = DynamicIntArray() positions.append_element(0) positions.append_element(6) positions.append_element(16) positions.append_element(25) f.obtain_by_position_list(4, positions) # print(f.get_features()) # now extract windows of size 8 from same positon list f.obtain_by_position_list(8, positions) # print(f.get_features()) return f