示例#1
0
    def get_predictions_from_seqdict(self, seqdic, site):
        """ we need to generate a huge test features object 
			containing all locations found in each seqdict-sequence
			and each location (this is necessary to efficiently
			(==fast,low memory) compute the splice outputs
		"""

        seqlen = self.window_right + self.window_left + 2

        for s in seqdic:
            position_list = DynamicIntArray()

            sequence = s.seq
            positions = s.preds[site].positions
            for j in xrange(len(positions)):
                i = positions[j] - self.offset - self.window_left
                position_list.append_element(i)

            t = StringCharFeatures([sequence], DNA)
            t.obtain_by_position_list(seqlen, position_list)
            self.wd_kernel.init(self.traindat, t)

            self.wd_kernel.io.enable_progress()
            l = self.svm.apply().get_labels()
            self.wd_kernel.cleanup()
            sys.stdout.write("\n...done...\n")

            num = len(s.preds[site].positions)
            scores = num * [0]
            for j in xrange(num):
                scores[j] = l[j]
            s.preds[site].set_scores(scores)
	def get_predictions_from_seqdict(self, seqdic, site):
		""" we need to generate a huge test features object
			containing all locations found in each seqdict-sequence
			and each location (this is necessary to efficiently
			(==fast,low memory) compute the splice outputs
		"""

		seqlen=self.window_right+self.window_left+2

		for s in seqdic:
			position_list=DynamicIntArray()

			sequence=s.seq
			positions=s.preds[site].positions
			for j in xrange(len(positions)):
				i=positions[j] - self.offset -self.window_left
				position_list.append_element(i)

			t=StringCharFeatures([sequence], DNA)
			t.obtain_by_position_list(seqlen, position_list)
			self.wd_kernel.init(self.traindat, t)

			self.wd_kernel.io.enable_progress()
			l=self.svm.apply().get_values()
			self.wd_kernel.cleanup()
			sys.stdout.write("\n...done...\n")

			num=len(s.preds[site].positions)
			scores= num * [0]
			for j in xrange(num):
				scores[j]=l[j]
			s.preds[site].set_scores(scores)
示例#3
0
    def get_predictions(self, sequence, positions):

        seqlen = self.window_right + self.window_left + 2
        num = len(positions)

        position_list = DynamicIntArray()

        for j in xrange(num):
            i = positions[j] - self.offset - self.window_left
            position_list.append_element(i)

        t = StringCharFeatures([sequence], DNA)
        t.obtain_by_position_list(seqlen, position_list)
        self.wd_kernel.init(self.traindat, t)
        del t

        self.wd_kernel.io.enable_progress()
        l = self.svm.classify().get_labels()
        self.wd_kernel.cleanup()
        sys.stdout.write("\n...done...\n")
        return l
	def get_predictions(self, sequence, positions):

		seqlen=self.window_right+self.window_left+2
		num=len(positions)

		position_list=DynamicIntArray()

		for j in xrange(num):
			i=positions[j] - self.offset - self.window_left
			position_list.append_element(i)

		t=StringCharFeatures([sequence], DNA)
		t.obtain_by_position_list(seqlen, position_list)
		self.wd_kernel.init(self.traindat, t)
		del t

		self.wd_kernel.io.enable_progress()
		l=self.svm.apply().get_values()
		self.wd_kernel.cleanup()
		sys.stdout.write("\n...done...\n")
		return l
示例#5
0
def features_string_sliding_window_modular(strings):
    from shogun.Features import StringCharFeatures, DNA
    from shogun.Library import DynamicIntArray

    f = StringCharFeatures([strings], DNA)

    # slide a window of length 5 over features
    # (memory efficient, does not copy strings)
    f.obtain_by_sliding_window(5, 1)
    #print(f.get_num_vectors())
    #print(f.get_vector_length(0))
    #print(f.get_vector_length(1))
    #print(f.get_features())

    # slide a window of length 4 over features
    # (memory efficient, does not copy strings)
    f.obtain_by_sliding_window(4, 1)
    #print(f.get_num_vectors())
    #print(f.get_vector_length(0))
    #print(f.get_vector_length(1))
    #print(f.get_features())

    # extract string-windows at position 0,6,16,25 of window size 4
    # (memory efficient, does not copy strings)
    f.set_features([s])
    positions = DynamicIntArray()
    positions.append_element(0)
    positions.append_element(6)
    positions.append_element(16)
    positions.append_element(25)

    f.obtain_by_position_list(4, positions)
    #print(f.get_features())

    # now extract windows of size 8 from same positon list
    f.obtain_by_position_list(8, positions)
    #print(f.get_features())
    return f
def features_string_sliding_window_modular(strings):
    from shogun.Features import StringCharFeatures, DNA
    from shogun.Library import DynamicIntArray

    f = StringCharFeatures([strings], DNA)

    # slide a window of length 5 over features
    # (memory efficient, does not copy strings)
    f.obtain_by_sliding_window(5, 1)
    # print(f.get_num_vectors())
    # print(f.get_vector_length(0))
    # print(f.get_vector_length(1))
    # print(f.get_features())

    # slide a window of length 4 over features
    # (memory efficient, does not copy strings)
    f.obtain_by_sliding_window(4, 1)
    # print(f.get_num_vectors())
    # print(f.get_vector_length(0))
    # print(f.get_vector_length(1))
    # print(f.get_features())

    # extract string-windows at position 0,6,16,25 of window size 4
    # (memory efficient, does not copy strings)
    f.set_features([s])
    positions = DynamicIntArray()
    positions.append_element(0)
    positions.append_element(6)
    positions.append_element(16)
    positions.append_element(25)

    f.obtain_by_position_list(4, positions)
    # print(f.get_features())

    # now extract windows of size 8 from same positon list
    f.obtain_by_position_list(8, positions)
    # print(f.get_features())
    return f