Exemplo n.º 1
0
    def from_file(self, file):
        sys.stderr.write('loading model file')
        l = file.readline()

        if l != '%arts version: 1.0\n':
            sys.stderr.write("\nfile not an arts definition file\n")
            return None

        bias = None
        alphas = None
        num_kernels = None

        while l:
            # skip comment or empty line
            if not (l.startswith('%') or l.startswith('\n')):
                if bias is None: bias = parse_float(l, 'b')
                if alphas is None: alphas = parse_vector(l, file, 'alphas')
                if num_kernels is None:
                    num_kernels = parse_int(l, 'num_kernels')

                if num_kernels and bias and alphas is not None:
                    for i in xrange(num_kernels):
                        s = Sensor()
                        (k, f) = s.from_file(file, i + 1)
                        k.io.enable_progress()
                        self.window = (min(self.window[0], s.window[0]),
                                       max(self.window[1], s.window[2]))
                        self.sensors.append(s)
                        self.kernel.append_kernel(k)
                        self.svs.append_feature_obj(f)

                    self.kernel.init(self.svs, self.svs)
                    self.svm = KernelMachine(
                        self.kernel, alphas,
                        numpy.arange(len(alphas), dtype=numpy.int32), bias)
                    self.svm.io.set_target_to_stderr()
                    self.svm.io.enable_progress()
                    self.svm.parallel.set_num_threads(
                        self.svm.parallel.get_num_cpus())
                    sys.stderr.write('done\n')
                    return

            l = file.readline()

        sys.stderr.write('error loading model file\n')
Exemplo n.º 2
0
    def from_file(self, file):
        sys.stderr.write('loading model file')
        l = file.readline();

        if l != '%arts version: 1.0\n':
            sys.stderr.write("\nfile not an arts definition file\n")
            return None

        bias = None
        alphas = None
        num_kernels = None

        while l:
            # skip comment or empty line
            if not (l.startswith('%') or l.startswith('\n')):
                if bias is None: bias = parse_float(l, 'b')
                if alphas is None: alphas = parse_vector(l, file, 'alphas')
                if num_kernels is None: num_kernels = parse_int(l, 'num_kernels')

                if num_kernels and bias and alphas is not None:
                    for i in xrange(num_kernels):
                        s = Sensor()
                        (k, f) = s.from_file(file, i + 1)
                        k.io.enable_progress()
                        self.window = (min(self.window[0], s.window[0]),
                                max(self.window[1], s.window[2]))
                        self.sensors.append(s)
                        self.kernel.append_kernel(k)
                        self.svs.append_feature_obj(f)

                    self.kernel.init(self.svs, self.svs)
                    self.svm = KernelMachine(self.kernel, alphas,
                            numpy.arange(len(alphas), dtype=numpy.int32), bias)
                    self.svm.io.set_target_to_stderr()
                    self.svm.io.enable_progress()
                    self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
                    sys.stderr.write('done\n')
                    return

            l = file.readline()

        sys.stderr.write('error loading model file\n')
Exemplo n.º 3
0
class svm_splice_model(object):
    def __init__(self, order, traindat, alphas, b, (window_left, offset,
                                                    window_right), consensus):

        f = StringCharFeatures(traindat, DNA)
        wd_kernel = WeightedDegreeStringKernel(f, f, int(order))
        wd_kernel.io.set_target_to_stderr()

        self.svm = KernelMachine(wd_kernel, alphas,
                                 numpy.arange(len(alphas), dtype=numpy.int32),
                                 b)
        self.svm.io.set_target_to_stderr()
        self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
        self.svm.set_linadd_enabled(False)
        self.svm.set_batch_computation_enabled(False)

        self.window_left = int(window_left)
        self.window_right = int(window_right)

        self.consensus = consensus
        self.wd_kernel = wd_kernel
        self.traindat = f
        self.offset = offset
Exemplo n.º 4
0
class SignalSensor(object):
    """
    A collection of sensors
    """
    def __init__(self):
        self.sensors = list()
        self.kernel = CombinedKernel()
        self.svs = CombinedFeatures()
        self.svm = None
        self.window = (+100000, -1000000)

    def from_file(self, file):
        sys.stderr.write('loading model file')
        l = file.readline()

        if l != '%arts version: 1.0\n':
            sys.stderr.write("\nfile not an arts definition file\n")
            return None

        bias = None
        alphas = None
        num_kernels = None

        while l:
            # skip comment or empty line
            if not (l.startswith('%') or l.startswith('\n')):
                if bias is None: bias = parse_float(l, 'b')
                if alphas is None: alphas = parse_vector(l, file, 'alphas')
                if num_kernels is None:
                    num_kernels = parse_int(l, 'num_kernels')

                if num_kernels and bias and alphas is not None:
                    for i in xrange(num_kernels):
                        s = Sensor()
                        (k, f) = s.from_file(file, i + 1)
                        k.io.enable_progress()
                        self.window = (min(self.window[0], s.window[0]),
                                       max(self.window[1], s.window[2]))
                        self.sensors.append(s)
                        self.kernel.append_kernel(k)
                        self.svs.append_feature_obj(f)

                    self.kernel.init(self.svs, self.svs)
                    self.svm = KernelMachine(
                        self.kernel, alphas,
                        numpy.arange(len(alphas), dtype=numpy.int32), bias)
                    self.svm.io.set_target_to_stderr()
                    self.svm.io.enable_progress()
                    self.svm.parallel.set_num_threads(
                        self.svm.parallel.get_num_cpus())
                    sys.stderr.write('done\n')
                    return

            l = file.readline()

        sys.stderr.write('error loading model file\n')

    def predict(self, seq, chunk_size=int(10e6)):
        """
        predicts on whole contig, splits up sequence in chunks of size chunk_size
        """

        seq_len = len(seq)
        num_chunks = int(numpy.ceil(float(seq_len) / float(chunk_size)))
        assert (num_chunks > 0)

        sys.stderr.write("number of chunks for contig: %i\n" % (num_chunks))

        start = 0
        stop = min(chunk_size, seq_len)

        out = []

        # iterate over chunks
        for chunk_idx in range(num_chunks):

            sys.stderr.write("processing chunk #%i\n" % (chunk_idx))

            assert (start < stop)
            chunk = seq[start:stop]

            assert (len(self.sensors) > 0)
            tf = CombinedFeatures()
            for i in xrange(len(self.sensors)):
                f = self.sensors[i].get_test_features(chunk, self.window)
                tf.append_feature_obj(f)

            sys.stderr.write("initialising kernel...")
            self.kernel.init(self.svs, tf)
            sys.stderr.write("..done\n")

            self.svm.set_kernel(self.kernel)
            lab_out = self.svm.apply().get_values()

            assert (len(lab_out) > 0)
            out.extend(lab_out)

            # increment chunk
            start = stop
            stop = min(stop + chunk_size, seq_len)

        l = (-self.window[0]) * [-42]
        r = self.window[1] * [-42]

        # concatenate
        ret = l + out + r

        assert (len(ret) == len(seq))

        return ret
Exemplo n.º 5
0
class SignalSensor(object):
    """
    A collection of sensors
    """
    def __init__(self):
        self.sensors = list()
        self.kernel = CombinedKernel()
        self.svs = CombinedFeatures()
        self.svm = None
        self.window = (+100000, -1000000)

    def from_file(self, file):
        sys.stderr.write('loading model file')
        l = file.readline();

        if l != '%arts version: 1.0\n':
            sys.stderr.write("\nfile not an arts definition file\n")
            return None

        bias = None
        alphas = None
        num_kernels = None

        while l:
            # skip comment or empty line
            if not (l.startswith('%') or l.startswith('\n')):
                if bias is None: bias = parse_float(l, 'b')
                if alphas is None: alphas = parse_vector(l, file, 'alphas')
                if num_kernels is None: num_kernels = parse_int(l, 'num_kernels')

                if num_kernels and bias and alphas is not None:
                    for i in xrange(num_kernels):
                        s = Sensor()
                        (k, f) = s.from_file(file, i + 1)
                        k.io.enable_progress()
                        self.window = (min(self.window[0], s.window[0]),
                                max(self.window[1], s.window[2]))
                        self.sensors.append(s)
                        self.kernel.append_kernel(k)
                        self.svs.append_feature_obj(f)

                    self.kernel.init(self.svs, self.svs)
                    self.svm = KernelMachine(self.kernel, alphas,
                            numpy.arange(len(alphas), dtype=numpy.int32), bias)
                    self.svm.io.set_target_to_stderr()
                    self.svm.io.enable_progress()
                    self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
                    sys.stderr.write('done\n')
                    return

            l = file.readline()

        sys.stderr.write('error loading model file\n')


    def predict(self, seq, chunk_size = int(10e6)):
        """
        predicts on whole contig, splits up sequence in chunks of size chunk_size
        """

        seq_len = len(seq)
        num_chunks = int(numpy.ceil(float(seq_len) / float(chunk_size)))
        assert(num_chunks > 0)

    	sys.stderr.write("number of chunks for contig: %i\n" % (num_chunks))

        start = 0
        stop = min(chunk_size, seq_len)
		
        out = []

        # iterate over chunks
        for chunk_idx in range(num_chunks):

            sys.stderr.write("processing chunk #%i\n" % (chunk_idx))

            assert (start < stop)
            chunk = seq[start:stop]

            assert(len(self.sensors) > 0)
            tf = CombinedFeatures()
            for i in xrange(len(self.sensors)):
                f = self.sensors[i].get_test_features(chunk, self.window)
                tf.append_feature_obj(f)

            sys.stderr.write("initialising kernel...")
            self.kernel.init(self.svs, tf)
            sys.stderr.write("..done\n")

            self.svm.set_kernel(self.kernel)
            lab_out = self.svm.apply().get_values()

            assert(len(lab_out) > 0)
            out.extend(lab_out)

            # increment chunk
            start = stop
            stop = min(stop+chunk_size, seq_len)


        l = (-self.window[0]) * [-42]
        r = self.window[1] * [-42]

        # concatenate
        ret = l + out + r

        assert(len(ret) == len(seq))

        return ret