Пример #1
0
    def predict(self, seq, chunk_size = int(10e6)):
        """
        predicts on whole contig, splits up sequence in chunks of size chunk_size
        """

        seq_len = len(seq)
        num_chunks = int(numpy.ceil(float(seq_len) / float(chunk_size)))
        assert(num_chunks > 0)

	sys.stderr.write("number of chunks for contig: %i\n" % (num_chunks))

        start = 0
        stop = min(chunk_size, seq_len)

        out = []

        # iterate over chunks
        for chunk_idx in range(num_chunks):

            sys.stderr.write("processing chunk #%i\n" % (chunk_idx))

            assert (start < stop)
            chunk = seq[start:stop]

            assert(len(self.sensors) > 0)
            tf = CombinedFeatures()
            for i in xrange(len(self.sensors)):
                f = self.sensors[i].get_test_features(chunk, self.window)
                tf.append_feature_obj(f)

            sys.stderr.write("initialising kernel...")
            self.kernel.init(self.svs, tf)
            sys.stderr.write("..done\n")

            self.svm.set_kernel(self.kernel)
            lab_out = self.svm.apply().get_values()

            assert(len(lab_out) > 0)
            out.extend(lab_out)

            # increment chunk
            start = stop
            stop = min(stop+chunk_size, seq_len)


        l = (-self.window[0]) * [-42]
        r = self.window[1] * [-42]

        # concatenate
        ret = l + out + r

        assert(len(ret) == len(seq))

        return ret
Пример #2
0
def mkl_multiclass_1(fm_train_real, fm_test_real, label_train_multiclass, C):
    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()

    for i in range(-10, 11):
        subkfeats_train = RealFeatures(fm_train_real)
        subkfeats_test = RealFeatures(fm_test_real)
        subkernel = GaussianKernel(pow(2, i + 1))
        feats_train.append_feature_obj(subkfeats_train)
        feats_test.append_feature_obj(subkfeats_test)
        kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = MulticlassLabels(label_train_multiclass)

    mkl = MKLMulticlass(C, kernel, labels)

    mkl.set_epsilon(1e-2)
    mkl.parallel.set_num_threads(num_threads)
    mkl.set_mkl_epsilon(mkl_epsilon)
    mkl.set_mkl_norm(1)

    mkl.train()

    kernel.init(feats_train, feats_test)

    out = mkl.apply().get_labels()
    return out
Пример #3
0
def kernel_combined_custom_poly(train_fname=traindat,
                                test_fname=testdat,
                                train_label_fname=label_traindat):
    from shogun import CombinedFeatures, RealFeatures, BinaryLabels
    from shogun import CombinedKernel, PolyKernel, CustomKernel
    from shogun import LibSVM, CSVFile

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()

    tfeats = RealFeatures(CSVFile(train_fname))
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, tfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_train = RealFeatures(CSVFile(train_fname))
    feats_train.append_feature_obj(subkfeats_train)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = BinaryLabels(CSVFile(train_label_fname))
    svm = LibSVM(1.0, kernel, labels)
    svm.train()

    kernel = CombinedKernel()
    feats_pred = CombinedFeatures()

    pfeats = RealFeatures(CSVFile(test_fname))
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, pfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_test = RealFeatures(CSVFile(test_fname))
    feats_pred.append_feature_obj(subkfeats_test)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_pred)

    svm.set_kernel(kernel)
    svm.apply()
    km_train = kernel.get_kernel_matrix()
    return km_train, kernel
Пример #4
0
def create_combined_kernel(kname, kparam, examples, train_mode, preproc):
    """A wrapper for creating combined kernels.

    kname, kparam and examples are lists.

    """
    num_kernels = len(kname)
    feats['combined'] = CombinedFeatures()
    kernel = CombinedKernel()

    for kix in xrange(num_kernels):
        cur_kname = '%s%d' % (kname[kix], kix)
        (cur_feats, cur_preproc) = create_features(kname[kix], examples[kix],
                                                   kparam[kix], train_mode,
                                                   preproc)
        feats[cur_kname] = cur_feats
        cur_kernel = create_kernel(kname[kix], kparam[kix], cur_feats)
        kernel.append_kernel(cur_kernel)

    return (feats, kernel)
def kernel_combined_custom_poly (train_fname = traindat,test_fname = testdat,train_label_fname=label_traindat):
    from shogun import CombinedFeatures, RealFeatures, BinaryLabels
    from shogun import CombinedKernel, PolyKernel, CustomKernel
    from shogun import LibSVM, CSVFile

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()

    tfeats = RealFeatures(CSVFile(train_fname))
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, tfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_train = RealFeatures(CSVFile(train_fname))
    feats_train.append_feature_obj(subkfeats_train)
    subkernel = PolyKernel(10,2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = BinaryLabels(CSVFile(train_label_fname))
    svm = LibSVM(1.0, kernel, labels)
    svm.train()

    kernel = CombinedKernel()
    feats_pred = CombinedFeatures()

    pfeats = RealFeatures(CSVFile(test_fname))
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, pfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_test = RealFeatures(CSVFile(test_fname))
    feats_pred.append_feature_obj(subkfeats_test)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_pred)

    svm.set_kernel(kernel)
    svm.apply()
    km_train=kernel.get_kernel_matrix()
    return km_train,kernel
def construct_features(features):
    """
    makes a list
    """

    feat_all = [inst for inst in features]
    feat_lhs = [inst[0:15] for inst in features]
    feat_rhs = [inst[15:] for inst in features]

    feat_wd = get_wd_features(feat_all)
    feat_spec_1 = get_spectrum_features(feat_lhs, order=3)
    feat_spec_2 = get_spectrum_features(feat_rhs, order=3)

    feat_comb = CombinedFeatures()
    feat_comb.append_feature_obj(feat_wd)
    feat_comb.append_feature_obj(feat_spec_1)
    feat_comb.append_feature_obj(feat_spec_2)

    return feat_comb
Пример #7
0
def mkl_multiclass(fm_train_real, fm_test_real, label_train_multiclass, width,
                   C, epsilon, num_threads, mkl_epsilon, mkl_norm):

    from shogun import CombinedFeatures, RealFeatures, MulticlassLabels
    from shogun import CombinedKernel, GaussianKernel, LinearKernel, PolyKernel
    from shogun import MKLMulticlass

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()

    subkfeats_train = RealFeatures(fm_train_real)
    subkfeats_test = RealFeatures(fm_test_real)
    subkernel = GaussianKernel(10, width)
    feats_train.append_feature_obj(subkfeats_train)
    feats_test.append_feature_obj(subkfeats_test)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures(fm_train_real)
    subkfeats_test = RealFeatures(fm_test_real)
    subkernel = LinearKernel()
    feats_train.append_feature_obj(subkfeats_train)
    feats_test.append_feature_obj(subkfeats_test)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures(fm_train_real)
    subkfeats_test = RealFeatures(fm_test_real)
    subkernel = PolyKernel(10, 2)
    feats_train.append_feature_obj(subkfeats_train)
    feats_test.append_feature_obj(subkfeats_test)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = MulticlassLabels(label_train_multiclass)

    mkl = MKLMulticlass(C, kernel, labels)

    mkl.set_epsilon(epsilon)
    mkl.parallel.set_num_threads(num_threads)
    mkl.set_mkl_epsilon(mkl_epsilon)
    mkl.set_mkl_norm(mkl_norm)

    mkl.train()

    kernel.init(feats_train, feats_test)

    out = mkl.apply().get_labels()
    return out
Пример #8
0
def mkl_multiclass (fm_train_real, fm_test_real, label_train_multiclass,
	width, C, epsilon, num_threads, mkl_epsilon, mkl_norm):

	from shogun import CombinedFeatures, RealFeatures, MulticlassLabels
	from shogun import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel
	from shogun import MKLMulticlass

	kernel = CombinedKernel()
	feats_train = CombinedFeatures()
	feats_test = CombinedFeatures()

	subkfeats_train = RealFeatures(fm_train_real)
	subkfeats_test = RealFeatures(fm_test_real)
	subkernel = GaussianKernel(10, width)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train = RealFeatures(fm_train_real)
	subkfeats_test = RealFeatures(fm_test_real)
	subkernel = LinearKernel()
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train = RealFeatures(fm_train_real)
	subkfeats_test = RealFeatures(fm_test_real)
	subkernel = PolyKernel(10,2)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	kernel.init(feats_train, feats_train)

	labels = MulticlassLabels(label_train_multiclass)

	mkl = MKLMulticlass(C, kernel, labels)

	mkl.set_epsilon(epsilon);
	mkl.parallel.set_num_threads(num_threads)
	mkl.set_mkl_epsilon(mkl_epsilon)
	mkl.set_mkl_norm(mkl_norm)

	mkl.train()

	kernel.init(feats_train, feats_test)

	out =  mkl.apply().get_labels()
	return out
Пример #9
0
def create_features(kname, examples, kparam, train_mode, preproc, seq_source,
                    nuc_con):
    """Converts numpy arrays or sequences into shogun features"""

    if kname == 'gauss' or kname == 'linear' or kname == 'poly':
        examples = numpy.array(examples)
        feats = RealFeatures(examples)

    elif kname == 'wd' or kname == 'localalign' or kname == 'localimprove':
        if seq_source == 'dna':
            examples = non_atcg_convert(examples, nuc_con)
            feats = StringCharFeatures(examples, DNA)
        elif seq_source == 'protein':
            examples = non_aminoacid_converter(examples, nuc_con)
            feats = StringCharFeatures(examples, PROTEIN)
        else:
            sys.stderr.write("Sequence source -" + seq_source +
                             "- is invalid. select [dna|protein]\n")
            sys.exit(-1)

    elif kname == 'spec' or kname == 'cumspec':
        if seq_source == 'dna':
            examples = non_atcg_convert(examples, nuc_con)
            feats = StringCharFeatures(examples, DNA)
        elif seq_source == 'protein':
            examples = non_aminoacid_converter(examples, nuc_con)
            feats = StringCharFeatures(examples, PROTEIN)
        else:
            sys.stderr.write("Sequence source -" + seq_source +
                             "- is invalid. select [dna|protein]\n")
            sys.exit(-1)

        wf = StringUlongFeatures(feats.get_alphabet())
        wf.obtain_from_char(feats, kparam['degree'] - 1, kparam['degree'], 0,
                            kname == 'cumspec')
        del feats

        if train_mode:
            preproc = SortUlongString()
            preproc.init(wf)
        wf.add_preprocessor(preproc)
        ret = wf.apply_preprocessor()
        #assert(ret)

        feats = wf
    elif kname == 'spec2' or kname == 'cumspec2':
        # spectrum kernel on two sequences
        feats = {}
        feats['combined'] = CombinedFeatures()

        reversed = kname == 'cumspec2'

        (ex0, ex1) = zip(*examples)

        f0 = StringCharFeatures(list(ex0), DNA)
        wf = StringWordFeatures(f0.get_alphabet())
        wf.obtain_from_char(f0, kparam['degree'] - 1, kparam['degree'], 0,
                            reversed)
        del f0

        if train_mode:
            preproc = SortWordString()
            preproc.init(wf)
        wf.add_preprocessor(preproc)
        ret = wf.apply_preprocessor()
        assert (ret)
        feats['combined'].append_feature_obj(wf)
        feats['f0'] = wf

        f1 = StringCharFeatures(list(ex1), DNA)
        wf = StringWordFeatures(f1.get_alphabet())
        wf.obtain_from_char(f1, kparam['degree'] - 1, kparam['degree'], 0,
                            reversed)
        del f1

        if train_mode:
            preproc = SortWordString()
            preproc.init(wf)
        wf.add_preprocessor(preproc)
        ret = wf.apply_preprocessor()
        assert (ret)
        feats['combined'].append_feature_obj(wf)
        feats['f1'] = wf

    else:
        print 'Unknown kernel %s' % kname

    return (feats, preproc)
def evaluation_cross_validation_mkl_weight_storage(traindat=traindat, label_traindat=label_traindat):
    from shogun import CrossValidation, CrossValidationResult
    from shogun import ParameterObserverCV
    from shogun import ContingencyTableEvaluation, ACCURACY
    from shogun import StratifiedCrossValidationSplitting
    from shogun import BinaryLabels
    from shogun import RealFeatures, CombinedFeatures
    from shogun import GaussianKernel, CombinedKernel
    from shogun import LibSVM, MKLClassification

    # training data, combined features all on same data
    features=RealFeatures(traindat)
    comb_features=CombinedFeatures()
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    labels=BinaryLabels(label_traindat)

    # kernel, different Gaussians combined
    kernel=CombinedKernel()
    kernel.append_kernel(GaussianKernel(10, 0.1))
    kernel.append_kernel(GaussianKernel(10, 1))
    kernel.append_kernel(GaussianKernel(10, 2))

    # create mkl using libsvm, due to a mem-bug, interleaved is not possible
    svm=MKLClassification(LibSVM());
    svm.set_interleaved_optimization_enabled(False);
    svm.set_kernel(kernel);

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy=StratifiedCrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium=ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation=CrossValidation(svm, comb_features, labels,
        splitting_strategy, evaluation_criterium)
    cross_validation.set_autolock(False)

    # append cross vlaidation output classes
    mkl_storage=ParameterObserverCV()
    cross_validation.subscribe_to_parameters(mkl_storage)
    cross_validation.set_num_runs(3)

    # perform cross-validation
    result=cross_validation.evaluate()

    # print mkl weights
    weights = []
    for obs_index in range(mkl_storage.get_num_observations()):
        obs = mkl_storage.get_observation(obs_index)
        for fold_index in range(obs.get_num_folds()):
            fold = obs.get_fold(fold_index)
            machine = MKLClassification.obtain_from_generic(fold.get_trained_machine())
            w = machine.get_kernel().get_subkernel_weights()
            weights.append(w)

    print("mkl weights during cross--validation")
    print(weights)
def evaluation_cross_validation_multiclass_storage (traindat=traindat, label_traindat=label_traindat):
    from shogun import CrossValidation, CrossValidationResult
    from shogun import ParameterObserverCV
    from shogun import MulticlassAccuracy, F1Measure
    from shogun import StratifiedCrossValidationSplitting
    from shogun import MulticlassLabels
    from shogun import RealFeatures, CombinedFeatures
    from shogun import GaussianKernel, CombinedKernel
    from shogun import MKLMulticlass
    from shogun import Statistics, MSG_DEBUG, Math
    from shogun import ROCEvaluation

    Math.init_random(1)

    # training data, combined features all on same data
    features=RealFeatures(traindat)
    comb_features=CombinedFeatures()
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    labels=MulticlassLabels(label_traindat)

    # kernel, different Gaussians combined
    kernel=CombinedKernel()
    kernel.append_kernel(GaussianKernel(10, 0.1))
    kernel.append_kernel(GaussianKernel(10, 1))
    kernel.append_kernel(GaussianKernel(10, 2))

    # create mkl using libsvm, due to a mem-bug, interleaved is not possible
    svm=MKLMulticlass(1.0,kernel,labels);
    svm.set_kernel(kernel);

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy=StratifiedCrossValidationSplitting(labels, 3)

    # evaluation method
    evaluation_criterium=MulticlassAccuracy()

    # cross-validation instance
    cross_validation=CrossValidation(svm, comb_features, labels,
        splitting_strategy, evaluation_criterium)
    cross_validation.set_autolock(False)

    # append cross validation parameter observer
    multiclass_storage=ParameterObserverCV()
    cross_validation.subscribe_to_parameters(multiclass_storage)
    cross_validation.set_num_runs(3)

    # perform cross-validation
    result=cross_validation.evaluate()

    # get first observation and first fold
    obs = multiclass_storage.get_observations()[0]
    fold = obs.get_folds_results()[0]

    # get fold ROC for first class
    eval_ROC = ROCEvaluation()
    pred_lab_binary = MulticlassLabels.obtain_from_generic(fold.get_test_result()).get_binary_for_class(0)
    true_lab_binary = MulticlassLabels.obtain_from_generic(fold.get_test_true_result()).get_binary_for_class(0)
    eval_ROC.evaluate(pred_lab_binary, true_lab_binary)
    print eval_ROC.get_ROC()

    # get fold evaluation result
    acc_measure = F1Measure()
    print acc_measure.evaluate(pred_lab_binary, true_lab_binary)
Пример #12
0
def combined_kernel(file_type, data_name, operate_type):
    if file_type == '4':
        X, y = loadFromMat(data_name)
    elif file_type == '5':
        X, y = loadFromLibsvm(data_name)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_size)
    if type(X_train) == scipy.sparse.csr.csr_matrix and type(
            X_test) == scipy.sparse.csr.csr_matrix:
        X_train = X_train.todense()
        X_test = X_test.todense()
    X_train = X_train.T
    X_test = X_test.T
    y_train = y_train.reshape(y_train.size, ).astype('float64')
    y_test = y_test.reshape(y_test.size, ).astype('float64')

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()
    subkfeats_train = RealFeatures(X_train)
    subkfeats_test = RealFeatures(X_test)
    for i in range(-10, 11):
        subkernel = GaussianKernel(pow(2, i + 1))
        feats_train.append_feature_obj(subkfeats_train)
        feats_test.append_feature_obj(subkfeats_test)
        kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_train)
    tmp_train_csv = NamedTemporaryFile(suffix=data_name + '_combined.csv')

    import time
    start = time.time()
    if operate_type == 'save':
        km_train = kernel.get_kernel_matrix()
        f = CSVFile(tmp_train_csv.name, "w")
        kernel.save(f)
    elif operate_type == 'load':
        f = CSVFile(tmp_train_csv.name, "r")
        kernel.load(f)
    end = time.time()
    print 'for saving or loading, use time : ' + str(end - start)

    labels = MulticlassLabels(y_train)

    mkl = MKLMulticlass(C, kernel, labels)

    mkl.set_epsilon(epsilon)
    mkl.parallel.set_num_threads(num_threads)
    mkl.set_mkl_epsilon(mkl_epsilon)
    mkl.set_mkl_norm(mkl_norm)

    import time
    start = time.time()
    mkl.train()
    end = time.time()
    print 'use time : ' + str(end - start)

    kernel.init(feats_train, feats_test)
    out = mkl.apply().get_labels()
    print out.shape
    print sum(out == y_test) / float(len(out))
Пример #13
0
 def __init__(self):
     self.sensors = list()
     self.kernel = CombinedKernel()
     self.svs = CombinedFeatures()
     self.svm = None
     self.window = (+100000, -1000000)
Пример #14
0
class SignalSensor(object):
    """
    A collection of sensors
    """
    def __init__(self):
        self.sensors = list()
        self.kernel = CombinedKernel()
        self.svs = CombinedFeatures()
        self.svm = None
        self.window = (+100000, -1000000)

    def from_file(self, file):
        sys.stderr.write('loading model file')
        l = file.readline();

        if l != '%arts version: 1.0\n':
            sys.stderr.write("\nfile not an arts definition file\n")
            return None

        bias = None
        alphas = None
        num_kernels = None

        while l:
            # skip comment or empty line
            if not (l.startswith('%') or l.startswith('\n')):
                if bias is None: bias = parse_float(l, 'b')
                if alphas is None: alphas = parse_vector(l, file, 'alphas')
                if num_kernels is None: num_kernels = parse_int(l, 'num_kernels')

                if num_kernels and bias and alphas is not None:
                    for i in xrange(num_kernels):
                        s = Sensor()
                        (k, f) = s.from_file(file, i + 1)
                        k.io.enable_progress()
                        self.window = (min(self.window[0], s.window[0]),
                                max(self.window[1], s.window[2]))
                        self.sensors.append(s)
                        self.kernel.append_kernel(k)
                        self.svs.append_feature_obj(f)

                    self.kernel.init(self.svs, self.svs)
                    self.svm = KernelMachine(self.kernel, alphas,
                            numpy.arange(len(alphas), dtype=numpy.int32), bias)
                    self.svm.io.set_target_to_stderr()
                    self.svm.io.enable_progress()
                    self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
                    sys.stderr.write('done\n')
                    return

            l = file.readline()

        sys.stderr.write('error loading model file\n')


    def predict(self, seq, chunk_size = int(10e6)):
        """
        predicts on whole contig, splits up sequence in chunks of size chunk_size
        """

        seq_len = len(seq)
        num_chunks = int(numpy.ceil(float(seq_len) / float(chunk_size)))
        assert(num_chunks > 0)

	sys.stderr.write("number of chunks for contig: %i\n" % (num_chunks))

        start = 0
        stop = min(chunk_size, seq_len)

        out = []

        # iterate over chunks
        for chunk_idx in range(num_chunks):

            sys.stderr.write("processing chunk #%i\n" % (chunk_idx))

            assert (start < stop)
            chunk = seq[start:stop]

            assert(len(self.sensors) > 0)
            tf = CombinedFeatures()
            for i in xrange(len(self.sensors)):
                f = self.sensors[i].get_test_features(chunk, self.window)
                tf.append_feature_obj(f)

            sys.stderr.write("initialising kernel...")
            self.kernel.init(self.svs, tf)
            sys.stderr.write("..done\n")

            self.svm.set_kernel(self.kernel)
            lab_out = self.svm.apply().get_values()

            assert(len(lab_out) > 0)
            out.extend(lab_out)

            # increment chunk
            start = stop
            stop = min(stop+chunk_size, seq_len)


        l = (-self.window[0]) * [-42]
        r = self.window[1] * [-42]

        # concatenate
        ret = l + out + r

        assert(len(ret) == len(seq))

        return ret
Пример #15
0
def mkl_binclass (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat):

    ##################################
    # set up and train

    # create some poly train/test matrix
    tfeats = RealFeatures(fm_train_real)
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, tfeats)
    K_train = tkernel.get_kernel_matrix()

    pfeats = RealFeatures(fm_test_real)
    tkernel.init(tfeats, pfeats)
    K_test = tkernel.get_kernel_matrix()

    # create combined train features
    feats_train = CombinedFeatures()
    feats_train.append_feature_obj(RealFeatures(fm_train_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_train))
    kernel.append_kernel(PolyKernel(10,2))
    kernel.init(feats_train, feats_train)

    # train mkl
    labels = BinaryLabels(fm_label_twoclass)
    mkl = MKLClassification()

    # which norm to use for MKL
    mkl.set_mkl_norm(1) #2,3

    # set cost (neg, pos)
    mkl.set_C(1, 1)

    # set kernel and labels
    mkl.set_kernel(kernel)
    mkl.set_labels(labels)

    # train
    mkl.train()
    #w=kernel.get_subkernel_weights()
    #kernel.set_subkernel_weights(w)


    ##################################
    # test

    # create combined test features
    feats_pred = CombinedFeatures()
    feats_pred.append_feature_obj(RealFeatures(fm_test_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_test))
    kernel.append_kernel(PolyKernel(10, 2))
    kernel.init(feats_train, feats_pred)

    # and classify
    mkl.set_kernel(kernel)
    mkl.apply()
    return mkl.apply(),kernel
Пример #16
0
def kernel_combined (fm_train_real=traindat,fm_test_real=testdat,fm_train_dna=traindna,fm_test_dna=testdna ):
	from shogun import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel
	from shogun import RealFeatures, StringCharFeatures, CombinedFeatures, DNA

	kernel=CombinedKernel()
	feats_train=CombinedFeatures()
	feats_test=CombinedFeatures()

	subkfeats_train=RealFeatures(fm_train_real)
	subkfeats_test=RealFeatures(fm_test_real)
	subkernel=GaussianKernel(10, 1.1)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train=StringCharFeatures(fm_train_dna, DNA)
	subkfeats_test=StringCharFeatures(fm_test_dna, DNA)
	degree=3
	subkernel=FixedDegreeStringKernel(10, degree)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train=StringCharFeatures(fm_train_dna, DNA)
	subkfeats_test=StringCharFeatures(fm_test_dna, DNA)
	subkernel=LocalAlignmentStringKernel(10)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	kernel.init(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
def evaluation_cross_validation_multiclass_storage(
        traindat=traindat, label_traindat=label_traindat):
    from shogun import CrossValidation, CrossValidationResult
    from shogun import ParameterObserverCV
    from shogun import MulticlassAccuracy, F1Measure
    from shogun import StratifiedCrossValidationSplitting
    from shogun import MulticlassLabels
    from shogun import RealFeatures, CombinedFeatures
    from shogun import GaussianKernel, CombinedKernel
    from shogun import MKLMulticlass
    from shogun import Statistics, MSG_DEBUG, Math
    from shogun import ROCEvaluation

    Math.init_random(1)

    # training data, combined features all on same data
    features = RealFeatures(traindat)
    comb_features = CombinedFeatures()
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    labels = MulticlassLabels(label_traindat)

    # kernel, different Gaussians combined
    kernel = CombinedKernel()
    kernel.append_kernel(GaussianKernel(10, 0.1))
    kernel.append_kernel(GaussianKernel(10, 1))
    kernel.append_kernel(GaussianKernel(10, 2))

    # create mkl using libsvm, due to a mem-bug, interleaved is not possible
    svm = MKLMulticlass(1.0, kernel, labels)
    svm.set_kernel(kernel)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy = StratifiedCrossValidationSplitting(labels, 3)

    # evaluation method
    evaluation_criterium = MulticlassAccuracy()

    # cross-validation instance
    cross_validation = CrossValidation(svm, comb_features, labels,
                                       splitting_strategy,
                                       evaluation_criterium)
    cross_validation.set_autolock(False)

    # append cross validation parameter observer
    multiclass_storage = ParameterObserverCV()
    cross_validation.subscribe_to_parameters(multiclass_storage)
    cross_validation.set_num_runs(3)

    # perform cross-validation
    result = cross_validation.evaluate()

    # get first observation and first fold
    obs = multiclass_storage.get_observations()[0]
    fold = obs.get_folds_results()[0]

    # get fold ROC for first class
    eval_ROC = ROCEvaluation()
    pred_lab_binary = MulticlassLabels.obtain_from_generic(
        fold.get_test_result()).get_binary_for_class(0)
    true_lab_binary = MulticlassLabels.obtain_from_generic(
        fold.get_test_true_result()).get_binary_for_class(0)
    eval_ROC.evaluate(pred_lab_binary, true_lab_binary)
    print eval_ROC.get_ROC()

    # get fold evaluation result
    acc_measure = F1Measure()
    print acc_measure.evaluate(pred_lab_binary, true_lab_binary)
Пример #18
0
def mkl_binclass(fm_train_real=traindat,
                 fm_test_real=testdat,
                 fm_label_twoclass=label_traindat):

    ##################################
    # set up and train

    # create some poly train/test matrix
    tfeats = RealFeatures(fm_train_real)
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, tfeats)
    K_train = tkernel.get_kernel_matrix()

    pfeats = RealFeatures(fm_test_real)
    tkernel.init(tfeats, pfeats)
    K_test = tkernel.get_kernel_matrix()

    # create combined train features
    feats_train = CombinedFeatures()
    feats_train.append_feature_obj(RealFeatures(fm_train_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_train))
    kernel.append_kernel(PolyKernel(10, 2))
    kernel.init(feats_train, feats_train)

    # train mkl
    labels = BinaryLabels(fm_label_twoclass)
    mkl = MKLClassification()

    # which norm to use for MKL
    mkl.set_mkl_norm(1)  #2,3

    # set cost (neg, pos)
    mkl.set_C(1, 1)

    # set kernel and labels
    mkl.set_kernel(kernel)
    mkl.set_labels(labels)

    # train
    mkl.train()
    #w=kernel.get_subkernel_weights()
    #kernel.set_subkernel_weights(w)

    ##################################
    # test

    # create combined test features
    feats_pred = CombinedFeatures()
    feats_pred.append_feature_obj(RealFeatures(fm_test_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_test))
    kernel.append_kernel(PolyKernel(10, 2))
    kernel.init(feats_train, feats_pred)

    # and classify
    mkl.set_kernel(kernel)
    mkl.apply()
    return mkl.apply(), kernel