示例#1
0
def predict_new_data(graph_file, cons_file, tri_file, other_feature_file):
    print 'reading extracted features'
    graph_feature = read_feature_data(graph_file)
    graph_feature = get_normalized_given_max_min(graph_feature,
                                                 'models/grtaph_max_size')
    cons_feature = read_feature_data(cons_file)
    cons_feature = get_normalized_given_max_min(cons_feature,
                                                'models/cons_max_size')
    CC_feature = read_feature_data(tri_file)
    CC_feature = get_normalized_given_max_min(CC_feature,
                                              'models/tri_max_size')
    ATOS_feature = read_feature_data(other_feature_file)
    ATOS_feature = get_normalized_given_max_min(ATOS_feature,
                                                'models/alu_max_size')

    width, C, epsilon, num_threads, mkl_epsilon, mkl_norm = 0.5, 1.2, 1e-5, 1, 0.001, 3.5
    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()

    #pdb.set_trace()
    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(graph_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/graph.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(cons_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/cons.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(CC_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/tri.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(ATOS_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/alu.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    model_file = "models/mkl.dat"
    if not os.path.exists(model_file):
        print 'downloading model file'
        url_add = 'http://rth.dk/resources/mirnasponge/data/mkl.dat'
        urllib.urlretrieve(url_add, model_file)
    print 'loading trained model'
    fstream = SerializableAsciiFile("models/mkl.dat", "r")
    new_mkl = MKLClassification()
    status = new_mkl.load_serializable(fstream)

    print 'model predicting'
    kernel.init(feats_train, feats_test)
    new_mkl.set_kernel(kernel)
    y_out = new_mkl.apply().get_labels()

    return y_out
示例#2
0
def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat):

    ##################################
    # set up and train

    # create some poly train/test matrix
    tfeats = RealFeatures(fm_train_real)
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, tfeats)
    K_train = tkernel.get_kernel_matrix()

    pfeats = RealFeatures(fm_test_real)
    tkernel.init(tfeats, pfeats)
    K_test = tkernel.get_kernel_matrix()

    # create combined train features
    feats_train = CombinedFeatures()
    feats_train.append_feature_obj(RealFeatures(fm_train_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_train))
    kernel.append_kernel(PolyKernel(10,2))
    kernel.init(feats_train, feats_train)

    # train mkl
    labels = BinaryLabels(fm_label_twoclass)
    mkl = MKLClassification()

    # which norm to use for MKL
    mkl.set_mkl_norm(1) #2,3

    # set cost (neg, pos)
    mkl.set_C(1, 1)

    # set kernel and labels
    mkl.set_kernel(kernel)
    mkl.set_labels(labels)

    # train
    mkl.train()
    #w=kernel.get_subkernel_weights()
    #kernel.set_subkernel_weights(w)


    ##################################
    # test

    # create combined test features
    feats_pred = CombinedFeatures()
    feats_pred.append_feature_obj(RealFeatures(fm_test_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_test))
    kernel.append_kernel(PolyKernel(10, 2))
    kernel.init(feats_train, feats_pred)

    # and classify
    mkl.set_kernel(kernel)
    mkl.apply()
    return mkl.apply(),kernel
def runShogunSVMMultipleKernels(train_xt, train_lt, test_xt):
    """
	Run SVM with Multiple Kernels
	"""

    ##################################################

    # Take all examples
    idxs = np.random.randint(1, 14000, 14000)
    train_xt = np.array(train_xt)[idxs]
    train_lt = np.array(train_lt)[idxs]

    # Initialize kernel and features
    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()
    labels = BinaryLabels(train_lt)

    ##################### Multiple Spectrum Kernels #########################
    for i in range(K1, K2, -1):
        # append training data to combined feature object
        charfeat_train = StringCharFeatures(list(train_xt), DNA)
        feats_train_k1 = StringWordFeatures(DNA)
        feats_train_k1.obtain_from_char(charfeat_train, i - 1, i, GAP, False)
        preproc = SortWordString()
        preproc.init(feats_train_k1)
        feats_train_k1.add_preprocessor(preproc)
        feats_train_k1.apply_preprocessor()
        # append testing data to combined feature object
        charfeat_test = StringCharFeatures(test_xt, DNA)
        feats_test_k1 = StringWordFeatures(DNA)
        feats_test_k1.obtain_from_char(charfeat_test, i - 1, i, GAP, False)
        feats_test_k1.add_preprocessor(preproc)
        feats_test_k1.apply_preprocessor()
        # append features
        feats_train.append_feature_obj(charfeat_train)
        feats_test.append_feature_obj(charfeat_test)
        # append spectrum kernel
        kernel1 = CommWordStringKernel(10, i)
        kernel1.io.set_loglevel(MSG_DEBUG)
        kernel.append_kernel(kernel1)
    '''
	Uncomment this for Multiple Weighted degree kernels and comment
	the multiple spectrum kernel block above instead

	##################### Multiple Weighted Degree Kernel #########################
	for i in range(K1,K2,-1):
		# append training data to combined feature object
		charfeat_train = StringCharFeatures(list(train_xt), DNA)
		# append testing data to combined feature object
		charfeat_test = StringCharFeatures(test_xt, DNA)
		# append features
		feats_train.append_feature_obj(charfeat_train);
    		feats_test.append_feature_obj(charfeat_test);
		# setup weighted degree kernel		
		kernel1=WeightedDegreePositionStringKernel(10,i);
    		kernel1.io.set_loglevel(MSG_DEBUG);
		kernel1.set_shifts(SHIFT*np.ones(len(train_xt[0]), dtype=np.int32))
		kernel1.set_position_weights(np.ones(len(train_xt[0]), dtype=np.float64));
		kernel.append_kernel(kernel1);
	'''

    ##################### Training #########################

    print "Starting MKL training.."
    mkl = MKLClassification()
    mkl.set_mkl_norm(3)  #1,2,3
    mkl.set_C(SVMC, SVMC)
    mkl.set_kernel(kernel)
    mkl.set_labels(labels)
    mkl.train(feats_train)

    print "Making predictions!"
    out1 = mkl.apply(feats_train).get_labels()
    out2 = mkl.apply(feats_test).get_labels()

    return out1, out2, train_lt
def runShogunSVMMultipleKernels(train_xt, train_lt, test_xt):
	"""
	Run SVM with Multiple Kernels
	"""

    ##################################################

    	# Take all examples
   	idxs = np.random.randint(1,14000,14000);
	train_xt = np.array(train_xt)[idxs];
    	train_lt = np.array(train_lt)[idxs];

    	# Initialize kernel and features
    	kernel=CombinedKernel()
	feats_train=CombinedFeatures()
	feats_test=CombinedFeatures()
	labels = BinaryLabels(train_lt)
	
	##################### Multiple Spectrum Kernels #########################
	for i in range(K1,K2,-1):
                # append training data to combined feature object
                charfeat_train = StringCharFeatures(list(train_xt), DNA)
                feats_train_k1 = StringWordFeatures(DNA)
                feats_train_k1.obtain_from_char(charfeat_train, i-1, i, GAP, False)
                preproc=SortWordString()
                preproc.init(feats_train_k1)
                feats_train_k1.add_preprocessor(preproc)
                feats_train_k1.apply_preprocessor()
                # append testing data to combined feature object
                charfeat_test = StringCharFeatures(test_xt, DNA)
                feats_test_k1=StringWordFeatures(DNA)
                feats_test_k1.obtain_from_char(charfeat_test, i-1, i, GAP, False)
                feats_test_k1.add_preprocessor(preproc)
                feats_test_k1.apply_preprocessor()
                # append features
                feats_train.append_feature_obj(charfeat_train);
                feats_test.append_feature_obj(charfeat_test);
		# append spectrum kernel
                kernel1=CommWordStringKernel(10,i);
                kernel1.io.set_loglevel(MSG_DEBUG);
                kernel.append_kernel(kernel1);

	'''
	Uncomment this for Multiple Weighted degree kernels and comment
	the multiple spectrum kernel block above instead

	##################### Multiple Weighted Degree Kernel #########################
	for i in range(K1,K2,-1):
		# append training data to combined feature object
		charfeat_train = StringCharFeatures(list(train_xt), DNA)
		# append testing data to combined feature object
		charfeat_test = StringCharFeatures(test_xt, DNA)
		# append features
		feats_train.append_feature_obj(charfeat_train);
    		feats_test.append_feature_obj(charfeat_test);
		# setup weighted degree kernel		
		kernel1=WeightedDegreePositionStringKernel(10,i);
    		kernel1.io.set_loglevel(MSG_DEBUG);
		kernel1.set_shifts(SHIFT*np.ones(len(train_xt[0]), dtype=np.int32))
		kernel1.set_position_weights(np.ones(len(train_xt[0]), dtype=np.float64));
		kernel.append_kernel(kernel1);
	'''

	##################### Training #########################

	print "Starting MKL training.."
	mkl = MKLClassification();
	mkl.set_mkl_norm(3) #1,2,3
	mkl.set_C(SVMC, SVMC)
	mkl.set_kernel(kernel)
	mkl.set_labels(labels)
	mkl.train(feats_train)
	
	print "Making predictions!"
	out1 = mkl.apply(feats_train).get_labels();
	out2 = mkl.apply(feats_test).get_labels();

	return out1,out2,train_lt
示例#5
0
def predict_new_data(graph_file, cons_file, tri_file, other_feature_file):
    print "reading extracted features"
    graph_feature = read_feature_data(graph_file)
    graph_feature = get_normalized_given_max_min(graph_feature, "models/grtaph_max_size")
    cons_feature = read_feature_data(cons_file)
    cons_feature = get_normalized_given_max_min(cons_feature, "models/cons_max_size")
    CC_feature = read_feature_data(tri_file)
    CC_feature = get_normalized_given_max_min(CC_feature, "models/tri_max_size")
    ATOS_feature = read_feature_data(other_feature_file)
    ATOS_feature = get_normalized_given_max_min(ATOS_feature, "models/alu_max_size")

    width, C, epsilon, num_threads, mkl_epsilon, mkl_norm = 0.5, 1.2, 1e-5, 1, 0.001, 3.5
    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()

    # pdb.set_trace()
    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(graph_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/graph.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(cons_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/cons.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(CC_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/tri.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures()
    subkfeats_test = RealFeatures(np.transpose(np.array(ATOS_feature)))
    subkernel = GaussianKernel(10, width)
    feats_test.append_feature_obj(subkfeats_test)

    fstream = SerializableAsciiFile("models/alu.dat", "r")
    status = subkfeats_train.load_serializable(fstream)
    feats_train.append_feature_obj(subkfeats_train)
    kernel.append_kernel(subkernel)

    model_file = "models/mkl.dat"
    if not os.path.exists(model_file):
        print "downloading model file"
        url_add = "http://rth.dk/resources/mirnasponge/data/mkl.dat"
        urllib.urlretrieve(url_add, model_file)
    print "loading trained model"
    fstream = SerializableAsciiFile("models/mkl.dat", "r")
    new_mkl = MKLClassification()
    status = new_mkl.load_serializable(fstream)

    print "model predicting"
    kernel.init(feats_train, feats_test)
    new_mkl.set_kernel(kernel)
    y_out = new_mkl.apply().get_labels()

    return y_out