def _union_train(self, prepared_data, param):
        """
        perform inner training by processing the tree
        """

    
        normalizer = MultitaskKernelNormalizer(prepared_data.task_vector_nums)
        
        # set similarity
        for task_name_lhs in prepared_data.get_task_names():
            for task_name_rhs in prepared_data.get_task_names():
                
                similarity = 1.0
                                
                normalizer.set_task_similarity(prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs), similarity)

        
        lab = shogun_factory.create_labels(prepared_data.labels)
        
        print "creating empty kernel"
        kernel = shogun_factory.create_kernel(prepared_data.examples, param)
        
        print "setting normalizer"
        kernel.set_normalizer(normalizer)
        kernel.init_normalizer()

        svm = shogun_factory.create_svm(param, kernel, lab)
        svm.set_linadd_enabled(False)
        svm.set_batch_computation_enabled(False)

        # train SVM
        svm.train()


        return svm
    def _union_train(self, prepared_data, param):
        """
        perform inner training by processing the tree
        """

        normalizer = MultitaskKernelNormalizer(prepared_data.task_vector_nums)

        # set similarity
        for task_name_lhs in prepared_data.get_task_names():
            for task_name_rhs in prepared_data.get_task_names():

                similarity = 1.0

                normalizer.set_task_similarity(
                    prepared_data.name_to_id(task_name_lhs),
                    prepared_data.name_to_id(task_name_rhs), similarity)

        lab = shogun_factory.create_labels(prepared_data.labels)

        print "creating empty kernel"
        kernel = shogun_factory.create_kernel(prepared_data.examples, param)

        print "setting normalizer"
        kernel.set_normalizer(normalizer)
        kernel.init_normalizer()

        svm = shogun_factory.create_svm(param, kernel, lab)
        svm.set_linadd_enabled(False)
        svm.set_batch_computation_enabled(False)

        # train SVM
        svm.train()

        return svm
Exemplo n.º 3
0
def train_splice_predictor(examples, labels, param):
    """

	@param examples: list of strings
	@param labels: list of integers {-1,1}
	"""

    ##########################
    #   build classifier
    ##########################

    feat_train = create_hashed_features_wdk(param.flags, examples)
    lab = create_labels(labels)
    svm = create_svm(param, feat_train, lab)

    svm.train()

    return svm
Exemplo n.º 4
0
def init_predictor(examples, labels, param, w):
    """

	@param examples: list of strings
	@param labels: list of integers {-1,1}
	@param w: weight vector of trained svm
	"""

    ##########################
    #   build classifier
    ##########################

    feat_train = create_hashed_features_wdk(param.flags, examples)
    lab = create_labels(labels)
    svm = create_svm(param, feat_train, lab)

    svm.set_w(w)

    return svm
    def _inner_train(self, prepared_data, param):
        """
        perform inner training by processing the tree
        """


        # init seq handler 
        
        classifiers = []


        #################
        # mtk
        normalizer = MultitaskKernelNormalizer(prepared_data.task_vector_nums)
        
        from method_mhc_rbf import SequencesHandlerRbf
        task_kernel = SequencesHandlerRbf(1, param.base_similarity, prepared_data.get_task_names(), param.flags["wdk_rbf_on"])
        

        # set similarity
        for task_name_lhs in prepared_data.get_task_names():
            for task_name_rhs in prepared_data.get_task_names():
                
                similarity = task_kernel.get_similarity(task_name_lhs, task_name_rhs)
                                
                normalizer.set_task_similarity(prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs), similarity)
           
        
        lab = shogun_factory.create_labels(prepared_data.labels)
        
        print "creating empty kernel"
        kernel = shogun_factory.create_kernel(prepared_data.examples, param)
        
        print "setting normalizer"
        kernel.set_normalizer(normalizer)
        kernel.init_normalizer()

        svm = shogun_factory.create_svm(param, kernel, lab)
        svm.set_linadd_enabled(False)
        svm.set_batch_computation_enabled(False)

        # train SVM
        svm.train()
        
        classifiers.append(svm)

        #################
        # dirac             
            #import pdb
            #pdb.set_trace()
            

        svm_dirac = self._dirac_train(prepared_data, param)

        classifiers.append(svm_dirac)
        
        ##
        #union
        
        #svm_union = self._union_train(prepared_data, param)

        #classifiers.append(svm_union)
        


        return classifiers
Exemplo n.º 6
0
    def _inner_train(self, prepared_data, param):
        """
        perform inner training by processing the tree
        """


        # init seq handler 
        pseudoseqs = SequencesHandler()

        
        classifiers = []


        for pocket in self.get_pockets(param.flags["all_positions"]):

            print "creating normalizer"
            #import pdb
            #pdb.set_trace()
            
            normalizer = MultitaskKernelNormalizer(prepared_data.task_vector_nums)
            
            from method_mhc_rbf import SequencesHandlerRbf
            
            task_kernel = SequencesHandlerRbf(1, param.base_similarity, prepared_data.get_task_names(), param.flags["wdk_rbf_on"])
            print "processing pocket", pocket
            
            M = prepared_data.get_num_tasks()
            save_sim_p = numpy.zeros((M,M))
            save_sim_t = numpy.zeros((M,M))

            # set similarity
            for task_name_lhs in prepared_data.get_task_names():
                for task_name_rhs in prepared_data.get_task_names():
                    
                    similarity = 0.0
                    
                    for pseudo_seq_pos in pocket:
                        similarity += float(pseudoseqs.get_similarity(task_name_lhs, task_name_rhs, pseudo_seq_pos-1))
                    
                    
                    # normalize
                    similarity = similarity / float(len(pocket))
                    
                    similarity_task = task_kernel.get_similarity(task_name_lhs, task_name_rhs)
                    
                    print "pocket %s (%s, %s) = %f" % (str(pocket), task_name_lhs, task_name_rhs, similarity)
                    
                    normalizer.set_task_similarity(prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs), similarity)
               
                    save_sim_p[prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs)] = similarity
                    save_sim_t[prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs)] = similarity_task
            
            
            #from IPython.Shell import IPShellEmbed
            #IPShellEmbed([])()
            
            lab = shogun_factory.create_labels(prepared_data.labels)
            
            print "creating empty kernel"
            kernel = shogun_factory.create_kernel(prepared_data.examples, param)
            
            print "setting normalizer"
            kernel.set_normalizer(normalizer)
            kernel.init_normalizer()

            print "training SVM for pocket", pocket
            svm = shogun_factory.create_svm(param, kernel, lab)
            svm.set_linadd_enabled(False)
            svm.set_batch_computation_enabled(False)
    
            # train SVM
            svm.train()
            
            #import pdb
            #pdb.set_trace()

            classifiers.append(svm)



        return classifiers
Exemplo n.º 7
0
    def _train(self, train_data, param):
        """
        training procedure using training examples and labels
        
        @param train_data: Data relevant to SVM training
        @type train_data: dict<str, list<instances> >
        @param param: Parameters for the training procedure
        @type param: ParameterSvm
        """


        assert(param.base_similarity >= 1)
        
        # merge data sets
        data = PreparedMultitaskData(train_data, shuffle=False)
        
        
        # create shogun data objects
        base_wdk = shogun_factory.create_kernel(data.examples, param)
        lab = shogun_factory.create_labels(data.labels)


        # create normalizer
        normalizer = MultitaskKernelNormalizer(data.task_vector_nums)

        # load hard-coded task-similarity
        task_similarity = helper.load("/fml/ag-raetsch/home/cwidmer/svn/projects/alt_splice_code/src/task_sim_tis.bz2")


        # set similarity
        similarities = numpy.zeros((data.get_num_tasks(), data.get_num_tasks()))
        
        for (i, task_name_lhs) in enumerate(data.get_task_names()):
            
            #max_value_row = max(task_similarity.get_row(task_name_lhs))
            max_value_row = 1.0
            
            for (j, task_name_rhs) in enumerate(data.get_task_names()):
                
                similarity = task_similarity.get_value(task_name_lhs, task_name_rhs) / max_value_row
                normalizer.set_task_similarity(i, j, similarity)
                similarities[i,j] = similarity
                
        
        pprint.pprint similarities
        
        # set normalizer
        #print "WARNING MTK disabled!!!!!!!!!!!!!!!!!!!!!"                
        base_wdk.set_normalizer(normalizer)
        base_wdk.init_normalizer()
        
        
        # set up svm
        param.flags["svm_type"] = "svmlight" #fix svm type
        
        svm = shogun_factory.create_svm(param, base_wdk, lab)
        
        # make sure these parameters are set correctly
        #print "WARNING MTK WONT WORK WITH THESE SETTINGS!!!!!!!!!!!!!!!!!!!!!"
        svm.set_linadd_enabled(False)
        svm.set_batch_computation_enabled(False)
        

        assert svm.get_linadd_enabled() == False, "linadd should be disabled"
        assert svm.get_batch_computation_enabled == False, "batch compute should be disabled"
        
        # start training
        svm.train()
        
        
        # save additional information
        self.additional_information["svm objective"] = svm.get_objective()
        self.additional_information["num sv"] = svm.get_num_support_vectors()
        #self.additional_information["distances"] = distances
        self.additional_information["similarities"] = similarities
        
        
        # wrap up predictors
        svms = {}
        
        # use a reference to the same svm several times
        for task_name in data.get_task_names():
            
            task_num = data.name_to_id(task_name)
            
            # save svm and task_num
            svms[task_name] = (task_num, svm)

        return svms
    def _inner_train(self, prepared_data, param):
        """
        perform inner training by processing the tree
        """

        # init seq handler

        classifiers = []

        #################
        # mtk
        normalizer = MultitaskKernelNormalizer(prepared_data.task_vector_nums)

        from method_mhc_rbf import SequencesHandlerRbf
        task_kernel = SequencesHandlerRbf(1, param.base_similarity,
                                          prepared_data.get_task_names(),
                                          param.flags["wdk_rbf_on"])

        # set similarity
        for task_name_lhs in prepared_data.get_task_names():
            for task_name_rhs in prepared_data.get_task_names():

                similarity = task_kernel.get_similarity(
                    task_name_lhs, task_name_rhs)

                normalizer.set_task_similarity(
                    prepared_data.name_to_id(task_name_lhs),
                    prepared_data.name_to_id(task_name_rhs), similarity)

        lab = shogun_factory.create_labels(prepared_data.labels)

        print "creating empty kernel"
        kernel = shogun_factory.create_kernel(prepared_data.examples, param)

        print "setting normalizer"
        kernel.set_normalizer(normalizer)
        kernel.init_normalizer()

        svm = shogun_factory.create_svm(param, kernel, lab)
        svm.set_linadd_enabled(False)
        svm.set_batch_computation_enabled(False)

        # train SVM
        svm.train()

        classifiers.append(svm)

        #################
        # dirac
        #import pdb
        #pdb.set_trace()

        svm_dirac = self._dirac_train(prepared_data, param)

        classifiers.append(svm_dirac)

        ##
        #union

        #svm_union = self._union_train(prepared_data, param)

        #classifiers.append(svm_union)

        return classifiers