def _union_train(self, prepared_data, param): """ perform inner training by processing the tree """ normalizer = MultitaskKernelNormalizer(prepared_data.task_vector_nums) # set similarity for task_name_lhs in prepared_data.get_task_names(): for task_name_rhs in prepared_data.get_task_names(): similarity = 1.0 normalizer.set_task_similarity(prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs), similarity) lab = shogun_factory.create_labels(prepared_data.labels) print "creating empty kernel" kernel = shogun_factory.create_kernel(prepared_data.examples, param) print "setting normalizer" kernel.set_normalizer(normalizer) kernel.init_normalizer() svm = shogun_factory.create_svm(param, kernel, lab) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) # train SVM svm.train() return svm
def _union_train(self, prepared_data, param): """ perform inner training by processing the tree """ normalizer = MultitaskKernelNormalizer(prepared_data.task_vector_nums) # set similarity for task_name_lhs in prepared_data.get_task_names(): for task_name_rhs in prepared_data.get_task_names(): similarity = 1.0 normalizer.set_task_similarity( prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs), similarity) lab = shogun_factory.create_labels(prepared_data.labels) print "creating empty kernel" kernel = shogun_factory.create_kernel(prepared_data.examples, param) print "setting normalizer" kernel.set_normalizer(normalizer) kernel.init_normalizer() svm = shogun_factory.create_svm(param, kernel, lab) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) # train SVM svm.train() return svm
def train_splice_predictor(examples, labels, param): """ @param examples: list of strings @param labels: list of integers {-1,1} """ ########################## # build classifier ########################## feat_train = create_hashed_features_wdk(param.flags, examples) lab = create_labels(labels) svm = create_svm(param, feat_train, lab) svm.train() return svm
def init_predictor(examples, labels, param, w): """ @param examples: list of strings @param labels: list of integers {-1,1} @param w: weight vector of trained svm """ ########################## # build classifier ########################## feat_train = create_hashed_features_wdk(param.flags, examples) lab = create_labels(labels) svm = create_svm(param, feat_train, lab) svm.set_w(w) return svm
def _inner_train(self, prepared_data, param): """ perform inner training by processing the tree """ # init seq handler classifiers = [] ################# # mtk normalizer = MultitaskKernelNormalizer(prepared_data.task_vector_nums) from method_mhc_rbf import SequencesHandlerRbf task_kernel = SequencesHandlerRbf(1, param.base_similarity, prepared_data.get_task_names(), param.flags["wdk_rbf_on"]) # set similarity for task_name_lhs in prepared_data.get_task_names(): for task_name_rhs in prepared_data.get_task_names(): similarity = task_kernel.get_similarity(task_name_lhs, task_name_rhs) normalizer.set_task_similarity(prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs), similarity) lab = shogun_factory.create_labels(prepared_data.labels) print "creating empty kernel" kernel = shogun_factory.create_kernel(prepared_data.examples, param) print "setting normalizer" kernel.set_normalizer(normalizer) kernel.init_normalizer() svm = shogun_factory.create_svm(param, kernel, lab) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) # train SVM svm.train() classifiers.append(svm) ################# # dirac #import pdb #pdb.set_trace() svm_dirac = self._dirac_train(prepared_data, param) classifiers.append(svm_dirac) ## #union #svm_union = self._union_train(prepared_data, param) #classifiers.append(svm_union) return classifiers
def _inner_train(self, prepared_data, param): """ perform inner training by processing the tree """ # init seq handler pseudoseqs = SequencesHandler() classifiers = [] for pocket in self.get_pockets(param.flags["all_positions"]): print "creating normalizer" #import pdb #pdb.set_trace() normalizer = MultitaskKernelNormalizer(prepared_data.task_vector_nums) from method_mhc_rbf import SequencesHandlerRbf task_kernel = SequencesHandlerRbf(1, param.base_similarity, prepared_data.get_task_names(), param.flags["wdk_rbf_on"]) print "processing pocket", pocket M = prepared_data.get_num_tasks() save_sim_p = numpy.zeros((M,M)) save_sim_t = numpy.zeros((M,M)) # set similarity for task_name_lhs in prepared_data.get_task_names(): for task_name_rhs in prepared_data.get_task_names(): similarity = 0.0 for pseudo_seq_pos in pocket: similarity += float(pseudoseqs.get_similarity(task_name_lhs, task_name_rhs, pseudo_seq_pos-1)) # normalize similarity = similarity / float(len(pocket)) similarity_task = task_kernel.get_similarity(task_name_lhs, task_name_rhs) print "pocket %s (%s, %s) = %f" % (str(pocket), task_name_lhs, task_name_rhs, similarity) normalizer.set_task_similarity(prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs), similarity) save_sim_p[prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs)] = similarity save_sim_t[prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs)] = similarity_task #from IPython.Shell import IPShellEmbed #IPShellEmbed([])() lab = shogun_factory.create_labels(prepared_data.labels) print "creating empty kernel" kernel = shogun_factory.create_kernel(prepared_data.examples, param) print "setting normalizer" kernel.set_normalizer(normalizer) kernel.init_normalizer() print "training SVM for pocket", pocket svm = shogun_factory.create_svm(param, kernel, lab) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) # train SVM svm.train() #import pdb #pdb.set_trace() classifiers.append(svm) return classifiers
def _train(self, train_data, param): """ training procedure using training examples and labels @param train_data: Data relevant to SVM training @type train_data: dict<str, list<instances> > @param param: Parameters for the training procedure @type param: ParameterSvm """ assert(param.base_similarity >= 1) # merge data sets data = PreparedMultitaskData(train_data, shuffle=False) # create shogun data objects base_wdk = shogun_factory.create_kernel(data.examples, param) lab = shogun_factory.create_labels(data.labels) # create normalizer normalizer = MultitaskKernelNormalizer(data.task_vector_nums) # load hard-coded task-similarity task_similarity = helper.load("/fml/ag-raetsch/home/cwidmer/svn/projects/alt_splice_code/src/task_sim_tis.bz2") # set similarity similarities = numpy.zeros((data.get_num_tasks(), data.get_num_tasks())) for (i, task_name_lhs) in enumerate(data.get_task_names()): #max_value_row = max(task_similarity.get_row(task_name_lhs)) max_value_row = 1.0 for (j, task_name_rhs) in enumerate(data.get_task_names()): similarity = task_similarity.get_value(task_name_lhs, task_name_rhs) / max_value_row normalizer.set_task_similarity(i, j, similarity) similarities[i,j] = similarity pprint.pprint similarities # set normalizer #print "WARNING MTK disabled!!!!!!!!!!!!!!!!!!!!!" base_wdk.set_normalizer(normalizer) base_wdk.init_normalizer() # set up svm param.flags["svm_type"] = "svmlight" #fix svm type svm = shogun_factory.create_svm(param, base_wdk, lab) # make sure these parameters are set correctly #print "WARNING MTK WONT WORK WITH THESE SETTINGS!!!!!!!!!!!!!!!!!!!!!" svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) assert svm.get_linadd_enabled() == False, "linadd should be disabled" assert svm.get_batch_computation_enabled == False, "batch compute should be disabled" # start training svm.train() # save additional information self.additional_information["svm objective"] = svm.get_objective() self.additional_information["num sv"] = svm.get_num_support_vectors() #self.additional_information["distances"] = distances self.additional_information["similarities"] = similarities # wrap up predictors svms = {} # use a reference to the same svm several times for task_name in data.get_task_names(): task_num = data.name_to_id(task_name) # save svm and task_num svms[task_name] = (task_num, svm) return svms
def _inner_train(self, prepared_data, param): """ perform inner training by processing the tree """ # init seq handler classifiers = [] ################# # mtk normalizer = MultitaskKernelNormalizer(prepared_data.task_vector_nums) from method_mhc_rbf import SequencesHandlerRbf task_kernel = SequencesHandlerRbf(1, param.base_similarity, prepared_data.get_task_names(), param.flags["wdk_rbf_on"]) # set similarity for task_name_lhs in prepared_data.get_task_names(): for task_name_rhs in prepared_data.get_task_names(): similarity = task_kernel.get_similarity( task_name_lhs, task_name_rhs) normalizer.set_task_similarity( prepared_data.name_to_id(task_name_lhs), prepared_data.name_to_id(task_name_rhs), similarity) lab = shogun_factory.create_labels(prepared_data.labels) print "creating empty kernel" kernel = shogun_factory.create_kernel(prepared_data.examples, param) print "setting normalizer" kernel.set_normalizer(normalizer) kernel.init_normalizer() svm = shogun_factory.create_svm(param, kernel, lab) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) # train SVM svm.train() classifiers.append(svm) ################# # dirac #import pdb #pdb.set_trace() svm_dirac = self._dirac_train(prepared_data, param) classifiers.append(svm_dirac) ## #union #svm_union = self._union_train(prepared_data, param) #classifiers.append(svm_union) return classifiers