def set_data_set(self, data_set): self.data_set = data_set if data_set == bc.DATA_NG: self.set_data_set_defaults('20ng-2000') self.loss_function = loss_function.ZeroOneError() self.cv_loss_function = loss_function.ZeroOneError() #self.num_labels = [5, 10, 20, 40] self.target_labels = CR[0] self.source_labels = np.vstack((CR[1], ST[1])) self.oracle_labels = CR[1] elif data_set == bc.DATA_SYNTHETIC_HYP_TRANS_1_1: self.set_data_set_defaults( 'synthetic_hyp_trans_class500-50-1.0-0.3-1-1') self.num_labels = [10, 20, 40] self.target_labels = None self.source_labels = None self.oracle_data_set_ids = np.asarray([1]) elif data_set == bc.DATA_SYNTHETIC_HYP_TRANS_2_2: self.set_data_set_defaults( 'synthetic_hyp_trans_class500-50-1.0-0.3-2-2') self.num_labels = [10, 20, 40] self.target_labels = None self.source_labels = None self.oracle_data_set_ids = np.asarray([1, 2]) else: assert False '''
def set_synthetic_classification(self): self.loss_function = loss_function.ZeroOneError() self.data_dir = 'data_sets/synthetic_classification' self.data_name = 'synthetic_classification' self.data_set_file_name = 'split_data.pkl' self.results_dir = 'synthetic_classification' self.target_labels = np.asarray([1, 2]) #self.target_labels = array_functions.vec_to_2d(self.target_labels).T self.source_labels = np.asarray([3, 4]) self.source_labels = array_functions.vec_to_2d(self.source_labels).T self.cv_loss_function = loss_function.LogLoss()
def set_ng_transfer(self): self.loss_function = loss_function.ZeroOneError() self.set_ng() ''' self.target_labels = np.asarray([1,2]) S1 = np.asarray([7,8]) S2 = np.asarray([12,13]) self.source_labels = np.vstack((S1,S2)) ''' self.target_labels = CR[0] #self.source_labels = CR[1] self.source_labels = np.vstack((CR[1], ST[1])) self.oracle_labels = CR[1] #self.source_labels = ST[1] #self.oracle_labels = np.empty(0) #self.cv_loss_function = loss_function.ZeroOneError() self.cv_loss_function = loss_function.LogLoss()
def set_data_set(self, data_set): self.data_set = data_set if data_set == bc.DATA_SYNTHETIC_PIECEWISE: self.set_data_set_defaults('synthetic_piecewise') self.num_labels = np.asarray([num_starting_labels]) self.target_labels = np.zeros([1]) self.source_labels = np.ones([1]) elif data_set == bc.DATA_MNIST: self.set_data_set_defaults('mnist') self.num_labels = [num_starting_labels / 2] self.target_labels = np.asarray([1, 3]) self.source_labels = np.asarray([7, 8]) self.loss_function = loss_function.ZeroOneError() self.cv_loss_function = loss_function.ZeroOneError() elif data_set == bc.DATA_BOSTON_HOUSING: self.set_data_set_defaults('boston_housing-13(transfer)') self.num_labels = [num_starting_labels] self.target_labels = np.asarray([0]) self.source_labels = np.asarray([1]) elif data_set == bc.DATA_WINE: self.set_data_set_defaults('wine-small-11') self.num_labels = [num_starting_labels] self.target_labels = np.asarray([0]) self.source_labels = np.asarray([1]) elif data_set == bc.DATA_CONCRETE: self.set_data_set_defaults('concrete-7') self.num_labels = [num_starting_labels] self.target_labels = np.asarray([1]) self.source_labels = np.asarray([3]) elif data_set == bc.DATA_CLIMATE_MONTH: self.set_data_set_defaults('climate-month', source_labels=[0], target_labels=[4], is_regression=True) self.num_labels = np.asarray([num_starting_labels]) elif data_set == bc.DATA_IRS: self.set_data_set_defaults('irs-income', source_labels=[0], target_labels=[1], is_regression=True) self.num_labels = np.asarray([num_starting_labels]) elif data_set == bc.DATA_KC_HOUSING: self.set_data_set_defaults('kc-housing-spatial-floors', source_labels=[0], target_labels=[1], is_regression=True) self.num_labels = np.asarray([num_starting_labels]) elif data_set == bc.DATA_ZILLOW: self.set_data_set_defaults('zillow-traffic', source_labels=[1], target_labels=[0], is_regression=True) #self.set_data_set_defaults('zillow', source_labels=[1], target_labels=[0], is_regression=True) self.num_labels = np.asarray([num_starting_labels]) elif data_set == bc.DATA_TAXI: #self.set_data_set_defaults('taxi2-20', source_labels=[1], target_labels=[0], is_regression=True) #self.set_data_set_defaults('taxi2-50', source_labels=[1], target_labels=[0], is_regression=True) #self.set_data_set_defaults('taxi2', source_labels=[0], target_labels=[1], is_regression=True) #self.set_data_set_defaults('taxi3', source_labels=[1], target_labels=[0], is_regression=True) self.set_data_set_defaults('taxi', source_labels=[1], target_labels=[0], is_regression=True) #self.num_labels = np.asarray([5, 10, 20, 40, 100, 200, 400, 800]) self.num_labels = np.asarray([num_starting_labels]) else: assert False, 'unknown transfer data set' assert self.source_labels.size > 0 assert self.target_labels.size > 0 self.labels_to_not_sample = self.source_labels.ravel() a = self.source_labels.ravel() self.labels_to_keep = np.concatenate((self.target_labels, a))
from configs import base_configs data_splitter = DataSplitter() data_splitter.data = data splits = data_splitter.generate_splits(data.y) split_data = data_lib.SplitData(data, splits) use_transfer = True use_regression = False m = base_configs.MethodConfigs() m.use_validation = True if use_transfer: assert not use_regression m.loss_function = loss_function.ZeroOneError() m.cv_loss_function = loss_function.ZeroOneError() transfer_learner = transfer_methods.StackingTransfer(deepcopy(m)) transfer_learner.base_learner = method.SKLLogisticRegression(deepcopy(m)) #transfer_learner.source_learner = method.SKLLogisticRegression(deepcopy(m)) transfer_learner.source_learner = method.SKLKNN(deepcopy(m)) transfer_learner.source_learner.configs.use_validation = False transfer_learner.use_all_source = True #transfer_learner.target_learner = method.SKLLogisticRegression(deepcopy(m)) transfer_learner.target_learner = method.SKLKNN(deepcopy(m)) #learner = method.SKLKNN(deepcopy(m)) #learner = method.SKLLogisticRegression(deepcopy(m)) #learner = method.SKLRidgeClassification() if use_regression: learner = method.SKLKNNRegression(deepcopy(m))