def set_drosophilia(self): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() self.data_dir = 'data_sets/drosophilia' self.data_name = 'drosophilia' self.results_dir = 'drosophilia' self.data_set_file_name = 'split_data.pkl'
def set_boston_housing(self): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() self.data_dir = 'data_sets/boston_housing' self.data_name = 'boston_housing' self.results_dir = 'boston_housing' self.data_set_file_name = 'split_data.pkl'
def set_adience_aligned_cnn_1(self): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() self.data_dir = 'data_sets/adience_aligned_cnn_1_per_instance_id' self.data_name = 'adience_aligned_cnn_1_per_instance_id' self.results_dir = 'adience_aligned_cnn_1_per_instance_id' self.data_set_file_name = 'split_data.pkl'
def __init__(self, data_set=None): super(ProjectConfigs, self).__init__() self._num_labels = None self.data_set = data_set self.project_dir = 'base' self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() self.data_dir = '' self.data_name = '' self.data_set_file_name = '' self.results_dir = '' self.include_name_in_results = False self.labels_to_use = None self.labels_to_not_sample = None self.target_labels = None self.source_labels = None self.oracle_labels = None self.num_labels = range(40, 201, 40) #self.num_labels = range(40,81,40) self.set_boston_housing() self.num_splits = 30 self.labels_to_keep = None self.labels_to_not_sample = {} self.data_set = None self.use_pool = False self.pool_size = 2 self.method_results_class = results_lib.MethodResults self.oracle_data_set_ids = None
def set_synthetic_linear_reg(self): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() self.data_dir = 'data_sets/synthetic_linear_reg500-50-1' self.data_name = 'synthetic_linear_reg500-50-1' self.results_dir = 'synthetic_linear_reg500-50-1' self.data_set_file_name = 'split_data.pkl'
def set_data_path_results(self, name): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() self.data_dir = 'data_sets/' + name self.data_name = name self.results_dir = name self.data_set_file_name = 'split_data.pkl'
def set_concrete(self): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() self.data_dir = 'data_sets/concrete' self.data_name = 'concrete' self.results_dir = 'concrete' self.data_set_file_name = 'split_data.pkl'
def set_wine_red(self): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() s = 'wine-red' self.data_dir = 'data_sets/' + s self.data_name = s self.results_dir = s self.data_set_file_name = 'split_data.pkl'
def set_synthetic_regression(self, name): self.loss_function = loss_function.MeanSquaredError() self.target_labels = np.zeros(1) self.source_labels = np.ones(1) self.loss_function = loss_function.MeanSquaredError() self.data_dir = 'data_sets/' + name self.data_name = name self.results_dir = name self.data_set_file_name = 'split_data.pkl'
def set_bike_sharing(self): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() assert self.use_1d_data == True self.data_dir = 'data_sets/bike_sharing-feat=1' self.data_name = 'bike_sharing-feat=1' self.results_dir = 'bike_sharing-feat=1' self.data_set_file_name = 'split_data.pkl' self.target_labels = np.asarray([1]) self.source_labels = np.asarray([0])
def set_pollution(self, id, size): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() #assert self.use_1d_data == True s = 'pollution-%d-%d-norm' % (id, size) self.data_dir = 'data_sets/' + s self.data_name = s self.results_dir = s self.data_set_file_name = 'split_data.pkl' self.target_labels = np.asarray([0]) self.source_labels = np.asarray([1])
def __init__(self, configs=MethodConfigs()): super(KDE, self).__init__(configs) self.cv_params = {'sigma': np.asarray(10.0**np.asarray(range(-4, 5)))} self.cv_params = {} self.is_classifier = False self._estimated_error = None self.quiet = True self.best_params = None self.model = None self.configs.loss_function = loss_function.MeanSquaredError() self.configs.cv_loss_function = loss_function.MeanSquaredError()
def __init__(self): super(MethodConfigs, self).__init__() pc = create_project_configs() self.z_score = False self.quiet = False self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() self.cv_loss_function = pc.cv_loss_function self.loss_function = pc.loss_function self.use_validation = False self.metric = 'euclidean' self.use_saved_cv_output = False
def set_data_set(self, name, target_labels, source_labels, is_regression): assert is_regression self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() self.data_dir = 'data_sets/' + name self.data_name = name self.results_dir = name self.data_set_file_name = 'split_data.pkl' self.target_labels = np.asarray(target_labels) self.source_labels = np.asarray(source_labels)
def set_pollution(self): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() s = 'pollution-[3 4]-500-norm' #s = 'pollution-[60 71]-500-norm' self.data_dir = 'data_sets/' + s self.data_name = s self.results_dir = s self.data_set_file_name = 'split_data.pkl' self.target_labels = np.asarray([0]) self.source_labels = np.asarray([1, 2, 3]) self.target_domain_order = np.asarray([1, 0]) self.source_domain_order = np.asarray([3, 2])
def set_boston_housing_transfer(self): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() if self.use_1d_data: self.data_dir = 'data_sets/boston_housing(transfer)' self.data_name = 'boston_housing' self.results_dir = 'boston_housing' else: self.data_dir = 'data_sets/boston_housing-13(transfer)' self.data_name = 'boston_housing-13' self.results_dir = 'boston_housing-13' self.data_set_file_name = 'split_data.pkl' self.target_labels = np.asarray([0]) self.source_labels = np.asarray([1])
def set_concrete_transfer(self): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() if self.use_1d_data: self.data_dir = 'data_sets/concrete-feat=0' self.data_name = 'concrete-feat=0' self.results_dir = 'concrete-feat=0' else: self.data_dir = 'data_sets/concrete-7' self.data_name = 'concrete-7' self.results_dir = 'concrete-7' self.data_set_file_name = 'split_data.pkl' self.target_labels = np.asarray([1]) self.source_labels = np.asarray([3])
def set_wine(self): self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() if self.use_1d_data: self.data_dir = 'data_sets/wine-small-feat=1' self.data_name = 'wine-small-feat=1' self.results_dir = 'wine-small-feat=1' else: self.data_dir = 'data_sets/wine-small-11' self.data_name = 'wine-small-11' self.results_dir = 'wine-small-11' self.data_set_file_name = 'split_data.pkl' self.target_labels = np.asarray([0]) self.source_labels = np.asarray([1])
def set_synthetic_step_linear_transfer(self): self.loss_function = loss_function.MeanSquaredError() self.data_dir = 'data_sets/synthetic_step_linear_transfer' self.data_name = 'synthetic_step_linear_transfer' self.data_set_file_name = 'split_data.pkl' self.results_dir = 'synthetic_step_linear_transfer' self.target_labels = np.asarray([0]) self.source_labels = np.asarray([1])
def set_data_set_defaults(self, data_set_name, target_labels=None, source_labels=None, is_regression=True): assert is_regression self.loss_function = loss_function.MeanSquaredError() self.cv_loss_function = loss_function.MeanSquaredError() self.data_dir = 'data_sets/' + data_set_name self.data_name = data_set_name self.results_dir = data_set_name self.data_set_file_name = 'split_data.pkl' self.target_labels = target_labels self.source_labels = source_labels if target_labels is not None: self.target_labels = np.asarray(target_labels) if source_labels is not None: self.source_labels = np.asarray(source_labels)
def __init__(self, configs=base_configs.MethodConfigs()): super(ScipyOptNonparametricHypothesisTransfer, self).__init__(configs) self.cv_params['C'] = 10**np.asarray(range(-4, 4), dtype='float64') self.g_nw = method.NadarayaWatsonMethod(configs) self.g_nw.configs.target_labels = None self.g_nw.configs.source_labels = None self.g_nw.configs.cv_loss_function = loss_function.MeanSquaredError() self.g_nw.quiet = True self.k = 3 self.metric = configs.metric self.bias = 0 self.use_huber = use_huber
def __init__(self, data_set=None, **kwargs): super(VisualizationConfigs, self).__init__(data_set, **kwargs) self.max_rows = max_rows pc = ProjectConfigs(data_set) self.copy_fields(pc, pc_fields_to_copy) self.data_set_to_use = pc.data_set self.title = bc.data_name_dict.get(self.data_set_to_use, 'Unknown Data Set') self.show_legend_on_all = True self.x_axis_string = 'Number of labeled instances' self.ylims = None self.generate_file_names(pc) viz_loss_function = loss_function.MeanSquaredError() self.always_show_y_label = True is_regression = not self.data_set_to_use in classification_data_sets instance_subset = 'is_train' if not hasattr(self, 'loss_to_use'): self.loss_to_use = loss_to_use if self.loss_to_use == LOSS_Y: if pc.use_var: results_features = ['y', 'true_y'] self.y_axis_string = 'Variance Error' else: results_features = ['y', 'true_y'] self.y_axis_string = 'Prediction Error' elif self.loss_to_use == LOSS_P: results_features = ['p', 'true_p'] self.y_axis_string = 'P(X) Error' elif self.loss_to_use == LOSS_NOISY: results_features = ['is_noisy', 'is_selected'] viz_loss_function = loss_function.LossAnyOverlap() self.y_axis_string = 'Noisy Error' elif self.loss_to_use == LOSS_ENTROPY: instance_subset = 'is_selected' results_features = ['y_orig', 'y_orig'] viz_loss_function = loss_function.LossSelectedEntropy( is_regression=is_regression) self.y_axis_string = 'Selection Distribution Error' else: assert False self.instance_subset = instance_subset self.results_features = results_features self.loss_function = viz_loss_function
value_housing = pricing_data[I, 1] value_housing /= value_housing.max() if apply_log: value_housing = np.log(value_housing) data = combine_data(loc_traffic, value_traffic, loc_housing, value_housing) else: I &= np.isfinite(locations[:, 0]) data = create_transfer_data(locations, pricing_data, I, apply_log) print 'n: ' + str(I.sum()) # pricing_data[:] = 1 if run_state_tests: m = base_configs.MethodConfigs() m.cv_loss_function = loss_function.MeanSquaredError() m.loss_function = loss_function.MeanSquaredError() loss = loss_function.MeanSquaredError() m.use_validation = True m.target_labels = np.asarray([1]) m.source_labels = np.asarray([0]) stacking_transfer = transfer_methods.StackingTransfer(deepcopy((m))) m.just_target = True target_learner = far_transfer_methods.GraphTransfer(deepcopy(m)) m.just_target = False m.just_transfer = True source_learner = far_transfer_methods.GraphTransfer(deepcopy(m)) num_splits = 10 errors = np.zeros((all_states.size, 3)) for state_idx, s in enumerate(all_states):