def create_synthetic_flip_transfer(file_dir="", dim=1): n_target = 100 n_source = 100 n = n_target + n_source sigma = 0.2 data = data_class.Data() data.x = np.random.uniform(0, 1, (n, dim)) data.data_set_ids = np.zeros(n) data.data_set_ids[n_target:] = 1 data.y = np.zeros(n) data.y[(data.data_set_ids == 0) & (data.x[:, 0] >= 0.5)] = 2 data.y[(data.data_set_ids == 1) & (data.x[:, 0] >= 0.5)] = 1 data.y[(data.data_set_ids == 0) & (data.x[:, 0] <= 0.5)] = 1 data.y[(data.data_set_ids == 1) & (data.x[:, 0] <= 0.5)] = 2 data.y += np.random.normal(0, sigma, n) data.set_train() data.set_true_y() data.is_regression = True if dim == 1: array_functions.plot_2d(data.x, data.y, data.data_set_ids) s = synthetic_flip_file if dim > 1: s = synthetic_step_kd_transfer_file % dim if file_dir != "": s = file_dir + "/" + s helper_functions.save_object(s, data)
def plot_g(self): x = np.linspace(0,1) x = array_functions.vec_to_2d(x) g_orig = self.g_learner.predict_g(x) g = 1 / (1+g_orig) array_functions.plot_2d(x,g) pass
def create_synthetic_piecewise_transfer(file_dir='', dim=1): n_target = 150 n_source = 150 n = n_target + n_source sigma = .2 data = data_class.Data() data.x = np.random.uniform(0, 1, (n, dim)) data.data_set_ids = np.zeros(n) data.data_set_ids[n_target:] = 1 data.y = np.zeros(n) data.y[(data.data_set_ids == 0) & (data.x[:, 0] <= 1)] = 1 data.y[(data.data_set_ids == 0) & (data.x[:, 0] <= .66)] = 0 data.y[(data.data_set_ids == 0) & (data.x[:, 0] <= .33)] = 2 data.y[(data.data_set_ids == 1) & (data.x[:, 0] <= 1)] = 0 data.y[(data.data_set_ids == 1) & (data.x[:, 0] <= .66)] = 2 data.y[(data.data_set_ids == 1) & (data.x[:, 0] <= .33)] = 1 data.y += np.random.normal(0, sigma, n) data.set_train() data.set_true_y() data.is_regression = True if dim == 1: array_functions.plot_2d(data.x, data.y, data.data_set_ids) s = synthetic_piecewise_file if dim > 1: s = synthetic_piecewise_file % dim if file_dir != '': s = file_dir + '/' + s helper_functions.save_object(s, data)
def create_synthetic_delta_linear_transfer(): slope = 5 target_fun = lambda x: slope * x source_fun = lambda x: slope * x + 4 data = create_synthetic_regression_transfer(target_fun, source_fun) array_functions.plot_2d(data.x, data.y, data.data_set_ids, title="Linear Delta Data Set") s = synthetic_delta_linear_file helper_functions.save_object(s, data)
def plot_target(self): x = np.linspace(0,1) x = array_functions.vec_to_2d(x) d = data_lib.Data() d.x = x d.y = np.nan*np.ones(x.shape[0]) d.is_regression = True o = self.target_learner.predict(d) array_functions.plot_2d(x, o.y)
def create_digits(): digits_data = datasets.load_digits() x = digits_data.data y = digits_data.target for i in range(x.shape[1]): xi = array_functions.normalize(x[:, i]) yi = y array_functions.plot_2d(xi, yi, alpha=.01) pass pass
def create_diabetes(): diabetes_data = datasets.load_diabetes() x = diabetes_data.data y = diabetes_data.target for i in range(x.shape[1]): xi = array_functions.normalize(x[:, i]) yi = array_functions.normalize(y) array_functions.plot_2d(xi, yi) pass assert False
def create_digits(): digits_data = datasets.load_digits() x = digits_data.data y = digits_data.target for i in range(x.shape[1]): xi = array_functions.normalize(x[:, i]) yi = y array_functions.plot_2d(xi, yi, alpha=0.01) pass pass
def create_synthetic_delta_linear_transfer(): slope = 5 target_fun = lambda x: slope * x source_fun = lambda x: slope * x + 4 data = create_synthetic_regression_transfer(target_fun, source_fun) array_functions.plot_2d(data.x, data.y, data.data_set_ids, title='Linear Delta Data Set') s = synthetic_delta_linear_file helper_functions.save_object(s, data)
def vis_data(): s = 'data_sets/' + data_file_dir + '/raw_data.pkl' data = helper_functions.load_object(s) x = data.x y = data.y for i in range(data.p): xi = x[:, i] title = 'Feature Names Missing' if data.feature_names is not None: title = data.feature_names[i] array_functions.plot_2d(xi, y, data_set_ids=data.data_set_ids, title=title) pass pass
def create_synthetic_multitask_transfer(): slope_source = 8 target_slope1 = 4 target_slope2 = 4.5 source_func = lambda x: slope_source * x target_funcs = [ lambda x: target_slope1 * x + 3, lambda x: target_slope2 * x + 8 ] data = create_synthetic_regression_transfer(target_funcs, source_func) array_functions.plot_2d(data.x, data.y, data.data_set_ids, title='Multitask Slant') s = synthetic_slant_multitask helper_functions.save_object(s, data)
def create_synthetic_classification(file_dir="", local=True): dim = 1 n_target = 200 n_source = 200 n = n_target + n_source data = data_class.Data() data.x = np.random.uniform(0, 1, (n, dim)) data.data_set_ids = np.zeros(n) data.data_set_ids[n_target:] = 1 data.y = np.zeros(n) x, ids = data.x, data.data_set_ids I = array_functions.in_range(x, 0, 0.25) I2 = array_functions.in_range(x, 0.25, 0.5) I3 = array_functions.in_range(x, 0.5, 0.75) I4 = array_functions.in_range(x, 0.75, 1) id0 = ids == 0 id1 = ids == 1 data.y[I & id0] = 1 data.y[I2 & id0] = 2 data.y[I3 & id0] = 1 data.y[I4 & id0] = 2 data.y[I & id1] = 3 data.y[I2 & id1] = 4 data.y[I3 & id1] = 3 data.y[I4 & id1] = 4 if local: data.y[I3 & id1] = 4 data.y[I4 & id1] = 3 data.set_true_y() data.set_train() data.is_regression = False noise_rate = 0 # data.add_noise(noise_rate) data.add_noise(noise_rate, id0, np.asarray([1, 2])) data.add_noise(noise_rate, id1, np.asarray([3, 4])) s = synthetic_classification_file if local: s = synthetic_classification_local_file i = id1 array_functions.plot_2d(data.x[i, :], data.y[i]) if file_dir != "": s = file_dir + "/" + s helper_functions.save_object(s, data)
def create_synthetic_classification(file_dir='', local=True): dim = 1 n_target = 200 n_source = 200 n = n_target + n_source data = data_class.Data() data.x = np.random.uniform(0, 1, (n, dim)) data.data_set_ids = np.zeros(n) data.data_set_ids[n_target:] = 1 data.y = np.zeros(n) x, ids = data.x, data.data_set_ids I = array_functions.in_range(x, 0, .25) I2 = array_functions.in_range(x, .25, .5) I3 = array_functions.in_range(x, .5, .75) I4 = array_functions.in_range(x, .75, 1) id0 = ids == 0 id1 = ids == 1 data.y[I & id0] = 1 data.y[I2 & id0] = 2 data.y[I3 & id0] = 1 data.y[I4 & id0] = 2 data.y[I & id1] = 3 data.y[I2 & id1] = 4 data.y[I3 & id1] = 3 data.y[I4 & id1] = 4 if local: data.y[I3 & id1] = 4 data.y[I4 & id1] = 3 data.set_true_y() data.set_train() data.is_regression = False noise_rate = 0 #data.add_noise(noise_rate) data.add_noise(noise_rate, id0, np.asarray([1, 2])) data.add_noise(noise_rate, id1, np.asarray([3, 4])) s = synthetic_classification_file if local: s = synthetic_classification_local_file i = id1 array_functions.plot_2d(data.x[i, :], data.y[i]) if file_dir != '': s = file_dir + '/' + s helper_functions.save_object(s, data)
def create_synthetic_step_linear_transfer(file_dir=""): n_target = 100 n_source = 100 n = n_target + n_source sigma = 0.5 data = data_class.Data() data.x = np.random.uniform(0, 1, (n, 1)) data.data_set_ids = np.zeros(n) data.data_set_ids[n_target:] = 1 data.y = np.reshape(data.x * 5, data.x.shape[0]) data.y[(data.data_set_ids == 1) & (data.x[:, 0] >= 0.5)] += 4 data.y += np.random.normal(0, sigma, n) data.set_defaults() data.is_regression = True array_functions.plot_2d(data.x, data.y, data.data_set_ids, title="Linear Step Data Set") s = synthetic_step_linear_transfer_file if file_dir != "": s = file_dir + "/" + s helper_functions.save_object(s, data)
def create_synthetic_step_linear_transfer(file_dir=''): n_target = 100 n_source = 100 n = n_target + n_source sigma = .5 data = data_class.Data() data.x = np.random.uniform(0, 1, (n, 1)) data.data_set_ids = np.zeros(n) data.data_set_ids[n_target:] = 1 data.y = np.reshape(data.x * 5, data.x.shape[0]) data.y[(data.data_set_ids == 1) & (data.x[:, 0] >= .5)] += 4 data.y += np.random.normal(0, sigma, n) data.set_defaults() data.is_regression = True array_functions.plot_2d(data.x, data.y, data.data_set_ids, title='Linear Step Data Set') s = synthetic_step_linear_transfer_file if file_dir != '': s = file_dir + '/' + s helper_functions.save_object(s, data)
def train_g_nonparametric(self, target_data): y_t, y_s, y_true = self.get_predictions(target_data) is_labeled = target_data.is_labeled labeled_inds = is_labeled.nonzero()[0] n_labeled = len(labeled_inds) g = cvx.Variable(n_labeled) ''' L = array_functions.make_laplacian_uniform(target_data.x[labeled_inds,:],self.radius,metric) \ + .0001*np.identity(n_labeled) ''' L = array_functions.make_laplacian_kNN(target_data.x[labeled_inds,:],self.k,self.metric) \ + .0001*np.identity(n_labeled) if self.use_fused_lasso: reg = cvx_functions.create_fused_lasso(-L, g) else: reg = cvx.quad_form(g,L) loss = cvx.sum_entries( cvx.power( cvx.mul_elemwise(y_s[:,0], g) + cvx.mul_elemwise(y_t[:,0], (1-g)) - y_true[:,0], 2 ) ) constraints = [g >= 0, g <= .5] #constraints += [g[0] == .5, g[-1] == 0] obj = cvx.Minimize(loss + self.C*reg) prob = cvx.Problem(obj,constraints) assert prob.is_dcp() try: prob.solve() g_value = np.reshape(np.asarray(g.value),n_labeled) except: k = 0 #assert prob.status is None print 'CVX problem: setting g = ' + str(k) print '\tsigma=' + str(self.sigma) print '\tC=' + str(self.C) print '\tradius=' + str(self.radius) g_value = k*np.ones(n_labeled) if self.should_plot_g and enable_plotting and target_data.x.shape[1] == 1: array_functions.plot_2d(target_data.x[labeled_inds,:],g_value) labeled_train_data = target_data.get_subset(labeled_inds) assert labeled_train_data.y.shape == g_value.shape g_nw = method.NadarayaWatsonMethod(copy.deepcopy(self.configs)) labeled_train_data.is_regression = True labeled_train_data.y = g_value labeled_train_data.true_y = g_value g_nw.configs.loss_function = loss_function.MeanSquaredError() g_nw.tune_loo(labeled_train_data) g_nw.train(labeled_train_data) ''' a =np.hstack((g_value[labeled_train_data.x.argsort(0)], np.sort(labeled_train_data.x,0))) print str(a) print 'g_nw sigma: ' + str(g_nw.sigma) print 'C:' + str(self.C) ''' target_data.is_regression = True self.g = g_nw.predict(target_data).fu self.g[labeled_inds] = g_value assert not np.any(np.isnan(self.g))
def vis_data(): s = 'data_sets/' + data_file_dir + '/raw_data.pkl' data = helper_functions.load_object(s) x = data.x y = data.y titles = ['', ''] label_idx = [0, 1] if plot_climate: img_path = 'C:/PythonFramework/far_transfer/figures/climate-terrain.png' image = imread(img_path) label_idx = [0, 4] if data_file_dir == 'climate-month': titles = [ 'Max Temperature Gradient: January', 'Max Temperature Gradient: April' ] label_idx = [0, 4] elif data_file_dir == 'irs-income': titles = ['Income', 'Household Size'] elif data_file_dir == 'zillow-traffic': titles = ['Morning Taxi Pickups', 'Housing Prices'] elif data_file_dir == 'kc-housing-spatial-floors': titles = ['House Prices: 1 Floor', 'House Prices: 2 or More Floors'] if plot_features: for i in range(data.p): xi = x[:, i] title = 'Feature Names Missing' if data.feature_names is not None: title = data.feature_names[i] array_functions.plot_2d(xi, y, data_set_ids=data.data_set_ids, title=title) else: for i, title in zip(label_idx, titles): #plt.close() I = data.data_set_ids == i if plot_gradients or plot_values: g, v = estimate_gradients(x, y, I) if plot_values: g = v #g = np.log(g) #g -= g.min() #g += g.max()/10.0 #g /= g.max() if data_file_dir == 'zillow-traffic': if i == 0: pass g -= g.min() g /= g.max() #g **= .5 else: pass g -= g.min() g /= g.max() #g **= .5 else: if i == 0: g -= g.min() g /= g.max() g = np.sqrt(g) else: g -= g.min() g /= g.max() g **= 1 #array_functions.plot_heatmap(g, sizes=dot_sizes, fig=fig, title=title) fig = plt.figure(i) plt.title(title) plt.axis('off') plt.imshow(g) array_functions.move_fig(fig, 750, 400) #plt.show(block=False) else: fig = plt.figure(4) array_functions.plot_heatmap(x[I, :], y[I], sizes=dot_sizes, fig=fig, title=title) if plot_climate: plt.imshow(image, zorder=0, extent=[-90, -78, 33.5, 38]) array_functions.move_fig(fig, 1400, 600) plt.show(block=True) pass