def create_synthetic_flip_transfer(file_dir="", dim=1):
    n_target = 100
    n_source = 100
    n = n_target + n_source
    sigma = 0.2
    data = data_class.Data()
    data.x = np.random.uniform(0, 1, (n, dim))
    data.data_set_ids = np.zeros(n)
    data.data_set_ids[n_target:] = 1
    data.y = np.zeros(n)
    data.y[(data.data_set_ids == 0) & (data.x[:, 0] >= 0.5)] = 2
    data.y[(data.data_set_ids == 1) & (data.x[:, 0] >= 0.5)] = 1
    data.y[(data.data_set_ids == 0) & (data.x[:, 0] <= 0.5)] = 1
    data.y[(data.data_set_ids == 1) & (data.x[:, 0] <= 0.5)] = 2
    data.y += np.random.normal(0, sigma, n)
    data.set_train()
    data.set_true_y()
    data.is_regression = True
    if dim == 1:
        array_functions.plot_2d(data.x, data.y, data.data_set_ids)
    s = synthetic_flip_file
    if dim > 1:
        s = synthetic_step_kd_transfer_file % dim
    if file_dir != "":
        s = file_dir + "/" + s
    helper_functions.save_object(s, data)
 def plot_g(self):
     x = np.linspace(0,1)
     x = array_functions.vec_to_2d(x)
     g_orig = self.g_learner.predict_g(x)
     g = 1 / (1+g_orig)
     array_functions.plot_2d(x,g)
     pass
def create_synthetic_piecewise_transfer(file_dir='', dim=1):
    n_target = 150
    n_source = 150
    n = n_target + n_source
    sigma = .2
    data = data_class.Data()
    data.x = np.random.uniform(0, 1, (n, dim))
    data.data_set_ids = np.zeros(n)
    data.data_set_ids[n_target:] = 1
    data.y = np.zeros(n)
    data.y[(data.data_set_ids == 0) & (data.x[:, 0] <= 1)] = 1
    data.y[(data.data_set_ids == 0) & (data.x[:, 0] <= .66)] = 0
    data.y[(data.data_set_ids == 0) & (data.x[:, 0] <= .33)] = 2

    data.y[(data.data_set_ids == 1) & (data.x[:, 0] <= 1)] = 0
    data.y[(data.data_set_ids == 1) & (data.x[:, 0] <= .66)] = 2
    data.y[(data.data_set_ids == 1) & (data.x[:, 0] <= .33)] = 1
    data.y += np.random.normal(0, sigma, n)
    data.set_train()
    data.set_true_y()
    data.is_regression = True
    if dim == 1:
        array_functions.plot_2d(data.x, data.y, data.data_set_ids)
    s = synthetic_piecewise_file
    if dim > 1:
        s = synthetic_piecewise_file % dim
    if file_dir != '':
        s = file_dir + '/' + s
    helper_functions.save_object(s, data)
def create_synthetic_delta_linear_transfer():
    slope = 5
    target_fun = lambda x: slope * x
    source_fun = lambda x: slope * x + 4
    data = create_synthetic_regression_transfer(target_fun, source_fun)
    array_functions.plot_2d(data.x, data.y, data.data_set_ids, title="Linear Delta Data Set")
    s = synthetic_delta_linear_file
    helper_functions.save_object(s, data)
 def plot_target(self):
     x = np.linspace(0,1)
     x = array_functions.vec_to_2d(x)
     d = data_lib.Data()
     d.x = x
     d.y = np.nan*np.ones(x.shape[0])
     d.is_regression = True
     o = self.target_learner.predict(d)
     array_functions.plot_2d(x, o.y)
def create_digits():
    digits_data = datasets.load_digits()
    x = digits_data.data
    y = digits_data.target
    for i in range(x.shape[1]):
        xi = array_functions.normalize(x[:, i])
        yi = y
        array_functions.plot_2d(xi, yi, alpha=.01)
        pass
    pass
def create_diabetes():
    diabetes_data = datasets.load_diabetes()
    x = diabetes_data.data
    y = diabetes_data.target
    for i in range(x.shape[1]):
        xi = array_functions.normalize(x[:, i])
        yi = array_functions.normalize(y)
        array_functions.plot_2d(xi, yi)
        pass
    assert False
def create_digits():
    digits_data = datasets.load_digits()
    x = digits_data.data
    y = digits_data.target
    for i in range(x.shape[1]):
        xi = array_functions.normalize(x[:, i])
        yi = y
        array_functions.plot_2d(xi, yi, alpha=0.01)
        pass
    pass
def create_diabetes():
    diabetes_data = datasets.load_diabetes()
    x = diabetes_data.data
    y = diabetes_data.target
    for i in range(x.shape[1]):
        xi = array_functions.normalize(x[:, i])
        yi = array_functions.normalize(y)
        array_functions.plot_2d(xi, yi)
        pass
    assert False
def create_synthetic_delta_linear_transfer():
    slope = 5
    target_fun = lambda x: slope * x
    source_fun = lambda x: slope * x + 4
    data = create_synthetic_regression_transfer(target_fun, source_fun)
    array_functions.plot_2d(data.x,
                            data.y,
                            data.data_set_ids,
                            title='Linear Delta Data Set')
    s = synthetic_delta_linear_file
    helper_functions.save_object(s, data)
示例#11
0
def vis_data():
    s = 'data_sets/' + data_file_dir + '/raw_data.pkl'
    data = helper_functions.load_object(s)
    x = data.x
    y = data.y
    for i in range(data.p):
        xi = x[:, i]
        title = 'Feature Names Missing'
        if data.feature_names is not None:
            title = data.feature_names[i]
        array_functions.plot_2d(xi, y, data_set_ids=data.data_set_ids, title=title)
        pass

    pass
def create_synthetic_multitask_transfer():
    slope_source = 8
    target_slope1 = 4
    target_slope2 = 4.5
    source_func = lambda x: slope_source * x
    target_funcs = [
        lambda x: target_slope1 * x + 3, lambda x: target_slope2 * x + 8
    ]
    data = create_synthetic_regression_transfer(target_funcs, source_func)
    array_functions.plot_2d(data.x,
                            data.y,
                            data.data_set_ids,
                            title='Multitask Slant')
    s = synthetic_slant_multitask
    helper_functions.save_object(s, data)
def create_synthetic_classification(file_dir="", local=True):
    dim = 1
    n_target = 200
    n_source = 200
    n = n_target + n_source
    data = data_class.Data()
    data.x = np.random.uniform(0, 1, (n, dim))
    data.data_set_ids = np.zeros(n)
    data.data_set_ids[n_target:] = 1
    data.y = np.zeros(n)
    x, ids = data.x, data.data_set_ids
    I = array_functions.in_range(x, 0, 0.25)
    I2 = array_functions.in_range(x, 0.25, 0.5)
    I3 = array_functions.in_range(x, 0.5, 0.75)
    I4 = array_functions.in_range(x, 0.75, 1)
    id0 = ids == 0
    id1 = ids == 1
    data.y[I & id0] = 1
    data.y[I2 & id0] = 2
    data.y[I3 & id0] = 1
    data.y[I4 & id0] = 2

    data.y[I & id1] = 3
    data.y[I2 & id1] = 4
    data.y[I3 & id1] = 3
    data.y[I4 & id1] = 4
    if local:
        data.y[I3 & id1] = 4
        data.y[I4 & id1] = 3
    data.set_true_y()
    data.set_train()
    data.is_regression = False
    noise_rate = 0
    # data.add_noise(noise_rate)
    data.add_noise(noise_rate, id0, np.asarray([1, 2]))
    data.add_noise(noise_rate, id1, np.asarray([3, 4]))
    s = synthetic_classification_file
    if local:
        s = synthetic_classification_local_file
    i = id1
    array_functions.plot_2d(data.x[i, :], data.y[i])
    if file_dir != "":
        s = file_dir + "/" + s
    helper_functions.save_object(s, data)
def create_synthetic_classification(file_dir='', local=True):
    dim = 1
    n_target = 200
    n_source = 200
    n = n_target + n_source
    data = data_class.Data()
    data.x = np.random.uniform(0, 1, (n, dim))
    data.data_set_ids = np.zeros(n)
    data.data_set_ids[n_target:] = 1
    data.y = np.zeros(n)
    x, ids = data.x, data.data_set_ids
    I = array_functions.in_range(x, 0, .25)
    I2 = array_functions.in_range(x, .25, .5)
    I3 = array_functions.in_range(x, .5, .75)
    I4 = array_functions.in_range(x, .75, 1)
    id0 = ids == 0
    id1 = ids == 1
    data.y[I & id0] = 1
    data.y[I2 & id0] = 2
    data.y[I3 & id0] = 1
    data.y[I4 & id0] = 2

    data.y[I & id1] = 3
    data.y[I2 & id1] = 4
    data.y[I3 & id1] = 3
    data.y[I4 & id1] = 4
    if local:
        data.y[I3 & id1] = 4
        data.y[I4 & id1] = 3
    data.set_true_y()
    data.set_train()
    data.is_regression = False
    noise_rate = 0
    #data.add_noise(noise_rate)
    data.add_noise(noise_rate, id0, np.asarray([1, 2]))
    data.add_noise(noise_rate, id1, np.asarray([3, 4]))
    s = synthetic_classification_file
    if local:
        s = synthetic_classification_local_file
    i = id1
    array_functions.plot_2d(data.x[i, :], data.y[i])
    if file_dir != '':
        s = file_dir + '/' + s
    helper_functions.save_object(s, data)
def create_synthetic_step_linear_transfer(file_dir=""):
    n_target = 100
    n_source = 100
    n = n_target + n_source
    sigma = 0.5
    data = data_class.Data()
    data.x = np.random.uniform(0, 1, (n, 1))
    data.data_set_ids = np.zeros(n)
    data.data_set_ids[n_target:] = 1
    data.y = np.reshape(data.x * 5, data.x.shape[0])
    data.y[(data.data_set_ids == 1) & (data.x[:, 0] >= 0.5)] += 4
    data.y += np.random.normal(0, sigma, n)
    data.set_defaults()
    data.is_regression = True
    array_functions.plot_2d(data.x, data.y, data.data_set_ids, title="Linear Step Data Set")
    s = synthetic_step_linear_transfer_file
    if file_dir != "":
        s = file_dir + "/" + s
    helper_functions.save_object(s, data)
def create_synthetic_step_linear_transfer(file_dir=''):
    n_target = 100
    n_source = 100
    n = n_target + n_source
    sigma = .5
    data = data_class.Data()
    data.x = np.random.uniform(0, 1, (n, 1))
    data.data_set_ids = np.zeros(n)
    data.data_set_ids[n_target:] = 1
    data.y = np.reshape(data.x * 5, data.x.shape[0])
    data.y[(data.data_set_ids == 1) & (data.x[:, 0] >= .5)] += 4
    data.y += np.random.normal(0, sigma, n)
    data.set_defaults()
    data.is_regression = True
    array_functions.plot_2d(data.x,
                            data.y,
                            data.data_set_ids,
                            title='Linear Step Data Set')
    s = synthetic_step_linear_transfer_file
    if file_dir != '':
        s = file_dir + '/' + s
    helper_functions.save_object(s, data)
    def train_g_nonparametric(self, target_data):
        y_t, y_s, y_true = self.get_predictions(target_data)

        is_labeled = target_data.is_labeled
        labeled_inds = is_labeled.nonzero()[0]
        n_labeled = len(labeled_inds)
        g = cvx.Variable(n_labeled)
        '''
        L = array_functions.make_laplacian_uniform(target_data.x[labeled_inds,:],self.radius,metric) \
            + .0001*np.identity(n_labeled)
        '''
        L = array_functions.make_laplacian_kNN(target_data.x[labeled_inds,:],self.k,self.metric) \
            + .0001*np.identity(n_labeled)
        if self.use_fused_lasso:
            reg = cvx_functions.create_fused_lasso(-L, g)
        else:
            reg = cvx.quad_form(g,L)
        loss = cvx.sum_entries(
            cvx.power(
                cvx.mul_elemwise(y_s[:,0], g) + cvx.mul_elemwise(y_t[:,0], (1-g)) - y_true[:,0],
                2
            )
        )
        constraints = [g >= 0, g <= .5]
        #constraints += [g[0] == .5, g[-1] == 0]
        obj = cvx.Minimize(loss + self.C*reg)
        prob = cvx.Problem(obj,constraints)

        assert prob.is_dcp()
        try:
            prob.solve()
            g_value = np.reshape(np.asarray(g.value),n_labeled)
        except:
            k = 0
            #assert prob.status is None
            print 'CVX problem: setting g = ' + str(k)
            print '\tsigma=' + str(self.sigma)
            print '\tC=' + str(self.C)
            print '\tradius=' + str(self.radius)
            g_value = k*np.ones(n_labeled)
        if self.should_plot_g and enable_plotting and target_data.x.shape[1] == 1:
            array_functions.plot_2d(target_data.x[labeled_inds,:],g_value)

        labeled_train_data = target_data.get_subset(labeled_inds)
        assert labeled_train_data.y.shape == g_value.shape
        g_nw = method.NadarayaWatsonMethod(copy.deepcopy(self.configs))
        labeled_train_data.is_regression = True
        labeled_train_data.y = g_value
        labeled_train_data.true_y = g_value
        g_nw.configs.loss_function = loss_function.MeanSquaredError()

        g_nw.tune_loo(labeled_train_data)
        g_nw.train(labeled_train_data)
        '''
        a =np.hstack((g_value[labeled_train_data.x.argsort(0)], np.sort(labeled_train_data.x,0)))
        print str(a)
        print 'g_nw sigma: ' + str(g_nw.sigma)
        print 'C:' + str(self.C)
        '''
        target_data.is_regression = True
        self.g = g_nw.predict(target_data).fu
        self.g[labeled_inds] = g_value
        assert not np.any(np.isnan(self.g))
示例#18
0
def vis_data():
    s = 'data_sets/' + data_file_dir + '/raw_data.pkl'
    data = helper_functions.load_object(s)
    x = data.x
    y = data.y
    titles = ['', '']
    label_idx = [0, 1]
    if plot_climate:
        img_path = 'C:/PythonFramework/far_transfer/figures/climate-terrain.png'
        image = imread(img_path)
        label_idx = [0, 4]
    if data_file_dir == 'climate-month':
        titles = [
            'Max Temperature Gradient: January',
            'Max Temperature Gradient: April'
        ]
        label_idx = [0, 4]
    elif data_file_dir == 'irs-income':
        titles = ['Income', 'Household Size']
    elif data_file_dir == 'zillow-traffic':
        titles = ['Morning Taxi Pickups', 'Housing Prices']
    elif data_file_dir == 'kc-housing-spatial-floors':
        titles = ['House Prices: 1 Floor', 'House Prices: 2 or More Floors']

    if plot_features:

        for i in range(data.p):
            xi = x[:, i]
            title = 'Feature Names Missing'
            if data.feature_names is not None:
                title = data.feature_names[i]
            array_functions.plot_2d(xi,
                                    y,
                                    data_set_ids=data.data_set_ids,
                                    title=title)
    else:
        for i, title in zip(label_idx, titles):
            #plt.close()
            I = data.data_set_ids == i
            if plot_gradients or plot_values:
                g, v = estimate_gradients(x, y, I)
                if plot_values:
                    g = v
                #g = np.log(g)
                #g -= g.min()
                #g += g.max()/10.0
                #g /= g.max()
                if data_file_dir == 'zillow-traffic':
                    if i == 0:
                        pass
                        g -= g.min()
                        g /= g.max()
                        #g **= .5
                    else:
                        pass
                        g -= g.min()
                        g /= g.max()
                        #g **= .5
                else:
                    if i == 0:
                        g -= g.min()
                        g /= g.max()
                        g = np.sqrt(g)
                    else:
                        g -= g.min()
                        g /= g.max()
                        g **= 1
                #array_functions.plot_heatmap(g, sizes=dot_sizes, fig=fig, title=title)
                fig = plt.figure(i)
                plt.title(title)
                plt.axis('off')
                plt.imshow(g)
                array_functions.move_fig(fig, 750, 400)
                #plt.show(block=False)
            else:
                fig = plt.figure(4)
                array_functions.plot_heatmap(x[I, :],
                                             y[I],
                                             sizes=dot_sizes,
                                             fig=fig,
                                             title=title)
                if plot_climate:
                    plt.imshow(image, zorder=0, extent=[-90, -78, 33.5, 38])
                    array_functions.move_fig(fig, 1400, 600)
        plt.show(block=True)

    pass