def predict(self, data):
        # d = data_lib.Data(np.expand_dims(data.source_y_pred, 1), data.y)
        y_pred_source = data.source_y_pred
        I = np.arange(y_pred_source.size)
        if self.predict_sample is not None and self.predict_sample < y_pred_source.size:
            I = np.random.choice(y_pred_source.size, self.predict_sample, replace=False)
        #L = array_functions.make_laplacian(y_pred_source[I], self.sigma_tr)
        #W = array_functions.make_rbf(self.transform.transform(self.x), self.sigma_nw, x2=self.transform.transform(data.x[I,:])).T
        k_L = int(self.sigma_tr*I.size)
        L = array_functions.make_laplacian_kNN(y_pred_source[I], k_L)
        k_W = int(self.sigma_nw*self.x.shape[0])
        W = array_functions.make_knn(self.transform.transform(data.x[I, :]), k_W, x2=self.transform.transform(self.x))
        S = array_functions.make_smoothing_matrix(W)

        A = np.eye(I.size) + self.C*L
        try:
            f = np.linalg.lstsq(A, S.dot(self.y))[0]
        except:
            print 'GraphTransferNW:predict failed, returning mean'
            f = self.y.mean() * np.ones(data.true_y.shape)

        o = results.Output(data)
        if self.predict_sample is not None:
            nw_data = data_lib.Data(data.x[I,:], f)
            self.nw_learner.train_and_test(nw_data)
            nw_output = self.nw_learner.predict(data)
            o.y = nw_output.y
            o.fu = nw_output.y
        else:
            o.y = f
            o.fu = f

        return o
示例#2
0
 def compute_predictions_nonparametric(self, X, Y, estimate=False, learner_reg=None):
     if learner_reg is None:
         learner_reg = self.learner_reg
     if not estimate:
         return Y.copy()
     W = self.target_learner.compute_kernel(X, X, bandwidth=learner_reg)
     #np.fill_diagonal(W, 0)
     S = array_functions.make_smoothing_matrix(W)
     return S.dot(Y)
示例#3
0
 def evaluate_selection(self, W_p, W_y, I, y_true, p_true):
     p_pred = W_p[:, I].sum(1)
     S = array_functions.make_smoothing_matrix(W_y[:, I])
     y_pred = S.dot(y_true[I])
     norm_order = 2
     if self.use_l1_loss:
         norm_order = 1
     error = norm(y_pred - y_true, ord=norm_order) + self.C * norm(p_pred - p_true, ord=norm_order)
     return error
    def train(self, data):
        assert data.is_regression
        y_s, y_true = self.get_predictions(data)
        I = data.is_target & data.is_labeled
        #y_s = y_s[I]
        y_s = data.y[data.is_source]
        y_true = data.true_y[I]

        x_s = data.x[data.is_source]
        x_s = array_functions.append_column(x_s, data.y[data.is_source])
        x_s = array_functions.standardize(x_s)
        x_t = data.x[I]
        x_t = array_functions.append_column(x_t, data.y[I])
        x_t = array_functions.standardize(x_t)
        Wrbf = array_functions.make_rbf(x_t, self.sigma, self.metric, x2=x_s)
        S = array_functions.make_smoothing_matrix(Wrbf)
        w = cvx.Variable(x_s.shape[0])
        constraints = [w >= 0]
        reg = cvx.norm(w)**2
        loss = cvx.sum_entries(
            cvx.power(
                S*cvx.diag(w)*y_s - y_true,2
            )
        )
        obj = cvx.Minimize(loss + self.C*reg)
        prob = cvx.Problem(obj,constraints)
        assert prob.is_dcp()
        try:
            prob.solve()
            #g_value = np.reshape(np.asarray(g.value),n_labeled)
            w_value = w.value
        except:
            k = 0
            #assert prob.status is None
            print 'CVX problem: setting g = ' + str(k)
            print '\tsigma=' + str(self.sigma)
            print '\tC=' + str(self.C)
            w_value = k*np.ones(x_s.shape[0])

        all_data = data.get_transfer_subset(self.configs.labels_to_keep,include_unlabeled=True)
        all_data.instance_weights = np.ones(all_data.n)
        all_data.instance_weights[all_data.is_source] = w.value
        self.instance_weights = all_data.instance_weights
        self.target_learner.train_and_test(all_data)

        self.x = all_data.x[all_data.is_source]
        self.w = all_data.instance_weights[all_data.is_source]
    def predict(self, data):
        # d = data_lib.Data(np.expand_dims(data.source_y_pred, 1), data.y)
        y_pred_source = data.source_y_pred
        I = np.arange(y_pred_source.size)
        if self.predict_sample is not None and self.predict_sample < y_pred_source.size:
            I = np.random.choice(y_pred_source.size,
                                 self.predict_sample,
                                 replace=False)
        if self.use_rbf:
            #L = array_functions.make_laplacian(y_pred_source[I], self.sigma_tr)
            W_source_pred = array_functions.make_rbf(y_pred_source[I],
                                                     self.sigma_tr)
            if self.oracle_guidance is not None:
                y = data.true_y[I]

                n_y = y.size
                num_to_sample = math.ceil(self.oracle_guidance * n_y**2)
                rand_index1 = np.random.choice(n_y,
                                               int(num_to_sample),
                                               replace=True)
                rand_index2 = np.random.choice(n_y,
                                               int(num_to_sample),
                                               replace=True)
                if self.oracle_guidance_binary:
                    target_distances = array_functions.make_graph_distance(y)
                    distance_threshold = .2 * (y.max() - y.min())
                    W_source_pred[rand_index1, rand_index2] = target_distances[
                        rand_index1, rand_index2] <= distance_threshold
                    W_source_pred[rand_index2, rand_index1] = target_distances[
                        rand_index2, rand_index1] <= distance_threshold
                else:
                    y_scaled = array_functions.normalize(y) * (
                        y_pred_source.max() - y_pred_source.min())
                    W_oracle_pred = array_functions.make_rbf(
                        y_scaled, self.sigma_tr)
                    W_source_pred[rand_index1,
                                  rand_index2] = W_oracle_pred[rand_index1,
                                                               rand_index2]
                    W_source_pred[rand_index2,
                                  rand_index1] = W_oracle_pred[rand_index2,
                                                               rand_index1]
            W = array_functions.make_rbf(self.transform.transform(self.x),
                                         self.sigma_nw,
                                         x2=self.transform.transform(
                                             data.x[I, :])).T

        else:
            assert self.oracle_guidance is None
            k_L = int(self.sigma_tr * I.size)
            #L = array_functions.make_laplacian_kNN(y_pred_source[I], k_L)
            W_source_pred = array_functions.make_knn(y_pred_source[I], k_L)
            k_W = int(self.sigma_nw * self.x.shape[0])
            W = array_functions.make_knn(self.transform.transform(
                data.x[I, :]),
                                         k_W,
                                         x2=self.transform.transform(self.x))
        sparsify_prediction_graph = False
        if self.use_prediction_graph_radius:
            sparsify_prediction_graph = True
            W_sparse = array_functions.make_graph_radius(
                self.transform.transform(data.x[I, :]),
                radius=self.radius,
            )
        if self.use_prediction_graph_sparsification:
            sparsify_prediction_graph = True
            W_sparse = array_functions.make_knn(self.transform.transform(
                data.x[I, :]),
                                                self.k_sparsification,
                                                normalize_entries=False)
            #W_L = array_functions.make_knn(y_pred_source[I], k_L)
        if sparsify_prediction_graph:
            W_source_pred = W_source_pred * W_sparse
        S = array_functions.make_smoothing_matrix(W)
        timing_test = False
        C = self.C * self.x.shape[0] / W_source_pred[:].sum()
        if self.nystrom_percentage > 0 or timing_test:
            if timing_test:
                tic()
            Sy = S.dot(self.y)
            if C != 0:
                lamb = 1 / float(C)
                f = None
                tic()
                inv_approx, _ = array_functions.nystrom_woodbury_laplacian(
                    W_source_pred, lamb, self.nystrom_percentage)
                self.predict_time = toc()
                #_, f2 = array_functions.nystrom_woodbury_laplacian(W_source_pred, lamb, self.nystrom_percentage, v=Sy)
                if f is not None:
                    f *= lamb
                else:
                    inv_approx *= lamb
                    f = inv_approx.dot(Sy)
            else:
                f = Sy
            if timing_test:
                toc()
        if self.nystrom_percentage == 0 or self.nystrom_percentage is None or timing_test:
            if timing_test:
                tic()
            L = array_functions.make_laplacian_with_W(W_source_pred,
                                                      normalized=False)
            A = np.eye(I.size) + C * L
            try:
                tic()
                f = np.linalg.lstsq(A, S.dot(self.y))[0]
                self.predict_time = toc()
            except:
                print 'GraphTransferNW:predict failed, returning mean'
                f = self.y.mean() * np.ones(data.true_y.shape)
            if timing_test:
                toc()
        if timing_test:
            A_inv = np.linalg.inv(A)
            print 'approx error: ' + str(
                norm(inv_approx - A_inv) / norm(A_inv))
        o = results.Output(data)
        if self.predict_sample is not None:
            nw_data = data_lib.Data(data.x[I, :], f)
            self.nw_learner.train_and_test(nw_data)
            nw_output = self.nw_learner.predict(data)
            o.y = nw_output.y
            o.fu = nw_output.y
        else:
            o.y = f
            o.fu = f

        return o