def get_predictions(self, target_data):
        o = self.target_learner.predict_loo(target_data)
        if self.source_loo:
            o_source = self.source_learner.train_predict_loo(target_data)
        else:
            o_source = self.source_learner.predict(target_data)
        is_labeled = target_data.is_labeled

        target_labels = self.configs.target_labels
        if self.use_estimated_f:
            o = self.target_learner.predict_loo(target_data.get_subset(is_labeled))
        if target_data.is_regression:
            y_t = array_functions.vec_to_2d(o.fu)
            if self.source_loo:
                y_s = array_functions.vec_to_2d(o_source.fu)
            else:
                y_s = array_functions.vec_to_2d(o_source.fu[is_labeled])
            y_true = array_functions.vec_to_2d(o.true_y)
        else:
            assert False, 'Update this?'
            y_t = o.fu[:,target_labels]
            y_s = o_source.fu[:,target_labels]
            y_s = y_s[is_labeled,:]
            y_true = array_functions.make_label_matrix(o.true_y)[:,target_labels]
            y_true = array_functions.try_toarray(y_true)
        return (y_t, y_s, y_true)
Пример #2
0
    def train(self, data):
        '''
        self.C = 1
        self.C2 = 10
        self.k = 1
        '''
        #self.C = 100
        #self.configs.use_fused_lasso = False
        g_max = 2
        g0_oracle = np.zeros(data.n)
        g0_oracle[data.x[:, 0] < .5] = g_max
        f = self.create_eval(data, self.C)
        g = self.create_gradient(data, self.C)
        bounds = list((0, g_max) for i in range(data.n))
        #bounds = list((i, i) for i in range(data.n))
        #bounds = list((0, 0) for i in range(data.n))
        #bounds[0] = (0,None)
        #self.include_bias = False
        if self.include_bias:
            bounds = [(None, None)] + bounds
            #bounds = [(10, 10)] + bounds
        else:
            bounds = [(0, 0)] + bounds
        n = data.n + 1

        g0 = np.zeros(n)
        g0[1:] = g0_oracle
        #g0[:] = 1
        x = data.x
        y_s = np.squeeze(data.y_s[:, 0])
        y_t = np.squeeze(data.y_t[:, 0])
        y = data.y
        W = -array_functions.make_laplacian_kNN(data.x, self.k,
                                                self.configs.metric)
        #W = array_functions.make_graph_radius(data.x, self.radius, self.configs.metric)
        #W = array_functions.make_graph_adjacent(data.x, self.configs.metric)
        W = array_functions.try_toarray(W)
        if not data.is_regression:
            y = array_functions.make_label_matrix(
                data.y)[:, data.classes].toarray()
            y = y[:, 0]
        reg = self.create_reg(data.x)
        reg2 = self.create_reg2(data.x)
        if self.configs.use_fused_lasso:
            method = 'SLSQP'
            max_iter = 10000
            maxfun = 10000
            fused_lasso = ScipyOptNonparametricHypothesisTransfer.fused_lasso
            lasso = lambda x: self.C - fused_lasso(x, W)
            constraints = [{'type': 'ineq', 'fun': lasso}]
            if self.configs.no_reg:
                constraints = ()
            args = (x, y, y_s, y_t, 0, reg, self.C2, reg2)
        else:
            method = 'L-BFGS-B'
            max_iter = np.inf
            maxfun = np.inf
            constraints = ()
            args = (x, y, y_s, y_t, self.C, reg, self.C2, reg2)

        if self.g_supervised:
            x = np.squeeze(data.x)
            assert x.ndim == 1
            min_i = x.argmin()
            max_i = x.argmax()
            bounds[min_i] = (1, None)
            bounds[max_i] = (0, 0)

        options = {
            'disp': False,
            'maxiter': max_iter,
            'maxfun': maxfun,
            #'pgtol': 1e-8
        }
        results = optimize.minimize(
            f,
            g0,
            method=method,
            bounds=bounds,
            jac=g,
            options=options,
            constraints=constraints,
            args=args,
        )
        compare_results = False
        if compare_results or not results.success:
            options['disp'] = False
            options['approx_grad'] = True
            results2 = optimize.minimize(f,
                                         g0,
                                         method=method,
                                         bounds=bounds,
                                         options=options,
                                         constraints=constraints,
                                         args=args)
        if compare_results:
            err = results.x - results2.x
            if norm(results2.x[1:]) == 0:
                print 'All zeros - using absolute error'
                print 'Abs Error - g: ' + str(norm(err[1:]))
            else:
                print 'Rel Error - g: ' + str(
                    norm(err[1:]) / norm(results2.x[1:]))
            if not self.include_bias:
                if norm(results2.x[0]) == 0:
                    print 'Abs Error - b: ' + str(norm(err[0]))
                else:
                    print 'Rel Error - b: ' + str(
                        norm(err[0]) / norm(results2.x[0]))
            rel_error = norm(results.fun - results2.fun) / norm(results2.fun)
            print 'Rel Error - f(g*): ' + str(rel_error)
            if rel_error > .001 and norm(results2.x) > 0:
                print 'Big error: C=' + str(self.C) + ' C2=' + str(self.C2)
        if not results.success:
            results = results2
        self.g = results.x[1:]
        self.bias = results.x[0]
        if not results.success:
            self.g[:] = 0
            self.bias = 0
            #print 'Failed: ' + results.message
        '''
        I = data.arg_sort()
        x = (data.x[I,:])
        g = array_functions.vec_to_2d(results.x[I])
        v = np.hstack((x,g))
        print v
        print ''
        '''
        s = 'C=' + str(self.C) + ',C2=' + str(self.C2) + ',k=' + str(
            self.k) + '-'
        if not results.success:
            s += 'Opt failed - '
        has_nonneg = (self.g[1:] < -1e-6).any()
        if has_nonneg:
            s += 'Negative g - min value: ' + str(self.g.min())
        if not results.success or has_nonneg:
            print s + ': ' + results.message
            self.g[:] = 0
        else:
            pass
        g_data = data_lib.Data()
        g_data.x = data.x
        g_data.y = results.x[1:]
        g_data.is_regression = True
        g_data.set_train()
        g_data.set_target()
        g_data.set_true_y()
        self.g_nw.train_and_test(g_data)
        if results.success:
            pass
Пример #3
0
    def train(self, data):
        y_s = np.squeeze(data.y_s[:, 0])
        y_t = np.squeeze(data.y_t[:, 0])
        y = data.y
        if self.constant_b:
            self.g = (y_t - y_s).mean()
            return

        is_labeled = data.is_labeled
        labeled_inds = is_labeled.nonzero()[0]
        n_labeled = len(labeled_inds)

        plot_lasso_path = False
        if plot_lasso_path:
            y_tilde = y_s - y
            #x = data.x[is_labeled, :]
            x = self.transform.fit_transform(data.x[is_labeled, :])
            from sklearn import linear_model
            import matplotlib.pyplot as plt
            regs, _, coefs = linear_model.lars_path(
                x,
                y_tilde,
                method='lasso',
            )
            xx = np.sum(np.abs(coefs.T), axis=1)
            xx /= xx[-1]
            plt.plot(xx, coefs.T)
            ymin, ymax = plt.ylim()
            plt.vlines(xx, ymin, ymax, linestyle='dashed')
            plt.xlabel('Normalized Norm of Coefficients')
            plt.ylabel('Coefficients')
            plt.title('LASSO Path for Boston Housing Data')
            plt.legend(data.feature_names)
            plt.axis('tight')
            xmin, xmax = plt.xlim()
            plt.xlim(xmin, xmax + .3)
            plt.show()

        if self.linear_b:
            g = cvx.Variable(data.p)
            b = cvx.Variable(1)
            x = self.transform.fit_transform(data.x[is_labeled, :])
            err = self.C3 * y_t + (1 - self.C3) * (y_s + x * g + b) - y
            reg = cvx.square(cvx.norm2(g))
        else:
            g = cvx.Variable(n_labeled)
            if self.use_radius:
                W = array_functions.make_graph_radius(data.x[is_labeled, :],
                                                      self.radius,
                                                      self.configs.metric)
            else:
                #W = array_functions.make_graph_adjacent(data.x[is_labeled,:], self.configs.metric)
                #W = array_functions.make_graph_adjacent(data.x[is_labeled, :], self.configs.metric)
                W = array_functions.make_rbf(data.x[is_labeled, :], self.sigma,
                                             self.configs.metric)
            W = array_functions.try_toarray(W)
            W = .5 * (W + W.T)
            if W.sum() > 0:
                W = W / W.sum()
            reg = 0
            if W.any():
                if self.use_fused_lasso:
                    reg = cvx_functions.create_fused_lasso(W, g)
                else:
                    L = array_functions.make_laplacian_with_W(W)
                    L += 1e-6 * np.eye(L.shape[0])
                    reg = cvx.quad_form(g, L)
                    #reg = g.T * L * g
            err = self.C3 * y_t + (1 - self.C3) * (y_s + g) - y
        err_l2 = cvx.power(err, 2)
        loss = cvx.sum_entries(err_l2)
        if not self.use_l2:
            reg = cvx.norm(g, 1)
        #constraints = [g >= -2, g <= 2]
        #constraints = [g >= -4, g <= 0]
        #constraints = [g >= 4, g <= 4]
        if self.linear_b:
            constraints = [f(g, b, x) for f in self.configs.constraints]
        else:
            constraints = [f(g) for f in self.configs.constraints]
        obj = cvx.Minimize(loss + self.C * reg)
        #obj = cvx.Minimize(loss + self.C*reg + self.C2*cvx.norm(g))
        prob = cvx.Problem(obj, constraints)

        assert prob.is_dcp()
        try:
            prob.solve()
            if self.linear_b:
                b_value = b.value
                g_value = np.reshape(np.asarray(g.value), data.p)
            else:
                g_value = np.reshape(np.asarray(g.value), n_labeled)
        except:
            k = 0
            #assert prob.status is None
            print 'CVX problem: setting g = ' + str(k)
            g_value = k * np.ones(n_labeled)
            if self.linear_b:
                g_value = k * np.ones(data.p)
                b_value = 0
            print '\tC=' + str(self.C)
            print '\tC2=' + str(self.C2)
            print '\tC3=' + str(self.C3)
        if self.linear_b:
            self.g = g_value
            self.b = b_value
            g_pred = x.dot(g_value)
            self.g_min = g_pred.min()
            self.g_max = g_pred.max()
            return
        #labeled_train_data = data.get_subset(labeled_inds)
        training_data = data.get_subset(data.is_train)
        assert training_data.y.shape == g_value.shape
        training_data.is_regression = True
        training_data.y = g_value
        training_data.true_y = g_value

        self.g_nw.train_and_test(training_data)
Пример #4
0
    def train(self, data):

        '''
        self.C = 1
        self.C2 = 10
        self.k = 1
        '''
        #self.C = 100
        #self.configs.use_fused_lasso = False
        g_max = 2
        g0_oracle = np.zeros(data.n)
        g0_oracle[data.x[:,0] < .5] = g_max
        f = self.create_eval(data, self.C)
        g = self.create_gradient(data, self.C)
        bounds = list((0, g_max) for i in range(data.n))
        #bounds = list((i, i) for i in range(data.n))
        #bounds = list((0, 0) for i in range(data.n))
        #bounds[0] = (0,None)
        #self.include_bias = False
        if self.include_bias:
            bounds = [(None, None)] + bounds
            #bounds = [(10, 10)] + bounds
        else:
            bounds = [(0, 0)] + bounds
        n = data.n + 1

        g0 = np.zeros(n)
        g0[1:] = g0_oracle
        #g0[:] = 1
        x = data.x
        y_s = np.squeeze(data.y_s[:,0])
        y_t = np.squeeze(data.y_t[:,0])
        y = data.y
        W = -array_functions.make_laplacian_kNN(data.x,self.k,self.configs.metric)
        #W = array_functions.make_graph_radius(data.x, self.radius, self.configs.metric)
        #W = array_functions.make_graph_adjacent(data.x, self.configs.metric)
        W = array_functions.try_toarray(W)
        if not data.is_regression:
            y = array_functions.make_label_matrix(data.y)[:,data.classes].toarray()
            y = y[:,0]
        reg = self.create_reg(data.x)
        reg2 = self.create_reg2(data.x)
        if self.configs.use_fused_lasso:
            method = 'SLSQP'
            max_iter = 10000
            maxfun = 10000
            fused_lasso = ScipyOptNonparametricHypothesisTransfer.fused_lasso
            lasso = lambda x : self.C - fused_lasso(x,W)
            constraints = [{
                'type': 'ineq',
                'fun': lasso
            }]
            if self.configs.no_reg:
                constraints = ()
            args = (x,y,y_s,y_t,0,reg,self.C2,reg2)
        else:
            method = 'L-BFGS-B'
            max_iter = np.inf
            maxfun = np.inf
            constraints = ()
            args = (x,y,y_s,y_t,self.C,reg,self.C2,reg2)

        if self.g_supervised:
            x = np.squeeze(data.x)
            assert x.ndim == 1
            min_i = x.argmin()
            max_i = x.argmax()
            bounds[min_i] = (1,None)
            bounds[max_i] = (0,0)

        options = {
            'disp': False,
            'maxiter':max_iter,
            'maxfun': maxfun,
            #'pgtol': 1e-8
        }
        results = optimize.minimize(
            f,
            g0,
            method=method,
            bounds=bounds,
            jac=g,
            options=options,
            constraints=constraints,
            args=args,
        )
        compare_results = False
        if compare_results or not results.success:
            options['disp'] = False
            options['approx_grad'] = True
            results2 = optimize.minimize(
                f,
                g0,
                method=method,
                bounds=bounds,
                options=options,
                constraints=constraints,
                args=args
            )
        if compare_results:
            err = results.x - results2.x
            if norm(results2.x[1:]) == 0:
                print 'All zeros - using absolute error'
                print 'Abs Error - g: ' + str(norm(err[1:]))
            else:
                print 'Rel Error - g: ' + str(norm(err[1:])/norm(results2.x[1:]))
            if not self.include_bias:
                if norm(results2.x[0]) == 0:
                    print 'Abs Error - b: ' + str(norm(err[0]))
                else:
                    print 'Rel Error - b: ' + str(norm(err[0])/norm(results2.x[0]))
            rel_error = norm(results.fun-results2.fun)/norm(results2.fun)
            print 'Rel Error - f(g*): ' + str(rel_error)
            if rel_error > .001 and norm(results2.x) > 0:
                print 'Big error: C=' + str(self.C) + ' C2=' + str(self.C2)
        if not results.success:
            results = results2
        self.g = results.x[1:]
        self.bias = results.x[0]
        if not results.success:
            self.g[:] = 0
            self.bias = 0
            #print 'Failed: ' + results.message
        '''
        I = data.arg_sort()
        x = (data.x[I,:])
        g = array_functions.vec_to_2d(results.x[I])
        v = np.hstack((x,g))
        print v
        print ''
        '''
        s = 'C=' + str(self.C) + ',C2=' + str(self.C2) + ',k=' + str(self.k) + '-'
        if not results.success:
            s += 'Opt failed - '
        has_nonneg = (self.g[1:] < -1e-6).any()
        if has_nonneg:
            s += 'Negative g - min value: ' + str(self.g.min())
        if not results.success or has_nonneg:
            print s + ': ' + results.message
            self.g[:] = 0
        else:
            pass
        g_data = data_lib.Data()
        g_data.x = data.x
        g_data.y = results.x[1:]
        g_data.is_regression = True
        g_data.set_train()
        g_data.set_target()
        g_data.set_true_y()
        self.g_nw.train_and_test(g_data)
        if results.success:
            pass