示例#1
0
文件: bigsvm.py 项目: vd114/galatea
    def fit(self, X_train, y_train, X_valid, y_valid):
        batchsize = self.batchsize

        n_train = X_train.shape[0]

        # decrease to a multiple of batchsize
        while n_train % batchsize:
            n_train -= 1

        assert not self.center_and_normalize

        train_features = X_train
        valid_features = X_valid
        train_labels = y_train
        valid_labels = y_valid

        x_i = tensor.matrix(dtype=X_train.dtype)
        y_i = tensor.vector(dtype=y_train.dtype)
        lr = tensor.scalar(dtype=X_train.dtype)

        feature_logreg = LogisticRegression.new(x_i,
                n_in = train_features.shape[1], n_out=self.n_classes,
                dtype=x_i.dtype)

        if self.loss_fn=='log':
            traincost = feature_logreg.nll(y_i).sum()
        elif self.loss_fn=='hinge':
            raw_output = tensor.dot(feature_logreg.input, feature_logreg.w)+feature_logreg.b
            traincost = multi_hinge_margin(raw_output, y_i).sum()
        else:
            raise NotImplementedError(self.loss_fn)
        traincost = traincost + abs(feature_logreg.w).sum() * self.l1_regularization
        traincost = traincost + (feature_logreg.w**2).sum() * self.l2_regularization
        train_logreg_fn = theano.function([x_i, y_i, lr],
                [feature_logreg.nll(y_i).mean(),
                    feature_logreg.errors(y_i).mean()],
                updates=pylearn.gd.sgd.sgd_updates(
                    params=feature_logreg.params,
                    grads=tensor.grad(traincost, feature_logreg.params),
                    stepsizes=[lr/batchsize,lr/(10*batchsize)]))

        test_logreg_fn = theano.function([x_i, y_i],
                feature_logreg.errors(y_i))

        if self.center_and_normalize:
            feature_logreg_test = LogisticRegression(
                    (x_i - self.X_mean_)/self.X_std_,
                    feature_logreg.w,
                    feature_logreg.b)
            self.predict_fn_ = theano.function([x_i], feature_logreg_test.argmax)
        else:
            self.predict_fn_ = theano.function([x_i], feature_logreg.argmax)

        best_epoch = -1
        best_epoch_valid = -1
        best_epoch_train = -1
        best_epoch_test = -1
        valid_rate=-1
        test_rate=-1
        train_rate=-1

        for epoch in xrange(self.n_epochs):
            # validate
            # Marc'Aurelio, you crazy!!
            # the division by batchsize is done in the cost function
            e_lr = np.float32(self.learnrate / max(1.0, np.floor(max(1.,
                (epoch+1)/float(self.anneal_epoch))-2)))

            if True:
                n_valid = X_valid.shape[0]
                l01s = []
                for i in xrange(n_valid/batchsize):
                    x_i = valid_features[i*batchsize:(i+1)*batchsize]
                    y_i = valid_labels[i*batchsize:(i+1)*batchsize]

                    #lr=0.0 -> no learning, safe for validation set
                    l01 = test_logreg_fn((x_i), y_i)
                    l01s.append(l01)
                valid_rate = 1-np.mean(l01s)
                #print('Epoch %i validation accuracy: %f'%(epoch, valid_rate))

                if valid_rate > best_epoch_valid:
                    best_epoch = epoch
                    best_epoch_test = test_rate
                    best_epoch_valid = valid_rate
                    best_epoch_train = train_rate

                print('Epoch=%i best epoch %i valid %f test %f best train %f current train %f'%(
                    epoch, best_epoch, best_epoch_valid, best_epoch_test, best_epoch_train, train_rate))
                if epoch > self.anneal_epoch and epoch > 2*best_epoch:
                    break
            else:
                print('Epoch=%i current train %f'%( epoch, train_rate))

            #train
            l01s = []
            nlls = []
            for i in xrange(n_train/batchsize):
                x_i = train_features[i*batchsize:(i+1)*batchsize]
                y_i = train_labels[i*batchsize:(i+1)*batchsize]
                nll, l01 = train_logreg_fn((x_i), y_i, e_lr)
                nlls.append(nll)
                l01s.append(l01)
            train_rate = 1-np.mean(l01s)
示例#2
0
    def fit(self, X, y):
        batchsize = self.batchsize

        n_valid = int(min(self.validset_max_examples, self.validset_fraction * X.shape[0]))
        # increase to a multiple of batchsize
        while n_valid % batchsize:
            n_valid += 1

        n_train = X.shape[0] - n_valid

        # decrease to a multiple of batchsize
        while n_train % batchsize:
            n_train -= 1

        if self.center_and_normalize and self.copy_X:
            X = X.copy()

        train_features = X[:n_train]
        valid_features = X[n_train:]
        train_labels = y[:n_train]
        valid_labels = y[n_train:]

        if self.center_and_normalize:
            print("Computing mean and std.dev")

            #this loop seems more memory efficient than numpy
            m= np.zeros(train_features.shape[1])
            msq= np.zeros(train_features.shape[1])
            for i in xrange(train_features.shape[0]):
                alpha = 1.0 / (i+1)
                v = train_features[i]
                m = alpha * v + (1-alpha)*m
                msq = alpha * v*v + (1-alpha)*msq

            self.X_mean_ = theano.shared(m.astype(X.dtype))
            self.X_std_ = theano.shared(
                    np.maximum(
                        self.min_feature_std,
                        np.sqrt(msq - m*m)).astype(X.dtype))

            X -= self.X_mean_.get_value()
            X /= self.X_std_.get_value()

        x_i = tensor.matrix(dtype=X.dtype)
        y_i = tensor.vector(dtype=y.dtype)
        lr = tensor.scalar(dtype=X.dtype)

        feature_logreg = LogisticRegression.new(x_i,
                n_in = train_features.shape[1], n_out=self.n_classes,
                dtype=x_i.dtype)

        if self.loss_fn=='log':
            traincost = feature_logreg.nll(y_i).sum()
        elif self.loss_fn=='hinge':
            raw_output = tensor.dot(feature_logreg.input, feature_logreg.w)+feature_logreg.b
            traincost = multi_hinge_margin(raw_output, y_i).sum()
        else:
            raise NotImplementedError(self.loss_fn)
        traincost = traincost + abs(feature_logreg.w).sum() * self.l1_regularization
        traincost = traincost + (feature_logreg.w**2).sum() * self.l2_regularization
        train_logreg_fn = theano.function([x_i, y_i, lr],
                [feature_logreg.nll(y_i).mean(),
                    feature_logreg.errors(y_i).mean()],
                updates=pylearn.gd.sgd.sgd_updates(
                    params=feature_logreg.params,
                    grads=tensor.grad(traincost, feature_logreg.params),
                    stepsizes=[lr/batchsize,lr/(10*batchsize)]))

        test_logreg_fn = theano.function([x_i, y_i],
                feature_logreg.errors(y_i))

        if self.center_and_normalize:
            feature_logreg_test = LogisticRegression(
                    (x_i - self.X_mean_)/self.X_std_,
                    feature_logreg.w,
                    feature_logreg.b)
            self.predict_fn_ = theano.function([x_i], feature_logreg_test.argmax)
        else:
            self.predict_fn_ = theano.function([x_i], feature_logreg.argmax)

        best_epoch = -1
        best_epoch_valid = -1
        best_epoch_train = -1
        best_epoch_test = -1
        valid_rate=-1
        test_rate=-1
        train_rate=-1

        for epoch in xrange(self.n_epochs):
            # validate
            # Marc'Aurelio, you crazy!!
            # the division by batchsize is done in the cost function
            e_lr = np.float32(self.learnrate / max(1.0, np.floor(max(1.,
                (epoch+1)/float(self.anneal_epoch))-2)))

            if n_valid:
                l01s = []
                for i in xrange(n_valid/batchsize):
                    x_i = valid_features[i*batchsize:(i+1)*batchsize]
                    y_i = valid_labels[i*batchsize:(i+1)*batchsize]

                    #lr=0.0 -> no learning, safe for validation set
                    l01 = test_logreg_fn((x_i), y_i)
                    l01s.append(l01)
                valid_rate = 1-np.mean(l01s)
                #print('Epoch %i validation accuracy: %f'%(epoch, valid_rate))

                if valid_rate > best_epoch_valid:
                    best_epoch = epoch
                    best_epoch_test = test_rate
                    best_epoch_valid = valid_rate
                    best_epoch_train = train_rate

                print('Epoch=%i best epoch %i valid %f test %f best train %f current train %f'%(
                    epoch, best_epoch, best_epoch_valid, best_epoch_test, best_epoch_train, train_rate))
                if epoch > self.anneal_epoch and epoch > 2*best_epoch:
                    break
            else:
                print('Epoch=%i current train %f'%( epoch, train_rate))

            #train
            l01s = []
            nlls = []
            for i in xrange(n_train/batchsize):
                x_i = train_features[i*batchsize:(i+1)*batchsize]
                y_i = train_labels[i*batchsize:(i+1)*batchsize]
                nll, l01 = train_logreg_fn((x_i), y_i, e_lr)
                nlls.append(nll)
                l01s.append(l01)
            train_rate = 1-np.mean(l01s)
示例#3
0
    def fit(self, X, y):
        batchsize = self.batchsize

        n_valid = int(min(self.validset_max_examples, self.validset_fraction * X.shape[0]))
        # increase to a multiple of batchsize
        while n_valid % batchsize:
            n_valid += 1

        n_train = X.shape[0] - n_valid

        # decrease to a multiple of batchsize
        while n_train % batchsize:
            n_train -= 1

        if self.center_and_normalize and self.copy_X:
            X = X.copy()

        train_features = X[:n_train]
        valid_features = X[n_train:]
        train_labels = y[:n_train]
        valid_labels = y[n_train:]

        if self.center_and_normalize:
            print("Computing mean and std.dev")

            #this loop seems more memory efficient than numpy
            m= np.zeros(train_features.shape[1])
            msq= np.zeros(train_features.shape[1])
            for i in xrange(train_features.shape[0]):
                alpha = 1.0 / (i+1)
                v = train_features[i]
                m = alpha * v + (1-alpha)*m
                msq = alpha * v*v + (1-alpha)*msq

            self.X_mean_ = theano.shared(m.astype(X.dtype))
            self.X_std_ = theano.shared(
                    np.maximum(
                        self.min_feature_std,
                        np.sqrt(msq - m*m)).astype(X.dtype))

            X -= self.X_mean_.get_value()
            X /= self.X_std_.get_value()

        x_i = tensor.matrix(dtype=X.dtype)
        y_i = tensor.vector(dtype=y.dtype)
        lr = tensor.scalar(dtype=X.dtype)

        feature_logreg = LogisticRegression.new(x_i,
                n_in = train_features.shape[1], n_out=self.n_classes,
                dtype=x_i.dtype)

        if self.loss_fn=='log':
            traincost = feature_logreg.nll(y_i).sum()
        elif self.loss_fn=='hinge':
            raw_output = tensor.dot(feature_logreg.input, feature_logreg.w)+feature_logreg.b
            traincost = multi_hinge_margin(raw_output, y_i).sum()
        else:
            raise NotImplementedError(self.loss_fn)
        traincost = traincost + abs(feature_logreg.w).sum() * self.l1_regularization
        traincost = traincost + (feature_logreg.w**2).sum() * self.l2_regularization
        train_logreg_fn = theano.function([x_i, y_i, lr],
                [feature_logreg.nll(y_i).mean(),
                    feature_logreg.errors(y_i).mean()],
                updates=pylearn.gd.sgd.sgd_updates(
                    params=feature_logreg.params,
                    grads=tensor.grad(traincost, feature_logreg.params),
                    stepsizes=[lr/batchsize,lr/(10*batchsize)]))

        test_logreg_fn = theano.function([x_i, y_i],
                feature_logreg.errors(y_i))

        if self.center_and_normalize:
            feature_logreg_test = LogisticRegression(
                    (x_i - self.X_mean_)/self.X_std_,
                    feature_logreg.w,
                    feature_logreg.b)
            self.predict_fn_ = theano.function([x_i], feature_logreg_test.argmax)
        else:
            self.predict_fn_ = theano.function([x_i], feature_logreg.argmax)

        best_epoch = -1
        best_epoch_valid = -1
        best_epoch_train = -1
        best_epoch_test = -1
        valid_rate=-1
        test_rate=-1
        train_rate=-1

        for epoch in xrange(self.n_epochs):
            # validate
            # Marc'Aurelio, you crazy!!
            # the division by batchsize is done in the cost function
            e_lr = np.float32(self.learnrate / max(1.0, np.floor(max(1.,
                (epoch+1)/float(self.anneal_epoch))-2)))

            if n_valid:
                l01s = []
                for i in xrange(n_valid/batchsize):
                    x_i = valid_features[i*batchsize:(i+1)*batchsize]
                    y_i = valid_labels[i*batchsize:(i+1)*batchsize]

                    #lr=0.0 -> no learning, safe for validation set
                    l01 = test_logreg_fn((x_i), y_i)
                    l01s.append(l01)
                valid_rate = 1-np.mean(l01s)
                #print('Epoch %i validation accuracy: %f'%(epoch, valid_rate))

                if valid_rate > best_epoch_valid:
                    best_epoch = epoch
                    best_epoch_test = test_rate
                    best_epoch_valid = valid_rate
                    best_epoch_train = train_rate

                print('Epoch=%i best epoch %i valid %f test %f best train %f current train %f'%(
                    epoch, best_epoch, best_epoch_valid, best_epoch_test, best_epoch_train, train_rate))
                if epoch > self.anneal_epoch and epoch > 2*best_epoch:
                    break
            else:
                print('Epoch=%i current train %f'%( epoch, train_rate))

            #train
            l01s = []
            nlls = []
            for i in xrange(n_train/batchsize):
                x_i = train_features[i*batchsize:(i+1)*batchsize]
                y_i = train_labels[i*batchsize:(i+1)*batchsize]
                nll, l01 = train_logreg_fn((x_i), y_i, e_lr)
                nlls.append(nll)
                l01s.append(l01)
            train_rate = 1-np.mean(l01s)
示例#4
0
    def fit(self, X, y):
        batchsize = self.batchsize

        n_valid = int(
            min(self.validset_max_examples,
                self.validset_fraction * X.shape[0]))
        # increase to a multiple of batchsize
        while n_valid % batchsize:
            n_valid += 1

        n_train = X.shape[0] - n_valid

        # decrease to a multiple of batchsize
        while n_train % batchsize:
            n_train -= 1

        if self.center_and_normalize and self.copy_X:
            X = X.copy()

        train_features = X[:n_train]
        valid_features = X[n_train:]
        train_labels = y[:n_train]
        valid_labels = y[n_train:]

        if self.center_and_normalize:
            print("Computing mean and std.dev")

            #this loop seems more memory efficient than numpy
            m = np.zeros(train_features.shape[1])
            msq = np.zeros(train_features.shape[1])
            for i in xrange(train_features.shape[0]):
                alpha = 1.0 / (i + 1)
                v = train_features[i]
                m = alpha * v + (1 - alpha) * m
                msq = alpha * v * v + (1 - alpha) * msq

            self.X_mean_ = theano.shared(m.astype(X.dtype))
            self.X_std_ = theano.shared(
                np.maximum(self.min_feature_std,
                           np.sqrt(msq - m * m)).astype(X.dtype))

            X -= self.X_mean_.get_value()
            X /= self.X_std_.get_value()

        x_i = tensor.matrix(dtype=X.dtype)

        if self.prob_max_pool:
            theano_rng = RandomStreams(85)
            I = sharedX(np.ones((1, train_features.shape[1], self.n_pools)))
            assert batchsize == 1
            M = theano_rng.binomial(size=I.shape, n=1, p=I, dtype=I.dtype)
            prod = x_i.dimshuffle((0, 1, 'x')) * M
            z_i = prod.max(axis=1)
            assert z_i.ndim == 2
            logreg_n_in = self.n_pools
        else:
            z_i = x_i
            logreg_n_in = train_features.shape[1]

        y_i = tensor.vector(dtype=y.dtype)
        lr = tensor.scalar(dtype=X.dtype)

        feature_logreg = LogisticRegression.new(z_i,
                                                n_in=logreg_n_in,
                                                n_out=self.n_classes,
                                                dtype=z_i.dtype)

        if self.loss_fn == 'log':
            traincost = feature_logreg.nll(y_i).sum()
        elif self.loss_fn == 'hinge':
            raw_output = tensor.dot(feature_logreg.input,
                                    feature_logreg.w) + feature_logreg.b
            traincost = multi_hinge_margin(raw_output, y_i).sum()
        else:
            raise NotImplementedError(self.loss_fn)
        traincost = traincost + abs(
            feature_logreg.w).sum() * self.l1_regularization
        traincost = traincost + (feature_logreg.w**
                                 2).sum() * self.l2_regularization

        params = [elem for elem in feature_logreg.params]
        grads = tensor.grad(traincost, params)

        updates = updates = pylearn.gd.sgd.sgd_updates(
            params=feature_logreg.params,
            grads=tensor.grad(traincost, feature_logreg.params),
            stepsizes=[lr / batchsize, lr / (10 * batchsize)])

        if self.prob_max_pool:
            zero = np.cast[config.floatX](0.0)
            one = np.cast[config.floatX](1.0)
            two = np.cast[config.floatX](2.0)
            #approximation based on one sample
            assert two.dtype == I.dtype
            assert M.dtype == I.dtype
            assert one.dtype == I.dtype
            approx_grad = traincost * (two * M - one) / (I * M + (one - I) *
                                                         (one - M))
            #traincost doesn't respect dtype
            approx_grad = tensor.cast(approx_grad, config.floatX)
            step_size = tensor.cast(
                lr, config.floatX) / np.cast[config.floatX](batchsize)
            assert step_size.dtype == I.dtype
            assert approx_grad.dtype == I.dtype
            update = tensor.clip(I - step_size * approx_grad, zero, one)
            updates.append((I, update))

        train_logreg_fn = theano.function([x_i, y_i, lr], [
            feature_logreg.nll(y_i).mean(),
            feature_logreg.errors(y_i).mean()
        ],
                                          updates=updates)

        test_logreg_fn = theano.function([x_i, y_i],
                                         feature_logreg.errors(y_i))

        if self.center_and_normalize:
            feature_logreg_test = LogisticRegression(
                (x_i - self.X_mean_) / self.X_std_, feature_logreg.w,
                feature_logreg.b)
            self.predict_fn_ = theano.function([x_i],
                                               feature_logreg_test.argmax)
        else:
            self.predict_fn_ = theano.function([x_i], feature_logreg.argmax)

        best_epoch = -1
        best_epoch_valid = -1
        best_epoch_train = -1
        best_epoch_test = -1
        valid_rate = -1
        test_rate = -1
        train_rate = -1

        for epoch in xrange(self.n_epochs):
            # validate
            # Marc'Aurelio, you crazy!!
            # the division by batchsize is done in the cost function
            e_lr = np.float32(self.learnrate / max(
                1.0,
                np.floor(max(1., (epoch + 1) / float(self.anneal_epoch)) - 2)))

            if n_valid:
                l01s = []
                for i in xrange(n_valid / batchsize):
                    x_i = valid_features[i * batchsize:(i + 1) * batchsize]
                    y_i = valid_labels[i * batchsize:(i + 1) * batchsize]

                    #lr=0.0 -> no learning, safe for validation set
                    l01 = test_logreg_fn((x_i), y_i)
                    l01s.append(l01)
                valid_rate = 1 - np.mean(l01s)
                #print('Epoch %i validation accuracy: %f'%(epoch, valid_rate))

                if valid_rate > best_epoch_valid:
                    best_epoch = epoch
                    best_epoch_test = test_rate
                    best_epoch_valid = valid_rate
                    best_epoch_train = train_rate

                print(
                    'Epoch=%i best epoch %i valid %f test %f best train %f current train %f'
                    % (epoch, best_epoch, best_epoch_valid, best_epoch_test,
                       best_epoch_train, train_rate))
                if epoch > self.anneal_epoch and epoch > 2 * best_epoch:
                    break
            else:
                print('Epoch=%i current train %f' % (epoch, train_rate))

            #train
            l01s = []
            nlls = []
            for i in xrange(n_train / batchsize):
                x_i = train_features[i * batchsize:(i + 1) * batchsize]
                y_i = train_labels[i * batchsize:(i + 1) * batchsize]
                nll, l01 = train_logreg_fn((x_i), y_i, e_lr)
                nlls.append(nll)
                l01s.append(l01)
            train_rate = 1 - np.mean(l01s)