Exemplo n.º 1
0
    def fit(self,
            train,
            test=None,
            validate_every=100,
            show_norms=False,
            show_output=False,
            error_logging=True):
        """ Fit model

        Pass in test to compute test error and report during
        training.

        train : ndarray (T x n_in)

        validation_frequency : int
            in terms of number of epochs

        """
        if test is not None:
            self.interactive = True
            test_set = self.shared_dataset(test)
        else:
            self.interactive = False

        train_set = self.shared_dataset(train)

        # compute number of minibatches for training
        # note that cases are the second dimension, not the first
        n_train = train_set.get_value(borrow=True).shape[0]
        n_train_batches = int(np.ceil(1.0 * n_train / self.batch_size))
        if self.interactive:
            n_test = test_set.get_value(borrow=True).shape[0]
            n_test_batches = int(np.ceil(1.0 * n_test / self.batch_size))

        #validate_every is specified in terms of epochs
        validation_frequency = validate_every * n_train_batches

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        logger.info('... building the model')

        index = T.lscalar('index')  # index to a [mini]batch
        n_ex = T.lscalar('n_ex')  # total number of examples
        # learning rate (may change)
        l_r = T.scalar('l_r', dtype=theano.config.floatX)
        mom = T.scalar('mom', dtype=theano.config.floatX)  # momentum

        # Proper implementation of variable-batch size evaluation
        # Note that classifier.errors() returns the mean error
        # But the last batch may be a smaller size
        # So we keep around the effective_batch_size (whose last element may
        # be smaller than the rest)
        # And weight the reported error by the batch_size when we average
        # Also, by keeping batch_start and batch_stop as symbolic variables,
        # we make the theano function easier to read
        batch_start = index * self.batch_size
        batch_stop = T.minimum(n_ex, (index + 1) * self.batch_size)
        effective_batch_size = batch_stop - batch_start

        get_batch_size = theano.function(inputs=[index, n_ex],
                                         outputs=effective_batch_size)

        compute_train_error = []
        compute_each_train_error = []
        compute_test_error = []
        train_model = []
        for da in self.estimator.dA_layers:
            loss = da.loss
            updates = da.get_updates(l_r, mom)

            f_ctrain = theano.function(
                inputs=[index, n_ex],
                outputs=loss,
                givens={self.x: train_set[batch_start:batch_stop]},
                mode=mode)
            compute_train_error.append(f_ctrain)

            f_cetrain = theano.function(
                inputs=[index, n_ex],
                outputs=da.each_loss,
                givens={self.x: train_set[batch_start:batch_stop]},
                mode=mode)
            compute_train_error.append(f_ctrain)
            compute_each_train_error.append(f_cetrain)

            if self.interactive:
                f_ctest = theano.function(
                    inputs=[index, n_ex],
                    outputs=loss,
                    givens={self.x: test_set[batch_start:batch_stop]},
                    mode=mode)
                compute_test_error.append(f_ctest)

            # compiling a Theano function `train_model` that returns the
            # cost, but in the same time updates the parameter of the
            # model based on the rules defined in `updates`
            f_train = theano.function(
                inputs=[index, n_ex, l_r, mom],
                outputs=loss,
                updates=updates,
                givens={self.x: train_set[batch_start:batch_stop]},
                mode=mode)

            train_model.append(f_train)

        ###############
        # TRAIN MODEL #
        ###############
        keyMonitoringThread = Threads.KeyMonitoringThread()
        keyMonitoringThread.start()
        initial_learning_rate = self.learning_rate
        t0 = time.time()
        for n in xrange(self.estimator.n_layers):
            logger.info('... training dA layer[%d]' % n)
            epoch = 0
            this_train_loss = np.inf
            stopFlg = False
            t0_l = time.time()

            self.learning_rate = initial_learning_rate
            self.errorlog.append([])

            while (epoch < self.n_epochs) and (
                    this_train_loss > self.t_error) and (stopFlg is False):
                epoch = epoch + 1
                effective_momentum = self.final_momentum \
                                     if epoch > self.momentum_switchover \
                                     else self.initial_momentum

                for minibatch_idx in xrange(n_train_batches):
                    minibatch_avg_cost = train_model[n](minibatch_idx, n_train,
                                                        self.learning_rate,
                                                        effective_momentum)

                    # iteration number (how many weight updates have we made?)
                    # epoch is 1-based, index is 0 based
                    iter = (epoch - 1) * n_train_batches + minibatch_idx + 1

                    if iter % validation_frequency == 0:
                        # compute loss on training set
                        train_losses = [
                            compute_train_error[n](i, n_train)
                            for i in xrange(n_train_batches)
                        ]
                        train_batch_sizes = [
                            get_batch_size(i, n_train)
                            for i in xrange(n_train_batches)
                        ]

                        this_train_loss = np.average(train_losses,
                                                     weights=train_batch_sizes)

                        # compute each output unit loss on training set
                        if error_logging is True:
                            train_each_losses = np.array([
                                compute_each_train_error[n](i, n_train)
                                for i in xrange(n_train_batches)
                            ])
                            train_batch_sizes_for_each = []
                            for i in xrange(train_each_losses.shape[1]):
                                train_batch_sizes_for_each.append(
                                    train_batch_sizes)

                            this_train_each_loss = np.average(
                                train_each_losses.T,
                                weights=train_batch_sizes_for_each,
                                axis=1)
                            el = np.r_[np.array([epoch]), this_train_each_loss]
                            self.errorlog[n] = np.vstack((self.errorlog[n], el)) \
                                                       if self.errorlog[n] != [] \
                                                       else np.array([el])
                            # エラーの推移をpngに保存
                            self.save_errorlog_png()


#                            self.save_errorlog_png(fname='dALayer%d' % n)

                        if self.interactive:
                            test_losses = [
                                compute_test_error[n](i, n_test)
                                for i in xrange(n_test_batches)
                            ]

                            test_batch_sizes = [
                                get_batch_size(i, n_test)
                                for i in xrange(n_test_batches)
                            ]

                            this_test_loss = np.average(
                                test_losses, weights=test_batch_sizes)

                            logger.info('*** dA layer[%d] *** epoch %i, mb %i/%i, tr loss %f '
                                    'te loss %f lr: %f mom: %f'% \
                            (n, epoch, minibatch_idx + 1, n_train_batches,
                             this_train_loss, this_test_loss,
                             self.learning_rate, effective_momentum))

                        else:
                            logger.info(
                                '*** dA layer[%d] *** epoch %i, mb %i/%i, train loss %f'
                                ' lr: %f mom: %f' %
                                (n, epoch, minibatch_idx + 1, n_train_batches,
                                 this_train_loss, self.learning_rate,
                                 effective_momentum))

                        self.optional_output(train_set, show_norms,
                                             show_output)

                    self.learning_rate *= self.learning_rate_decay

                # 学習途中のパラメータをスナップショットとして保存
                if self.snapshot_every is not None:
                    if (epoch + 1) % self.snapshot_every == 0:
                        date_obj = datetime.datetime.now()
                        date_str = date_obj.strftime('%Y-%m-%d-%H:%M:%S')
                        class_name = self.__class__.__name__
                        fname = '%s.%s-snapshot-%d' % (class_name, date_str,
                                                       epoch + 1)
                        self.save(fpath=self.snapshot_path, fname=fname)

                # 学習中のコマンド入力を別スレッドで受け取る
                var = keyMonitoringThread.GetInput()
                # 'q' を受け取った場合、学習を途中で切り上げる
                if var == 'q':
                    stopFlg = True

            h, m = divmod(time.time() - t0_l, 3600)
            m, s = divmod(m, 60)
            print "*** dA layer[%d] *** Elapsed time: %d hour %d min %f sec" % (
                n, int(h), int(m), s)

        h, m = divmod(time.time() - t0, 3600)
        m, s = divmod(m, 60)
        print "Elapsed time: %d hour %d min %f sec" % (int(h), int(m), s)

        # コマンド入力スレッドの停止
        # ('q'入力による学習の途中終了ではなく、終了条件を満たして
        # 学習が正常に終了した場合、スレッドを明示的に終了する必要がある)
        keyMonitoringThread.Stop()
        print 'Press any key...'
Exemplo n.º 2
0
    def fit(self, X_train, Y_train, X_test=None, Y_test=None,
            validate_every=100, optimizer='sgd', compute_zero_one=False,
            show_norms=True, show_output=True, error_logging=True):
        """ Fit model

        Pass in X_test, Y_test to compute test error and report during
        training.

        X_train : ndarray (T x n_in)
        Y_train : ndarray (T x n_out)

        validation_frequency : int
            in terms of number of epochs

        optimizer : string
            Optimizer type.
            Possible values:
                'sgd'  : batch stochastic gradient descent
                'cg'   : nonlinear conjugate gradient algorithm
                         (scipy.optimize.fmin_cg)
                'bfgs' : quasi-Newton method of Broyden, Fletcher, Goldfarb,
                         and Shanno (scipy.optimize.fmin_bfgs)
                'l_bfgs_b' : Limited-memory BFGS (scipy.optimize.fmin_l_bfgs_b)

        compute_zero_one : bool
            in the case of binary output, compute zero-one error in addition to
            cross-entropy error
        show_norms : bool
            Show L2 norms of individual parameter groups while training.
        show_output : bool
            Show the model output on first training case while training.
        """
        if X_test is not None:
            assert(Y_test is not None)
            self.interactive = True
            test_set_x, test_set_y = self.shared_dataset((X_test, Y_test))
        else:
            self.interactive = False

        train_set_x, train_set_y = self.shared_dataset((X_train, Y_train))

        # compute number of minibatches for training
        # note that cases are the second dimension, not the first
        n_train = train_set_x.get_value(borrow=True).shape[1]
        n_train_batches = int(np.ceil(1.0 * n_train / self.batch_size))
        if self.interactive:
            n_test = test_set_x.get_value(borrow=True).shape[1]
            n_test_batches = int(np.ceil(1.0 * n_test / self.batch_size))

        #validate_every is specified in terms of epochs
        validation_frequency = validate_every * n_train_batches

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        logger.info('... building the model')

        index = T.lscalar('index')    # index to a [mini]batch
        n_ex = T.lscalar('n_ex')      # total number of examples
        # learning rate (may change)
        l_r = T.scalar('l_r', dtype=theano.config.floatX)
        mom = T.scalar('mom', dtype=theano.config.floatX)  # momentum

        cost = self.estimator.loss(self.y) \
            + self.L1_reg * self.estimator.L1 \
            + self.L2_reg * self.estimator.L2_sqr

        # Proper implementation of variable-batch size evaluation
        # Note that classifier.errors() returns the mean error
        # But the last batch may be a smaller size
        # So we keep around the effective_batch_size (whose last element may
        # be smaller than the rest)
        # And weight the reported error by the batch_size when we average
        # Also, by keeping batch_start and batch_stop as symbolic variables,
        # we make the theano function easier to read
        batch_start = index * self.batch_size
        batch_stop = T.minimum(n_ex, (index + 1) * self.batch_size)
        effective_batch_size = batch_stop - batch_start

        get_batch_size = theano.function(inputs=[index, n_ex],
                                          outputs=effective_batch_size)

        compute_train_error = theano.function(inputs=[index, n_ex],
            outputs=self.estimator.loss(self.y),
            givens={self.x: train_set_x[:, batch_start:batch_stop],
                    self.y: train_set_y[:, batch_start:batch_stop]},
            mode=mode)

        compute_train_each_error = theano.function(inputs=[index, n_ex],
            outputs=self.estimator.each_loss(self.y),
            givens={self.x: train_set_x[:, batch_start:batch_stop],
                    self.y: train_set_y[:, batch_start:batch_stop]},
            mode=mode)

        if self.interactive:
            compute_test_error = theano.function(inputs=[index, n_ex],
                outputs=self.estimator.loss(self.y),
                givens={self.x: test_set_x[:, batch_start:batch_stop],
                        self.y: test_set_y[:, batch_start:batch_stop]},
                mode=mode)

        self.get_norms = {}
        for param in self.estimator.params:
            self.get_norms[param] = theano.function(inputs=[],
                    outputs=self.estimator.l2_norms[param], mode=mode)

        # compute the gradient of cost with respect to theta using BPTT
        gtheta = T.grad(cost, self.estimator.theta)

        if optimizer == 'sgd':

            updates = {}
            theta = self.estimator.theta
            theta_update = self.estimator.theta_update
            # careful here, update to the shared variable
            # cannot depend on an updated other shared variable
            # since updates happen in parallel
            # so we need to be explicit
            upd = mom * theta_update - l_r * gtheta
            updates[theta_update] = upd
            updates[theta] = theta + upd

            # compiling a Theano function `train_model` that returns the
            # cost, but in the same time updates the parameter of the
            # model based on the rules defined in `updates`
            train_model = theano.function(inputs=[index, n_ex, l_r, mom],
                outputs=cost,
                updates=updates,
                givens={self.x: train_set_x[:, batch_start:batch_stop],
                        self.y: train_set_y[:, batch_start:batch_stop]},
                mode=mode)

            ###############
            # TRAIN MODEL #
            ###############
            logger.info('... training')
            epoch = 0
            this_train_loss = np.inf
            stopFlg = False
            keyMonitoringThread = Threads.KeyMonitoringThread()
            keyMonitoringThread.start()
            t0 = time.time()

            while (epoch < self.n_epochs) and (this_train_loss > self.t_error) and (stopFlg is False):
                epoch = epoch + 1
                effective_momentum = self.final_momentum \
                                     if epoch > self.momentum_switchover \
                                     else self.initial_momentum

                for minibatch_idx in xrange(n_train_batches):
                    minibatch_avg_cost = train_model(minibatch_idx, n_train,
                                                     self.learning_rate,
                                                     effective_momentum)

                    # iteration number (how many weight updates have we made?)
                    # epoch is 1-based, index is 0 based
                    iter = (epoch - 1) * n_train_batches + minibatch_idx + 1

                    if iter % validation_frequency == 0:
                        # compute loss on training set
                        train_losses = [compute_train_error(i, n_train)
                                        for i in xrange(n_train_batches)]
                        train_batch_sizes = [get_batch_size(i, n_train)
                                             for i in xrange(n_train_batches)]

                        this_train_loss = np.average(train_losses,
                                                     weights=train_batch_sizes)

                        # compute each output unit loss on training set
                        if error_logging is True:
                            train_each_losses = np.array([compute_train_each_error(i, n_train)
                                            for i in xrange(n_train_batches)])
                            train_batch_sizes_for_each = []
                            for i in xrange(self.n_out):
                                train_batch_sizes_for_each.append(train_batch_sizes)

                            this_train_each_loss = np.average(train_each_losses.T,
                                                         weights=train_batch_sizes_for_each, axis=1)
                            el = np.r_[np.array([epoch]), this_train_each_loss]
                            self.errorlog = np.vstack((self.errorlog, el)) \
                                                       if self.errorlog != [] \
                                                       else np.array([el])
                            # エラーの推移をpngに保存
                            self.save_errorlog_png()

                        if self.interactive:
                            test_losses = [compute_test_error(i, n_test)
                                            for i in xrange(n_test_batches)]

                            test_batch_sizes = [get_batch_size(i, n_test)
                                            for i in xrange(n_test_batches)]

                            this_test_loss = np.average(test_losses,
                                                    weights=test_batch_sizes)

                            logger.info('epoch %i, mb %i/%i, tr loss %f '
                                    'te loss %f lr: %f mom: %f'% \
                            (epoch, minibatch_idx + 1, n_train_batches,
                             this_train_loss, this_test_loss,
                             self.learning_rate, effective_momentum))

                        else:
                            logger.info('epoch %i, mb %i/%i, train loss %f'
                                    ' lr: %f mom: %f' % (epoch,
                                                     minibatch_idx + 1,
                                                     n_train_batches,
                                                     this_train_loss,
                                                     self.learning_rate,
                                                     effective_momentum))

                        self.optional_output(train_set_x, show_norms,
                                             show_output)

                self.learning_rate *= self.learning_rate_decay

                # 学習途中のパラメータをスナップショットとして保存
                if self.snapshot_every is not None:
                    if (epoch + 1) % self.snapshot_every == 0:
                        date_obj = datetime.datetime.now()
                        date_str = date_obj.strftime('%Y-%m-%d-%H:%M:%S')
                        class_name = self.__class__.__name__
                        fname = '%s.%s-snapshot-%d' % (class_name,
                                                           date_str, epoch + 1)
                        self.save(fpath=self.snapshot_path, fname=fname)
                
                # 学習中のコマンド入力を別スレッドで受け取る
                var = keyMonitoringThread.GetInput()
                # 'q' を受け取った場合、学習を途中で切り上げる
                if var == 'q':
                    stopFlg = True
                    keyMonitoringThread.Stop()

            h, m = divmod(time.time() - t0, 3600)
            m, s = divmod(m, 60)
            print "Elapsed time: %d hour %d min %f sec" % (int(h), int(m), s)

            # コマンド入力スレッドの停止
            # ('q'入力による学習の途中終了ではなく、終了条件を満たして
            # 学習が正常に終了した場合、スレッドを明示的に終了する必要がある)
            keyMonitoringThread.Stop()
            print 'Press any key...'

        elif optimizer == 'cg' or optimizer == 'bfgs' or optimizer == 'l_bfgs_b':
            # compile a theano function that returns the cost of a minibatch
            batch_cost = theano.function(inputs=[index, n_ex],
                outputs=cost,
                givens={self.x: train_set_x[:, batch_start:batch_stop],
                        self.y: train_set_y[:, batch_start:batch_stop]},
                mode=mode, name="batch_cost")

            # compile a theano function that returns the gradient of the
            # minibatch with respect to theta
            batch_grad = theano.function(inputs=[index, n_ex],
                outputs=T.grad(cost, self.estimator.theta),
                givens={self.x: train_set_x[:, batch_start:batch_stop],
                        self.y: train_set_y[:, batch_start:batch_stop]},
                mode=mode, name="batch_grad")

            # creates a function that computes the average cost on the training
            # set
            def train_fn(theta_value):
                theta_value=np.array(theta_value, dtype=theano.config.floatX)
                self.estimator.theta.set_value(theta_value, borrow=True)
                train_losses = [batch_cost(i, n_train)
                                for i in xrange(n_train_batches)]
                train_batch_sizes = [get_batch_size(i, n_train)
                                     for i in xrange(n_train_batches)]
                return np.average(train_losses, weights=train_batch_sizes)

            # creates a function that computes the average gradient of cost
            # with respect to theta
            def train_fn_grad(theta_value):
                theta_value=np.array(theta_value, dtype=theano.config.floatX)
                self.estimator.theta.set_value(theta_value, borrow=True)

                train_grads = [batch_grad(i, n_train)
                                for i in xrange(n_train_batches)]
                train_batch_sizes = [get_batch_size(i, n_train)
                                     for i in xrange(n_train_batches)]

                return np.average(train_grads, weights=train_batch_sizes,
                                  axis=0)

            # validation function, prints useful output after each iteration
            def callback(theta_value):
                self.epoch += 1
                if (self.epoch) % validate_every == 0:
                    theta_value=np.array(theta_value, dtype=theano.config.floatX)
                    self.estimator.theta.set_value(theta_value, borrow=True)
                    # compute loss on training set
                    train_losses = [compute_train_error(i, n_train)
                                    for i in xrange(n_train_batches)]
                    train_batch_sizes = [get_batch_size(i, n_train)
                                         for i in xrange(n_train_batches)]

                    this_train_loss = np.average(train_losses,
                                                    weights=train_batch_sizes)

                    # compute each output unit loss on training set
                    if error_logging is True:
                        train_each_losses = np.array([compute_train_each_error(i, n_train)
                                        for i in xrange(n_train_batches)])
                        train_batch_sizes_for_each = []
                        for i in xrange(self.n_out):
                            train_batch_sizes_for_each.append(train_batch_sizes)

                        this_train_each_loss = np.average(train_each_losses.T,
                                                     weights=train_batch_sizes_for_each, axis=1)
                        el = np.r_[np.array([self.epoch]), this_train_each_loss]
                        self.errorlog = np.vstack((self.errorlog, el)) \
                                                   if self.errorlog is not None \
                                                   else np.array([el])
                        # エラーの推移をpngに保存
                        self.save_errorlog_png(fname=optimizer)

                    if self.interactive:
                        test_losses = [compute_test_error(i, n_test)
                                        for i in xrange(n_test_batches)]

                        test_batch_sizes = [get_batch_size(i, n_test)
                                        for i in xrange(n_test_batches)]

                        this_test_loss = np.average(test_losses,
                                                    weights=test_batch_sizes)

                        logger.info('epoch %i, tr loss %f, te loss %f' % \
                                    (self.epoch, this_train_loss,
                                     this_test_loss, self.learning_rate))

                    else:
                        logger.info('epoch %i, train loss %f ' % \
                                    (self.epoch, this_train_loss))

                    self.optional_output(train_set_x, show_norms, show_output)

            ###############
            # TRAIN MODEL #
            ###############
            logger.info('... training')
            # using scipy conjugate gradient optimizer
            import scipy.optimize
            if optimizer == 'cg':
                of = scipy.optimize.fmin_cg
            elif optimizer == 'bfgs':
                of = scipy.optimize.fmin_bfgs
            elif optimizer == 'l_bfgs_b':
                of = scipy.optimize.fmin_l_bfgs_b
            logger.info("Optimizing using %s..." % of.__name__)
            start_time = time.clock()

            # keep track of epochs externally
            # these get updated through callback
            self.epoch = 0

            # interface to l_bfgs_b is different than that of cg, bfgs
            # however, this will be changed in scipy 0.11
            # unified under scipy.optimize.minimize
            if optimizer == 'cg' or optimizer == 'bfgs':
                best_theta = of(
                    f=train_fn,
                    x0=self.estimator.theta.get_value(),
                    #x0=np.zeros(self.estimator.theta.get_value().shape,
                    #             dtype=theano.config.floatX),
                    fprime=train_fn_grad,
                    callback=callback,
                    disp=1,
                    retall=1,
                    maxiter=self.n_epochs)
            elif optimizer == 'l_bfgs_b':
                best_theta, f_best_theta, info = of(
                    func=train_fn,
                    x0=self.estimator.theta.get_value(),
                    fprime=train_fn_grad,
                    iprint=validate_every,
                    maxfun=self.n_epochs)  # max number of feval

            end_time = time.clock()

            h, m = divmod(end_time - start_time, 3600)
            m, s = divmod(m, 60)
            print "Optimization time: %d hour %d min %f sec" % (int(h), int(m), s)

        else:
            raise NotImplementedError
Exemplo n.º 3
0
    global IS_GET_OBJECT_SIZE
    global CONTROL_TIME

    # 学習済みネットワークを呼び出す
    model = PrepNetwork('NN_D204060_MSPTS_Short_batch100_f')  # 14/10/01 BEST!
    #     model = PrepNetwork('NN_D204060_MSPTS_batch100f_m500_lr9999')   # No Good
    #     model = PrepNetwork('NN_D204060_MSPT_Short_batch100_f')

    # Hostと通信を行うインスタンスを用意
    com = HostCommunication()

    # Hostとの通信を行うスレッドを開始する
    com.start()

    keyMonitoringThread = Threads.KeyMonitoringThread()
    keyMonitoringThread.start()
    rtcFlag = False
    print "####################################"
    print "# Command                          #"
    print "####################################"
    print "#  'i': Set Initial Grasping Pose  #"
    print "#  's': Start Real Time Control    #"
    print "#  'p': Pause Real Time Control    #"
    print "#  'q': Quit Program               #"
    print "####################################"
    while True:
        # Hostとの通信が切れるまでループ
        if com.CompleteFlag == True:
            break