示例#1
0
    def evaluate_set(self, data_x, data_y):

        args = self.args

        # compile prediction function
        eval_acc = theano.function(
             inputs = [self.x],
             outputs = self.pred,
             allow_input_downcast = True
        )

        # create batches by grouping sentences of the same length together
        batches_x, batches_y = create_batches(
                    range(len(data_x)),
                    data_x,
                    data_y,
                    args.batch
            )

        # evaluate on the data set
        dropout_prob = np.float64(args.dropout_rate).astype(theano.config.floatX)
        self.dropout.set_value(0.0)
        accuracy = self.evaluate_batches(batches_x, batches_y, eval_acc)
        self.dropout.set_value(dropout_prob)
        return accuracy
示例#2
0
    def train(self, train, dev, test):
        args = self.args
        trainx, trainy = train
        batch_size = args.batch_size

        if dev:
            dev_batches_x, dev_batches_y = create_batches(
                range(len(dev[0])), dev[0], dev[1], batch_size)

        if test:
            test_batches_x, test_batches_y = create_batches(
                range(len(test[0])), test[0], test[1], batch_size)

        cost = self.nll_loss + self.l2_sqr

        updates, lr, gnorm = create_optimization_updates(
            cost=cost,
            params=self.params,
            lr=args.learning_rate,
            method=args.learning)[:3]

        train_model = theano.function(inputs=[self.x, self.y],
                                      outputs=[cost, gnorm],
                                      updates=updates,
                                      allow_input_downcast=True)

        eval_acc = theano.function(inputs=[self.x],
                                   outputs=self.pred,
                                   allow_input_downcast=True)

        unchanged = 0
        best_dev = 0.0
        dropout_prob = np.float64(args.dropout).astype(theano.config.floatX)
        rnn_dropout_prob = np.float64(args.rnn_dropout).astype(
            theano.config.floatX)

        start_time = time.time()

        perm = range(len(trainx))

        say(
            str([
                "%.2f" % np.linalg.norm(x.get_value(borrow=True))
                for x in self.params
            ]) + "\n")
        for epoch in xrange(args.max_epochs):
            unchanged += 1
            if unchanged > 20: return
            train_loss = 0.0

            random.shuffle(perm)
            batches_x, batches_y = create_batches(perm, trainx, trainy,
                                                  batch_size)

            N = len(batches_x)
            for i in xrange(N):

                if i % 100 == 0:
                    sys.stdout.write("\r%d" % i)
                    sys.stdout.flush()

                x = batches_x[i]
                y = batches_y[i]

                va, grad_norm = train_model(x, y)
                train_loss += va

                # debug
                if math.isnan(va):
                    print ""
                    print i - 1, i
                    print x
                    print y
                    return

                if i == N - 1:
                    self.dropout.set_value(0.0)
                    self.rnn_dropout.set_value(0.0)

                    say("\n")
                    say("Epoch %.1f\tlr=%.6f\tloss=%.4f\t|g|=%s  [%.2fm]\n" %
                        (epoch + (i + 1) / (N + 0.0),
                         float(lr.get_value(borrow=True)), train_loss /
                         (i + 1), float(grad_norm),
                         (time.time() - start_time) / 60.0))
                    say(
                        str([
                            "%.2f" % np.linalg.norm(x.get_value(borrow=True))
                            for x in self.params
                        ]) + "\n")

                    if dev:
                        preds = [eval_acc(x) for x in dev_batches_x]
                        nowf_dev = self.eval_accuracy(preds, dev_batches_y)
                        if nowf_dev > best_dev:
                            unchanged = 0
                            best_dev = nowf_dev
                            if args.save:
                                self.save_model(args.save, args)

                        say("\tdev accuracy=%.4f\tbest=%.4f\n" %
                            (nowf_dev, best_dev))
                        if args.test and nowf_dev == best_dev:
                            preds = [eval_acc(x) for x in test_batches_x]
                            nowf_test = self.eval_accuracy(
                                preds, test_batches_y)
                            say("\ttest accuracy=%.4f\n" % (nowf_test, ))

                        if best_dev > nowf_dev + 0.05:
                            return

                    self.dropout.set_value(dropout_prob)
                    self.rnn_dropout.set_value(rnn_dropout_prob)

                    start_time = time.time()

            if args.lr_decay > 0:
                assert args.lr_decay < 1
                lr.set_value(np.float32(lr.get_value() * args.lr_decay))
示例#3
0
    def train(self, args, train, dev, test=None):
        embedding_layer = self.layers[-2]

        dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX)
        rnn_dropout_prob = np.float64(args["rnn_dropout"]).astype(
            theano.config.floatX)
        batch_size = args["batch_size"]
        unroll_size = args["unroll_size"]

        train = create_batches(train, embedding_layer.map_to_ids, batch_size)

        dev = create_batches(dev, embedding_layer.map_to_ids, 1)

        if test is not None:
            test = create_batches(test, embedding_layer.map_to_ids, 1)

        cost = T.sum(self.nll) / self.idxs.shape[1]
        updates, lr, gnorm = create_optimization_updates(
            cost=cost,
            params=self.params,
            lr=args["learning_rate"],
            eps=args["eps"],
            method=args["learning"])[:3]

        train_func = theano.function(inputs=[self.idxs, self.idys] +
                                     self.init_state,
                                     outputs=[cost, gnorm] + self.last_state,
                                     updates=updates)
        eval_func = theano.function(
            inputs=[self.idxs, self.idys] + self.init_state,
            outputs=[self.nll] + self.last_state,
        )

        N = (len(train[0]) - 1) / unroll_size + 1
        say(" train: {} tokens, {} mini-batches\n".format(
            len(train[0].ravel()), N))
        say(" dev: {} tokens\n".format(len(dev[0].ravel())))

        say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

        decay_epoch = args["lr_decay_epoch"]
        decay_rate = args["lr_decay"]
        lr_0 = args["learning_rate"]
        iter_cnt = 0

        depth = args["depth"]
        unchanged = 0
        best_dev = 1e+10
        start_time = 0
        max_epoch = args["max_epoch"]
        for epoch in xrange(max_epoch):
            unchanged += 1
            if unchanged > 20: break

            if decay_epoch > 0 and epoch >= decay_epoch:
                lr.set_value(np.float32(lr.get_value() * decay_rate))

            start_time = time.time()

            prev_state = [
                np.zeros((batch_size, self.n_d), dtype=theano.config.floatX)
                for i in xrange(depth * 2)
            ]

            train_loss = 0.0
            for i in xrange(N):
                # get current batch
                x = train[0][i * unroll_size:(i + 1) * unroll_size]
                y = train[1][i * unroll_size:(i + 1) * unroll_size]

                iter_cnt += 1
                ret = train_func(x, y, *prev_state)
                cur_loss, grad_norm, prev_state = ret[0], ret[1], ret[2:]
                train_loss += cur_loss / len(x)

                if i % 10 == 0:
                    say("\r{}".format(i))

                if i == N - 1:
                    self.dropout.set_value(0.0)
                    self.rnn_dropout.set_value(0.0)
                    dev_preds = self.evaluate(eval_func, dev, 1, unroll_size)
                    dev_loss = evaluate_average(predictions=dev_preds,
                                                masks=None)
                    dev_ppl = np.exp(dev_loss)
                    self.dropout.set_value(dropout_prob)
                    self.rnn_dropout.set_value(rnn_dropout_prob)

                    say("\r\n")
                    say( ( "Epoch={}  lr={:.4f}  train_loss={:.3f}  train_ppl={:.1f}  " \
                        +"dev_loss={:.3f}  dev_ppl={:.1f}\t|g|={:.3f}\t[{:.1f}m]\n" ).format(
                            epoch,
                            float(lr.get_value(borrow=True)),
                            train_loss/N,
                            np.exp(train_loss/N),
                            dev_loss,
                            dev_ppl,
                            float(grad_norm),
                            (time.time()-start_time)/60.0
                        ))
                    say("\tp_norm: {}\n".format(self.get_pnorm_stat()))

                    if dev_ppl < best_dev:
                        best_dev = dev_ppl
                        if test is None: continue
                        self.dropout.set_value(0.0)
                        self.rnn_dropout.set_value(0.0)
                        test_preds = self.evaluate(eval_func, test, 1,
                                                   unroll_size)
                        test_loss = evaluate_average(predictions=test_preds,
                                                     masks=None)
                        test_ppl = np.exp(test_loss)
                        self.dropout.set_value(dropout_prob)
                        self.rnn_dropout.set_value(rnn_dropout_prob)
                        say("\tbest_dev={:.1f}  test_loss={:.3f}  test_ppl={:.1f}\n"
                            .format(best_dev, test_loss, test_ppl))
                        if best_dev < 200: unchanged = 0

        say("\n")