def test_object_sequence_model(self):
     """Test the model class."""
     core = snt.RelationalMemory(mem_slots=2,
                                 head_size=4,
                                 num_heads=1,
                                 num_blocks=1,
                                 gate_style="unit")
     final_mlp = snt.nets.MLP(output_sizes=(5, ), activate_final=True)
     model = rmc_nth_farthest.SequenceModel(core=core,
                                            target_size=self._num_objects,
                                            final_mlp=final_mlp)
     logits = model(
         tf.zeros(
             (self._batch_size, self._num_objects, self._feature_size)))
     self.assertAllEqual(logits.shape,
                         (self._batch_size, self._num_objects))
Exemplo n.º 2
0
 def test_object_sequence_model(self):
   """Test the model class."""
   core = snt.RelationalMemory(
       mem_slots=2, head_size=4, num_heads=1, num_blocks=1, gate_style="unit")
   final_mlp = snt.nets.MLP(
       output_sizes=(5,), activate_final=True)
   model = rmc_learn_to_execute.SequenceModel(
       core=core,
       target_size=self._feature_size,
       final_mlp=final_mlp)
   dummy_in = tf.zeros(
       (self._seq_sz_in, self._batch_size, self._feature_size))
   dummy_out = tf.zeros(
       (self._seq_sz_out, self._batch_size, self._feature_size))
   sizes = tf.ones((self._batch_size))
   logits = model(dummy_in, dummy_out, sizes, sizes)
   self.assertAllEqual(
       logits.shape, (self._seq_sz_out, self._batch_size, self._feature_size))
Exemplo n.º 3
0
def build_and_train(iterations, log_stride, test=False):
    """Construct the data, model, loss and optimizer then train."""

    # Test mode settings.
    batch_size = 2 if test else FLAGS.batch_size
    num_mems = 2 if test else FLAGS.num_mems
    num_heads = 1 if test else FLAGS.num_mems
    num_blocks = 1 if test else FLAGS.num_mems
    head_size = 4 if test else FLAGS.head_size
    num_objects = 2 if test else FLAGS.num_objects
    num_features = 4 if test else FLAGS.num_features
    mlp_size = (20, ) if test else (256, 256, 256, 256)

    with tf.Graph().as_default():
        t0 = time.time()

        # Initialize the dataset.
        dataset = dataset_nth_farthest.NthFarthest(batch_size, num_objects,
                                                   num_features)

        # Create the model.
        core = snt.RelationalMemory(mem_slots=num_mems,
                                    head_size=head_size,
                                    num_heads=num_heads,
                                    num_blocks=num_blocks,
                                    gate_style=FLAGS.gate_style)

        final_mlp = snt.nets.MLP(output_sizes=mlp_size, activate_final=True)

        model = SequenceModel(core=core,
                              target_size=num_objects,
                              final_mlp=final_mlp)

        tf.logging.info("Instantiated models ({:3f})".format(time.time() - t0))

        # Get train and test data.
        inputs_train, labels_train = dataset.get_batch()
        inputs_test, labels_test = dataset.get_batch()

        # Define target accuracy.
        def compute_accuracy(logits, targets, name="accuracy"):
            correct_pred = tf.cast(
                tf.equal(tf.cast(targets, tf.int64), tf.argmax(logits, 1)),
                tf.float32)
            return tf.reduce_mean(correct_pred, name=name)

        # Define the loss & accuracy.
        def loss_fn(inputs, labels):
            """Creates the loss and the exports."""
            logits = model(inputs)
            labels = tf.cast(labels, tf.int32)
            loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                               labels=labels))
            accuracy = compute_accuracy(logits, labels)
            return loss, accuracy

        # Get training step counter.
        global_step = tf.get_variable(name="global_step",
                                      shape=[],
                                      dtype=tf.int64,
                                      initializer=tf.zeros_initializer(),
                                      trainable=False,
                                      collections=[
                                          tf.GraphKeys.GLOBAL_VARIABLES,
                                          tf.GraphKeys.GLOBAL_STEP
                                      ])

        # Create the optimizer.
        learning_rate_op = tf.reduce_max([
            tf.train.exponential_decay(FLAGS.learning_rate,
                                       global_step,
                                       decay_steps=FLAGS.epochs // 100,
                                       decay_rate=0.9,
                                       staircase=False),
            FLAGS.min_learning_rate
        ])
        optimizer = tf.train.AdamOptimizer(learning_rate_op)
        train_loss, _ = loss_fn(inputs_train, labels_train)
        step_op = optimizer.minimize(train_loss, global_step=global_step)

        # Compute test accuracy
        logits_test = model(inputs_test)
        labels_test = tf.cast(labels_test, tf.int32)
        test_acc = compute_accuracy(logits_test, labels_test)

        tf.logging.info(
            "Created losses and optimizers ({:3f})".format(time.time() - t0))

        # Begin Training.
        t0 = time.time()
        train_losses = []
        steps = []
        test_accs = []
        tf.logging.info("Starting training ({:3f})".format(time.time() - t0))
        with tf.train.SingularMonitoredSession() as sess:
            for it in six.moves.range(iterations):
                sess.run([step_op, learning_rate_op])
                if it % log_stride == 0:
                    loss_v, acc_v = sess.run([train_loss, test_acc])
                    elapsed = time.time() - t0
                    tf.logging.info(
                        "iter: {:2d}, train loss {:3f}; test acc {:3f} ({:3f})"
                        .format(it, loss_v, acc_v, elapsed))
                    train_losses.append(loss_v)
                    steps.append(it)
                test_accs.append(acc_v)
    return steps, train_losses, test_accs
Exemplo n.º 4
0
def build_and_train(iterations, log_stride, test=False):
    """Construct the data, model, loss and optimizer then train."""

    # Test mode settings.
    batch_size = 2 if test else FLAGS.batch_size
    num_mems = 2 if test else FLAGS.num_mems
    num_heads = 1 if test else FLAGS.num_mems
    num_blocks = 1 if test else FLAGS.num_mems
    head_size = 4 if test else FLAGS.head_size
    max_length = 3 if test else FLAGS.max_length
    max_nest = 2 if test else FLAGS.max_nest
    mlp_size = (20, ) if test else (256, 256, 256, 256)

    with tf.Graph().as_default():
        t0 = time.time()

        # Initialize the dataset.
        lte_train = learn_to_execute.LearnToExecute(batch_size, max_length,
                                                    max_nest)
        lte_test = learn_to_execute.LearnToExecute(
            batch_size, max_length, max_nest, mode=learn_to_execute.Mode.TEST)
        train_data_iter = lte_train.make_one_shot_iterator().get_next()
        test_data_iter = lte_test.make_one_shot_iterator().get_next()
        output_size = lte_train.state.vocab_size

        # Create the model.
        core = snt.RelationalMemory(mem_slots=num_mems,
                                    head_size=head_size,
                                    num_heads=num_heads,
                                    num_blocks=num_blocks,
                                    gate_style=FLAGS.gate_style)
        final_mlp = snt.nets.MLP(output_sizes=mlp_size, activate_final=True)
        model = SequenceModel(core=core,
                              target_size=output_size,
                              final_mlp=final_mlp)
        tf.logging.info("Instantiated models ({:3f})".format(time.time() - t0))

        # Define the loss & accuracy.
        def loss_fn(inputs, targets, input_sequence_length,
                    output_sequence_length):
            """Creates the loss and the exports."""
            logits = model(inputs, targets, input_sequence_length,
                           output_sequence_length)
            targets = tf.cast(targets, tf.int32)
            sq_sz_out_max = targets.shape[0].value

            # Create a mask to ignore accuracy on buffer characters.
            sequence_sizes = tf.cast(output_sequence_length, tf.float32)
            lengths_transposed = tf.expand_dims(sequence_sizes, 1)
            range_row = tf.expand_dims(
                tf.range(0, sq_sz_out_max, 1, dtype=tf.float32), 0)
            mask = tf.cast(
                tf.transpose(tf.less(range_row, lengths_transposed)),
                tf.float32)

            # Compute token accuracy and solved.
            correct = tf.equal(tf.argmax(logits, 2), tf.argmax(targets, 2))
            solved = tf.reduce_all(tf.boolean_mask(correct, tf.squeeze(mask)),
                                   axis=0)
            token_acc = tf.reduce_sum(tf.cast(correct, tf.float32) * mask)
            token_acc /= tf.reduce_sum(sequence_sizes)

            # Compute Loss.
            mask = tf.cast(
                tf.tile(tf.expand_dims(mask, 2), (1, 1, logits.shape[2])),
                tf.float32)
            masked_logits = logits * mask
            masked_target = tf.cast(targets, tf.float32) * mask
            logits_flat = tf.reshape(masked_logits,
                                     [sq_sz_out_max * batch_size, -1])
            target_flat = tf.reshape(masked_target,
                                     [sq_sz_out_max * batch_size, -1])
            xent = tf.nn.softmax_cross_entropy_with_logits(logits=logits_flat,
                                                           labels=target_flat)
            loss = tf.reduce_mean(xent)
            return loss, token_acc, solved

        # Get training step counter.
        global_step = tf.train.get_or_create_global_step()

        # Create the optimizer.
        learning_rate_op = tf.reduce_max([
            tf.train.exponential_decay(FLAGS.learning_rate,
                                       global_step,
                                       decay_steps=FLAGS.epochs // 100,
                                       decay_rate=0.9,
                                       staircase=False),
            FLAGS.min_learning_rate
        ])
        optimizer = tf.train.AdamOptimizer(learning_rate_op)

        # Compute loss, accuracy & the step op.
        inputs, targets, _, input_lengths, output_lengths = train_data_iter
        train_loss, train_acc, train_sol = loss_fn(inputs, targets,
                                                   input_lengths,
                                                   output_lengths)
        step_op = optimizer.minimize(train_loss, global_step=global_step)
        inputs, targets, _, input_lengths, output_lengths = test_data_iter
        _, test_acc, test_sol = loss_fn(inputs, targets, input_lengths,
                                        output_lengths)
        tf.logging.info(
            "Created losses and optimizers ({:3f})".format(time.time() - t0))

        # Begin Training.
        t0 = time.time()
        tf.logging.info("Starting training ({:3f})".format(time.time() - t0))
        with tf.train.SingularMonitoredSession() as sess:
            for it in six.moves.range(iterations):
                sess.run([step_op, learning_rate_op])
                if it % log_stride == 0:
                    loss_v, train_acc_v, test_acc_v, train_sol_v, test_sol_v = sess.run(
                        [train_loss, train_acc, test_acc, train_sol, test_sol])
                    elapsed = time.time() - t0
                    tf.logging.info(
                        "iter: {:2d}, train loss {:3f}; train acc {:3f}; test acc {:3f};"
                        " train solved {:3f}; test solved {:3f}; ({:3f})".
                        format(it, loss_v, train_acc_v, test_acc_v,
                               train_sol_v, test_sol_v, elapsed))