Exemplo n.º 1
0
    def testStep(self):
        agent = self._get_agent_instance()
        bs_ph = tf.placeholder_with_default(B, ())

        init_state = agent.initial_state(bs=bs_ph)

        step_type = np.zeros((B, ), dtype=np.int32)
        reward = np.zeros((B, ), dtype=np.float32)
        obs = dict(features=np.zeros((B, N_NODES), dtype=np.float32),
                   graph_features=self._get_graph_features_step(),
                   node_mask=np.ones((B, N_NODES), dtype=np.int32))
        prev_state = init_state  # hack that works for now!

        step_type, reward, obs, prev_state = agent.step_preprocess(
            step_type, reward, obs, prev_state)

        def f(np_arr):
            return tf.constant(np_arr)

        with tf.variable_scope('step', reuse=tf.AUTO_REUSE):
            step_output = agent.step(nest.map_structure(f, step_type),
                                     nest.map_structure(f, reward),
                                     nest.map_structure(f, obs), prev_state)

        sess = self.session()
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        for _ in range(100):
            sess.run(step_output)
            print('.', end='')
        print('')
        print('Done!')
Exemplo n.º 2
0
def dynamic_decode(decoder_cell, max_iter):
    max_iter = tf.convert_to_tensor(max_iter, dtype=tf.int32)

    # TensorArray: wrap dynamic-sized, per-time-step, write-once Tensor arrays
    def create_tensor_array(d):
        # initial size = 0
        return tf.TensorArray(dtype=d, size=0, dynamic_size=True)

    time_index = tf.constant(0, dtype=tf.int32)
    # nest.map_structure: applies func to each entry in structure
    output_tensor_arrays = nest.map_structure(
        create_tensor_array, decoder_cell.output_dtype)

    cell_states, inputs, decode_finished = decoder_cell.initialize()

    # tf.while_loop(cond, body, vars): Repeat body while condition cond is true
    def condition(time_index, output_ta, cell_states, inputs, decode_finished):
        """
            if all "decode_finished" are True, return "False"
        """
        return tf.logical_not(tf.reduce_all(decode_finished))

    def body(time_index, output_ta, cell_states, inputs, decode_finished):
        sts = decoder_cell.step(time_index, cell_states, inputs,
                                decode_finished)
        new_output, new_cell_states, new_inputs, new_decode_finished = sts

        # TensorArray.write(index, value): register value and returns new TAs
        output_ta = nest.map_structure(
            lambda ta, out: ta.write(time_index, out),
            output_ta, new_output)

        new_decode_finished = tf.logical_or(
            tf.greater_equal(time_index, max_iter),
            new_decode_finished)

        return (time_index + 1, output_ta, new_cell_states, new_inputs,
                new_decode_finished)

    with tf.variable_scope("decoding"):

        res = tf.while_loop(
            condition,
            body,
            loop_vars=[time_index, output_tensor_arrays, cell_states,
                       inputs, decode_finished],
            back_prop=False)

    # get final outputs and states
    final_output_ta, final_cell_states = res[1], res[2]
    # TA.stack(): stack all tensors in TensorArray, [max_iter+1, batch_size, _]
    final_outputs = nest.map_structure(lambda ta: ta.stack(), final_output_ta)

    # finalize the computation from the decoder cell
    final_outputs = decoder_cell.finalize(final_outputs, final_cell_states)

    # transpose the final output
    final_outputs = nest.map_structure(transpose_batch_time, final_outputs)

    return final_outputs, final_cell_states
Exemplo n.º 3
0
    def write(self, values, step=None):
        if step is None:
            step = self._step

        if self._tracker.track_increment():
            # Note that this is #write calls % period
            # and not necessarily step % period

            # compute the to_write value
            if self._is_average:
                if self._history is None:
                    self._history = nest.map_structure(self._make_history,
                                                       values)
                else:
                    self._history = nest.map_structure(self._add_to_history,
                                                       self._history, values)

                to_write = nest.map_structure(np.mean, self._history)
            else:
                to_write = values

            # write to base now
            for logger in self._base_loggers:
                logger.write(to_write, step=step)

        self._step += 1
Exemplo n.º 4
0
def _prepare_memory(memory, memory_sequence_length, check_inner_dims_defined):
    """Convert to tensor and possibly mask `memory`.

    Args:
      memory: `Tensor`, shaped `[batch_size, max_time, ...]`.
      memory_sequence_length: `int32` `Tensor`, shaped `[batch_size]`.
      check_inner_dims_defined: Python boolean.  If `True`, the `memory`
        argument's shape is checked to ensure all but the two outermost
        dimensions are fully defined.

    Returns:
      A (possibly masked), checked, new `memory`.

    Raises:
      ValueError: If `check_inner_dims_defined` is `True` and not
        `memory.shape[2:].is_fully_defined()`.
    """
    memory = nest.map_structure(
        lambda m: tf.convert_to_tensor(m, name="memory"), memory)
    if memory_sequence_length is not None:
        memory_sequence_length = tf.convert_to_tensor(
            memory_sequence_length, name="memory_sequence_length")
    if check_inner_dims_defined:
        def _check_dims(m):
            if not m.get_shape()[2:].is_fully_defined():
                raise ValueError("Expected memory %s to have fully defined inner dims, "
                                 "but saw shape: %s" % (m.name, m.get_shape()))

        nest.map_structure(_check_dims, memory)
    if memory_sequence_length is None:
        seq_len_mask = None
    else:
        seq_len_mask = tf.sequence_mask(
            memory_sequence_length,
            maxlen=tf.shape(nest.flatten(memory)[0])[1],
            dtype=nest.flatten(memory)[0].dtype)
        seq_len_batch_size = (
                tf.dimension_value(memory_sequence_length.shape[0])
                or tf.shape(memory_sequence_length)[0])

    def _maybe_mask(m, seq_len_mask):
        rank = m.get_shape().ndims
        rank = rank if rank is not None else tf.rank(m)
        extra_ones = tf.ones(rank - 2, dtype=tf.int32)
        m_batch_size = tf.dimension_value(
            m.shape[0]) or tf.shape(m)[0]
        if memory_sequence_length is not None:
            message = ("memory_sequence_length and memory tensor batch sizes do not "
                       "match.")
            with tf.control_dependencies([
                tf.assert_equal(
                    seq_len_batch_size, m_batch_size, message=message)]):
                seq_len_mask = tf.reshape(
                    seq_len_mask,
                    tf.concat((tf.shape(seq_len_mask), extra_ones), 0))
                return m * seq_len_mask
        else:
            return m

    return nest.map_structure(lambda m: _maybe_mask(m, seq_len_mask), memory)
Exemplo n.º 5
0
    def _get_graph_features_update(self):
        # get timestep stacked and batched graph features
        def f(*l):
            return np.stack(l, axis=0)

        graph_features = nest.map_structure(f, *[GRAPH_FEATURES] * B)
        return nest.map_structure(f, *[graph_features] * (T + 1))
Exemplo n.º 6
0
def loop_tf(loop_fn,
            inputs,
            persistent_initializer,
            transient_initializer,
            n=None,
            time_major=False):
    def create_tensor_array(initial_tensor: tf.Tensor):
        return tf.TensorArray(initial_tensor.dtype,
                              size=n,
                              element_shape=initial_tensor.get_shape())

    tensor_arrays = nest.map_structure(create_tensor_array,
                                       persistent_initializer)

    def while_fn(*args):
        current_iteration = args[0]
        persistent_values = args[1]
        transient_values = args[2]
        current_tensor_arrays = args[3]
        if time_major:
            input_values = inputs[current_iteration]
        else:
            input_values = inputs[:, current_iteration]

        new_persistent, new_transient = loop_fn(input_values,
                                                persistent_values,
                                                transient_values)
        flat_new_persistent = nest.flatten(new_persistent)
        flat_tensor_arrays = nest.flatten(current_tensor_arrays)
        flat_written_tensor_arrays = [
            ta.write(current_iteration, a)
            for ta, a in zip(flat_tensor_arrays, flat_new_persistent)
        ]
        new_tensor_arrays = nest.pack_sequence_as(current_tensor_arrays,
                                                  flat_written_tensor_arrays)
        return current_iteration + 1, new_persistent, new_transient, new_tensor_arrays

    def while_cond(*args):
        seq_len = tf.shape(inputs)[0] if time_major else tf.shape(inputs)[1]
        return tf.less(args[0], seq_len)

    _, final_persistent, final_transient, final_tensor_arrays = \
        tf.while_loop(while_cond, while_fn, (0, persistent_initializer, transient_initializer, tensor_arrays))

    final_sequence_tensors = nest.map_structure(lambda x: x.stack(),
                                                final_tensor_arrays)

    def make_batch_major(tensor):
        permutation = np.arange(len(tensor.get_shape()))
        permutation[:2] = permutation[:2][::-1]
        return tf.transpose(tensor, permutation)

    if not time_major:
        final_sequence_tensors = nest.map_structure(make_batch_major,
                                                    final_sequence_tensors)

    return final_sequence_tensors
Exemplo n.º 7
0
    def _get_graph_features_update(self, large_graph=True):
        # get timestep stacked and batched graph features
        def f(*l):
            return np.stack(l, axis=0)

        if large_graph:
            graph_features = self._large_graph()
        else:
            graph_features = GRAPH_FEATURES
        graph_features = nest.map_structure(f, *[graph_features] * B)
        return nest.map_structure(f, *[graph_features] * (T + 1))
Exemplo n.º 8
0
    def testUpdate(self):
        agent = self._get_agent_instance()
        bs_ph = tf.placeholder_with_default(B, ())
        sess = self.session()

        init_state = agent.initial_state(bs=bs_ph)
        init_state_val = sess.run(init_state)

        step_type = np.zeros((T + 1, B), dtype=np.int32)
        reward = np.zeros((T + 1, B), dtype=np.float32)
        discount = np.zeros((T + 1, B), dtype=np.float32)

        var_type_mask = np.zeros((T + 1, B, N_NODES), dtype=np.int32)
        constraint_type_mask = np.zeros((T + 1, B, N_NODES), dtype=np.int32)
        obj_type_mask = np.zeros((T + 1, B, N_NODES), dtype=np.int32)
        var_type_mask[:, :, 0] = 1
        constraint_type_mask[:, :, 1] = 1
        obj_type_mask[:, :, 2] = 1

        obs = dict(features=np.zeros((T + 1, B, N_NODES), dtype=np.float32),
                   graph_features=self._get_graph_features_update(),
                   node_mask=np.ones(((T + 1), B, N_NODES), dtype=np.int32),
                   var_type_mask=var_type_mask,
                   constraint_type_mask=constraint_type_mask,
                   obj_type_mask=obj_type_mask)

        step_output = StepOutput(action=np.zeros((T, B), dtype=np.int32),
                                 logits=np.zeros((T, B, N_NODES),
                                                 dtype=np.float32),
                                 next_state=np.zeros_like(
                                     np.vstack([init_state_val] * T)))

        step_output, _, step_type, reward, obs, discount = agent.update_preprocess(
            step_output, None, step_type, reward, obs, discount)

        def f(np_arr):
            return tf.constant(np_arr)

        with tf.variable_scope('update', reuse=tf.AUTO_REUSE):
            agent.build_update_ops(
                nest.map_structure(f, step_output),
                tf.zeros_like(np.vstack([init_state_val] * (T + 1))),
                nest.map_structure(f, step_type),
                nest.map_structure(f, reward), nest.map_structure(f, obs),
                nest.map_structure(f, discount))

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        for _ in range(3):
            agent.update(sess, {}, {})
            print('.', end='')
        print('')
        print('Done!')
Exemplo n.º 9
0
def unstack_data_dict(stacked_data_dict):
    """
    stacked_data_dict is a data_dict with all the features stacked.
    globals =>
  """
    def f(arr):
        if arr is None:
            return arr
        l = np.split(arr, arr.shape[0])
        # remove the leading dimension
        l = list(map(lambda k: np.squeeze(k, axis=0), l))
        return l

    d = nest.map_structure(f, stacked_data_dict)
    bs = len(d['n_node'])

    data_dicts = [{} for _ in range(bs)]
    for k, l in d.items():
        for i in range(bs):
            data_dicts[i][k] = l[i]

    unstacked_data_dicts = []
    for d in data_dicts:
        if d['n_node'].ndim > 0:
            # d is a stacked data dict
            unstacked_data_dicts.extend(unstack_data_dict(d))
        else:
            unstacked_data_dicts.append(d)
    return unstacked_data_dicts
Exemplo n.º 10
0
 def initialize(batch_size,time,emit_input):
     for w in ["SOS"]:
         idx = tf.reshape(tf.constant(word2idx[w],dtype=tf.int32),[-1])
         idx = tf.tile(idx,[batch_size])
         emit_input = nest.map_structure(lambda ta,em: ta.write(time,em),emit_input,idx)
         time += 1
     return emit_input,time
Exemplo n.º 11
0
def rnn_model(inputs, shape, embedding_matrix):
    """make an unrolled RNN over the inputs. Not optimised for GPU"""
    with tf.variable_scope('rnn'):
        inputs = tf.nn.embedding_lookup(embedding_matrix, inputs)
        input_shape = inputs.get_shape().as_list()
        vocab_size = embedding_matrix.get_shape()[0].value

        cells = [tf.nn.rnn_cell.GRUCell(n) for n in shape]
        cell = tf.nn.rnn_cell.MultiRNNCell(cells)
        # won't work with LSTMs
        initial_state = tuple(
            tf.get_variable('state_{}'.format(i),
                            shape=[input_shape[0], c.state_size],
                            dtype=tf.float32,
                            initializer=tf.zeros_initializer())
            for i, c in enumerate(cells))
        outputs, final_state = tf.nn.dynamic_rnn(cell,
                                                 inputs,
                                                 initial_state=initial_state)
        # we're always going to roll this over every time the output is
        # evaluated
        state_updates = nest.map_structure(tf.assign, initial_state,
                                           final_state)
        state_updates = nest.flatten(state_updates)
        with tf.control_dependencies(state_updates):
            outputs = tf.reshape(outputs, [-1, shape[-1]])
            outputs = tf.layers.dense(outputs, vocab_size, activation=None)
            outputs = tf.reshape(
                outputs, [input_shape[0] or -1, input_shape[1], vocab_size])
        return outputs
Exemplo n.º 12
0
    def _traj_spec(self):
        def expand_spec(spec):
            spec = copy.deepcopy(spec)
            np.expand_dims(spec, axis=0)
            return spec

        return nest.map_structure(expand_spec, self._get_graph_features())
Exemplo n.º 13
0
    def zero_state(self, batch_size, dtype):
        with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
            if self._initial_cell_state is not None:
                cell_state = self._initial_cell_state
            else:
                cell_state = self._cell.zero_state(batch_size, dtype)
            error_message = (
                    "When calling zero_state of AttentionWrapper %s: " % self._base_name +
                    "Non-matching batch sizes between the memory "
                    "(encoder output) and the requested batch size.    Are you using "
                    "the BeamSearchDecoder?    If so, make sure your encoder output has "
                    "been tiled to beam_width via tf.contrib.seq2seq.tile_batch, and "
                    "the batch_size= argument passed to zero_state is "
                    "batch_size * beam_width.")
            with tf.control_dependencies(
                    self._batch_size_checks(batch_size, error_message)):
                cell_state = nest.map_structure(
                        lambda s: tf.identity(s, name="checked_cell_state"),
                        cell_state)

            return AttentionWrapperState(
                    cell_state=cell_state,
                    time=tf.zeros([], dtype=tf.int32),
                    attention=_zero_state_tensors(self._attention_layer_size, batch_size, dtype),
                    alignments=self._item_or_tuple(
                            attention_mechanism.initial_alignments(batch_size, dtype)
                            for attention_mechanism in self._attention_mechanisms),
                    alignment_history=self._item_or_tuple(
                            tf.TensorArray(dtype=dtype, size=0, dynamic_size=True)
                            if self._alignment_history else ()
                            for _ in self._attention_mechanisms))
Exemplo n.º 14
0
def distill_loss_old(student_logits, teacher_logits, masks):
    """ Distillation loss.

    Args:
      student_logits: structured logits compatible with nest.map_structure.
      teacher_logits:

    Returns:
      final_distill_loss: total loss.
      head_distill_loss: per-head loss. The same structure as inputs.
  """
    def _compute_kl(logits, o_logits, masks):
        a0 = logits - tf.reduce_max(logits, axis=-1, keep_dims=True)
        a1 = o_logits - tf.reduce_max(o_logits, axis=-1, keep_dims=True)
        ea0 = tf.exp(a0)
        ea1 = tf.exp(a1)
        z0 = tf.reduce_sum(ea0, axis=-1, keep_dims=True)
        z1 = tf.reduce_sum(ea1, axis=-1, keep_dims=True)
        p0 = ea0 / z0
        return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)),
                             axis=-1) * masks

    head_distill_loss = nest.map_structure(_compute_kl, student_logits,
                                           teacher_logits, masks)
    return head_distill_loss
Exemplo n.º 15
0
    def step_output_spec(self):
        def mk_spec(tensor):
            return ArraySpec(dtype=tensor.dtype.as_numpy_dtype,
                             shape=tensor.shape,
                             name=tensor.name)

        return dict(nest.map_structure(mk_spec, self._step_output)._asdict())
Exemplo n.º 16
0
    def _mk_phs(self, traj_spec):
        def mk_ph(spec):
            return tf.placeholder(dtype=spec.dtype,
                                  shape=spec.shape,
                                  name='learner/' +
                                  spec.name.replace(':', '_') + '_ph')

        self._traj_phs = nest.map_structure(mk_ph, traj_spec)
Exemplo n.º 17
0
def _zero_state_tensors(state_size, batch_size, dtype):
    """Create tensors of zeros based on state_size, batch_size, and dtype."""
    def get_state_shape(s):
        """Combine s with batch_size to get a proper tensor shape."""
        c = _concat(batch_size, s)
        size = tf.random_uniform(c, dtype=dtype)
        return size

    return nest.map_structure(get_state_shape, state_size)
Exemplo n.º 18
0
    def _get_graph_features(self):
        # get timestep stacked and batched graph features
        def f(*l):
            return np.stack(l, axis=0)

        graph_features = self._large_graph()
        graph_features = nest.map_structure(f, *[graph_features] * B)
        # return nest.map_structure(f, *[graph_features] * (T + 1))
        return graph_features
Exemplo n.º 19
0
        def body(time, outputs_ta, parents):
            # get ids, logits and parents predicted at time step by decoder
            input_t = nest.map_structure(lambda t: t[time], final_outputs)

            # extract the entries corresponding to parents
            new_state = nest.map_structure(
                    lambda t: gather_helper(t, parents, self._batch_size,
                    self._beam_size), input_t)

            # create new output
            new_output = DecoderOutput(logits=new_state.logits,
                    ids=new_state.ids)

            # write beam ids
            outputs_ta = nest.map_structure(lambda ta, out: ta.write(time, out),
                    outputs_ta, new_output)

            return (time + 1), outputs_ta, parents
 def zero_state(self, batch_size, dtype):
     """Return an initial (zero) state tuple for this `AttentionWrapper`.
     **NOTE** Please see the initializer documentation for details of how
     to call `zero_state` if using an `AttentionWrapper` with a
     `BeamSearchDecoder`.
     Args:
       batch_size: `0D` integer tensor: the batch size.
       dtype: The internal state data type.
     Returns:
       An `AttentionWrapperState` tuple containing zeroed out tensors and,
       possibly, empty `TensorArray` objects.
     Raises:
       ValueError: (or, possibly at runtime, InvalidArgument), if
         `batch_size` does not match the output size of the encoder passed
         to the wrapper object at initialization time.
     """
     with tf.name_scope(type(self).__name__ + "ZeroState",
                        values=[batch_size]):
         if self._initial_cell_state is not None:
             cell_state = self._initial_cell_state
         else:
             cell_state = self._cell.zero_state(batch_size, dtype)
         error_message = (
             "When calling zero_state of AttentionWrapper %s: " %
             self._base_name +
             "Non-matching batch sizes between the memory "
             "(encoder output) and the requested batch size.  Are you using "
             "the BeamSearchDecoder?  If so, make sure your encoder output has "
             "been tiled to beam_width via tf.contrib.seq2seq.tile_batch, and "
             "the batch_size= argument passed to zero_state is "
             "batch_size * beam_width.")
         with tf.control_dependencies(
                 self._batch_size_checks(batch_size, error_message)):
             cell_state = nest.map_structure(
                 lambda s: tf.identity(s, name="checked_cell_state"),
                 cell_state)
         initial_alignments = [
             attention_mechanism.initial_alignments(batch_size, dtype)
             for attention_mechanism in self._attention_mechanisms
         ]
         return AttentionWrapperState(
             cell_state=cell_state,
             time=tf.zeros([], dtype=tf.int32),
             attention=_zero_state_tensors(self._attention_layer_size,
                                           batch_size, dtype),
             alignments=self._item_or_tuple(initial_alignments),
             attention_state=self._item_or_tuple(
                 attention_mechanism.initial_state(batch_size, dtype)
                 for attention_mechanism in self._attention_mechanisms),
             alignment_history=self._item_or_tuple(
                 tf.TensorArray(dtype,
                                size=0,
                                dynamic_size=True,
                                element_shape=alignment.shape) if self.
                 _alignment_history else ()
                 for alignment in initial_alignments))
Exemplo n.º 21
0
 def __init__(
     self,
     obs_spec,
     step_output_spec,
 ):
     self._trajs = None
     # Don't use shape in the spec since it's unknown
     self._traj_spec = dict(
         step_type=ArraySpec(dtype=np.int8,
                             shape=(None, None),
                             name='traj_step_type_spec'),
         reward=ArraySpec(dtype=np.float32,
                          shape=(None, None),
                          name='traj_reward_spec'),
         discount=ArraySpec(dtype=np.float32,
                            shape=(None, None),
                            name='traj_discount_spec'),
         observation=nest.map_structure(expand_spec, obs_spec),
         step_output=nest.map_structure(expand_spec, step_output_spec))
Exemplo n.º 22
0
    def __init__(self,
                 inputs,
                 labels,
                 keep_prob,
                 time_sizes=[300, 300],
                 note_sizes=[100, 50]):
        self.inputs = inputs  # input shape (batch, time, note, feature)
        self.labels = labels  # label shape (batch, time, note, out)
        self.batch_size = tf.shape(self.inputs)[0]
        self.keep_prob = keep_prob
        self.time_sizes = time_sizes
        self.note_sizes = note_sizes

        self.time_lstm_cell = tf.contrib.rnn.MultiRNNCell([
            tf.contrib.rnn.DropoutWrapper(
                tf.contrib.rnn.LSTMCell(sz, state_is_tuple=True),
                output_keep_prob=self.keep_prob) for sz in self.time_sizes
        ],
                                                          state_is_tuple=True)
        self.time_state = nest.map_structure(
            lambda x: tf.placeholder_with_default(x, x.shape, x.op.name),
            self.time_lstm_cell.zero_state(self.batch_size * NOTE_LEN,
                                           tf.float32))
        for tensor in nest.flatten(self.time_state):
            tf.add_to_collection('time_state_input', tensor)

        self.note_lstm_cell = tf.contrib.rnn.MultiRNNCell([
            tf.contrib.rnn.DropoutWrapper(
                tf.contrib.rnn.LSTMCell(sz, state_is_tuple=True),
                output_keep_prob=self.keep_prob) for sz in self.note_sizes
        ],
                                                          state_is_tuple=True)
        self.note_state = nest.map_structure(
            lambda x: tf.placeholder_with_default(x, x.shape, x.op.name),
            self.note_lstm_cell.zero_state(self.batch_size * SEQ_LEN,
                                           tf.float32))
        for tensor in nest.flatten(self.note_state):
            tf.add_to_collection('note_state_input', tensor)

        self.final_time_state, self.final_note_state, self.prediction \
            = self.forward_pass()
        self.loss = self.loss_function()
        self.optimize = self.optimizer()
Exemplo n.º 23
0
    def _initial_state(self):
        # t: [batch_size, num_units]
        cell_states = nest.map_structure(
            lambda t: tile_beam(t, self._beam_size), self._dec_init_states)

        # another "log_probs" initial states: accumulative log_prob!
        log_probs = tf.zeros([self._batch_size, self._beam_size],
                             dtype=self._dtype)

        return BeamDecoderCellStates(cell_states, log_probs)
Exemplo n.º 24
0
        def body(f_time_index, output_ta, f_parents):
            # get ids, logits and parents predicted at this time step
            input_t = nest.map_structure(lambda t: t[f_time_index],
                                         final_outputs)

            # parents: reversed version shows the next position to go
            new_beam_state = nest.map_structure(
                lambda t: gather_helper(t, f_parents, self._batch_size, self.
                                        _beam_size), input_t)

            # create new output
            new_output = DecoderOutput(logits=new_beam_state.logits,
                                       ids=new_beam_state.ids)

            # write beam ids
            output_ta = nest.map_structure(
                lambda ta, out: ta.write(f_time_index, out), output_ta,
                new_output)

            return (f_time_index + 1), output_ta, input_t.parents
Exemplo n.º 25
0
def _weight_mean_with_function(f, args, max_batch):
    batch_size = len(args[0])
    weight = np.array([max_batch for _ in range(math.floor(batch_size / max_batch))] + \
                      [] if batch_size % max_batch == 0 else [batch_size % max_batch])
    weight_sum = np.sum(weight)
    outputs = _calculate_function_with_batch_size(args, f, max_batch)

    def _weight_mean(*args):
        return sum(w * a for w, a in zip(weight, args)) / weight_sum

    outputs = nest.map_structure(_weight_mean, outputs)
    return outputs
Exemplo n.º 26
0
    def body(time, outputs_ta, state, inputs, finished):
        new_output, new_state, new_inputs, new_finished = decoder_cell.step(
            time, state, inputs, finished)

        outputs_ta = nest.map_structure(lambda ta, out: ta.write(time, out),
                                      outputs_ta, new_output)

        new_finished = tf.logical_or(
            tf.greater_equal(time, maximum_iterations),
            new_finished)

        return (time + 1, outputs_ta, new_state, new_inputs, new_finished)
Exemplo n.º 27
0
Arquivo: data.py Projeto: cappzxw/nmt
def get_training_dataset(features_file,
                         labels_file,
                         features_vocab_file,
                         labels_vocab_file,
                         batch_size,
                         batch_type="examples",
                         share_vocab=False,
                         intercept=False,
                         shuffle_buffer_size=None,
                         bucket_width=None,
                         maximum_features_length=None,
                         maximum_labels_length=None,
                         single_pass=False):

                      
  features_dataset = tf.data.TextLineDataset(features_file)
  features_vocab = Vocab(vocabulary_file=features_vocab_file)
  features_vocab = features_vocab.vocabulary_lookup()
  features_dataset = features_dataset.map(
    lambda args: make_features(args, vocabulary=features_vocab))

  labels_dataset = tf.data.TextLineDataset(labels_file)
  if share_vocab is not None:
    labels_vocab = features_vocab
  else:
    labels_vocab = Vocab(vocabulary_file=labels_vocab_file)
    labels_vocab = labels_vocab.vocabulary_lookup()
  labels_dataset = labels_dataset.map(
    lambda args: make_labels(args, vocabulary=labels_vocab))


  dataset = tf.data.Dataset.zip((features_dataset, labels_dataset))

  if shuffle_buffer_size is not None:
    dataset = dataset.shuffle(shuffle_buffer_size)

  dataset = dataset.apply(filter_length(maximum_features_length=maximum_features_length,
                                        maximum_labels_length=maximum_labels_length,
                                        intercept=intercept))
  padded_shapes = nest.map_structure(lambda shape: shape.as_list(), dataset.output_shapes)

  dataset = dataset.apply(
    batch_pad_dataset(batch_size=batch_size, 
                      padded_shapes=padded_shapes,
                      batch_type=batch_type,
                      bucket_width=bucket_width))
  if not single_pass:
    dataset = dataset.repeat()

  return dataset
Exemplo n.º 28
0
    def _build_rnn(self, x, log=no_op):
        log('- build-rnn -')
        with tf.name_scope('build_rnn', [x]):
            log('rnn-input', x)
            bs = tf.unstack(tf.shape(x))[0] # figure out dynamic batch size
            with slim.arg_scope(self._arg_scope()):
                lstms = [tf.nn.rnn_cell.LSTMCell(cfg.LSTM_SIZE) for _ in range(cfg.NUM_LSTM)]
                cell = tf.nn.rnn_cell.MultiRNNCell(lstms)
                #print('c0',cell.zero_state(cfg.BATCH_SIZE, tf.float32))
                state0 = nest.map_structure(
                        lambda x : tf.placeholder_with_default(x, [None] + list(x.shape)[1:], x.op.name),
                        cell.zero_state(self.batch_size_, tf.float32))
                #with tf.variable_scope('rnn_state'):
                #    state_variables = []
                #    for state_c, state_h in cell.zero_state(bs, tf.float32):
                #        state_variables.append(tf.nn.rnn_cell.LSTMStateTuple(
                #            tf.Variable(state_c, trainable=False, validate_shape=False),
                #            tf.Variable(state_h, trainable=False, validate_shape=False)))
                #    state0 = tuple(state_variables)
                with tf.variable_scope('rnn', reuse=self.reuse_):
                    output, state1 = tf.nn.dynamic_rnn(
                            cell=cell,
                            inputs=x,
                            initial_state=state0,
                            time_major=False,
                            dtype=tf.float32)
                #log('rnn-output', output.shape)
                #with tf.name_scope('rnn_keep'):
                #    # for stateful LSTM (during runtime)
                #    keep_ops = []
                #    for (s0c,s0h), (s1c,s1h) in zip(state0, state1):
                #        # Assign the new state to the state variables on this layer
                #        # for both (c,h)
                #        keep_ops.extend([s0c.assign(s1c), s0h.assign(s1h)])
                #    rnn_keep_op = tf.group(keep_ops)
                #with tf.name_scope('rnn_reset'):
                #    # for stateless LSTM (during training)
                #    # Define an op to reset the hidden state to zeros
                #    reset_ops = []
                #    for (s0c,s0h) in state0:
                #        # Assign the new state to the state variables on this layer
                #        # for both (c,h)
                #        reset_ops.extend([
                #            s0c.assign(tf.zeros_like(s0c)),
                #            s0h.assign(tf.zeros_like(s0h))])
                #    rnn_reset_op = tf.group(reset_ops)

        log('-------------')
        return output, state1, state0
Exemplo n.º 29
0
    def __init__(self, obs_spec, step_output_spec, batch_size, discount_factor,
                 traj_length):
        self._batch_size = batch_size
        self._traj_len = traj_length
        self._discount_factor = discount_factor
        # Don't use shape in the spec since it's unknown
        self._traj_spec = dict(
            step_type=ArraySpec(dtype=np.int8,
                                shape=(None, None),
                                name='traj_step_type_spec'),
            reward=ArraySpec(dtype=np.float32,
                             shape=(None, None),
                             name='traj_reward_spec'),
            discount=ArraySpec(dtype=np.float32,
                               shape=(None, None),
                               name='traj_discount_spec'),
            observation=nest.map_structure(expand_spec, obs_spec),
            step_output=nest.map_structure(expand_spec, step_output_spec))

        # self._trajs[i] = trajectory of the ith experience in the batch.
        self._trajs = None
        # list of timesteps that have been backtracked
        # and ready to be split into chunks to be shipped out.
        # _finished_timesteps[i] = Finished timesteps for the ith item of the batch.
        self._finished_timesteps = None
        # used to chop the trajectory into chunks.

        obs_spec2 = copy.deepcopy(obs_spec)
        obs_spec2['bootstrap_value'] = ArraySpec(dtype=np.float32,
                                                 shape=(None, ),
                                                 name='bootstrap_value_spec')
        self._chopping_trajs = [
            BaseTrajectory(obs_spec2, step_output_spec)
            for _ in range(batch_size)
        ]
        self._len = 0
Exemplo n.º 30
0
    def body(time_index, output_ta, cell_states, inputs, decode_finished):
        sts = decoder_cell.step(time_index, cell_states, inputs,
                                decode_finished)
        new_output, new_cell_states, new_inputs, new_decode_finished = sts

        # TensorArray.write(index, value): register value and returns new TAs
        output_ta = nest.map_structure(
            lambda ta, out: ta.write(time_index, out),
            output_ta, new_output)

        new_decode_finished = tf.logical_or(
            tf.greater_equal(time_index, max_iter),
            new_decode_finished)

        return (time_index + 1, output_ta, new_cell_states, new_inputs,
                new_decode_finished)