Пример #1
0
    def __init__(self,
                 num_units,
                 norm=True,
                 use_peepholes=False,
                 cell_clip=None,
                 initializer=None,
                 num_proj=None,
                 proj_clip=None,
                 forget_bias=1,
                 activation=None,
                 reuse=None):
        super(WeightNormLSTMCell, self).__init__(_reuse=reuse)

        self._scope = 'wn_lstm_cell'
        self._num_units = num_units
        self._norm = norm
        self._initializer = initializer
        self._use_peepholes = use_peepholes
        self._cell_clip = cell_clip
        self._num_proj = num_proj
        self._proj_clip = proj_clip
        self._activation = activation or math_ops.tanh
        self._forget_bias = forget_bias

        self._weights_variable_name = "kernel"
        self._bias_variable_name = "bias"

        if num_proj:
            self._state_size = LSTMStateTuple(num_units, num_proj)
            self._output_size = num_proj
        else:
            self._state_size = LSTMStateTuple(num_units, num_units)
            self._output_size = num_units
    def loop_fn(time, cell_output, cell_state, loop_state):
        #check whether is initial condition
        if cell_output is None:  # time == 0
            next_cell_state = cell.zero_state(batch_size, tf.float32)
        else:
            next_cell_state = cell_state
        #check whether finished
        elements_finished = (time >= tf.cast(sequence_length, tf.int32))
        finished = tf.reduce_all(elements_finished)

        #read given inputs
        next_input = tf.cond(
            finished, lambda: tf.zeros(
                [batch_size, rnn_inputs.get_shape()[-1]], dtype=tf.float32),
            lambda: inputs_ta.read(time))

        switch_embed_flag = tf.cast(tf.reduce_max(tf.abs(next_input), axis=1),
                                    tf.bool,
                                    name='switch_embed_flag')

        if cell_output is not None:  # time > 0
            if len(cell_state) > 1:  #LSTM case
                tilde_cell_state = tf.where(switch_embed_flag,
                                            tf.zeros_like(next_input),
                                            next_cell_state[0])
                tilde_cell_output = tf.where(
                    tf.cast(switch_embed_flag, tf.bool),
                    tf.zeros_like(next_cell_state[1]), next_cell_state[1])
                next_cell_state = LSTMStateTuple(tilde_cell_state,
                                                 tilde_cell_output)
            else:  #GRU case
                next_cell_state = tf.where(switch_embed_flag,
                                           tf.zeros_like(next_input),
                                           next_cell_state)

        #generate reconstruted features
        if cell_output == None:
            next_cell_state = LSTMStateTuple(next_input, next_cell_state[1])
            with tf.variable_scope('linear_transform'):
                w_o = tf.get_variable('weights', [cell.output_size, 39],\
                  initializer=tf.truncated_normal_initializer(stddev=0.1))
                b_o = tf.get_variable('bias', [39],\
                  initializer=tf.constant_initializer(0.1))
        else:
            with tf.variable_scope('linear_transform', reuse=True):
                w_o = tf.get_variable('weights', [cell.output_size, 39],\
                  initializer=tf.truncated_normal_initializer(stddev=0.1))
                b_o = tf.get_variable('bias', [39],\
                  initializer=tf.constant_initializer(0.1))

        emit_output = cell_output
        if cell_output == None:  # time == 0
            next_loop_state = loop_state_ta
        else:
            y = tf.add(tf.matmul(cell_output, w_o), b_o, name='reconstruction')
            next_loop_state = loop_state.write(time - 1, y)

        #next_input = tf.zeros_like(next_input)
        return (elements_finished, next_input, next_cell_state, emit_output,
                next_loop_state)
    def call(self, inputs, state):
        output, new_state = self._cell(inputs, state)

        if isinstance(self.state_size, LSTMStateTuple):
            c, h = state
            new_c, new_h = new_state
            zoneout_prob_c, zoneout_prob_h = self._zoneout_prob

            if self.is_training:
                # Rescales the output of dropout (tf.nn.dropout scales it's output
                # by a factor of 1 / keep_prob).
                new_c = (1 - zoneout_prob_c) * tf.nn.dropout(
                    new_c - c, (1 - zoneout_prob_c)) + c
                new_h = (1 - zoneout_prob_h) * tf.nn.dropout(
                    new_h - h, (1 - zoneout_prob_h)) + h
                new_state = LSTMStateTuple(c=new_c, h=new_h)
            else:
                # Uses expectation at test time.
                new_c = zoneout_prob_c * c + (1 - zoneout_prob_c) * new_c
                new_h = zoneout_prob_h * h + (1 - zoneout_prob_h) * new_h
                new_state = LSTMStateTuple(c=new_c, h=new_h)
            return new_h, new_state
        else:
            if self.is_training:
                new_state = state + (1 - self._zoneout_prob) * tf.nn.dropout(
                    new_state - state, (1 - self._zoneout_prob))
            else:
                new_state = self._zoneout_prob * state + (
                    1 - self._zoneout_prob) * new_state
            return new_state, new_state
def custom_rnn_seq2seq(encoder_inputs,
                       decoder_inputs,
                       enc_cell,
                       dec_cell,
                       dtype=dtypes.float32,
                       initial_state=None,
                       use_previous=False,
                       scope=None,
                       num_units=0):

    with variable_scope.variable_scope(scope or "custom_rnn_seq2seq"):
        _, enc_state = core_rnn.static_rnn(enc_cell,
                                           encoder_inputs,
                                           dtype=dtype,
                                           scope=scope,
                                           initial_state=initial_state)
        print(enc_state.get_shape)
        c = tf.tanh(
            tf.matmul(tf.get_variable("v", [dim_hidden, dim_hidden]),
                      enc_state))
        h_prime_init = tf.tanh(
            tf.matmul(tf.get_variable("v_prime", [dim_hidden, dim_hidden]), c))
        if not use_previous:
            return seq2seq.rnn_decoder(decoder_inputs,
                                       LSTMStateTuple(c, h_prime_init),
                                       dec_cell,
                                       scope=scope)
        return infer(LSTMStateTuple(c, h_prime_init), dec_cell, num_units)
    def _construct_decoder_initial_state(self):

        if self._video_output is not None:
            video_state = self._video_output.final_state
        else:
            zero_slice = [
                tf.zeros(shape=tf.shape(self._audio_output.final_state[0].c),
                         dtype=self._hparams.dtype)
                for _ in range(len(self._audio_output.final_state[0]))
            ]

            video_state = tuple([
                LSTMStateTuple(c=zero_slice[0], h=zero_slice[1])
                for _ in range(len(self._hparams.encoder_units_per_layer))
            ])

        if self._audio_output is not None:
            audio_state = self._audio_output.final_state
        else:
            zero_slice = [
                tf.zeros(shape=tf.shape(self._video_output.final_state[0].c),
                         dtype=self._hparams.dtype)
                for _ in range(len(self._video_output.final_state[0]))
            ]
            audio_state = tuple([
                LSTMStateTuple(c=zero_slice[0], h=zero_slice[1])
                for _ in range(len(self._hparams.encoder_units_per_layer))
            ])

        if type(video_state) == tuple:
            final_video_state = video_state[-1]
        else:
            final_video_state = video_state

        if type(audio_state) == tuple:
            final_audio_state = audio_state[-1]
        else:
            final_audio_state = audio_state

        state_tuple = (
            final_video_state,
            final_audio_state,
        )

        self._decoder_initial_state = _project_state_tuple(
            state_tuple,
            num_units=self._hparams.decoder_units_per_layer[0],
            cell_type=self._hparams.cell_type)

        dec_layers = len(self._hparams.decoder_units_per_layer)

        if dec_layers > 1:
            self._decoder_initial_state = [
                self._decoder_initial_state,
            ]
            zero_state = self._decoder_cells.zero_state(
                self._batch_size, self._hparams.dtype)
            for j in range(dec_layers - 1):
                self._decoder_initial_state.append(zero_state[j + 1])
            self._decoder_initial_state = tuple(self._decoder_initial_state)
Пример #6
0
    def _skipEncoderOutput(self):
        with tf.variable_scope("Decoder") as scope:
            batch_size = self._hparams.batch_size[0 if self._mode ==
                                                  'train' else 1]

            if 'skip' in self._cell_type:
                pre_decoder_cells, pre_initial_state = build_rnn_layers(
                    cell_type=self._cell_type,
                    num_units_per_layer=[
                        self._hparams.decoder_units_per_layer[-1]
                    ],
                    use_dropout=self._hparams.use_dropout,
                    dropout_probability=self._hparams.
                    decoder_dropout_probability,
                    mode=self._mode,
                    batch_size=batch_size,
                    dtype=self._hparams.dtype,
                )

                print('Decoder_pre_decoder_cells', pre_decoder_cells)
                print('Decoder_pre_initial_state', pre_initial_state)
                print('Decoder _encoder_output', self._encoder_output)
                pre_decoder_cells = MultiRNNCell([
                    pre_decoder_cells,
                ])
                out = tf.nn.dynamic_rnn(
                    cell=pre_decoder_cells,
                    inputs=self._encoder_output.outputs,
                    sequence_length=self._encoder_features_len,
                    parallel_iterations=self._hparams.
                    batch_size[0 if self._mode == 'train' else 1],
                    swap_memory=False,
                    dtype=self._hparams.dtype,
                    initial_state=pre_initial_state,
                    scope=scope)
                new_encoder_output, new_encoder_final_state = out

                cell_state = new_encoder_final_state
                try:
                    cell_state = [
                        LSTMStateTuple(cs.c, cs.h) for cs in cell_state
                    ]
                except:
                    cell_state = LSTMStateTuple(cell_state.c, cell_state.h)

                new_encoder_output, updated_states = new_encoder_output
                print("Decoder_new_encoder_output", new_encoder_output)
                print("Decoder_updated_states", updated_states)
                cost_per_sample = self._hparams.cost_per_sample[2]
                budget_loss = tf.reduce_mean(
                    tf.reduce_sum(cost_per_sample * updated_states, 1), 0)
                meanUpdates = tf.reduce_mean(tf.reduce_sum(updated_states, 1),
                                             0)
                self.skip_infos = SkipInfoTuple(updated_states, meanUpdates,
                                                budget_loss)
                self._encoder_output = self.get_data(new_encoder_output,
                                                     cell_state)
                print('Encoder_Output in Decoder after skip',
                      self._encoder_output)
Пример #7
0
 def do_validation(loss, curr_epoch):
     curr_epoch = int(curr_epoch)
     j = 0
     val_losses = []
     val_max = 0
     val_norm_max = 0
     for val in epoch_val:
         j += 1
         if j >= 2:
             break
         print("Running validation...")
         if model == "LSTM":
             val_state = tuple([
                 LSTMStateTuple(np.zeros((nb_v, n_hidden), dtype=np.float),
                                np.zeros((nb_v, n_hidden), dtype=np.float))
                 for _ in range(n_layers)
             ])
         elif model == "HyperDRUM":
             val_state = tuple([
                 LSTMStateTuple(
                     np.zeros((nb_v, n_hyper_hidden), dtype=np.float),
                     np.zeros((nb_v, n_hyper_hidden + n_hidden),
                              dtype=np.float)) for _ in range(n_layers)
             ])
         elif model == "FSRUM":
             val_state = tuple([
                 tuple([
                     tuple([
                         np.zeros((nb_v, fast_size), dtype=np.float),
                         np.zeros((nb_v, fast_size), dtype=np.float)
                     ]),
                     np.zeros((nb_v, slow_size), dtype=np.float)
                 ]) for _ in range(n_layers)
             ])
         else:
             val_state = tuple([
                 np.zeros((nb_v, n_hidden), dtype=np.float)
                 for _ in range(n_layers)
             ])
         for stepb, (X_val, Y_val) in enumerate(val):
             val_batch_x = X_val
             val_batch_y = Y_val
             val_dict = {x: val_batch_x, y: val_batch_y, i_s: val_state}
             val_acc, val_loss, val_state = sess.run(
                 [accuracy, cost, states], feed_dict=val_dict)
             val_losses.append(val_loss)
     print("Validations:", )
     validation_losses.append(sum(val_losses) / len(val_losses))
     print("Validation Loss= " + "{:.6f}".format(validation_losses[-1]))
     test_loss = do_test()
     lr = [v for v in tf.global_variables()
           if v.name == "learning_rate:0"][0]
     lr = sess.run(lr)
     f.write(
         "Step: %d\t TrLoss: %f\t TestLoss: %f\t ValLoss: %f\t Epoch: %d\t Learning rate: %f\n"
         % (t, loss, test_loss, validation_losses[-1], curr_epoch, lr))
     f.flush()
Пример #8
0
    def __init__(self,
                 num_units,
                 is_training,
                 use_peepholes=False,
                 cell_clip=None,
                 initializer=orthogonal_initializer(),
                 num_proj=None,
                 proj_clip=None,
                 forget_bias=1.0,
                 state_is_tuple=True,
                 activation=tf.tanh):
        """Initialize the parameters for an LSTM cell.
        Args:
          num_units: int, The number of units in the LSTM cell.
          is_training: bool, set True when training.
          use_peepholes: bool, set True to enable diagonal/peephole
            connections.
          cell_clip: (optional) A float value, if provided the cell state
            is clipped by this value prior to the cell output activation.
          initializer: (optional) The initializer to use for the weight
            matrices.
          num_proj: (optional) int, The output dimensionality for
            the projection matrices.  If None, no projection is performed.
          forget_bias: Biases of the forget gate are initialized by default
            to 1 in order to reduce the scale of forgetting at the beginning of
            the training.
          state_is_tuple: If True, accepted and returned states are 2-tuples of
            the `c_state` and `m_state`.  If False, they are concatenated
            along the column axis.
          activation: Activation function of the inner states.
        """
        if not state_is_tuple:
            tf.logging.log_first_n(
                tf.logging.WARN,
                "%s: Using a concatenated state is slower and "
                " will soon be deprecated.  Use state_is_tuple=True.", 1, self)

        self.num_units = num_units
        self.is_training = is_training
        self.use_peepholes = use_peepholes
        self.cell_clip = cell_clip
        self.num_proj = num_proj
        self.proj_clip = proj_clip
        self.initializer = initializer
        self.forget_bias = forget_bias
        self._state_is_tuple = state_is_tuple
        self.state_is_tuple = state_is_tuple
        self.activation = activation

        if num_proj:
            self._state_size = (LSTMStateTuple(num_units, num_proj)
                                if state_is_tuple else num_units + num_proj)
            self._output_size = num_proj
        else:
            self._state_size = (LSTMStateTuple(num_units, num_units)
                                if state_is_tuple else 2 * num_units)
            self._output_size = num_units
Пример #9
0
    def __init__(self,
                 num_units,
                 use_peepholes=False,
                 cell_clip=None,
                 initializer=None,
                 num_proj=None,
                 proj_clip=None,
                 num_unit_shards=None,
                 num_proj_shards=None,
                 forget_bias=1.0,
                 state_is_tuple=True,
                 activation=None,
                 reuse=None,
                 normalize_in_to_hidden=False,
                 normalize_in_together=True,
                 normalize_cell=False,
                 normalize_config=None,
                 name=None):
        super(BNLSTMCell, self).__init__(_reuse=reuse, name=name)
        if not state_is_tuple:
            logging.warn(
                "%s: Using a concatenated state is slower and will soon be "
                "deprecated.  Use state_is_tuple=True.", self)
        if num_unit_shards is not None or num_proj_shards is not None:
            logging.warn(
                "%s: The num_unit_shards and proj_unit_shards parameters are "
                "deprecated and will be removed in Jan 2017.  "
                "Use a variable scope with a partitioner instead.", self)

        # Inputs must be 2-dimensional.
        self.input_spec = base_layer.InputSpec(ndim=2)

        self._num_units = num_units
        self._use_peepholes = use_peepholes
        self._cell_clip = cell_clip
        self._initializer = initializer
        self._num_proj = num_proj
        self._proj_clip = proj_clip
        self._num_unit_shards = num_unit_shards
        self._num_proj_shards = num_proj_shards
        self._forget_bias = forget_bias
        self._state_is_tuple = state_is_tuple
        self._activation = activation or math_ops.tanh
        self._normalize_in_to_hidden = normalize_in_to_hidden
        self._normalize_in_together = normalize_in_to_hidden and normalize_in_together
        self._normalize_cell = normalize_cell
        self._normalize_config = normalize_config

        if num_proj:
            self._state_size = (LSTMStateTuple(num_units, num_proj)
                                if state_is_tuple else num_units + num_proj)
            self._output_size = num_proj
        else:
            self._state_size = (LSTMStateTuple(num_units, num_units)
                                if state_is_tuple else 2 * num_units)
            self._output_size = num_units
    def forward(self, X):
        """ Inspired in part by https://github.com/areiner222/MDLSTM/blob/master/md_lstm.py """
        """ X: batch_size X height X width X channels """

        """ create H*W arrays """
        with tf.variable_scope(self.scope):
            _, H, W, C = X.get_shape().as_list()
            N = tf.shape(X)[0]

            X = tf.reshape(tf.transpose(X, [1,2,0,3]), [-1, C])
            X = tf.split(X, H*W, axis=0)

            """ create dynamic-sized arrays with timesteps = H*W """
            inputs = tf.TensorArray(dtype=tf.float32, size=H*W).unstack(X)
            states = tf.TensorArray(dtype=tf.float32, size=H*W+1, clear_after_read=False)
            outputs = tf.TensorArray(dtype=tf.float32, size=H*W)

            """ initialiaze states to zero  """
            states = states.write(H*W, LSTMStateTuple(tf.zeros([N, self.hidden_dim], tf.float32),
                                                      tf.zeros([N, self.hidden_dim], tf.float32)))

            """ define counter """
            t = tf.constant(0)

            """ define operations at each time step """
            def body(t_, outputs_, states_):"""TODO: check if first state should use tf.less instead of tf.less_equal"""
                states_1 = tf.cond(tf.less_equal(t_, tf.constant(W)),
                                   lambda: states_.read(H*W),
                                   lambda: states_.read(t_ - tf.constant(W)))
                states_2 = tf.cond(tf.equal(t_ % W, tf.constant(0)),
                                   lambda: states_.read(H*W),
                                   lambda: states_.read(t_ - tf.constant(1)))
     
                prev_hidden_states = LSTMStateTuple(states_1[0], states_2[0])
                prev_cell_states = LSTMStateTuple(states_1[1], states_2[1])

                out, state = self.step_forward(inputs.read(t_), prev_hidden_states, prev_cell_states)
                outputs_ = outputs_.write(t_, out)
                states_ = states_.write(t_, state)

                return t_+1, outputs_, states_

            """ define condition for while loop """
            def condition(t_, outputs_, states_):
                return tf.less(t_, tf.constant(H*W))

            """ run while loop """
            _, outputs, states = tf.while_loop(condition, body, [t, outputs, states], parallel_iterations=1)

            """ stack outputs and states to get tensor and reshape outputs appropriately """
            outputs = outputs.stack()
            states = states.stack()

            outputs = tf.transpose(tf.reshape(outputs, [H, W, -1, self.hidden_dim]), [2,0,1,3])
 def initial_state(self):
     if self.encoder is None:
         batch_shape = [self.hparams.batch_size, self.hparams.dec_units]
         return LSTMStateTuple(
             c=tf.zeros(batch_shape, dtype=tf.float32),
             h=tf.zeros(batch_shape, dtype=tf.float32),
         )
     else:
         return LSTMStateTuple(
             c=tf.zeros_like(self.encoder.state.c),
             h=tf.zeros_like(self.encoder.state.h),
         )
Пример #12
0
    def add(self, values, _struct=None):
        """
        Adds single experience frame to rollout.

        Args:
            values:    [nested] dictionary of values.
        """
        if _struct is None:
            # Top level:
            _struct = self
            self.size += 1
            top = True

        else:
            top = False

        try:
            if isinstance(values, dict):
                for key, value in values.items():
                    if key not in _struct.keys():
                        _struct[key] = {}
                    _struct[key] = self.add(value, _struct[key])

            elif isinstance(values, tuple):
                if not isinstance(_struct, tuple):
                    _struct = ['empty' for entry in values]
                _struct = tuple(
                    [self.add(*pair) for pair in zip(values, _struct)])

            elif isinstance(values, LSTMStateTuple):
                if not isinstance(_struct, LSTMStateTuple):
                    _struct = LSTMStateTuple(0, 0)
                c = self.add(values[0], _struct[0])
                h = self.add(values[1], _struct[1])
                _struct = LSTMStateTuple(c, h)

            else:
                if isinstance(_struct, list):
                    _struct += [values]

                else:
                    _struct = [values]

        except:
            print('values:\n', values)
            print('_struct:\n', _struct)
            raise RuntimeError

        if not top:
            return _struct
Пример #13
0
    def pass_messages(self):
        with tf.name_scope('pass_messages') as scope:
            denom = tf.sqrt(tf.cast(self.opts.d, tf.float32))

            L_output = tf.tile(tf.div(self.L_init, denom), [self.n_lits, 1])
            C_output = tf.tile(tf.div(self.C_init, denom), [self.n_clauses, 1])

            L_state = LSTMStateTuple(h=L_output, c=tf.zeros([self.n_lits, self.opts.d]))
            C_state = LSTMStateTuple(h=C_output, c=tf.zeros([self.n_clauses, self.opts.d]))

            _, L_state, C_state = tf.while_loop(self.while_cond, self.while_body, [0, L_state, C_state])

        self.final_lits = L_state.h
        self.final_clauses = C_state.h
Пример #14
0
    def create_architecture(self):
        self.vars.sequence_length = tf.placeholder(tf.int64, [1],
                                                   name="sequence_length")

        fc_input = self.get_input_layers()

        fc1 = fully_connected(fc_input,
                              num_outputs=self.fc_units_num,
                              scope=self._name_scope + "/fc1")

        fc1_reshaped = tf.reshape(fc1, [1, -1, self.fc_units_num])
        self.recurrent_cells = self.ru_class(self._recurrent_units_num)
        state_c = tf.placeholder(tf.float32,
                                 [1, self.recurrent_cells.state_size.c],
                                 name="initial_lstm_state_c")
        state_h = tf.placeholder(tf.float32,
                                 [1, self.recurrent_cells.state_size.h],
                                 name="initial_lstm_state_h")
        self.vars.initial_network_state = LSTMStateTuple(state_c, state_h)
        rnn_outputs, self.ops.network_state = tf.nn.dynamic_rnn(
            self.recurrent_cells,
            fc1_reshaped,
            initial_state=self.vars.initial_network_state,
            sequence_length=self.vars.sequence_length,
            time_major=False,
            scope=self._name_scope)
        reshaped_rnn_outputs = tf.reshape(rnn_outputs,
                                          [-1, self._recurrent_units_num])

        self.reset_state()
        self.ops.pi, self.ops.frameskip_pi, self.ops.v = self.policy_value_frameskip_layer(
            reshaped_rnn_outputs)
Пример #15
0
def bidirectional_LSTM(inputs, scope, training):

    with tf.variable_scope(scope):
        outputs, (fw_state, bw_state) = tf.nn.bidirectional_dynamic_rnn(
            # tf.nn.rnn_cell.LSTMCell(hp.enc_units),
            # tf.nn.rnn_cell.LSTMCell(hp.enc_units),
            ZoneoutLSTMCell(
                hp.enc_units,
                training,
                zoneout_factor_cell=hp.z_drop,
                zoneout_factor_output=hp.z_drop,
            ),
            ZoneoutLSTMCell(
                hp.enc_units,
                training,
                zoneout_factor_cell=hp.z_drop,
                zoneout_factor_output=hp.z_drop,
            ),
            inputs,
            dtype=tf.float32)

        #Concatenate c states and h states from forward
        #and backward cells
        encoder_final_state_c = tf.concat((fw_state.c, bw_state.c), 1)
        encoder_final_state_h = tf.concat((fw_state.h, bw_state.h), 1)

        #Get the final state to pass as initial state to decoder
        final_state = LSTMStateTuple(c=encoder_final_state_c,
                                     h=encoder_final_state_h)

    return tf.concat(
        outputs, axis=2
    ), final_state  # Concat forward + backward outputs and final states
Пример #16
0
    def encoder(self, inputs, seq_len, keep_prob=0.9):
        batch_size = tf.shape(inputs)[0]
        with tf.variable_scope('encoder'):
            encoder_cell_fw = self.add_encoder_cell(self.hidden_size,
                                                    self.cell_type,
                                                    self.num_layers, keep_prob)
            encoder_cell_bw = self.add_encoder_cell(self.hidden_size,
                                                    self.cell_type,
                                                    self.num_layers, keep_prob)

            initial_state = encoder_cell_fw.zero_state(batch_size,
                                                       dtype=tf.float32)
            encoder_outputs_, encoder_states_ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=encoder_cell_fw,
                cell_bw=encoder_cell_bw,
                inputs=inputs,
                sequence_length=seq_len,
                initial_state_fw=initial_state,
                initial_state_bw=initial_state,
                dtype=tf.float32,
                swap_memory=True)
            encoder_outputs = tf.concat(encoder_outputs_, axis=-1)
            encoder_states = []
            for i in range(self.num_layers):
                c_fw, h_fw = encoder_states_[0][i]
                c_bw, h_bw = encoder_states_[1][i]
                # c_s = tf.concat([c_fw, c_bw], axis=-1)
                # h_s = tf.concat([h_fw, h_bw], axis=-1)
                c_s = tf.add(c_fw, c_bw)
                h_s = tf.add(h_fw, h_bw)
                encoder_states.append(LSTMStateTuple(c_s, h_s))
            encoder_states = tuple(encoder_states)

            return encoder_outputs, encoder_states
Пример #17
0
    def _init_bidirectional_encoder(self):
        with tf.variable_scope("BidirectionalEncoder") as scope:
            ((encoder_fw_outputs, encoder_bw_outputs),
             (encoder_fw_state,
              encoder_bw_state)) = (tf.nn.bidirectional_dynamic_rnn(
                  cell_fw=self.encoder_cell,
                  cell_bw=self.encoder_cell,
                  inputs=self.encoder_inputs_embedded,
                  sequence_length=self.encoder_inputs_length,
                  time_major=True,
                  dtype=tf.float32))

            # concatenates tensors along one dimension.
            self.encoder_outputs = tf.concat(
                (encoder_fw_outputs, encoder_bw_outputs), 2)

            # isinstance() 会认为子类是一种父类类型, 考虑继承关系
            if isinstance(encoder_fw_state, LSTMStateTuple):
                encoder_state_c = tf.concat(
                    (encoder_fw_state.c, encoder_bw_state.c),
                    1,
                    name='bidirectional_concat_c')

                encoder_state_h = tf.concat(
                    (encoder_fw_state.h, encoder_bw_state.h),
                    1,
                    name='bidirectional_concat_h')

                self.encoder_state = LSTMStateTuple(c=encoder_state_c,
                                                    h=encoder_state_h)
            elif isinstance(encoder_fw_state, tf.Tensor):
                self.encoder_state = tf.concat(
                    (encoder_fw_state, encoder_bw_state),
                    1,
                    name='bidirectional_concat')
def BiLSTM(x, seqlen, weights, biases):
    cell = LSTMCell(n_hidden)
    cell = tf.nn.rnn_cell.DropoutWrapper(
        cell, output_keep_prob=0.5)  #giam hien tuong overfitting
    # cell = tf.nn.rnn_cell.MultiRNNCell([cell] * self.num_layers, state_is_tuple=True)
    # cell_bw = tf.nn.rnn_cell.MultiRNNCell([self.encoder_cell] * self.num_layers, state_is_tuple=True)
    ((encoder_fw_outputs, encoder_bw_outputs),
     (encoder_fw_state, encoder_bw_state)) = (
         tf.nn.bidirectional_dynamic_rnn(
             cell_fw=cell,
             cell_bw=cell,
             inputs=x,
             # sequence_length=seqlen,
             time_major=True,
             dtype=tf.float32))
    encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)

    if isinstance(encoder_fw_state, LSTMStateTuple):
        encoder_state_c = tf.concat((encoder_fw_state.c, encoder_bw_state.c),
                                    1,
                                    name='bidirectional_concat_c')
        encoder_state_h = tf.concat((encoder_fw_state.h, encoder_bw_state.h),
                                    1,
                                    name='bidirectional_concat_h')
        encoder_state = LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)

    elif isinstance(encoder_fw_state, tf.Tensor):
        encoder_state = tf.concat((encoder_fw_state, encoder_bw_state),
                                  1,
                                  name='bidirectional_concat')
    return tf.matmul(encoder_outputs, weights['out']) + biases['out']
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM)."""
        with _checked_scope(self,
                            scope or "basic_lstm_cell",
                            reuse=self._reuse):
            # Parameters of gates are concatenated into one multiply for efficiency.
            if self._state_is_tuple:
                c, h = state
            else:
                c, h = tf.split(value=state, num_or_size_splits=2, axis=1)

            all_inputs = tf.concat([inputs, h], 1)

            concat = tf.nn.bias_add(tf.matmul(all_inputs, self.weight),
                                    self.bias)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1)

            new_c = (c * tf.sigmoid(f + self._forget_bias) +
                     tf.sigmoid(i) * self._activation(j))
            new_h = self._activation(new_c) * tf.sigmoid(o)

            if self._state_is_tuple:
                new_state = LSTMStateTuple(new_c, new_h)
            else:
                new_state = tf.concat([new_c, new_h], 1)
            return new_h, new_state
Пример #20
0
    def _encoder(self):
        word_embeddings = self._get_embeddings(self.input_placeholder)
        expanded_answer_position = tf.expand_dims(self.answer_position, 2)
        word_embeddings_answer_position = tf.concat(
            (word_embeddings, expanded_answer_position), 2)
        encoder_lstm_cell = LSTMCell(
            num_units=self.config.encoder_hidden_state_size)
        ((encoder_fw_outputs, encoder_bw_outputs),
         (encoder_fw_final_state,
          encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(
              cell_fw=encoder_lstm_cell,
              cell_bw=encoder_lstm_cell,
              inputs=word_embeddings_answer_position,
              sequence_length=self.input_length_placeholder,
              dtype=tf.float32)
        encoder_output = tf.concat([encoder_fw_outputs, encoder_bw_outputs], 2)

        encoder_final_state_c = tf.concat(
            (encoder_fw_final_state.c, encoder_bw_final_state.c), 1)

        encoder_final_state_h = tf.concat(
            (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)

        encoder_final_state = LSTMStateTuple(c=encoder_final_state_c,
                                             h=encoder_final_state_h)

        # decoder_lstm_cell = LSTMCell(decoder_hidden_state_size)

        # eos_step_embedded = self.get_embeddings(self.eos_time_slice)

        # pad_step_embedded = self.get_embeddings(self.pad_time_slice)

        return encoder_final_state
Пример #21
0
    def __call__(self, inputs, state, scope=None):
        """Long short-term memory cell (LSTM).
        @param: inputs (batch,n)
        @param state: the states and hidden unit of the two cells
        """
        with tf.variable_scope(scope or type(self).__name__):
            c1, c2, h1, h2 = state

            # change bias argument to False since LN will add bias via shift
            concat = _linear([inputs, h1, h2], 5 * self._num_units, False)

            i, j, f1, f2, o = tf.split(value=concat,
                                       num_or_size_splits=5,
                                       axis=1)

            # add layer normalization to each gate
            i = ln(i, scope='i/')
            j = ln(j, scope='j/')
            f1 = ln(f1, scope='f1/')
            f2 = ln(f2, scope='f2/')
            o = ln(o, scope='o/')

            new_c = (c1 * tf.nn.sigmoid(f1 + self._forget_bias) +
                     c2 * tf.nn.sigmoid(f2 + self._forget_bias) +
                     tf.nn.sigmoid(i) * self._activation(j))

            # add layer_normalization in calculation of new hidden state
            new_h = self._activation(ln(new_c,
                                        scope='new_h/')) * tf.nn.sigmoid(o)
            new_state = LSTMStateTuple(new_c, new_h)

            return new_h, new_state
Пример #22
0
    def __call__(self, inputs, state, scope=None):

        sigmoid = math_ops.sigmoid
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)

        with tf.variable_scope("Weight",
                               initializer=tf.orthogonal_initializer()):
            weight_matrix = _linear([inputs, h], 5 * self._num_units, True)
        with tf.variable_scope("transform_input",
                               initializer=tf.orthogonal_initializer()):
            trans_input = _linear([inputs], self._num_units, True)
        # i = input_gate, j = new_input, f = forget_gate, o = output_gate batch_size * dim
        i, j, f, o, t = tf.split(weight_matrix, num_or_size_splits=5, axis=1)
        i = ln(i, scope='i_LN')
        j = ln(j, scope='j_LN')
        f = ln(f, scope='f_LN')
        o = ln(o, scope='o_LN')
        t = ln(t, scope='t_LN')
        new_c = (c * sigmoid(f + self._forget_bias) +
                 sigmoid(i) * self._activation(j))
        new_h = self._activation(ln(new_c, scope='new_c_LN')) * sigmoid(o)
        high_h = sigmoid(t) * new_h + \
                 (1.0 - sigmoid(t)) * self._activation(ln(trans_input, scope='new_input_LN'))

        if self._state_is_tuple:
            new_state = LSTMStateTuple(new_c, high_h)
        else:
            new_state = tf.concat([new_c, high_h], 1)
        return high_h, new_state
Пример #23
0
def build_bidirectional_encoder(encoder_cell,
                                num_layers,
                                encoder_inputs,
                                encoder_inputs_length,
                                scope=None):
    assert num_layers > 0
    with tf.variable_scope(scope or 'basic_encoder'):
        current_inputs = encoder_inputs
        for layer_id in range(num_layers):
            ((encoder_fw_outputs, encoder_bw_outputs),
             (encoder_fw_state,
              encoder_bw_state)) = (tf.nn.bidirectional_dynamic_rnn(
                  cell_fw=encoder_cell,
                  cell_bw=encoder_cell,
                  inputs=current_inputs,
                  sequence_length=encoder_inputs_length,
                  time_major=False,
                  dtype=tf.float32,
                  scope='encoder_l' + str(layer_id)))
            encoder_outputs = tf.concat(
                (encoder_fw_outputs, encoder_bw_outputs),
                2)  # [batch_size, enc_seq_len, 2 * enc_hid_size]
            current_inputs = encoder_outputs

        encoder_state_c = tf.concat((encoder_fw_state.c, encoder_bw_state.c),
                                    1,
                                    name='bidirectional_concat_c')
        encoder_state_h = tf.concat((encoder_fw_state.h, encoder_bw_state.h),
                                    1,
                                    name='bidirectional_concat_h')
        encoder_final_state = LSTMStateTuple(
            c=encoder_state_c,
            h=encoder_state_h)  # [batch_size, enc_hid_size*2] for c,h

    return encoder_outputs, encoder_final_state
    def _encoder(self, input_seq, input_seq_length, name="", reuse=False):        
        with tf.variable_scope("Encoder") as scope:
            if reuse:
                tf.get_variable_scope().reuse_variables()
            cell_fw = BasicLSTMCell(self.h_dim, state_is_tuple=True, reuse=reuse)
            cell_fw = SwitchableDropoutWrapper(cell_fw, self.is_train, input_keep_prob = self.config.input_keep_prob)

            cell_bw = BasicLSTMCell(self.h_dim, state_is_tuple=True, reuse=reuse)
            cell_bw = SwitchableDropoutWrapper(cell_bw, self.is_train, input_keep_prob = self.config.input_keep_prob)

            (encoder_outputs, encoder_state) = tf.nn.bidirectional_dynamic_rnn(cell_fw, 
                cell_bw, 
                inputs=input_seq,
                sequence_length=input_seq_length,
                dtype=tf.float32,
                scope='enc')

            # Join outputs since we are using a bidirectional RNN
            encoder_outputs = tf.concat(encoder_outputs, 2)

            if isinstance(encoder_state[0], LSTMStateTuple):

                encoder_state_c = tf.concat(
                    (encoder_state[0].c, encoder_state[1].c), 1, name='bidirectional_concat_c')
                encoder_state_h = tf.concat(
                    (encoder_state[0].h, encoder_state[1].h), 1, name='bidirectional_concat_h')
                encoder_state = LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)                

        return encoder_outputs, encoder_state
Пример #25
0
    def concatenate_state(fw_state, bw_state):
        if isinstance(fw_state, LSTMStateTuple):
            state_c = tf.concat((fw_state.c, bw_state.c),
                                1,
                                name='bidirectional_concat_c')
            state_h = tf.concat((fw_state.h, bw_state.h),
                                1,
                                name='bidirectional_concat_h')
            state = LSTMStateTuple(c=state_c, h=state_h)
            return state
        elif isinstance(fw_state, tf.Tensor):
            state = tf.concat((fw_state, bw_state),
                              1,
                              name='bidirectional_concat')
            return state
        elif (isinstance(fw_state, tuple) and isinstance(bw_state, tuple)
              and len(fw_state) == len(bw_state)):
            # multilayer
            state = tuple(
                concatenate_state(fw, bw)
                for fw, bw in zip(fw_state, bw_state))
            return state

        else:
            raise ValueError('unknown state type: {}'.format(
                (fw_state, bw_state)))
Пример #26
0
Файл: model.py Проект: kpsc/nlp
    def decode_onestep(self, sess, last_tokens, dec_pre_state, encoder_outputs,
                       source_len):
        '''
        Args:
            last_tokens: tokens to be fed as input into the decoder for this timestep
            encoder_outputs: [beam_size, seq_len, hidden_size]
            dec_pre_state: List of bead_size LSTMStateTuples from the previous timestep
        return:
        '''
        beam_size = len(dec_pre_state)
        c = [np.expand_dims(state.c, axis=0) for state in dec_pre_state]
        h = [np.expand_dims(state.h, axis=0) for state in dec_pre_state]
        new_c = np.concatenate(c, axis=0)
        new_h = np.concatenate(h, axis=0)
        dec_pre_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h)

        feed_dict = {
            self.decoder_input_train: np.transpose(np.array([last_tokens])),
            self.dec_inp_state: dec_pre_state,
            self.encoder_outputs: encoder_outputs,
            self.source_len: source_len,
            self.keep_prob: 1.0
        }
        output = {
            'idx': self.topk_idx,
            'probs': self.topk_log_prob,
            'states': self.dec_out_state
        }
        output = sess.run(output, feed_dict=feed_dict)
        dec_states = [
            LSTMStateTuple(output['states'].c[i, :], output['states'].h[i, :])
            for i in range(beam_size)
        ]

        return output['idx'], output['probs'], dec_states
Пример #27
0
    def _init_bidirectional_encoder(self):
        '''
        双向LSTM encoder
        '''
        with tf.variable_scope("BidirectionalEncoder") as scope:
            ((encoder_fw_outputs,
              encoder_bw_outputs),
             (encoder_fw_state,
              encoder_bw_state)) = (
                tf.nn.bidirectional_dynamic_rnn(cell_fw=self.encoder_cell,
                                                cell_bw=self.encoder_cell,
                                                inputs=self.encoder_inputs_embedded,
                                                sequence_length=self.encoder_inputs_length,
                                                time_major=self.time_major,
                                                dtype=tf.float32)
                )

            self.encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)

            if isinstance(encoder_fw_state, LSTMStateTuple):

                encoder_state_c = tf.concat(
                    (encoder_fw_state.c, encoder_bw_state.c), 1, name='bidirectional_concat_c')
                encoder_state_h = tf.concat(
                    (encoder_fw_state.h, encoder_bw_state.h), 1, name='bidirectional_concat_h')
                self.encoder_state = LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)

            elif isinstance(encoder_fw_state, tf.Tensor):
                self.encoder_state = tf.concat((encoder_fw_state, encoder_bw_state), 1, name='bidirectional_concat')
Пример #28
0
    def create_architecture(self):
        self.vars.sequence_length = tf.placeholder(tf.int64, [1],
                                                   name="sequence_length")
        fc_input = self.get_input_layers()

        fc1 = layers.fully_connected(fc_input,
                                     self.fc_units_num,
                                     scope=self._name_scope + "/fc1")
        fc1_reshaped = tf.reshape(fc1, [1, -1, self.fc_units_num])

        self.recurrent_cells = self._get_ru_class()(self._recurrent_units_num)
        state_c = tf.placeholder(tf.float32,
                                 [1, self.recurrent_cells.state_size.c],
                                 name="initial_lstm_state_c")
        state_h = tf.placeholder(tf.float32,
                                 [1, self.recurrent_cells.state_size.h],
                                 name="initial_lstm_state_h")
        self.vars.initial_network_state = LSTMStateTuple(state_c, state_h)
        rnn_outputs, self.ops.network_state = tf.nn.dynamic_rnn(
            self.recurrent_cells,
            fc1_reshaped,
            initial_state=self.vars.initial_network_state,
            sequence_length=self.vars.sequence_length,
            scope=self._name_scope)
        reshaped_rnn_outputs = tf.reshape(rnn_outputs,
                                          [-1, self._recurrent_units_num])
        q = layers.linear(reshaped_rnn_outputs,
                          num_outputs=self.actions_num,
                          scope=self._name_scope + "/q")
        self.reset_state()
        return q
Пример #29
0
def build_cudnn_encoder(encoder_cell,
                        num_layers,
                        encoder_inputs,
                        encoder_inputs_length,
                        time_major=False,
                        scope=None):
    assert num_layers > 0
    batch_size = encoder_inputs.get_shape()[0]
    num_units = encoder_cell.output_size
    input_size = encoder_inputs.get_shape()[-1]
    with tf.variable_scope(scope):
        model = CudnnLSTM(num_layers, num_units, input_size)
        params_size_t = calc_cudnn_num_params(num_layers, num_units,
                                              input_size)
        #params_size_t = model.params_size()
        if not time_major:
            encoder_inputs = tf.transpose(encoder_inputs, [1, 0, 2])
        input_h = tf.zeros([num_layers, batch_size, num_units])
        input_c = tf.zeros([num_layers, batch_size, num_units])
        params = tf.Variable(tf.random_normal([params_size_t]))
        output, output_h, output_c = model(is_training=True,
                                           input_data=encoder_inputs,
                                           input_h=input_h,
                                           input_c=input_c,
                                           params=params)
        print("output", output)
        print("output_h", output_h)

        if not time_major:
            output = tf.transpose(output, [1, 0, 2])

    return output, LSTMStateTuple(c=output_c[0], h=output_h[0])
Пример #30
0
def DynRNN(cell_model,
           num_units,
           num_layers,
           emb_inps,
           enc_lens,
           keep_prob=1.0,
           bidi=False,
           name_scope="encoder",
           dtype=tf.float32):
    """A Dynamic RNN Creator"
		Take embedding inputs and make dynamic rnn process 
	"""
    with tf.name_scope(name_scope):
        if bidi:
            cell_fw = CreateMultiRNNCell(cell_model,
                                         num_units,
                                         num_layers,
                                         keep_prob,
                                         name_scope="cell_fw")
            cell_bw = CreateMultiRNNCell(cell_model,
                                         num_units,
                                         num_layers,
                                         keep_prob,
                                         name_scope="cell_bw")
            enc_outs, enc_states = bidirectional_dynamic_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=emb_inps,
                sequence_length=enc_lens,
                dtype=dtype,
                parallel_iterations=16,
                scope=name_scope)
            fw_s, bw_s = enc_states
            enc_states = []
            for f, b in zip(fw_s, bw_s):
                if isinstance(f, LSTMStateTuple):
                    enc_states.append(
                        LSTMStateTuple(tf.concat([f.c, b.c], axis=1),
                                       tf.concat([f.h, b.h], axis=1)))
                else:
                    enc_states.append(tf.concat([f, b], 1))

            enc_outs = tf.concat([enc_outs[0], enc_outs[1]], axis=2)
            mem_size = 2 * num_units
            enc_state_size = 2 * num_units
        else:
            cell = CreateMultiRNNCell(cell_model,
                                      num_units,
                                      num_layers,
                                      keep_prob,
                                      name_scope="cell")
            enc_outs, enc_states = dynamic_rnn(cell=cell,
                                               inputs=emb_inps,
                                               sequence_length=enc_lens,
                                               parallel_iterations=16,
                                               dtype=dtype,
                                               scope=name_scope)
            mem_size = num_units
            enc_state_size = num_units
    return enc_outs, enc_states, mem_size, enc_state_size