Пример #1
0
    def get_initial_state(self, x):
        input_shape = self.input_spec[0].shape
        init_nb_row = input_shape[self.row_axis]
        init_nb_col = input_shape[self.column_axis]

        base_initial_state = K.zeros_like(
            x)  # (samples, timesteps) + image_shape
        non_channel_axis = -1 if self.data_format == 'channels_first' else -2
        for _ in range(2):
            base_initial_state = K.sum(base_initial_state,
                                       axis=non_channel_axis)
        base_initial_state = K.sum(base_initial_state,
                                   axis=1)  # (samples, nb_channels)

        initial_states = []
        states_to_pass = ['r', 'c', 'e']
        nlayers_to_pass = {u: self.nb_layers for u in states_to_pass}
        if self.extrap_start_time is not None:
            states_to_pass.append(
                'ahat'
            )  # pass prediction in states so can use as actual for t+1 when extrapolating
            nlayers_to_pass['ahat'] = 1
        for u in states_to_pass:
            for l in range(nlayers_to_pass[u]):
                ds_factor = 2**l
                nb_row = init_nb_row // ds_factor
                nb_col = init_nb_col // ds_factor
                if u in ['r', 'c']:
                    stack_size = self.R_stack_sizes[l]
                elif u == 'e':
                    stack_size = 2 * self.stack_sizes[l]
                elif u == 'ahat':
                    stack_size = self.stack_sizes[l]
                output_size = stack_size * nb_row * nb_col  # flattened size

                reducer = K.zeros((input_shape[self.channel_axis],
                                   output_size))  # (nb_channels, output_size)
                initial_state = K.dot(base_initial_state,
                                      reducer)  # (samples, output_size)
                if self.data_format == 'channels_first':
                    output_shp = (-1, stack_size, nb_row, nb_col)
                else:
                    output_shp = (-1, nb_row, nb_col, stack_size)
                initial_state = K.reshape(initial_state, output_shp)
                initial_states += [initial_state]

        if K._BACKEND == 'theano':
            from theano import tensor as T
            # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension.
            # In our case, this is a problem when training on grayscale images, and the below line fixes it.
            initial_states = [
                T.unbroadcast(init_state, 0, 1)
                for init_state in initial_states
            ]

        if self.extrap_start_time is not None:
            initial_states += [
                K.variable(0, int if K.backend() != 'tensorflow' else 'int32')
            ]  # the last state will correspond to the current timestep
        return initial_states
Пример #2
0
 def step(self, inputs, states):
     states = list(states)
     if self.teacher_force:
         readout = states.pop()
         ground_truth = states.pop()
         assert K.ndim(ground_truth) == 3, K.ndim(ground_truth)
         counter = states.pop()
         if K.backend() == 'tensorflow':
             with tf.control_dependencies(None):
                 zero = K.cast(K.zeros((1, ))[0], 'int32')
                 one = K.cast(K.zeros((1, ))[0], 'int32')
         else:
             zero = K.cast(K.zeros((1, ))[0], 'int32')
             one = K.cast(K.zeros((1, ))[0], 'int32')
         slices = [
             slice(None), counter[0] - K.switch(counter[0], one, zero)
         ] + [slice(None)] * (K.ndim(ground_truth) - 2)
         ground_truth_slice = ground_truth[slices]
         readout = K.in_train_phase(
             K.switch(counter[0], ground_truth_slice, readout), readout)
         states.append(readout)
     if self.decode:
         model_input = states
     else:
         model_input = [inputs] + states
     shapes = []
     for x in model_input:
         if hasattr(x, '_keras_shape'):
             shapes.append(x._keras_shape)
             del x._keras_shape  # Else keras internals will get messed up.
     model_output = _to_list(self.model.call(model_input))
     for x, s in zip(model_input, shapes):
         setattr(x, '_keras_shape', s)
     if self.decode:
         model_output.insert(1, model_input[0])
     for tensor in model_output:
         tensor._uses_learning_phase = self.uses_learning_phase
     states = model_output[1:]
     output = model_output[0]
     if self.readout:
         states += [output]
         if self.teacher_force:
             states.insert(-1, counter + 1)
             states.insert(-1, ground_truth)
     return output, states
Пример #3
0
 def get_initial_state(self, inputs):
     if type(self.model.input) is not list:
         return []
     try:
         batch_size = K.int_shape(inputs)[0]
     except:
         batch_size = None
     state_shapes = list(map(K.int_shape, self.model.input[1:]))
     states = []
     if self.readout:
         state_shapes.pop()
         # default value for initial_readout is handled in call()
     for shape in state_shapes:
         if None in shape[1:]:
             raise Exception(
                 'Only the batch dimension of a state can be left unspecified. Got state with shape '
                 + str(shape))
         if shape[0] is None:
             ndim = K.ndim(inputs)
             z = K.zeros_like(inputs)
             slices = [slice(None)] + [0] * (ndim - 1)
             z = z[slices]  # (batch_size,)
             state_ndim = len(shape)
             z = K.reshape(z, (-1, ) + (1, ) * (state_ndim - 1))
             z = K.tile(z, (1, ) + tuple(shape[1:]))
             states.append(z)
         else:
             states.append(K.zeros(shape))
     state_initializer = self.state_initializer
     if state_initializer:
         # some initializers don't accept symbolic shapes
         for i in range(len(state_shapes)):
             if state_shapes[i][0] is None:
                 if hasattr(self, 'batch_size'):
                     state_shapes[i] = (
                         self.batch_size, ) + state_shapes[i][1:]
             if None in state_shapes[i]:
                 state_shapes[i] = K.shape(states[i])
         num_state_init = len(state_initializer)
         num_state = self.num_states
         assert num_state_init == num_state, 'RNN has ' + str(
             num_state) + ' states, but was provided ' + str(
                 num_state_init) + ' state initializers.'
         for i in range(len(states)):
             init = state_initializer[i]
             shape = state_shapes[i]
             try:
                 if not isinstance(init, initializers.Zeros):
                     states[i] = init(shape)
             except:
                 raise Exception(
                     'Seems the initializer ' + init.__class__.__name__ +
                     ' does not support symbolic shapes(' + str(shape) +
                     '). Try providing the full input shape (include batch dimension) for you RecurrentModel.'
                 )
     return states
Пример #4
0
 def __init__(self, name=None, **kwargs):
     if not name:
         prefix = 'optional_input_placeholder'
         name = prefix + '_' + str(K.get_uid(prefix))
     kwargs['batch_input_shape'] = (2, )
     super(_OptionalInputPlaceHolder, self).__init__(**kwargs)
     self.tensor = K.zeros(shape=(2, ))
     self.tensor._keras_shape = (2, )
     self.tensor._uses_learning_phase = False
     self.tensor._keras_history = (self, 0, 0)
     Node(self,
          inbound_layers=[],
          node_indices=[],
          tensor_indices=[],
          input_tensors=[],
          output_tensors=[self.tensor],
          input_masks=[None],
          output_masks=[None],
          input_shapes=[],
          output_shapes=[(2, )])
     self.build((2, ))
Пример #5
0
    def call(self,
             inputs,
             initial_state=None,
             initial_readout=None,
             ground_truth=None,
             mask=None,
             training=None):
        # input shape: `(samples, time (padded with zeros), input_dim)`
        # note that the .build() method of subclasses MUST define
        # self.input_spec and self.state_spec with complete input shapes.
        if type(mask) is list:
            mask = mask[0]
        if self.model is None:
            raise Exception('Empty RecurrentModel.')
        num_req_states = self.num_states
        if self.readout:
            num_actual_states = num_req_states - 1
        else:
            num_actual_states = num_req_states
        if type(inputs) is list:
            inputs_list = inputs[:]
            inputs = inputs_list.pop(0)
            initial_states = inputs_list[:num_actual_states]
            if len(initial_states) > 0:
                if self._is_optional_input_placeholder(initial_states[0]):
                    initial_states = self.get_initial_state(inputs)
            inputs_list = inputs_list[num_actual_states:]
            if self.readout:
                initial_readout = inputs_list.pop(0)
                if self.teacher_force:
                    ground_truth = inputs_list.pop()
        else:
            if initial_state is not None:
                if not isinstance(initial_state, (list, tuple)):
                    initial_states = [initial_state]
                else:
                    initial_states = list(initial_state)
                if self._is_optional_input_placeholder(initial_states[0]):
                    initial_states = self.get_initial_state(inputs)

            elif self.stateful:
                initial_states = self.states
            else:
                initial_states = self.get_initial_state(inputs)
        if self.readout:
            if initial_readout is None or self._is_optional_input_placeholder(
                    initial_readout):
                output_shape = K.int_shape(_to_list((self.model.output))[0])
                output_ndim = len(output_shape)
                input_ndim = K.ndim(inputs)
                initial_readout = K.zeros_like(inputs)
                slices = [slice(None)] + [0] * (input_ndim - 1)
                initial_readout = initial_readout[slices]  # (batch_size,)
                initial_readout = K.reshape(initial_readout,
                                            (-1, ) + (1, ) * (output_ndim - 1))
                initial_readout = K.tile(initial_readout,
                                         (1, ) + tuple(output_shape[1:]))
            initial_states.append(initial_readout)
            if self.teacher_force:
                if ground_truth is None or self._is_optional_input_placeholder(
                        ground_truth):
                    raise Exception(
                        'ground_truth must be provided for RecurrentModel with teacher_force=True.'
                    )
                if K.backend() == 'tensorflow':
                    with tf.control_dependencies(None):
                        counter = K.zeros((1, ))
                else:
                    counter = K.zeros((1, ))
                counter = K.cast(counter, 'int32')
                initial_states.insert(-1, counter)
                initial_states[-2]
                initial_states.insert(-1, ground_truth)
                num_req_states += 2
        if len(initial_states) != num_req_states:
            raise ValueError('Layer requires ' + str(num_req_states) +
                             ' states but was passed ' +
                             str(len(initial_states)) + ' initial states.')
        input_shape = K.int_shape(inputs)
        if self.unroll and input_shape[1] is None:
            raise ValueError('Cannot unroll a RNN if the '
                             'time dimension is undefined. \n'
                             '- If using a Sequential model, '
                             'specify the time dimension by passing '
                             'an `input_shape` or `batch_input_shape` '
                             'argument to your first layer. If your '
                             'first layer is an Embedding, you can '
                             'also use the `input_length` argument.\n'
                             '- If using the functional API, specify '
                             'the time dimension by passing a `shape` '
                             'or `batch_shape` argument to your Input layer.')
        preprocessed_input = self.preprocess_input(inputs, training=None)
        constants = self.get_constants(inputs, training=None)
        if self.decode:
            initial_states.insert(0, inputs)
            preprocessed_input = K.zeros((1, self.output_length, 1))
            input_length = self.output_length
        else:
            input_length = input_shape[1]
        if self.uses_learning_phase:
            with learning_phase_scope(0):
                last_output_test, outputs_test, states_test, updates = rnn(
                    self.step,
                    preprocessed_input,
                    initial_states,
                    go_backwards=self.go_backwards,
                    mask=mask,
                    constants=constants,
                    unroll=self.unroll,
                    input_length=input_length)
            with learning_phase_scope(1):
                last_output_train, outputs_train, states_train, updates = rnn(
                    self.step,
                    preprocessed_input,
                    initial_states,
                    go_backwards=self.go_backwards,
                    mask=mask,
                    constants=constants,
                    unroll=self.unroll,
                    input_length=input_length)

            last_output = K.in_train_phase(last_output_train,
                                           last_output_test,
                                           training=training)
            outputs = K.in_train_phase(outputs_train,
                                       outputs_test,
                                       training=training)
            states = []
            for state_train, state_test in zip(states_train, states_test):
                states.append(
                    K.in_train_phase(state_train,
                                     state_test,
                                     training=training))

        else:
            last_output, outputs, states, updates = rnn(
                self.step,
                preprocessed_input,
                initial_states,
                go_backwards=self.go_backwards,
                mask=mask,
                constants=constants,
                unroll=self.unroll,
                input_length=input_length)
        states = list(states)
        if self.decode:
            states.pop(0)
        if self.readout:
            states.pop()
            if self.teacher_force:
                states.pop()
                states.pop()
        if len(updates) > 0:
            self.add_update(updates)
        if self.stateful:
            updates = []
            for i in range(len(states)):
                updates.append((self.states[i], states[i]))
            self.add_update(updates, inputs)

        # Properly set learning phase
        if 0 < self.dropout + self.recurrent_dropout:
            last_output._uses_learning_phase = True
            outputs._uses_learning_phase = True

        if self.return_sequences:
            y = outputs
        else:
            y = last_output
        if self.return_states:
            return [y] + states
        else:
            return y
Пример #6
0
    def get_updates(self, params, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. /
                   (1. +
                    self.decay * K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (
            1. - K.pow(self.beta_1, t))

        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):

            # if a weight tensor (len > 1) use weight normalized parameterization
            # this is the only part changed w.r.t. keras.optimizers.Adam
            ps = K.get_variable_shape(p)
            if len(ps) > 1:

                # get weight normalization parameters
                V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads(
                    p, g)

                # Adam containers for the 'g' parameter
                V_scaler_shape = K.get_variable_shape(V_scaler)
                m_g = K.zeros(V_scaler_shape)
                v_g = K.zeros(V_scaler_shape)

                # update g parameters
                m_g_t = (self.beta_1 * m_g) + (1. - self.beta_1) * grad_g
                v_g_t = (self.beta_2 *
                         v_g) + (1. - self.beta_2) * K.square(grad_g)
                new_g_param = g_param - lr_t * m_g_t / (K.sqrt(v_g_t) +
                                                        self.epsilon)
                self.updates.append(K.update(m_g, m_g_t))
                self.updates.append(K.update(v_g, v_g_t))

                # update V parameters
                m_t = (self.beta_1 * m) + (1. - self.beta_1) * grad_V
                v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(grad_V)
                new_V_param = V - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
                self.updates.append(K.update(m, m_t))
                self.updates.append(K.update(v, v_t))

                # if there are constraints we apply them to V, not W
                # if p in constraints:
                #     c = constraints[p]
                #     new_V_param = c(new_V_param)

                # wn param updates --> W updates
                add_weightnorm_param_updates(self.updates, new_V_param,
                                             new_g_param, p, V_scaler)

            else:  # do optimization normally
                m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
                v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
                p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

                self.updates.append(K.update(m, m_t))
                self.updates.append(K.update(v, v_t))

                new_p = p_t
                # apply constraints
                # if p in constraints:
                #     c = constraints[p]
                #     new_p = c(new_p)
                self.updates.append(K.update(p, new_p))
        return self.updates