def extract_image_patches(x, ksizes, ssizes, padding='same', data_format='tf'): ''' Extract the patches from an image # Parameters x : The input image ksizes : 2-d tuple with the kernel size ssizes : 2-d tuple with the strides size padding : 'same' or 'valid' data_format : 'channels_last' or 'channels_first' # Returns The (k_w,k_h) patches extracted TF ==> (batch_size,w,h,k_w,k_h,c) TH ==> (batch_size,w,h,c,k_w,k_h) ''' kernel = [1, ksizes[0], ksizes[1], 1] strides = [1, ssizes[0], ssizes[1], 1] padding = _preprocess_padding(padding) if data_format == 'channels_first': x = KTF.permute_dimensions(x, (0, 2, 3, 1)) bs_i, w_i, h_i, ch_i = KTF.int_shape(x) patches = tf.extract_image_patches(x, kernel, strides, [1, 1, 1, 1], padding) # Reshaping to fit Theano bs, w, h, ch = KTF.int_shape(patches) patches = tf.reshape( tf.transpose( tf.reshape(patches, [-1, w, h, tf.floordiv(ch, ch_i), ch_i]), [0, 1, 2, 4, 3]), [-1, w, h, ch_i, ksizes[0], ksizes[1]]) if data_format == 'channels_last': patches = KTF.permute_dimensions(patches, [0, 1, 2, 4, 5, 3]) return patches
def padding(x): if ktf.int_shape(input)[1] % 2 != 0: x = ZeroPadding2D(padding=((1, 0), (0, 0)))(x) elif ktf.int_shape(input)[2] % 2 != 0: x = ZeroPadding2D(padding=((0, 0), (1, 0)))(x) print ktf.int_shape(input) return x
def attention_single_input(tensors): ''' layer for target specific attention computes attention weights of inputs w.r.t to target inputs.shape = (batch_size, time_steps, input_dim) (None, 32) -> RepeatVector(3) -> (None, 3, 32) ''' # Must import inside lambda function otherwise model won't load from keras.layers import Dense, Input, Flatten, Activation, Average, Permute, RepeatVector, Lambda, Multiply, Subtract, concatenate, merge, Masking import keras.backend.tensorflow_backend as K from attention import get_target_hidden_states, get_inputs inputs = tensors[0] trg_seq = tensors[1] units = int(inputs.shape[2]) time_steps = K.int_shape(inputs)[1] # Get target hidden states trg_hidden_states = get_target_hidden_states(inputs, trg_seq) trg_repeated = RepeatVector(time_steps)(trg_hidden_states) # Get inputs sentence_input = get_inputs(inputs, trg_seq) sentence_context = concatenate([sentence_input, trg_repeated], axis=2) attn = Dense(1, activation='tanh')(sentence_context) attn = Flatten()(attn) attn = Activation('softmax')(attn) attn = RepeatVector(units)(attn) attn = Permute([2, 1], name='attention_vec')(attn) s = merge([sentence_input, attn], name='attention_mul', mode='mul') return [s, attn]
def cnn_classifier(x, input_channel, units, kernels, strides, paddings, activations, bn=True): if len(units) > 1: x = Reshape( (int(KTF.int_shape(x)[1] / input_channel), input_channel))(x) for i in range(0, len(units) - 1): if activations[i] == 'elu': x = Conv1D(units[i], kernel_size=kernels[i], strides=strides[i], padding=paddings[i])(x) if bn: x = BatchNormalization(axis=-1)(x) x = keras.layers.advanced_activations.ELU(alpha=1.0)(x) else: x = Conv1D(units[i], activation=activations[i], kernel_size=kernels[i], strides=strides[i], padding=paddings[i])(x) x = Flatten()(x) x = Dense(units[len(units) - 1], activation=activations[len(units) - 1], kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform')(x) return x
def state_shape(self): model_input = self.model.input if type(model_input) is list: if len(model_input) == 2: return K.int_shape(model_input[1]) else: return list(map(K.int_shape, model_input[1:])) else: return None
def get_initial_state(self, inputs): if type(self.model.input) is not list: return [] try: batch_size = K.int_shape(inputs)[0] except: batch_size = None state_shapes = list(map(K.int_shape, self.model.input[1:])) states = [] if self.readout: state_shapes.pop() # default value for initial_readout is handled in call() for shape in state_shapes: if None in shape[1:]: raise Exception( 'Only the batch dimension of a state can be left unspecified. Got state with shape ' + str(shape)) if shape[0] is None: ndim = K.ndim(inputs) z = K.zeros_like(inputs) slices = [slice(None)] + [0] * (ndim - 1) z = z[slices] # (batch_size,) state_ndim = len(shape) z = K.reshape(z, (-1, ) + (1, ) * (state_ndim - 1)) z = K.tile(z, (1, ) + tuple(shape[1:])) states.append(z) else: states.append(K.zeros(shape)) state_initializer = self.state_initializer if state_initializer: # some initializers don't accept symbolic shapes for i in range(len(state_shapes)): if state_shapes[i][0] is None: if hasattr(self, 'batch_size'): state_shapes[i] = ( self.batch_size, ) + state_shapes[i][1:] if None in state_shapes[i]: state_shapes[i] = K.shape(states[i]) num_state_init = len(state_initializer) num_state = self.num_states assert num_state_init == num_state, 'RNN has ' + str( num_state) + ' states, but was provided ' + str( num_state_init) + ' state initializers.' for i in range(len(states)): init = state_initializer[i] shape = state_shapes[i] try: if not isinstance(init, initializers.Zeros): states[i] = init(shape) except: raise Exception( 'Seems the initializer ' + init.__class__.__name__ + ' does not support symbolic shapes(' + str(shape) + '). Try providing the full input shape (include batch dimension) for you RecurrentModel.' ) return states
def _build_extracter(self): inp = Input(shape=self.img_shape) h = inp h = layers.Conv2D(32, kernel_size=5, strides=1, padding='same', activation='relu')(h) h = layers.MaxPooling2D(2, strides=2)(h) h = layers.Conv2D(48, kernel_size=5, strides=1, padding='same', activation='relu')(h) h = layers.MaxPooling2D(2, strides=2)(h) h = layers.Flatten()(h) self.n_features = K.int_shape(h)[-1] # h = layers.Dense(50, activation='relu')(h) outY = h return Model(inp, outY, name="extracter")
def extract_image_patches(X, ksizes, ssizes, border_mode="same", dim_ordering="tf"): ''' Extract the patches from an image Parameters ---------- X : The input image ksizes : 2-d tuple with the kernel size ssizes : 2-d tuple with the strides size border_mode : 'same' or 'valid' dim_ordering : 'tf' or 'th' Returns ------- The (k_w,k_h) patches extracted TF ==> (batch_size,w,h,k_w,k_h,c) TH ==> (batch_size,w,h,c,k_w,k_h) ''' kernel = [1, ksizes[0], ksizes[1], 1] strides = [1, ssizes[0], ssizes[1], 1] padding = _preprocess_border_mode(border_mode) if dim_ordering == "th": X = KTF.permute_dimensions(X, (0, 2, 3, 1)) bs_i, w_i, h_i, ch_i = KTF.int_shape(X) patches = tf.extract_image_patches(X, kernel, strides, [1, 1, 1, 1], padding) # Reshaping to fit Theano bs, w, h, ch = KTF.int_shape(patches) patches = tf.reshape( tf.transpose(tf.reshape(patches, [bs, w, h, -1, ch_i]), [0, 1, 2, 4, 3]), [bs, w, h, ch_i, ksizes[0], ksizes[1]]) if dim_ordering == "tf": patches = KTF.permute_dimensions(patches, [0, 1, 2, 4, 5, 3]) return patches
def tc_lstm_input(tensors): ''' Lambda function for TC-LSTM inputs.shape = (batch_size, time_steps, input_dim) ''' from keras.layers import Permute, RepeatVector, Lambda, Multiply, Subtract, concatenate, merge, Masking import keras.backend.tensorflow_backend as K from tc_lstm import get_target_embeddings, get_inputs inputs = tensors[0] trg_seq = tensors[1] time_steps = K.int_shape(inputs)[1] trg_embeddings = get_target_embeddings(inputs, trg_seq) trg_repeated = RepeatVector(time_steps)(trg_embeddings) sentence_input = get_inputs(inputs, trg_seq) #sentence_input = inputs #sentence_input = Masking(mask_value=0.0) (sentence_input) sentence_input = concatenate([sentence_input, trg_repeated], axis=2) return sentence_input
def enforced_regression_classifier(x, input_channel, units, kernels, strides, paddings): if len(units) > 1: x = Reshape( (int(KTF.int_shape(x)[1] / input_channel), input_channel))(x) for i in range(0, len(units) - 1): x = Conv1D(filters=units[i], kernel_size=kernels[i], strides=strides[i], padding=paddings[i], kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform')(x) x = BatchNormalization(axis=-1)(x) x = keras.layers.advanced_activations.ELU(alpha=1.0)(x) x = Flatten()(x) x = Dense(units[len(units) - 1], activation="tanh", kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform')(x) return x
def compute_output_shape(self, input_shape): if not self.decode: if type(input_shape) is list: input_shape[0] = self._remove_time_dim(input_shape[0]) else: input_shape = self._remove_time_dim(input_shape) input_shape = _to_list(input_shape) input_shape = [input_shape[0]] + [ K.int_shape(state) for state in self.model.input[1:] ] output_shape = self.model.compute_output_shape(input_shape) if type(output_shape) is list: output_shape = output_shape[0] if self.return_sequences: if self.decode: output_shape = output_shape[:1] + ( self.output_length, ) + output_shape[1:] else: output_shape = output_shape[:1] + ( self.input_spec.shape[1], ) + output_shape[1:] if self.return_states and len(self.states) > 0: output_shape = [output_shape] + list( map(K.int_shape, self.model.output[1:])) return output_shape
def build(self, input_shape): if hasattr(self, 'model'): del self.model # Try and get batch size for initializer if not hasattr(self, 'batch_size'): if hasattr(self, 'batch_input_shape'): batch_size = self.batch_input_shape[0] if batch_size is not None: self.batch_size = batch_size if self.state_sync: if type(input_shape) is list: x_shape = input_shape[0] if not self.decode: input_length = x_shape.pop(1) if input_length is not None: shape = list(self.input_spec.shape) shape[1] = input_length self.input_spec = InputSpec(shape=tuple(shape)) input = Input(batch_shape=x_shape) initial_states = [ Input(batch_shape=shape) for shape in input_shape[1:] ] else: if not self.decode: input_length = input_shape[1] if input_length is not None: shape = list(self.input_spec.shape) shape[1] = input_length self.input_spec = InputSpec(shape=tuple(shape)) input = Input(batch_shape=input_shape[:1] + input_shape[2:]) else: input = Input(batch_shape=input_shape) initial_states = [] output = input final_states = initial_states[:] for cell in self.cells: if _is_rnn_cell(cell): if not initial_states: cell.build(K.int_shape(output)) initial_states = [ Input(batch_shape=shape) for shape in _to_list(cell.state_shape) ] final_states = initial_states[:] cell_out = cell([output] + final_states) if type(cell_out) is not list: cell_out = [cell_out] output = cell_out[0] final_states = cell_out[1:] else: output = cell(output) else: if type(input_shape) is list: x_shape = input_shape[0] if not self.decode: input_length = x_shape.pop(1) if input_length is not None: shape = list(self.input_spec.shape) shape[1] = input_length self.input_spec = InputSpec(shape=tuple(shape)) input = Input(batch_shape=x_shape) initial_states = [ Input(batch_shape=shape) for shape in input_shape[1:] ] output = input final_states = [] for cell in self.cells: if _is_rnn_cell(cell): cell_initial_states = initial_states[ len(final_states):len(final_states) + cell.num_states] cell_in = [output] + cell_initial_states cell_out = _to_list(cell(cell_in)) output = cell_out[0] final_states += cell_out[1:] else: output = cell(output) else: if not self.decode: input_length = input_shape[1] if input_length is not None: shape = list(self.input_spec.shape) shape[1] = input_length self.input_spec = InputSpec(shape=tuple(shape)) input = Input(batch_shape=input_shape[:1] + input_shape[2:]) else: input = Input(batch_shape=input_shape) output = input initial_states = [] final_states = [] for cell in self.cells: if _is_rnn_cell(cell): cell.build(K.int_shape(output)) state_inputs = [ Input(batch_shape=shape) for shape in _to_list(cell.state_shape) ] initial_states += state_inputs cell_in = [output] + state_inputs cell_out = _to_list(cell(cell_in)) output = cell_out[0] final_states += cell_out[1:] else: output = cell(output) self.model = Model([input] + initial_states, [output] + final_states) self.states = [None] * len(initial_states) if self.readout: readout_input = Input(batch_shape=K.int_shape(output), name='readout_input') if self.readout_activation.__name__ == 'linear': readout = Lambda(lambda x: x + 0., output_shape=lambda s: s)(readout_input) else: readout = Activation(self.readout_activation)(readout_input) input = Input(batch_shape=K.int_shape(input)) if self.readout in [True, 'add']: input_readout_merged = add([input, readout]) elif self.readout in ['mul', 'multiply']: input_readout_merged = multiply([input, readout]) elif self.readout in ['avg', 'average']: input_readout_merged = average([input, readout]) elif self.readout in ['max', 'maximum']: input_readout_merged = maximum([input, readout]) elif self.readout == 'readout_only': input_readout_merged = readout initial_states = [ Input(batch_shape=K.int_shape(s)) for s in initial_states ] output = _to_list( self.model([input_readout_merged] + initial_states)) final_states = output[1:] output = output[0] self.model = Model([input] + initial_states + [readout_input], [output] + final_states) self.states.append(None) super(RecurrentSequential, self).build(input_shape)
def to_model(self, input_shape, name="default_for_op", kernel_regularizer_l2=0.01): # with graph.as_default(): # with tf.name_scope(name) as scope: graph_helper = self.copy() assert nx.is_directed_acyclic_graph(graph_helper) topo_nodes = nx.topological_sort(graph_helper) input_tensor = Input(shape=input_shape) for node in topo_nodes: pre_nodes = graph_helper.predecessors(node) suc_nodes = graph_helper.successors(node) if node.type not in ['Concatenate', 'Add', 'Multiply']: if len(pre_nodes) == 0: layer_input_tensor = input_tensor else: assert len(pre_nodes) == 1 layer_input_tensor = graph_helper[pre_nodes[0]][node]['tensor'] if node.type == 'Conv2D': kernel_size = node.config.get('kernel_size', 3) filters = node.config['filters'] layer = Conv2D(kernel_size=kernel_size, filters=filters, name=node.name, padding='same', kernel_regularizer=regularizers.l2(kernel_regularizer_l2) ) elif node.type == 'Conv2D_Pooling': kernel_size = node.config.get('kernel_size', 3) filters = node.config['filters'] layer = self.conv_pooling_layer(name=node.name, kernel_size=kernel_size, filters=filters, kernel_regularizer_l2=kernel_regularizer_l2) elif node.type == 'Group': layer = self.group_layer(name=node.name, group_num=node.config['group_num'], filters=node.config['filters'], kernel_regularizer_l2=kernel_regularizer_l2) elif node.type == 'GlobalMaxPooling2D': layer = keras.layers.GlobalMaxPooling2D(name=node.name) elif node.type == 'MaxPooling2D': layer = keras.layers.MaxPooling2D(name=node.name) elif node.type == 'AveragePooling2D': layer = keras.layers.AveragePooling2D(name=node.name) elif node.type == 'Activation': activation_type = node.config['activation_type'] layer = Activation(activation=activation_type, name=node.name) layer_output_tensor = layer(layer_input_tensor) if node.type in ['Conv2D', 'Conv2D_Pooling', 'Group']: self.update(), graph_helper.update() if node.type == 'Conv2D': layer_output_tensor = PReLU()(layer_output_tensor) # MAX_DP, MIN_DP = .35, .01 # ratio_dp = - (MAX_DP - MIN_DP) / self.max_depth * node.depth + MAX_DP # use fixed drop out ratio ratio_dp = 0.30 layer_output_tensor = keras.layers.Dropout(ratio_dp)(layer_output_tensor) # logger.debug('layer {} ratio of dropout {}'.format(node.name, ratio_dp)) # for test, use batch norm #layer_output_tensor = keras.layers.BatchNormalization(axis = 3)(layer_output_tensor) else: layer_input_tensors = [graph_helper[pre_node][node]['tensor'] for pre_node in pre_nodes] if node.type == 'Add': # todo also test multiply assert K.image_data_format() == 'channels_last' ori_shapes = [ktf.int_shape(layer_input_tensor)[1:3] for layer_input_tensor in layer_input_tensors] ori_shapes = np.array(ori_shapes) new_shape = ori_shapes.min(axis=0) ori_chnls = [ktf.int_shape(layer_input_tensor)[3] for layer_input_tensor in layer_input_tensors] ori_chnls = np.array(ori_chnls) new_chnl = ori_chnls.min() for ind, layer_input_tensor, ori_shape in \ zip(range(len(layer_input_tensors)), layer_input_tensors, ori_shapes): diff_shape = ori_shape - new_shape if diff_shape.any(): diff_shape += 1 layer_input_tensors[ind] = \ keras.layers.MaxPool2D(pool_size=diff_shape, strides=1, name=node.name + '_maxpool2d')( layer_input_tensor) if ori_chnls[ind] > new_chnl: layer_input_tensors[ind] = \ Conv2D(filters=new_chnl, kernel_size=1, padding='same', name=node.name + '_conv2d')(layer_input_tensor) layer = keras.layers.Add(name=node.name) # logger.debug('In graph to_model add a Add layer with name {}'.format(node.name)) if node.type == 'Concatenate': logger.critical('Concatenate is decrapted!!!') if K.image_data_format() == "channels_last": (width_ind, height_ind, chn_ind) = (1, 2, 3) else: (width_ind, height_ind, chn_ind) = (2, 3, 1) ori_shapes = [ ktf.int_shape(layer_input_tensor)[width_ind:height_ind + 1] for layer_input_tensor in layer_input_tensors ] ori_shapes = np.array(ori_shapes) new_shape = ori_shapes.min(axis=0) for ind, layer_input_tensor, ori_shape in \ zip(range(len(layer_input_tensors)), layer_input_tensors, ori_shapes): diff_shape = ori_shape - new_shape if diff_shape.all(): diff_shape += 1 layer_input_tensors[ind] = \ keras.layers.MaxPool2D(pool_size=diff_shape, strides=1)(layer_input_tensor) # todo custom div layer # def div2(x): # return x / 2. # layer_input_tensors = [keras.layers.Lambda(div2)(tensor) for tensor in layer_input_tensors] layer = keras.layers.Concatenate(axis=chn_ind, name=node.name) try: layer_output_tensor = layer(layer_input_tensors) except: print("create intput output layer error!") #embed() graph_helper.add_node(node, layer=layer) if len(suc_nodes) == 0: output_tensor = layer_output_tensor else: for suc_node in suc_nodes: graph_helper.add_edge(node, suc_node, tensor=layer_output_tensor) # assert tf.get_default_graph() == graph, "should be same" # tf.train.export_meta_graph('tmp.pbtxt', graph_def=tf.get_default_graph().as_graph_def()) assert 'output_tensor' in locals() import time tic = time.time() model = Model(inputs=input_tensor, outputs=output_tensor) logger.info('Consume Time(Just Build model: {}'.format(time.time() - tic)) return model
def compute_output_shape(self, input_shape): _input = Input(batch_shape=input_shape) _tensor = MaxPooling2D()(Conv2D(self.output_dim, self.kernel_size)(_input)) return ktf.int_shape(_tensor)
def deep_model(input_shape=(None, None, 6), n1=16): input = Input(shape=input_shape) x = input padding = 'same' x = Conv2D(filters=n1, kernel_size=3, padding=padding, activation='relu', name='conv1')(x) x = Conv2D(filters=n1, kernel_size=3, padding=padding, activation='relu', name='conv2')(x) x1 = x x = MaxPool2D(pool_size=2, strides=2)(x) x = Conv2D(filters=n1 * 2, kernel_size=3, padding=padding, activation='relu')(x) x2 = x x = MaxPool2D(pool_size=2, strides=2)(x) x = Conv2D(filters=n1 * 4, kernel_size=3, padding=padding, activation='relu')(x) x = Conv2DTranspose(filters=n1 * 4, kernel_size=2, strides=2, padding=padding, activation='relu')(x) x = Conv2D(filters=n1 * 4, kernel_size=3, padding=padding, activation='relu')(x) x = Conv2D(filters=n1 * 2, kernel_size=3, padding=padding, activation='relu')(x) assert ktf.int_shape(x)[-1] == ktf.int_shape(x2)[-1] x = Add()([x, x2]) x = Conv2DTranspose(filters=n1 * 2, kernel_size=2, strides=2, padding=padding, activation='relu')(x) x = Conv2D(filters=n1, kernel_size=3, padding=padding, activation='relu')(x) x = Conv2D(filters=n1, kernel_size=3, padding=padding, activation='relu')(x) assert ktf.int_shape(x)[-1] == ktf.int_shape(x1)[-1] x = Add()([x, x1]) output = Conv2D(filters=3, kernel_size=3, padding=padding, activation='relu')(x) model = keras.models.Model(inputs=input, outputs=output) adam = keras.optimizers.Adam(lr=1e-4) model.compile(optimizer=adam, loss=[my_mse], metrics=[loss2acc]) return model
def call(self, inputs, initial_state=None, initial_readout=None, ground_truth=None, mask=None, training=None): # input shape: `(samples, time (padded with zeros), input_dim)` # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if type(mask) is list: mask = mask[0] if self.model is None: raise Exception('Empty RecurrentModel.') num_req_states = self.num_states if self.readout: num_actual_states = num_req_states - 1 else: num_actual_states = num_req_states if type(inputs) is list: inputs_list = inputs[:] inputs = inputs_list.pop(0) initial_states = inputs_list[:num_actual_states] if len(initial_states) > 0: if self._is_optional_input_placeholder(initial_states[0]): initial_states = self.get_initial_state(inputs) inputs_list = inputs_list[num_actual_states:] if self.readout: initial_readout = inputs_list.pop(0) if self.teacher_force: ground_truth = inputs_list.pop() else: if initial_state is not None: if not isinstance(initial_state, (list, tuple)): initial_states = [initial_state] else: initial_states = list(initial_state) if self._is_optional_input_placeholder(initial_states[0]): initial_states = self.get_initial_state(inputs) elif self.stateful: initial_states = self.states else: initial_states = self.get_initial_state(inputs) if self.readout: if initial_readout is None or self._is_optional_input_placeholder( initial_readout): output_shape = K.int_shape(_to_list((self.model.output))[0]) output_ndim = len(output_shape) input_ndim = K.ndim(inputs) initial_readout = K.zeros_like(inputs) slices = [slice(None)] + [0] * (input_ndim - 1) initial_readout = initial_readout[slices] # (batch_size,) initial_readout = K.reshape(initial_readout, (-1, ) + (1, ) * (output_ndim - 1)) initial_readout = K.tile(initial_readout, (1, ) + tuple(output_shape[1:])) initial_states.append(initial_readout) if self.teacher_force: if ground_truth is None or self._is_optional_input_placeholder( ground_truth): raise Exception( 'ground_truth must be provided for RecurrentModel with teacher_force=True.' ) if K.backend() == 'tensorflow': with tf.control_dependencies(None): counter = K.zeros((1, )) else: counter = K.zeros((1, )) counter = K.cast(counter, 'int32') initial_states.insert(-1, counter) initial_states[-2] initial_states.insert(-1, ground_truth) num_req_states += 2 if len(initial_states) != num_req_states: raise ValueError('Layer requires ' + str(num_req_states) + ' states but was passed ' + str(len(initial_states)) + ' initial states.') input_shape = K.int_shape(inputs) if self.unroll and input_shape[1] is None: raise ValueError('Cannot unroll a RNN if the ' 'time dimension is undefined. \n' '- If using a Sequential model, ' 'specify the time dimension by passing ' 'an `input_shape` or `batch_input_shape` ' 'argument to your first layer. If your ' 'first layer is an Embedding, you can ' 'also use the `input_length` argument.\n' '- If using the functional API, specify ' 'the time dimension by passing a `shape` ' 'or `batch_shape` argument to your Input layer.') preprocessed_input = self.preprocess_input(inputs, training=None) constants = self.get_constants(inputs, training=None) if self.decode: initial_states.insert(0, inputs) preprocessed_input = K.zeros((1, self.output_length, 1)) input_length = self.output_length else: input_length = input_shape[1] if self.uses_learning_phase: with learning_phase_scope(0): last_output_test, outputs_test, states_test, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) with learning_phase_scope(1): last_output_train, outputs_train, states_train, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) last_output = K.in_train_phase(last_output_train, last_output_test, training=training) outputs = K.in_train_phase(outputs_train, outputs_test, training=training) states = [] for state_train, state_test in zip(states_train, states_test): states.append( K.in_train_phase(state_train, state_test, training=training)) else: last_output, outputs, states, updates = rnn( self.step, preprocessed_input, initial_states, go_backwards=self.go_backwards, mask=mask, constants=constants, unroll=self.unroll, input_length=input_length) states = list(states) if self.decode: states.pop(0) if self.readout: states.pop() if self.teacher_force: states.pop() states.pop() if len(updates) > 0: self.add_update(updates) if self.stateful: updates = [] for i in range(len(states)): updates.append((self.states[i], states[i])) self.add_update(updates, inputs) # Properly set learning phase if 0 < self.dropout + self.recurrent_dropout: last_output._uses_learning_phase = True outputs._uses_learning_phase = True if self.return_sequences: y = outputs else: y = last_output if self.return_states: return [y] + states else: return y
def __call__(self, inputs, initial_state=None, initial_readout=None, ground_truth=None, **kwargs): req_num_inputs = 1 + self.num_states inputs = _to_list(inputs) inputs = inputs[:] if len(inputs) == 1: if initial_state is not None: if type(initial_state) is list: inputs += initial_state else: inputs.append(initial_state) else: if self.readout: initial_state = self._get_optional_input_placeholder( 'initial_state', self.num_states - 1) else: initial_state = self._get_optional_input_placeholder( 'initial_state', self.num_states) inputs += _to_list(initial_state) if self.readout: if initial_readout is None: initial_readout = self._get_optional_input_placeholder( 'initial_readout') inputs.append(initial_readout) if self.teacher_force: req_num_inputs += 1 if ground_truth is None: ground_truth = self._get_optional_input_placeholder( 'ground_truth') inputs.append(ground_truth) assert len(inputs) == req_num_inputs, "Required " + str( req_num_inputs) + " inputs, received " + str(len(inputs)) + "." with K.name_scope(self.name): if not self.built: self.build(K.int_shape(inputs[0])) if self._initial_weights is not None: self.set_weights(self._initial_weights) del self._initial_weights self._initial_weights = None previous_mask = _collect_previous_mask(inputs[:1]) user_kwargs = kwargs.copy() if not _is_all_none(previous_mask): if 'mask' in inspect.getargspec(self.call).args: if 'mask' not in kwargs: kwargs['mask'] = previous_mask input_shape = _collect_input_shape(inputs) output = self.call(inputs, **kwargs) output_mask = self.compute_mask(inputs[0], previous_mask) output_shape = self.compute_output_shape(input_shape[0]) self._add_inbound_node(input_tensors=inputs, output_tensors=output, input_masks=previous_mask, output_masks=output_mask, input_shapes=input_shape, output_shapes=output_shape, arguments=user_kwargs) if hasattr(self, 'activity_regularizer' ) and self.activity_regularizer is not None: regularization_losses = [ self.activity_regularizer(x) for x in _to_list(output) ] self.add_loss(regularization_losses, _to_list(inputs)) return output
def __init__(self, input, output, initial_states=None, final_states=None, readout_input=None, teacher_force=False, decode=False, output_length=None, return_states=False, state_initializer=None, **kwargs): inputs = [input] outputs = [output] state_spec = None if initial_states is not None: if type(initial_states) not in [list, tuple]: initial_states = [initial_states] state_spec = [ InputSpec(shape=K.int_shape(state)) for state in initial_states ] if final_states is None: raise Exception('Missing argument : final_states') else: self.states = [None] * len(initial_states) inputs += initial_states else: self.states = [] state_spec = [] if final_states is not None: if type(final_states) not in [list, tuple]: final_states = [final_states] assert len(initial_states) == len( final_states ), 'initial_states and final_states should have same number of tensors.' if initial_states is None: raise Exception('Missing argument : initial_states') outputs += final_states self.decode = decode self.output_length = output_length if decode: if output_length is None: raise Exception( 'output_length should be specified for decoder') kwargs['return_sequences'] = True self.return_states = return_states if readout_input is not None: self.readout = True state_spec += [Input(batch_shape=K.int_shape(outputs[0]))] self.states += [None] inputs += [readout_input] else: self.readout = False if teacher_force and not self.readout: raise Exception('Readout should be enabled for teacher forcing.') self.teacher_force = teacher_force self.model = Model(inputs, outputs) super(RecurrentModel, self).__init__(**kwargs) input_shape = list(K.int_shape(input)) if not decode: input_shape.insert(1, None) self.input_spec = InputSpec(shape=tuple(input_shape)) self.state_spec = state_spec self._optional_input_placeholders = {} if state_initializer: if type(state_initializer) not in [list, tuple]: state_initializer = [state_initializer] * self.num_states else: state_initializer += [None] * (self.num_states - len(state_initializer)) state_initializer = [ initializers.get(init) if init else initializers.get('zeros') for init in state_initializer ] self.state_initializer = state_initializer