def __init__(self, max_length, nr_hidden, dropout=0.0, L2=1e-4, activation='relu'): self.max_length = max_length self.model = Sequential() self.model.add( Dense(nr_hidden, name='attend1', init='he_normal', W_regularizer=l2(L2), input_shape=(nr_hidden,), activation='relu')) self.model.add(Dropout(dropout)) self.model.add(Dense(nr_hidden, name='attend2', init='he_normal', W_regularizer=l2(L2), activation='relu')) self.model = TimeDistributed(self.model)
def __init__(self, words, nr_hidden, L2=0.0, dropout=0.0): self.words = words self.model = Sequential() self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,))) self.model.add(Dense(nr_hidden, name='compare1', init='he_normal', W_regularizer=l2(L2))) self.model.add(Activation('relu')) self.model.add(Dropout(dropout)) self.model.add(Dense(nr_hidden, name='compare2', W_regularizer=l2(L2), init='he_normal')) self.model.add(Activation('relu')) self.model = TimeDistributed(self.model)
class _Attention(object): def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'): self.max_length = max_length self.model = Sequential() self.model.add(Dropout(dropout, input_shape=(nr_hidden,))) self.model.add( Dense(nr_hidden, name='attend1', init='he_normal', W_regularizer=l2(L2), input_shape=(nr_hidden,), activation='relu')) self.model.add(Dropout(dropout)) self.model.add(Dense(nr_hidden, name='attend2', init='he_normal', W_regularizer=l2(L2), activation='relu')) self.model = TimeDistributed(self.model) def __call__(self, sent1, sent2): def _outer(AB): att_ji = K.batch_dot(AB[1], K.permute_dimensions(AB[0], (0, 2, 1))) return K.permute_dimensions(att_ji,(0, 2, 1)) return merge( [self.model(sent1), self.model(sent2)], mode=_outer, output_shape=(self.max_length, self.max_length))
class _Attention(object): def __init__(self, max_length, nr_hidden, dropout=0.0, L2=1e-4, activation='relu'): self.max_length = max_length self.model = Sequential() self.model.add( Dense(nr_hidden, name='attend1', init='he_normal', W_regularizer=l2(L2), input_shape=(nr_hidden,), activation='relu')) self.model.add(Dropout(dropout)) self.model.add(Dense(nr_hidden, name='attend2', init='he_normal', W_regularizer=l2(L2), activation='relu')) self.model = TimeDistributed(self.model) def __call__(self, sent1, sent2): def _outer((A, B)): att_ji = T.batched_dot(B, A.dimshuffle((0, 2, 1))) return att_ji.dimshuffle((0, 2, 1)) return merge( [self.model(sent1), self.model(sent2)], mode=_outer, output_shape=(self.max_length, self.max_length))
trainable=False, )(sent_ints) sent_wv_dr = Dropout(drop_rate)(sent_wv) sent_wa = bidir_gru(sent_wv_dr, sent_n_units, is_GPU) sent_att_vec = AttentionWithContext()(sent_wa) sent_att_vec_dr = Dropout(drop_rate)(sent_att_vec) # skip connection sent_added = SkipConnection()([sent_att_vec_dr, sent_wv_dr]) sent_encoder = Model(sent_ints, sent_added) doc_ints = Input(shape=( docs_train.shape[1], docs_train.shape[2], )) sent_att_vecs_dr = TimeDistributed(sent_encoder)(doc_ints) doc_sa = bidir_gru(sent_att_vecs_dr, doc_n_units, is_GPU) doc_att_vec = AttentionWithContext()(doc_sa) doc_att_vec_dr = Dropout(drop_rate)(doc_att_vec) doc_att_vec_dr = LeakyReLU(alpha=0.01)(doc_att_vec_dr) doc_att_vec_dr = LeakyReLU(alpha=0.01)(doc_att_vec_dr) preds = Dense(units=1)(doc_att_vec_dr) model = Model(doc_ints, preds) model.compile(loss='mean_squared_error', optimizer=my_optimizer, metrics=['mae']) print('model compiled')
def Seq2Seq(output_dim, output_length, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' ''' Below block is used for computing the shape - batch_input_shape=(batch_size, timesteps, data_dim) batch_size creates a statefull LSTM while None makes it unstateful ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size,) + input_shape elif input_dim: if input_length: shape = (batch_size,) + (input_length,) + (input_dim,) else: shape = (batch_size,) + (None,) + (input_dim,) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim ''' Sequential model :- https://keras.io/layers/recurrent/ unroll - Nothing important return_state - Boolean. Whether to return the last state in addition to the output. ''' encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) for _ in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) ''' TimeDistributed :- https://keras.io/layers/wrappers/ ''' dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) ''' Readout lets you feed the output of your RNN from the previous time step back to the current time step. ''' decoder = RecurrentSequential(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) for _ in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) encoded_seq = encoder(encoded_seq) if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = Trueoutput_dim inputs += [truth_tensor] decoded_seq = decoder(encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def create_contextual_attention_model(self, returnEpEh=False): # 0, (Optional) Set the upper limit of GPU memory config = tf.ConfigProto() config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) # 1, Embedding the input and project the embeddings premise = Input(shape=(self.SentMaxLen, ), dtype='int32') hypothesis = Input(shape=(self.SentMaxLen, ), dtype='int32') embed_p = self.Embed(premise) # [batchsize, Psize, Embedsize] embed_h = self.Embed(hypothesis) # [batchsize, Hsize, Embedsize] EmbdProject = TimeDistributed( Dense(200, activation='relu', kernel_regularizer=l2(self.L2Strength), bias_regularizer=l2(self.L2Strength))) embed_p = Dropout(self.DropProb)( EmbdProject(embed_p)) # [batchsize, Psize, units] embed_h = Dropout(self.DropProb)( EmbdProject(embed_h)) # [batchsize, Hsize, units] # 2, Encoder words with its surrounding context Encoder = Bidirectional( LSTM(units=200, dropout=self.DropProb, return_sequences=True)) embed_p = Encoder(embed_p) embed_h = Encoder(embed_h) # 2, Score each words and calc score matrix Eph. F_p, F_h = embed_p, embed_h for i in range(2): # Applying Decomposable Score Function scoreF = TimeDistributed( Dense(200, activation='relu', kernel_regularizer=l2(self.L2Strength), bias_regularizer=l2(self.L2Strength))) F_p = Dropout(self.DropProb)( scoreF(F_p)) # [batch_size, Psize, units] F_h = Dropout(self.DropProb)( scoreF(F_h)) # [batch_size, Hsize, units] Eph = keras.layers.Dot(axes=(2, 2))([F_h, F_p ]) # [batch_size, Hsize, Psize] Eh = Lambda(lambda x: keras.activations.softmax(x))( Eph) # [batch_size, Hsize, Psize] Ep = keras.layers.Permute((2, 1))(Eph) # [batch_size, Psize, Hsize) Ep = Lambda(lambda x: keras.activations.softmax(x))( Ep) # [batch_size, Psize, Hsize] # 4, Normalize score matrix, encoder premesis and get alignment PremAlign = keras.layers.Dot((2, 1))([Ep, embed_h]) HypoAlign = keras.layers.Dot((2, 1))([Eh, embed_p]) PremAlign = keras.layers.Concatenate()( [embed_p, PremAlign]) # [batch_size, Psize, 2*unit] HypoAlign = keras.layers.Concatenate()( [embed_h, HypoAlign]) # [batch_size, Hsize, 2*unit] Compresser = TimeDistributed(Dense( 200, kernel_regularizer=l2(self.L2Strength), bias_regularizer=l2(self.L2Strength)), name='Compresser') PremAlign = Compresser(PremAlign) HypoAlign = Compresser(HypoAlign) # 5, Final biLST< Encoder Final = Bidirectional(LSTM(units=200, dropout=self.DropProb), name='finaldecoer') # [-1,2*units] final_p = Final(PremAlign) final_h = Final(HypoAlign) Final = keras.layers.Concatenate()([final_p, final_h]) for i in range(2): Final = Dense(200, name='3dense_' + str(i), activation='relu')(Final) Final = Dropout(self.DropProb)(Final) Final = BatchNormalization()(Final) # 6, Prediction by softmax Final = Dense(3, activation='softmax', name='judge')(Final) if returnEpEh: self.model = Model(inputs=[premise, hypothesis], outputs=[Ep, Eh, Final]) else: self.model = Model(inputs=[premise, hypothesis], outputs=Final)
def _build_network(self, network_input, network_output, additional_network_outputs): cluster_counts = list(self.data_provider.get_cluster_counts()) # The simple loss cluster NN requires a specific output: a list of softmax distributions # First in this list are all softmax distributions for k=k_min for each object, then for k=k_min+1 for each # object etc. At the end, there is the cluster count output. # First we get an embedding for the network inputs embeddings = self._get_embedding(network_input) # Reshape all embeddings to 1d vectors # embedding_shape = self._embedding_nn.model.layers[-1].output_shape # embedding_size = np.prod(embedding_shape[1:]) embedding_shape = embeddings[0].shape embedding_size = int(str(np.prod(embedding_shape[1:]))) embedding_reshaper = self._s_layer( 'embedding_reshape', lambda name: Reshape( (1, embedding_size), name=name)) embeddings_reshaped = [ embedding_reshaper(embedding) for embedding in embeddings ] # # We need now the internal representation of the embeddings. This means we have to resize them. # embedding_internal_resizer = self._s_layer('internal_embedding_resize', lambda name: Dense(internal_embedding_size, name=name)) # embeddings_reshaped = [embedding_internal_resizer(embedding) for embedding in embeddings_reshaped] # embedding_internal_resizer_act = LeakyReLU() # embeddings_reshaped = [embedding_internal_resizer_act(embedding) for embedding in embeddings_reshaped] # Merge all embeddings to one tensor embeddings_merged = self._s_layer( 'embeddings_merge', lambda name: Concatenate(axis=1, name=name))(embeddings_reshaped) processed = embeddings_merged for i in range(len(self.__lstm_block_state_sizes)): lstm_block_state_size = self.__lstm_block_state_sizes[i] // 2 * 2 if i > 0: # For the initial layer wo do not like to have a Dense layer (this should be included in the Embedding network) processed = BatchNormalization()(processed) processed = self._s_layer( 'INTERNAL_STATE_CHANGE{}'.format(i), lambda name: TimeDistributed(Dense(lstm_block_state_size)))(processed) processed = LeakyReLU()(processed) for j in range(self.__lstm_block_size): tmp = self._s_layer( 'LSTM_proc_{}_{}'.format(i, j), lambda name: Bidirectional(LSTM(lstm_block_state_size // 2, return_sequences=True), name=name))(processed) processed = Add()([processed, tmp]) # Split the tensor to seperate layers embeddings_processed = [ self._s_layer('slice_{}'.format(i), lambda name: slice_layer(processed, i, name)) for i in range(len(network_input)) ] # Create now two outputs: The cluster count and for each cluster count / object combination a softmax distribution. # These outputs are independent of each other, therefore it doesn't matter which is calculated first. Let us start # with the cluster count / object combinations. # First prepare some generally required layers layers = [] for i in range(self.__output_dense_layers): layers += [ self._s_layer( 'output_dense{}'.format(i), lambda name: Dense(self.__output_dense_units, name=name)), self._s_layer('output_batch'.format(i), lambda name: BatchNormalization(name=name)), LeakyReLU() # self._s_layer('output_relu'.format(i), lambda name: Activation(LeakyReLU(), name=name)) ] cluster_softmax = { k: self._s_layer( 'softmax_cluster_{}'.format(k), lambda name: Dense(k, activation='softmax', name=name)) for k in cluster_counts } # Create now the outputs clusters_output = additional_network_outputs['clusters'] = {} for i in range(len(embeddings_processed)): embedding_proc = embeddings_processed[i] # Add the required layers for layer in layers: embedding_proc = layer(embedding_proc) input_clusters_output = clusters_output['input{}'.format(i)] = {} for k in cluster_counts: # Create now the required softmax distributions output_classifier = cluster_softmax[k](embedding_proc) input_clusters_output['cluster{}'.format( k)] = output_classifier network_output.append(output_classifier) # Calculate the real cluster count assert self.__cluster_count_lstm_layers >= 1 for i in range(self.__cluster_count_lstm_layers - 1): cluster_count = self._s_layer( 'cluster_count_LSTM{}'.format(i), lambda name: Bidirectional(LSTM( self.__cluster_count_lstm_units, return_sequences=True), name=name)(embeddings_merged)) cluster_count = self._s_layer( 'cluster_count_LSTM{}_batch'.format(i), lambda name: BatchNormalization(name=name))(cluster_count) cluster_count = self._s_layer( 'cluster_count_LSTM_merge', lambda name: Bidirectional(LSTM(self.__cluster_count_lstm_units), name=name)(cluster_count)) cluster_count = self._s_layer( 'cluster_count_LSTM_merge_batch', lambda name: BatchNormalization(name=name))(cluster_count) for i in range(self.__cluster_count_dense_layers): cluster_count = self._s_layer( 'cluster_count_dense{}'.format(i), lambda name: Dense(self.__cluster_count_dense_units, name=name ))(cluster_count) cluster_count = self._s_layer( 'cluster_count_batch{}'.format(i), lambda name: BatchNormalization(name=name))(cluster_count) cluster_count = LeakyReLU()(cluster_count) # cluster_count = self._s_layer('cluster_count_relu{}'.format(i), lambda name: Activation(LeakyReLU(), name=name))(cluster_count) # The next layer is an output-layer, therefore the name must not be formatted cluster_count = self._s_layer( 'cluster_count_output', lambda name: Dense( len(cluster_counts), activation='softmax', name=name), format_name=False)(cluster_count) additional_network_outputs['cluster_count_output'] = cluster_count network_output.append(cluster_count) return True
def melody_ResNet_joint_add(options): num_output = int(45 * 2**(math.log(options.resolution, 2)) + 2) input = Input(shape=(options.input_size, options.num_spec, 1)) block_1 = Conv2D( 64, (3, 3), name="conv1_1", padding="same", kernel_initializer="he_normal", use_bias=False, kernel_regularizer=l2(1e-5), )(input) block_1 = BatchNormalization()(block_1) block_1 = LeakyReLU(0.01)(block_1) block_1 = Conv2D( 64, (3, 3), name="conv1_2", padding="same", kernel_initializer="he_normal", use_bias=False, kernel_regularizer=l2(1e-5), )(block_1) block_2 = ResNet_Block(input=block_1, block_id=2, filterNum=128) block_3 = ResNet_Block(input=block_2, block_id=3, filterNum=192) block_4 = ResNet_Block(input=block_3, block_id=4, filterNum=256) block_4 = BatchNormalization()(block_4) block_4 = LeakyReLU(0.01)(block_4) block_4 = MaxPooling2D((1, 4))(block_4) block_4 = Dropout(0.5)(block_4) numOutput_P = 2 * block_4.shape[3] output = Reshape((options.input_size, numOutput_P))(block_4) output = Bidirectional( LSTM(256, return_sequences=True, recurrent_dropout=0.3, dropout=0.3))(output) output = TimeDistributed(Dense(num_output))(output) output = TimeDistributed(Activation("softmax"), name="output")(output) block_1 = MaxPooling2D((1, 4**4))(block_1) block_2 = MaxPooling2D((1, 4**3))(block_2) block_3 = MaxPooling2D((1, 4**2))(block_3) joint = concatenate([block_1, block_2, block_3, block_4]) joint = Conv2D( 256, (1, 1), padding="same", kernel_initializer="he_normal", use_bias=False, kernel_regularizer=l2(1e-5), )(joint) joint = BatchNormalization()(joint) joint = LeakyReLU(0.01)(joint) joint = Dropout(0.5)(joint) num_V = joint.shape[3] * 2 output_V = Reshape((options.input_size, num_V))(joint) output_V = Bidirectional( LSTM( 32, return_sequences=True, stateful=False, recurrent_dropout=0.3, dropout=0.3, ))(output_V) output_V = TimeDistributed(Dense(2))(output_V) output_V = TimeDistributed(Activation("softmax"))(output_V) output_NS = Lambda(lambda x: x[:, :, 0])(output) output_NS = Reshape((options.input_size, 1))(output_NS) output_S = Lambda(lambda x: 1 - x[:, :, 0])(output) output_S = Reshape((options.input_size, 1))(output_S) output_VV = concatenate([output_NS, output_S]) output_V = add([output_V, output_VV]) output_V = TimeDistributed(Activation("softmax"), name="output_V")(output_V) model = Model(inputs=input, outputs=[output, output_V]) return model
# Build RNN model model = Sequential() # build LSTM RNN model.add( LSTM( batch_input_shape=(BATCH_SIZE, TIMES_STEPS, INPUT_SIZE), output_dim=CELL_SIZE, # default is false only output at last time step # however when True model return output each time step return_sequences=True, # true if batch is related to next batch stateful=True)) # output layer model.add(TimeDistributed(Dense(OUTPUT_SIZE))) # We add metrics to get more results you want to see adam = Adam(LR) model.compile( optimizer= adam, #can also use the default 'adam' with the quotes but cannot adjust learning rate loss='mse', ) # training print("training---------------") for step in range(4001): # batch processing slicing from X_Train and Y_Train X_batch, Y_batch, xs = get_batch() cost = model.train_on_batch(X_batch, Y_batch)
A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, output_mode="error", return_sequences=True) layer_config_base = prednet_base_dynamic.get_config() layer_config_base["name"] = "prednet_dynamic" prednet_dynamic = PredNet_dynamic(**layer_config_base) errors_static = prednet_static( inputs_static) # errors will be (batch_size, nt, nb_layers) errors_dynamic = prednet_dynamic(inputs_dynamic) # Error_static errors_by_time_static = TimeDistributed( Dense(1, trainable=False), weights=[layer_loss_weights_static, np.zeros(1)], trainable=False)(errors_static) errors_by_time_static = Flatten()( errors_by_time_static) # will be (batch_size, nt) final_errors_static = Dense(1, weights=[time_loss_weights_static, np.zeros(1)], trainable=False)( errors_by_time_static) # weight errors by time # Error_dynamic errors_by_time_dynamic = TimeDistributed( Dense(1, trainable=False), weights=[layer_loss_weights_dynamic, np.zeros(1)], trainable=False)(errors_dynamic)
def acl_vgg(data, stateful): dcn = dcn_vgg() outs = TimeDistributed(dcn)(data) attention = TimeDistributed( MaxPooling2D((2, 2), strides=(2, 2), padding='same'))(outs) attention = TimeDistributed( Conv2D(64, (1, 1), padding='same', activation='relu'))(attention) attention = TimeDistributed( Conv2D(128, (3, 3), padding='same', activation='relu'))(attention) attention = TimeDistributed( MaxPooling2D((2, 2), strides=(2, 2), padding='same'))(attention) attention = TimeDistributed( Conv2D(64, (1, 1), padding='same', activation='relu'))(attention) attention = TimeDistributed( Conv2D(128, (3, 3), padding='same', activation='relu'))(attention) attention = TimeDistributed( Conv2D(1, (1, 1), padding='same', activation='sigmoid'))(attention) attention = TimeDistributed(UpSampling2D(4))(attention) # attention = TimeDistributed(Conv2D(256, (3, 3), padding='same', activation='relu'))(outs) # attention = TimeDistributed(Conv2D(128, (3, 3), padding='same', activation='relu'))(attention) # attention = TimeDistributed(Conv2D(1, (1, 1), padding='same', activation='sigmoid'))(attention) f_attention = TimeDistributed(Flatten())(attention) f_attention = TimeDistributed(RepeatVector(512))(f_attention) f_attention = TimeDistributed(Permute((2, 1)))(f_attention) f_attention = TimeDistributed(Reshape((32, 40, 512)))(f_attention) #30 m_outs = Multiply()([outs, f_attention]) outs = Add()([outs, m_outs]) outs = (ConvLSTM2D(filters=256, kernel_size=(3, 3), padding='same', return_sequences=True, stateful=stateful, dropout=0.4))(outs) outs = TimeDistributed( Conv2D(1, (1, 1), padding='same', activation='sigmoid'))(outs) outs = TimeDistributed(UpSampling2D(4))(outs) attention = TimeDistributed(UpSampling2D(2))(attention) return [outs, outs, outs, attention, attention, attention] #
class _Comparison(object): def __init__(self, words, nr_hidden, L2=0.0, dropout=0.0): self.words = words self.model = Sequential() self.model.add(Dropout(dropout, input_shape=(nr_hidden * 2, ))) self.model.add( Dense(nr_hidden, name='compare1', init='he_normal', W_regularizer=l2(L2))) self.model.add(Activation('relu')) self.model.add(Dropout(dropout)) self.model.add( Dense(nr_hidden, name='compare2', W_regularizer=l2(L2), init='he_normal')) self.model.add(Activation('relu')) self.model = TimeDistributed(self.model) def __call__(self, sent, align, **kwargs): result = self.model(merge([sent, align], mode='concat')) # Shape: (i, n) avged = GlobalAveragePooling1D()(result, mask=self.words) maxed = GlobalMaxPooling1D()(result, mask=self.words) merged = merge([avged, maxed]) result = BatchNormalization()(merged) return result
# you can treat any model as if it were a layer, by calling it on a tensor. x = Input(shape=(784, )) y = Model(x) #This can allow, for instance, to quickly create models that can process *sequences* of inputs. You could turn an image classification model into a video classification model, in just one line. from keras.layers import TimeDistributed # input tensor for sequences of 20 timesteps, # each containing a 784-dimensional vector input_sequences = Input(shape=(20, 784)) # this applies our previous model to every timestep in the input sequences. # the output of the previous model was a 10-way softmax, # so the output of the layer below will be a sequence of 20 vectors of size 10. processed_sequences = TimeDistributed(model)(input_sequences) ## Multi-input and multi-output models #The main input will receive the headline, as a sequence of integers (each integer encodes a word). #The integers will be between 1 and 10,000 (a vocabulary of 10,000 words) and the sequences will be 100 words long. # #```python from keras.layers import Input, Embedding, LSTM, Dense, merge from keras.models import Model # headline input: meant to receive sequences of 100 integers, between 1 and 10000. # note that we can name any layer by passing it a "name" argument. main_input = Input(shape=(100, ), dtype='int32', name='main_input')
######################## # Creating the items and the tokens tokens_6 = [0, 0, 0, 0, 0, 1] items_26 = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 ] #Botvinick and Plaut model sr_input = Input(shape=(None, len(tokens_6) + 1), name="sr_input") sr_lstm = LSTM(units=50, return_sequences=True, name="sr_lstm")(sr_input) sr_output = TimeDistributed( Dense( units=len(tokens_6) + 1, activation="softmax", name="sr_output", ))(sr_lstm) sr_model = Model(inputs=sr_input, outputs=[sr_output]) #Binding pool model bp_item_input = Input(shape=(None, len(items_26)), name="bp_item_input") bp_token_input = Input(shape=(None, len(tokens_6) + 1), name="bp_token_input") bp_all_inputs = keras.layers.concatenate([bp_item_input, bp_token_input]) bp_lstm = LSTM(units=50, return_sequences=True, name="bp_lstm")(bp_all_inputs) bp_output = TimeDistributed( Dense(units=len(items_26), activation="softmax", name="bp_output"), )(bp_lstm) bp_model = Model(inputs=[bp_item_input, bp_token_input], outputs=[bp_output])
print(len(train_X)) print(len(test_X)) train_X = np.array(train_X) test_X = np.array(test_X) train_label = np.array(train_label) test_label = np.array(test_label) # valid_label = np.array(valid_label) # valid_X = np.array(valid_X) train_X = train_X.reshape(train_X.shape[0], 1, 50, 1) test_X = test_X.reshape(test_X.shape[0], 1, 50, 1) model = Sequential() #add model layers model.add( TimeDistributed( Conv1D(128, kernel_size=1, activation='relu', input_shape=(None, 50, 1)))) model.add(TimeDistributed(MaxPooling1D(2))) model.add(TimeDistributed(Conv1D(256, kernel_size=1, activation='relu'))) model.add(TimeDistributed(MaxPooling1D(2))) model.add(TimeDistributed(Conv1D(512, kernel_size=1, activation='relu'))) model.add(TimeDistributed(MaxPooling1D(2))) model.add(TimeDistributed(Flatten())) model.add(Bidirectional(LSTM(200, return_sequences=True))) model.add(Dropout(0.25)) model.add(Bidirectional(LSTM(200, return_sequences=False))) model.add(Dropout(0.5)) model.add(Dense(1, activation='linear')) model.compile(optimizer='RMSprop', loss='mse') model.fit(train_X,
def start_training(self): # data_en = self.load(self.english_train_file) # data_de = self.load(self.german_train_file) # val_data_en = self.load(self.english_val_file) # val_data_de = self.load(self.german_val_file) # train_input_data, train_target_data, val_input_data, val_target_data, embedding_matrix, vocab_size = self.preprocess_data( # data_en, data_de, val_data_en, val_data_en) # if len(train_input_data) != len(train_target_data) or len(val_input_data) != len(val_target_data): # print("length of input_data and target_data have to be the same") # exit(-1) # num_samples = len(train_input_data) # print("Number of training data:", num_samples) # print("Number of validation data:", len(val_input_data)) self.START_TOKEN_VECTOR = np.random.rand(self.params['EMBEDDING_DIM']) self.END_TOKEN_VECTOR = np.random.rand(self.params['EMBEDDING_DIM']) self.UNK_TOKEN_VECTOR = np.random.rand(self.params['EMBEDDING_DIM']) np.save(self.BASIC_PERSISTENCE_DIR + '/start_token_vector.npy', self.START_TOKEN_VECTOR) np.save(self.BASIC_PERSISTENCE_DIR + '/end_token_vector.npy', self.END_TOKEN_VECTOR) np.save(self.BASIC_PERSISTENCE_DIR + '/unk_token_vector.npy', self.UNK_TOKEN_VECTOR) self._split_count_data() M = Sequential() M.add(Embedding(self.params['MAX_WORDS'] + 3, self.params['EMBEDDING_DIM'], weights=[self.embedding_matrix], mask_zero=True, trainable=False)) M.add(LSTM(self.params['latent_dim'], return_sequences=True)) M.add(Dropout(self.params['P_DENSE_DROPOUT'])) M.add( LSTM(self.params['latent_dim'] * int(1 / self.params['P_DENSE_DROPOUT']), return_sequences=True)) M.add(Dropout(self.params['P_DENSE_DROPOUT'])) M.add(TimeDistributed(Dense(self.params['MAX_WORDS'] + 3, input_shape=(None, self.params['num_tokens'], self.params['MAX_WORDS'] + 3), activation='softmax'))) print('compiling') M.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy']) M.summary() print('compiled') steps = 4 mod_epochs = np.math.floor(self.num_samples / self.params['batch_size'] / steps * self.params['epochs']) tbCallBack = callbacks.TensorBoard(log_dir=self.GRAPH_DIR, histogram_freq=0, write_graph=True, write_images=True) modelCallback = callbacks.ModelCheckpoint(self.MODEL_CHECKPOINT_DIR + '/model.{epoch:02d}-{loss:.2f}.hdf5', monitor='loss', verbose=1, save_best_only=False, save_weights_only=True, mode='auto', period=mod_epochs / self.params['epochs']) M.fit_generator(self.serve_batch(), steps, epochs=mod_epochs, verbose=2, max_queue_size=15, callbacks=[tbCallBack, modelCallback]) M.save(self.model_file) print('\n\n Test prediction:') print(self.input_texts[0]) prediction = M.predict(self.input_texts[0]) reverse_word_index = dict((i, word) for word, i in self.word_index.items()) predicted_sentence = '' for sentence in prediction: for token in sentence: print(token) print(token.shape) max_idx = np.argmax(token) print(max_idx) if max_idx == 0: print("id of max token = 0") predicted_sentence += reverse_word_index[np.argmax(np.delete(token, max_idx))] else: print(reverse_word_index[max_idx]) predicted_sentence += reverse_word_index[max_idx] print(predicted_sentence) print("\n\n") print(self.input_texts[10000]) prediction = M.predict(self.input_texts[10000]) reverse_word_index = dict((i, word) for word, i in self.word_index.items()) predicted_sentence = '' for sentence in prediction: for token in sentence: print(token) print(token.shape) max_idx = np.argmax(token) print(max_idx) if max_idx == 0: print("id of max token = 0") predicted_sentence += reverse_word_index[np.argmax(np.delete(token, max_idx))] else: print(reverse_word_index[max_idx]) predicted_sentence += reverse_word_index[max_idx] print(predicted_sentence)
def create_model(X_vocab_len, X_max_len, y_vocab_len, y_max_len, y1, n1, y2, n2, y3, n3, y4, n4, y5, n5, y6, n6, hidden_size, num_layers): def smart_merge(vectors, **kwargs): return vectors[0] if len(vectors)==1 else merge(vectors, **kwargs) current_word = Input(shape=(X_max_len,), dtype='int32') right_word1 = Input(shape=(X_max_len,), dtype='int32') right_word2 = Input(shape=(X_max_len,), dtype='int32') right_word3 = Input(shape=(X_max_len,), dtype='int32') left_word1 = Input(shape=(X_max_len,), dtype='int32') left_word2 = Input(shape=(X_max_len,), dtype='int32') left_word3 = Input(shape=(X_max_len,), dtype='int32') emb_layer = Embedding(X_vocab_len, EMBEDDING_DIM, input_length=X_max_len, mask_zero=True) current_word_embedding = emb_layer(current_word) # POSITION of layer right_word_embedding1 = emb_layer(right_word1) # these are the left shifted X by 1 right_word_embedding2 = emb_layer(right_word2) # left shifted by 2 right_word_embedding3 = emb_layer(right_word3) left_word_embedding1 = emb_layer(left_word1) # these are the right shifted X by 1, i.e. the left word is at current position left_word_embedding2 = emb_layer(left_word2) left_word_embedding3 = emb_layer(left_word3) BidireLSTM_curr= Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(current_word_embedding) BidireLSTM_right1 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(right_word_embedding1) BidireLSTM_right2 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(right_word_embedding2) BidireLSTM_right3 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(right_word_embedding3) BidireLSTM_left1 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(left_word_embedding1) BidireLSTM_left2 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(left_word_embedding2) BidireLSTM_left3 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(left_word_embedding3) #att = AttentionWithContext()(BidireLSTM_curr) #print(att.shape) RepLayer= RepeatVector(y_max_len) RepVec= RepLayer(BidireLSTM_curr) Emb_plus_repeat=[current_word_embedding] Emb_plus_repeat.append(RepVec) Emb_plus_repeat = smart_merge(Emb_plus_repeat, mode='concat') for _ in range(num_layers): LtoR_LSTM = Bidirectional(LSTM(40, dropout=dropout, return_sequences=True)) temp = LtoR_LSTM(Emb_plus_repeat) # for each time step in the input, we intend to output |y_vocab_len| time steps time_dist_layer = TimeDistributed(Dense(y_vocab_len))(temp) outputs = Activation('softmax')(time_dist_layer) # Only for the tags prediction, will we be requiring the context words concatenated_encodings = [BidireLSTM_curr] concatenated_encodings.append(BidireLSTM_left1) concatenated_encodings.append(BidireLSTM_right1) concatenated_encodings.append(BidireLSTM_left2) concatenated_encodings.append(BidireLSTM_right2) concatenated_encodings.append(BidireLSTM_left3) concatenated_encodings.append(BidireLSTM_right3) concatenated_encodings = smart_merge(concatenated_encodings, mode='concat') #att2 = AttentionWithContext()(concatenated_encodings) RepVec= RepLayer(concatenated_encodings) Emb_plus_repeat=[current_word_embedding] Emb_plus_repeat.append(RepVec) Emb_plus_repeat = smart_merge(Emb_plus_repeat, mode='concat') BidireLSTM_vector = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(Emb_plus_repeat) out1 = Dense(n1, activation='softmax')(BidireLSTM_vector) out2 = Dense(n2, activation='softmax')(BidireLSTM_vector) out3 = Dense(n3, activation='softmax')(BidireLSTM_vector) out4 = Dense(n4, activation='softmax')(BidireLSTM_vector) out5 = Dense(n5, activation='softmax')(BidireLSTM_vector) out6 = Dense(n6, activation='softmax')(BidireLSTM_vector) all_inputs = [current_word, right_word1, right_word2, right_word3, left_word1, left_word2, left_word3] all_outputs = [outputs, out1, out2, out3, out4, out5, out6] model = Model(input=all_inputs, output=all_outputs) opt = Adam() model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'], loss_weights=[1., 1., 1., 1., 1., 1., 1.]) return model
class _Comparison(object): def __init__(self, words, nr_hidden, L2=1e-6, dropout=0.2): self.words = words self.model = Sequential() self.model.add(Dense(nr_hidden, name='compare1', init='he_normal', W_regularizer=l2(L2), input_shape=(nr_hidden*2,))) self.model.add(Activation('relu')) self.model.add(Dropout(dropout)) self.model.add(Dense(nr_hidden, name='compare2', W_regularizer=l2(L2), init='he_normal')) self.model.add(Activation('relu')) self.model.add(Dropout(dropout)) self.model = TimeDistributed(self.model) def __call__(self, sent, align, **kwargs): result = self.model(merge([sent, align], mode='concat')) # Shape: (i, n) result = _GlobalSumPooling1D()(result, mask=self.words) return result
activation=None, padding='same', dilation_rate=16)(od7) bd8 = BatchNormalization()(cd8) ad8 = Activation('relu')(bd8) dd8 = Dropout(conv_dropout)(ad8) od8 = concatenate([od7, dd8]) # Self-Attention Layer # atn = MultiHead(AttentionDilated(attn_units, dilation_rate=attn_dilation), layer_num=attn_heads)(od4) # dat = Dropout(attn_dropout)(atn) # fat = TimeDistributed(Flatten())(dat) # oat = concatenate([fat, od4]) out = TimeDistributed(Dense(3, activation='relu'))(od8) # define model model = Model(inputs=seqs, outputs=out) seq_forward = Input(shape=(None, 4)) seq_revcomp = Lambda(rev_comp)(seq_forward) output_forward = model(seq_forward) output_revcomp = model(seq_revcomp) output_back = Lambda(lambda x: K.reverse(x, -2))(output_revcomp) model_bi = Model(inputs=seq_forward, outputs=[output_forward, output_back]) learning_rate = 0.002 beta_1 = 0.97
# 모델에 양방향 LSTM을 사용, 모델의 출력층에 CRF층을 배치 from keras.models import Sequential from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional from keras_contrib.layers import CRF model = Sequential() model.add( Embedding(input_dim=n_words, output_dim=20, input_length=max_len, mask_zero=True)) model.add( Bidirectional(LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))) model.add(TimeDistributed(Dense(50, activation="relu"))) crf = CRF(n_labels) model.add(crf) # In[30]: from keras.utils import np_utils y_train2 = np_utils.to_categorical(y_train) # one-hot 인코딩 # In[31]: pd.set_option('max_colwidth', 800) # In[32]: y_train2[0]
words = Embedding(input_dim=wordEmbeddings.shape[0], output_dim=wordEmbeddings.shape[1], weights=[wordEmbeddings], trainable=False)(words_input) casing_input = Input(shape=(None, ), dtype='int32', name='casing_input') casing = Embedding(output_dim=caseEmbeddings.shape[1], input_dim=caseEmbeddings.shape[0], weights=[caseEmbeddings], trainable=False)(casing_input) character_input = Input(shape=( None, 52, ), name='char_input') embed_char_out = TimeDistributed(Embedding( len(char2Idx), 30, embeddings_initializer=RandomUniform(minval=-0.5, maxval=0.5)), name='char_embedding')(character_input) dropout = Dropout(0.5)(embed_char_out) conv1d_out = TimeDistributed( Conv1D(kernel_size=3, filters=30, padding='same', activation='tanh', strides=1))(dropout) maxpool_out = TimeDistributed(MaxPooling1D(52))(conv1d_out) char = TimeDistributed(Flatten())(maxpool_out) char = Dropout(0.5)(char) output = concatenate([words, casing, char]) output = Bidirectional( LSTM(200, return_sequences=True, dropout=0.50,
strides=30, padding='valid', activation='relu')(main_input) #main_path = Conv1D(filters=7, kernel_size=2, strides = 2, padding='valid', activation = 'relu')(main_input) main_path = concatenate([main_path, status_input], axis=-1) main_path = LSTM(units=30, return_sequences=True)(main_path) main_path = LSTM(units=15, return_sequences=True)(main_path) #s path s_path = Dense(units=30, activation='relu')(s_input) s_path = Dense(units=15, activation='relu')(s_path) #d path d_path = d_input d_path = concatenate([d_path, status_input], axis=-1) d_path = TimeDistributed(Dense(units=30, activation='relu'))(d_path) d_path = TimeDistributed(Dense(units=15, activation='relu'))(d_path) #merge path s_path = Reshape((1, K.int_shape(s_path)[1]))(s_path) s_paths = concatenate([s_path] * 24, axis=1) merge_path = concatenate([main_path, s_paths, d_path], axis=-1) merge_path = TimeDistributed( Dense(units=30, kernel_initializer='he_normal', activation='relu'))(merge_path) merge_path = TimeDistributed( Dense(units=15, kernel_initializer='he_normal', activation='relu'))(merge_path) output = TimeDistributed(
len(it_test) / (Nframes + 1))) print('') print('{} sequences = samples per batch'.format(batch_size)) print('{} batches per epoch'.format(spe)) print('{} epochs'.format(epochs)) #Defining the model #ENCODER-DECODER + LSTM - TIME_DISTRIBUTED #Initializing the CNN model = Sequential() #Adding layers. #First CNV layer model.add( TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding='same'), input_shape=(None, 128, 128, 1))) model.add(BatchNormalization()) #Pooling model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) ##Second CNV layer model.add( TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding='same'))) model.add(BatchNormalization()) ##Pooling model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2)))) ##Third CNV layer model.add( TimeDistributed(Conv2D(128, (3, 3), activation='relu', padding='same'))) model.add(BatchNormalization())
# define input sequence in_seq1 = array([10, 20, 30, 40, 50, 60, 70, 80, 90]) in_seq2 = array([15, 25, 35, 45, 55, 65, 75, 85, 95]) out_seq = array([in_seq1[i]+in_seq2[i] for i in range(len(in_seq1))]) # convert to [rows, columns] structure in_seq1 = in_seq1.reshape((len(in_seq1), 1)) in_seq2 = in_seq2.reshape((len(in_seq2), 1)) out_seq = out_seq.reshape((len(out_seq), 1)) # horizontally stack columns dataset = hstack((in_seq1, in_seq2, out_seq)) # choose a number of time steps n_steps_in, n_steps_out = 3, 2 # covert into input/output X, y = split_sequences(dataset, n_steps_in, n_steps_out) # the dataset knows the number of features, e.g. 2 n_features = X.shape[2] # define model model = Sequential() model.add(LSTM(200, activation='relu', input_shape=(n_steps_in, n_features))) model.add(RepeatVector(n_steps_out)) model.add(LSTM(200, activation='relu', return_sequences=True)) model.add(TimeDistributed(Dense(n_features))) model.compile(optimizer='adam', loss='mse') # fit model model.fit(X, y, epochs=300, verbose=0) # demonstrate prediction x_input = array([[60, 65, 125], [70, 75, 145], [80, 85, 165], [70, 75, 145], [80, 85, 165], [90, 95, 185]]) x_input = x_input.reshape((2, n_steps_in, n_features)) yhat = model.predict(x_input, verbose=0) print(yhat)
def create_model(self): self._set_model_params() act = 'relu' input_data = Input(name='the_input', shape=self.input_shape, dtype='float32') inner = Convolution2D(self.conv_num_filters, self.filter_size, self.filter_size, border_mode='same', activation=act, name='conv1')(input_data) inner = MaxPooling2D(pool_size=(self.pool_size_1, self.pool_size_1), name='max1')(inner) inner = Convolution2D(self.conv_num_filters, self.filter_size, self.filter_size, border_mode='same', activation=act, name='conv2')(inner) inner = MaxPooling2D(pool_size=(self.pool_size_2, self.pool_size_2), name='max2')(inner) conv_to_rnn_dims = (int( (self.img_h / (self.pool_size_1 * self.pool_size_2)) * self.conv_num_filters), int(self.img_w / (self.pool_size_1 * self.pool_size_2))) inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) inner = Permute(dims=(2, 1), name='permute')(inner) # cuts down input size going into RNN: inner = TimeDistributed( Dense(self.time_dense_size, activation=act, name='dense1'))(inner) # Two layers of bidirecitonal GRUs # GRU seems to work as well, if not better than LSTM: gru_1 = GRU(self.rnn_size, return_sequences=True, name='gru1')(inner) gru_1b = GRU(self.rnn_size, return_sequences=True, go_backwards=True, name='gru1_b')(inner) gru1_merged = merge([gru_1, gru_1b], mode='sum') gru_2 = GRU(self.rnn_size, return_sequences=True, name='gru2')(gru1_merged) gru_2b = GRU(self.rnn_size, return_sequences=True, go_backwards=True)(gru1_merged) # transforms RNN output to character activations: inner = TimeDistributed(Dense(self.output_size, name='dense2'))(merge([gru_2, gru_2b], mode='concat')) y_pred = Activation('softmax', name='softmax')(inner) # Model(input=[input_data], output=y_pred).summary() labels = Input(name='the_labels', shape=[self.absolute_max_string_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name="ctc")( [y_pred, labels, input_length, label_length]) lr = 0.03 # clipnorm seems to speeds up convergence clipnorm = 5 sgd = SGD(lr=lr, decay=3e-7, momentum=0.9, nesterov=True, clipnorm=clipnorm) model = Model(input=[input_data, labels, input_length, label_length], output=[loss_out]) # model.summary() # the loss calc occurs elsewhere, so use a dummy lambda func for the loss if self.weight_file is not None: model.load_weights(self.weight_file) model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=sgd) self.model = model self._predictor = K.function([input_data], [y_pred]) return model
n_in_seq_length = len(list(X_train[0])) n_out_seq_length = len(list(y_train[0])) # print(n_in_seq_length,n_out_seq_length) # 24 1 print(np.shape(X_train), np.shape(y_train)) # create LSTM model = Sequential() model.add( LSTM(150, batch_input_shape=(None, n_in_seq_length, encoded_length)) ) #encoder 150即隐含层节点数 = 输出维度,encoded_length即输入维度,n_in_seq_length即输入步长 model.add(Dropout(0.2)) model.add(RepeatVector(n_out_seq_length)) model.add(LSTM(150, return_sequences=True)) #decoder model.add(Dropout(0.2)) model.add(LSTM(150, return_sequences=True)) #decoder model.add(Dropout(0.3)) model.add(TimeDistributed(Dense(encoded_length, activation='softmax'))) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # show model print(model.summary()) # train LSTM history = model.fit(X_train, y_train, epochs=50, batch_size=50, validation_split=0.05, shuffle=False, verbose=2) # save model model.save('../model/seq2seq_code.h5')
from keras_contrib.layers import CRF words_input = Input(shape=(None, ), dtype='int32', name='words_input') words = Embedding(input_dim=wordEmbeddings.shape[0], output_dim=wordEmbeddings.shape[1], weights=[wordEmbeddings], trainable=False)(words_input) output = Bidirectional( LSTM(200, return_sequences=True, dropout=0.50, recurrent_dropout=0.25))(words) outputx = Bidirectional( LSTM(200, return_sequences=True, dropout=0.50, recurrent_dropout=0.25))(output) outputxx = TimeDistributed(Dense(50, activation="relu"))( outputx) # a dense layer as suggested by neuralNer crf = CRF(9) # CRF layer out = crf(outputxx) model = Model(inputs=words_input, outputs=out) model.compile(optimizer="adam", loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() def compute_f1(predictions, correct, idx2Label): label_pred = [] for sentence in predictions: label_pred.append([idx2Label[element] for element in sentence]) label_correct = [] for sentence in correct:
conv_model_single_image = mlp_block( conv_model_single_image, number_of_neurons_per_layer_convolutional_model) #Decide whether to add an Average Layer on top of convolutional model to effectively have #the model predict optical flow (Mutually exclusive to dense output above!): flag_use_average_layer_on_top_of_convolutional_model = 0 if flag_use_average_layer_on_top_of_convolutional_model == 1: conv_model_single_image = Average(conv_model_single_image) #TimeDistributed: #(1). take Conv2D model and actually make it a "Model" according to the functional API conv_model_single_image_as_model = Model(inputs=[image_input], outputs=[conv_model_single_image]) #(2). make it time distributed or bidirectional(TimeDistributed) conv_model_time_distributed = TimeDistributed( conv_model_single_image_as_model)(image_inputs) #(3). after TimeDistributed we have a tensor of shape (batch_size,number_of_time_steps,single_model_output) # so i need to add a top layer to output something of desired shape: conv_model_time_distributed = Flatten()(conv_model_time_distributed) conv_model_time_distributed = Dense(2)(conv_model_time_distributed) #(3). make the whole thing, after TimeDistributed, a Model according to the functional API: #K.set_learning_phase(0) conv_model_time_distributed = Model(inputs=[image_inputs], outputs=[conv_model_time_distributed]) conv_model_time_distributed._uses_learning_phase = True #for learning=True, for testing = False #Visualize Model: if flag_plot_model == 1: keras.utils.plot_model(conv_model_single_image_as_model) keras.utils.vis_utils.plot_model(conv_model_single_image_as_model) from IPython.display import SVG
return word2ind[c] all_x,X,y =createData(f_test) X_test_enc = [[checkForUnk(c) for c in x] for x in X] X_test = pad_sequences(X_test_enc, maxlen=maxlen) # ------------------------------- defining the model ------------------------------------------------------------------- max_features = len(word2ind) embedding_size = 500 hidden_size = 150 out_size = len(label2ind) + 1 model = Sequential() model.add(Embedding(max_features, embedding_size, input_length=maxlen, mask_zero=True)) model.add(Bidirectional(LSTM(hidden_size, return_sequences=True))) model.add(TimeDistributed(Dense(out_size))) model.add(Activation('softmax')) model.compile(loss='binary_crossentropy', optimizer='adam') # ------------------------------- training and saving the model ------------------------------------------------------------------- batch_size = 32 model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, validation_data=(X_val,y_val)) model.save("../model/bilstm",True,True) # ------------------------------- evaluating the model ------------------------------------------------------------------- def score(yh, pr): coords = [np.where(yhh > 0)[0][0] for yhh in yh] yh = [yhh[co:] for yhh, co in zip(yh, coords)] ypr = [prr[co:] for prr, co in zip(pr, coords)] fyh = [c for row in yh for c in row]
def model_init(self, args): """ Build a deep network for speech """ # Main acoustic input self.inputs = Input(name='the_input', shape=(None, self.args.num_features)) # Specify the layers in your network if self.args.is_brnn == '123': if self.args.rnn_celltype == 'gru': for i in range(self.args.num_layers): if i == 0: bidir_rnn = Bidirectional( GRU(self.args.hidden_size, activation=self.args.activation, return_sequences=True, implementation=2, name='bidir' + str(i)), merge_mode='concat')(self.inputs) bn_rnn = BatchNormalization()(bidir_rnn) dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn) else: bidir_rnn = Bidirectional( GRU(self.args.hidden_size, activation=self.args.activation, return_sequences=True, implementation=2, name='bidir' + str(i)), merge_mode='concat')(dropout_rnn) bn_rnn = BatchNormalization()(bidir_rnn) dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn) elif self.args.rnn_celltype == 'lstm': for i in range(self.args.num_layers): if i == 0: bidir_rnn = Bidirectional( LSTM(self.args.hidden_size, return_sequences=True, name='bidir' + str(i)))(self.inputs) bn_rnn = BatchNormalization()(bidir_rnn) dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn) else: bidir_rnn = Bidirectional( LSTM(self.args.hidden_size, return_sequences=True, name='bidir' + str(i)))(dropout_rnn) bn_rnn = BatchNormalization()(bidir_rnn) dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn) else: if self.args.rnn_celltype == 'gru': for i in range(self.args.num_layers): if i == 0: bidir_rnn = GRU(self.args.hidden_size, return_sequences=True, name='gru' + str(i))(self.inputs) bn_rnn = BatchNormalization()(bidir_rnn) dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn) else: bidir_rnn = GRU(self.args.hidden_size, return_sequences=True, name='gru' + str(i))(dropout_rnn) bn_rnn = BatchNormalization()(bidir_rnn) dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn) elif self.args.rnn_celltype == 'lstm': bidir_rnn = self.make_residual_lstm_layers( self.inputs, self.args.hidden_size, self.args.num_layers, self.args.keep_prob) # self.outputs = Dense(self.args.num_classes)(dropout_rnn) # Specify the model # self.model = Model(inputs= self.inputs, outputs=self.outputs) time_dense = TimeDistributed(Dense(self.args.num_classes))(bidir_rnn) # Add softmax activation layer y_pred = Activation('softmax', name='softmax')(time_dense) # Specify the model self.model_1 = Model(inputs=self.inputs, outputs=y_pred) # Specify model.output_length self.model_1.output_length = lambda x: x
def create_standard_attention_model(self, test_mode=False): ''' This model is Largely based on [A Decomposable Attention Model, Ankur et al.] ''' # 0, (Optional) Set the upper limit of GPU memory config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.2 set_session(tf.Session(config=config)) # 1, Embedding the input and project the embeddings premise = Input(shape=(self.SentMaxLen, ), dtype='int32') hypothesis = Input(shape=(self.SentMaxLen, ), dtype='int32') embed_p = self.Embed(premise) # [batchsize, Psize, Embedsize] embed_h = self.Embed(hypothesis) # [batchsize, Hsize, Embedsize] EmbdProject = TimeDistributed( Dense(200, activation='relu', kernel_regularizer=l2(self.L2Strength), bias_regularizer=l2(self.L2Strength))) embed_p = Dropout(self.DropProb)( EmbdProject(embed_p)) # [batchsize, Psize, units] embed_h = Dropout(self.DropProb)( EmbdProject(embed_h)) # [batchsize, Hsize, units] # 2, Score each embeddings and calc score matrix Eph. F_p, F_h = embed_p, embed_h for i in range(2): # Applying Decomposable Score Function scoreF = TimeDistributed( Dense(200, activation='relu', kernel_regularizer=l2(self.L2Strength), bias_regularizer=l2(self.L2Strength))) F_p = Dropout(self.DropProb)( scoreF(F_p)) # [batch_size, Psize, units] F_h = Dropout(self.DropProb)( scoreF(F_h)) # [batch_size, Hsize, units] Eph = keras.layers.Dot(axes=(2, 2))([F_p, F_h ]) # [batch_size, Psize, Hsize] # 3, Normalize score matrix and get alignment Ep = Lambda(lambda x: keras.activations.softmax(x))( Eph) # [batch_size, Psize, Hsize] Eh = keras.layers.Permute((2, 1))(Eph) # [batch_size, Hsize, Psize) Eh = Lambda(lambda x: keras.activations.softmax(x))( Eh) # [batch_size, Hsize, Psize] PremAlign = keras.layers.Dot((2, 1))([Ep, embed_h]) HypoAlign = keras.layers.Dot((2, 1))([Eh, embed_p]) # 4, Concat original and alignment, score each pair of alignment PremAlign = keras.layers.concatenate( [embed_p, PremAlign]) # [batch_size, PreLen, 2*Size] HypoAlign = keras.layers.concatenate([embed_h, HypoAlign ]) # [batch_size, Hypo, 2*Size] for i in range(2): scoreG = TimeDistributed( Dense(200, activation='relu', kernel_regularizer=l2(self.L2Strength), bias_regularizer=l2(self.L2Strength))) PremAlign = scoreG(PremAlign) # [batch_size, Psize, units] HypoAlign = scoreG(HypoAlign) # [batch_size, Hsize, units] PremAlign = Dropout(self.DropProb)(PremAlign) HypoAlign = Dropout(self.DropProb)(HypoAlign) # 5, Sum all these scores, and make final judge according to sumed-score SumWords = Lambda(lambda X: K.reshape(K.sum(X, axis=1, keepdims=True), (-1, 200))) V_P = SumWords(PremAlign) # [batch_size, 512] V_H = SumWords(HypoAlign) # [batch_size, 512] final = keras.layers.concatenate([V_P, V_H]) for i in range(2): final = Dense(200, activation='relu', kernel_regularizer=l2(self.L2Strength), bias_regularizer=l2(self.L2Strength))(final) final = Dropout(self.DropProb)(final) final = BatchNormalization()(final) # 6, Prediction by softmax final = Dense(3, activation='softmax')(final) if test_mode: self.model = Model(inputs=[premise, hypothesis], outputs=[Ep, Eh, final]) else: self.model = Model(inputs=[premise, hypothesis], outputs=final)
def final_model(input_dim, filters=50, kernel_size=5, units=200, output_dim=29, recur_layers=3, activation='relu', dropout_rate=0.1): """ Build a deep network for speech """ conv_stride = 1 conv_border_mode = 'same' # Main acoustic input # we add a dimension, to be able to apply convolution1d to frequencies only input_data = Input(name='the_input', shape=(None, input_dim)) # applying convolution to frequency domain -> allowing to model spectral variance due to speaker change (better than fullly connected because it preserve orders of frequencies) conv_0 = Conv1D(filters, 1, strides=1, padding=conv_border_mode, activation='relu')(input_data) conv_0 = BatchNormalization()(conv_0) conv_1 = Conv1D(filters, 3, strides=1, padding=conv_border_mode, activation='relu')(conv_0) conv_1 = BatchNormalization()(conv_1) conv_2 = Conv1D(filters, 1, strides=1, padding=conv_border_mode, activation='relu')(conv_1) conv_2 = BatchNormalization()(conv_2) rnn_input = conv_2 #units = filters//4 #rnn_input = input_data for num in range(recur_layers): rnn_name = 'rnn_{}'.format(num) # TODO: en ajoutant un dropout en input rnn_input = Dropout(dropout_rate)(rnn_input) # TODO: tester avec LSTM # TODO: tester avec activation ='elu' ou tanh? # TODO: tester avec un clipped simp_rnn = LSTM(units, activation='tanh', return_sequences=True, implementation=2, name=rnn_name) rnn = Bidirectional(simp_rnn)(rnn_input) output = BatchNormalization()(rnn) # we set rnn_input to new output, thus allowing to chain rnn rnn_input = output # and finally we add a TimeDistributed time_dense = TimeDistributed(Dense(output_dim))(rnn_input) # TODO: Add softmax activation layer y_pred = Activation('softmax', name='softmax')(time_dense) # Specify the model model = Model(inputs=input_data, outputs=y_pred) # TODO: Specify model.output_length model.output_length = lambda x: x #model.output_length = lambda x: cnn_output_length( x, kernel_size, conv_border_mode, conv_stride) print(model.summary()) return model
def create_enhanced_attention_model(self): # 0, (Optional) Set the upper limit of GPU memory config = tf.ConfigProto() config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) # 1, Embedding the input and project the embeddings premise = Input(shape=(self.SentMaxLen, ), dtype='int32') hypothesis = Input(shape=(self.SentMaxLen, ), dtype='int32') embed_p = self.Embed(premise) # [batchsize, Psize, Embedsize] embed_h = self.Embed(hypothesis) # [batchsize, Hsize, Embedsize] # 2, Encoder words with its surrounding context Encoder = Bidirectional( LSTM(units=300, dropout=self.DropProb, return_sequences=True)) embed_p = Encoder(embed_p) embed_h = Encoder(embed_h) # 2, Score each words and calc score matrix Eph. F_p, F_h = embed_p, embed_h Eph = keras.layers.Dot(axes=(2, 2))([F_h, F_p ]) # [batch_size, Hsize, Psize] Eh = Lambda(lambda x: keras.activations.softmax(x))( Eph) # [batch_size, Hsize, Psize] Ep = keras.layers.Permute((2, 1))(Eph) # [batch_size, Psize, Hsize) Ep = Lambda(lambda x: keras.activations.softmax(x))( Ep) # [batch_size, Psize, Hsize] # 4, Normalize score matrix, encoder premesis and get alignment PremAlign = keras.layers.Dot((2, 1))([Ep, embed_h]) # [-1, Psize, dim] HypoAlign = keras.layers.Dot((2, 1))([Eh, embed_p]) # [-1, Hsize, dim] mm_1 = keras.layers.Multiply()([embed_p, PremAlign]) mm_2 = keras.layers.Multiply()([embed_h, HypoAlign]) # ReshapeLayer = Lambda(lambda x: K.reshape(x, (-1, self.SentMaxLen, 600))) # Reshape handles batch_size # sb_1 = ReshapeLayer(embed_p - PremAlign) # sb_2 = ReshapeLayer(embed_h - HypoAlign) sb_1 = Lambda(lambda x: tf.subtract(x, PremAlign))(embed_p) sb_2 = Lambda(lambda x: tf.subtract(x, HypoAlign))(embed_h) PremAlign = keras.layers.Concatenate()([ embed_p, PremAlign, sb_1, mm_1, ]) # [batch_size, Psize, 2*unit] HypoAlign = keras.layers.Concatenate()( [embed_h, HypoAlign, sb_2, mm_2]) # [batch_size, Hsize, 2*unit] PremAlign = Dropout(self.DropProb)(PremAlign) HypoAlign = Dropout(self.DropProb)(HypoAlign) Compresser = TimeDistributed(Dense( 300, kernel_regularizer=l2(self.L2Strength), bias_regularizer=l2(self.L2Strength)), name='Compresser') PremAlign = Compresser(PremAlign) HypoAlign = Compresser(HypoAlign) # 5, Final biLST < Encoder + Softmax Classifier Final = Bidirectional(LSTM(units=300, dropout=self.DropProb, return_sequences=True), name='finaldecoer') # [-1,2*units] final_p = Final(PremAlign) final_h = Final(HypoAlign) AveragePooling = Lambda(lambda x: K.mean(x, axis=1)) # outs [-1, dim] MaxPooling = Lambda(lambda x: K.max(x, axis=1)) # outs [-1, dim] avg_p = AveragePooling(final_p) avg_h = AveragePooling(final_h) max_p = MaxPooling(final_p) max_h = MaxPooling(final_h) Final = keras.layers.Concatenate()([avg_p, max_p, avg_h, max_h]) Final = Dropout(self.DropProb)(Final) Final = Dense(512, name='dense512', activation='relu')(Final) Final = Dropout(self.DropProb)(Final) Final = Dense(256, name='dense256', activation='relu')(Final) Final = Dropout(self.DropProb)(Final) Final = Dense(3, activation='softmax', name='judge256')(Final) self.model = Model(inputs=[premise, hypothesis], outputs=Final)
def Seq2Seq(output_dim, output_length, rnncell_type, batch_input_shape=None, input_shape=None, batch_size=None, input_dim=None, input_length=None, hidden_dim=None, depth=1, broadcast_state=True, unroll=False, stateful=False, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0.): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if isinstance(depth, int): depth = (depth, depth) if batch_input_shape: shape = batch_input_shape elif input_shape: shape = (batch_size, ) + input_shape elif input_dim: if input_length: shape = (batch_size, ) + (input_length, ) + (input_dim, ) else: shape = (batch_size, ) + (None, ) + (input_dim, ) else: # TODO Proper error message raise TypeError if hidden_dim is None: hidden_dim = output_dim rnncell = rnncell_list[rnncell_type] encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, unroll=unroll, stateful=stateful, return_states=broadcast_state) for _ in range(depth[0]): encoder.add( rnncell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder = RecurrentSequential(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, decode=True, output_length=output_length, unroll=unroll, stateful=stateful, teacher_force=teacher_force) for _ in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add( LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim))) _input = Input(batch_shape=shape) _input._keras_history[0].supports_masking = True encoded_seq = dense1(_input) encoded_seq = encoder(encoded_seq) if broadcast_state: assert type(encoded_seq) is list states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = None encoded_seq = dense2(encoded_seq) inputs = [_input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] decoded_seq = decoder(encoded_seq, ground_truth=inputs[1] if teacher_force else None, initial_readout=encoded_seq, initial_state=states) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
def Seq2Seq(output_dim, output_length, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0., **kwargs): ''' Seq2seq model based on [1] and [2]. This model has the ability to transfer the encoder hidden state to the decoder's hidden state(specified by the broadcast_state argument). Also, in deep models (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by the inner_broadcast_state argument. You can switch between [1] based model and [2] based model using the peek argument.(peek = True for [2], peek = False for [1]). When peek = True, the decoder gets a 'peek' at the context vector at every timestep. [1] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) y(0) = LSTM(s0, C); C is the context vector from the encoder. [2] based model: Encoder: X = Input sequence C = LSTM(X); The context vector Decoder: y(t) = LSTM(s(t-1), y(t-1), C) y(0) = LSTM(s0, C, C) Where s is the hidden state of the LSTM (h and c), and C is the context vector from the encoder. Arguments: output_dim : Required output dimension. hidden_dim : The dimension of the internal representations of the model. output_length : Length of the required output sequence. depth : Used to create a deep Seq2seq model. For example, if depth = 3, there will be 3 LSTMs on the enoding side and 3 LSTMs on the decoding side. You can also specify depth as a tuple. For example, if depth = (4, 5), 4 LSTMs will be added to the encoding side and 5 LSTMs will be added to the decoding side. broadcast_state : Specifies whether the hidden state from encoder should be transfered to the deocder. inner_broadcast_state : Specifies whether hidden states should be propogated throughout the LSTM stack in deep models. peek : Specifies if the decoder should be able to peek at the context vector at every timestep. dropout : Dropout probability in between layers. ''' if type(depth) == int: depth = [depth, depth] if 'batch_input_shape' in kwargs: shape = kwargs['batch_input_shape'] del kwargs['batch_input_shape'] elif 'input_shape' in kwargs: shape = (None,) + tuple(kwargs['input_shape']) del kwargs['input_shape'] elif 'input_dim' in kwargs: if 'input_length' in kwargs: shape = (None, kwargs['input_length'], kwargs['input_dim']) del kwargs['input_length'] else: shape = (None, None, kwargs['input_dim']) del kwargs['input_dim'] if 'unroll' in kwargs: unroll = kwargs['unroll'] del kwargs['unroll'] else: unroll = False if 'stateful' in kwargs: stateful = kwargs['stateful'] del kwargs['stateful'] else: stateful = False if not hidden_dim: hidden_dim = output_dim encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful, return_states=broadcast_state) for i in range(depth[0]): encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs)) encoder.add(Dropout(dropout)) dense1 = TimeDistributed(Dense(hidden_dim)) dense1.supports_masking = True dense2 = Dense(output_dim) decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True, input_length=shape[1]) for i in range(depth[1]): decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs)) input = Input(batch_shape=shape) input._keras_history[0].supports_masking = True encoded_seq = dense1(input) encoded_seq = encoder(encoded_seq) if broadcast_state: states = encoded_seq[-2:] encoded_seq = encoded_seq[0] else: states = [None] * 2 encoded_seq = dense2(encoded_seq) inputs = [input] if teacher_force: truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) truth_tensor._keras_history[0].supports_masking = True inputs += [truth_tensor] decoded_seq = decoder({'input': encoded_seq, 'ground_truth': inputs[1] if teacher_force else None, 'initial_readout': encoded_seq, 'states': states}) model = Model(inputs, decoded_seq) model.encoder = encoder model.decoder = decoder return model
mask_zero=True, input_length=step_length)(hash_index_input) pos_input = Input(shape=(step_length, pos_length)) chunk_input = Input(shape=(step_length, chunk_length)) gazetteer_input = Input(shape=(step_length, gazetteer_length)) senna_hash_pos_chunk_gazetteer_merge = merge( [embedding, encoder_embedding, pos_input, chunk_input, gazetteer_input], mode='concat') input_mask = Masking(mask_value=0)(senna_hash_pos_chunk_gazetteer_merge) dp_1 = Dropout(0.5)(input_mask) hidden_1 = Bidirectional(LSTM(128, return_sequences=True))(dp_1) hidden_2 = Bidirectional(LSTM(64, return_sequences=True))(hidden_1) dp_2 = Dropout(0.5)(hidden_2) output = TimeDistributed(Dense(output_length, activation='softmax'))(dp_2) model = Model(input=[ embed_index_input, hash_index_input, pos_input, chunk_input, gazetteer_input ], output=output) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) print(model.summary()) number_of_train_batches = int(math.ceil(float(train_samples) / batch_size)) number_of_dev_batches = int(math.ceil(float(dev_samples) / batch_size))