match = dot([input_encoded_m, question_encoded], axes=2) print("match", match.get_shape()) match = Activation('softmax')(match) # add the match matrix with the second input vector sequence response = dot([match, input_encoded_c], axes=1) # shape: (samples, story_maxlen, query_maxlen) print("response", response.get_shape()) question_encoded = add([response, question_encoded]) print(question_encoded.get_shape()) # the original paper uses a matrix multiplication for this reduction step. final_ans = LSTM(32)(question_encoded) final_ans = Dropout(0.3)(final_ans) print(final_ans.get_shape()) final_ans = Dense(vocab_size)(final_ans) # (samples, vocab_size) # we output a probability distribution over the vocabulary final_ans = Activation('softmax')(final_ans) # build the final model model = Model([input_sequence, question], final_ans) model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # train model.fit([inputs_train, queries_train], answers_train, batch_size=32,
############### word_input = Input(shape=(sequence_length,)) model1 = embedding_layer1(word_input) model1 = Dropout(0.5)(model1) model1 = Reshape((sequence_length,embedding_dim,1))(model1) model2=embedding_layer2(word_input) model2=Dropout(0.5)(model2) model2=Reshape((sequence_length,embedding_dim,1))(model2) biLSTM_Input = Reshape((sequence_length,embedding_dim))(model1) left_branch = LSTM(300,input_shape = (40,300),return_sequences='True',input_length=40)(biLSTM_Input) right_branch = LSTM(300,input_shape=(40,300),return_sequences='True',input_length=40,go_backwards=True)(biLSTM_Input) print "left_branch.get_shape()",left_branch.get_shape() print "right_branch.get_shape()",right_branch.get_shape() lstm_merged = merge([left_branch,right_branch],mode='ave') lstm_merged = Reshape([40,300,1])(lstm_merged) lstm_merged = Dropout(0.2)(lstm_merged) graph_in_temp = merge([model1, model2,lstm_merged],mode='concat',concat_axis=-1) graph_in = Reshape((40,300,3))(graph_in_temp) print graph_in.get_shape() conv_11 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[0], nb_col=col_size, border_mode='valid', activation='relu')(graph_in) conv_22 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[1], nb_col=col_size, border_mode='valid', activation='relu')(graph_in) conv_33 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[2], nb_col=col_size, border_mode='valid', activation='relu')(graph_in) conv_11 = MaxPooling2D(pool_size=(int(conv_11.get_shape()[1]),int(conv_11.get_shape()[2])))(conv_11)