Пример #1
0
    match = dot([input_encoded_m, question_encoded], axes=2)
    print("match", match.get_shape())
    match = Activation('softmax')(match)

    # add the match matrix with the second input vector sequence
    response = dot([match, input_encoded_c], axes=1)
    # shape: (samples, story_maxlen, query_maxlen)
    print("response", response.get_shape())

    question_encoded = add([response, question_encoded])
print(question_encoded.get_shape())

# the original paper uses a matrix multiplication for this reduction step.
final_ans = LSTM(32)(question_encoded)
final_ans = Dropout(0.3)(final_ans)
print(final_ans.get_shape())
final_ans = Dense(vocab_size)(final_ans)  # (samples, vocab_size)

# we output a probability distribution over the vocabulary
final_ans = Activation('softmax')(final_ans)

# build the final model
model = Model([input_sequence, question], final_ans)
model.compile(optimizer='rmsprop',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# train
model.fit([inputs_train, queries_train],
          answers_train,
          batch_size=32,
            ###############
            word_input = Input(shape=(sequence_length,))
            model1 = embedding_layer1(word_input)
            model1 = Dropout(0.5)(model1)
            model1 = Reshape((sequence_length,embedding_dim,1))(model1)

            model2=embedding_layer2(word_input)
            model2=Dropout(0.5)(model2)
            model2=Reshape((sequence_length,embedding_dim,1))(model2)

            biLSTM_Input = Reshape((sequence_length,embedding_dim))(model1)
            left_branch = LSTM(300,input_shape = (40,300),return_sequences='True',input_length=40)(biLSTM_Input)

            right_branch = LSTM(300,input_shape=(40,300),return_sequences='True',input_length=40,go_backwards=True)(biLSTM_Input)

            print "left_branch.get_shape()",left_branch.get_shape()
            print "right_branch.get_shape()",right_branch.get_shape()
            lstm_merged = merge([left_branch,right_branch],mode='ave')
            lstm_merged = Reshape([40,300,1])(lstm_merged)
            lstm_merged = Dropout(0.2)(lstm_merged)
            graph_in_temp = merge([model1, model2,lstm_merged],mode='concat',concat_axis=-1)

            graph_in = Reshape((40,300,3))(graph_in_temp)

            print graph_in.get_shape()

            conv_11 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[0], nb_col=col_size, border_mode='valid', activation='relu')(graph_in)
            conv_22 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[1], nb_col=col_size, border_mode='valid', activation='relu')(graph_in)
            conv_33 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[2], nb_col=col_size, border_mode='valid', activation='relu')(graph_in)

            conv_11 = MaxPooling2D(pool_size=(int(conv_11.get_shape()[1]),int(conv_11.get_shape()[2])))(conv_11)