def model(words_per_sequence, vocab_size, embedding_weights_path="weights/embedding.npy"): embedding_weights = embedding.load(embedding_weights_path).squeeze() print(embedding_weights.shape) embedding_layer = Embedding(embedding_weights.shape[0], embedding_weights.shape[1], input_length=words_per_sequence, weights=[embedding_weights]) print(np.array(embedding_layer.get_weights()).squeeze().shape) """ Calling embedding_layer.set_weights has been decreed a sin. You have to use an undocumented weights argument. The weights should be wrapped in a list as anything else would be against God """ #embedding_layer.set_weights(embedding_weights) layer_input = Input((words_per_sequence, )) x = embedding_layer(layer_input) print(x.shape) x = LSTM(embedding_weights.shape[-1], return_sequences=True)(x) print(x.shape) x = LSTM(embedding_weights.shape[-1], return_sequences=True)(x) print(x.shape) x = Dropout(0.5)(x) print(x.shape) x = TimeDistributed(Dense(vocab_size))(x) print(x.shape) x = Activation("softmax", name="softmax")(x) print(x.shape) return keras.Model(inputs=[layer_input], outputs=[x])
def vis_lstm_2(): embedding_matrix = embedding.load() embedding_model = Sequential() embedding_model.add( Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], trainable=False)) image_model_1 = Sequential() image_model_1.add( Dense(embedding_matrix.shape[1], input_dim=4096, activation='linear')) image_model_1.add(Reshape((1, embedding_matrix.shape[1]))) image_model_2 = Sequential() image_model_2.add( Dense(embedding_matrix.shape[1], input_dim=4096, activation='linear')) image_model_2.add(Reshape((1, embedding_matrix.shape[1]))) main_model = Sequential() main_model.add( Merge([image_model_1, embedding_model, image_model_2], mode='concat', concat_axis=1)) main_model.add(LSTM(1001)) main_model.add(Dropout(0.5)) main_model.add(Dense(1001, activation='softmax')) return main_model
def vis_lstm_2(): embedding_matrix = embedding.load() inpx0=Input(shape=(embedding_matrix.shape[0],embedding_matrix.shape[1])) x0=Embedding(weights = [embedding_matrix],trainable = False)(inpx0) ###Make embedding_model embedding_model = Model([inpx0],x0) inpx1=Input(shape=(4096,)) x1=Dense(embedding_matrix.shape[1],activation='linear')(inpx1) x1=Reshape((1,embedding_matrix.shape[1]))(x1) ###Make image_model_1 image_model_1 = Model([inpx1],x1) inpx2=Input(shape=(4096,)) x2=Dense(embedding_matrix.shape[1],activation='linear')(inpx2) x2=Reshape((1,embedding_matrix.shape[1]))(x2) ###Make image_model_2 image_model_1 = Model([inpx2],x2) inpx3=Merge([image_model_1,embedding_model,image_model_2],mode = 'concat',concat_axis = 1)) x3=LSTM(1001)(inpx3) x3=Dropout(0.5)(x3) x3=Dense(1001,activation='softmax')(x3) ###Make main_model main_model = Model([inpx3],x3) return main_model
def vis_lstm_100(): embedding_matrix = embedding.load() # print(embedding_matrix.shape) embedding_model = Sequential() embedding_model.add( Embedding( # 2195885, # 300, embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], trainable=False)) embedding_model.add(Dense(10)) print(embedding_model.summary()) image_model1 = Sequential() image_model1.add( Convolution3D(1, 10, 3, 300, border_mode='valid', input_shape=(10, MAX_SEQUENCE_LENGTH, 300, 1), activation='relu')) image_model1.add(MaxPooling3D((1, 2, 1), strides=(1, 1, 1))) image_model1.add(Reshape((1, 10))) print(image_model1.summary()) spatial_model = Sequential() spatial_model.add(Dense(1, input_shape=(1, 10, 10))) spatial_model.add(Reshape((1, 10))) print(spatial_model.summary()) main_model = Sequential() main_model.add( Merge([image_model1, embedding_model, spatial_model], mode='concat', concat_axis=1)) main_model.add(LSTM(101)) main_model.add(Dropout(0.5)) main_model.add(Dense(1001, activation='softmax')) return main_model
#texts = [] features, images, texts = load('annotations.10k.txt', 'resnet50-features.10k.npy') tokenizer = Tokenizer() tokenizer.fit_on_texts(texts) sequences = tokenizer.texts_to_sequences(texts) captions = pad_sequences(sequences, maxlen=16) vocab = tokenizer.word_index vocab['<eos>'] = 0 # add word with id 0 with open('vocab1.json', 'w') as fp: # save the vocab fp.write(json.dumps(vocab)) embedding_weights = embedding.load(vocab, 100, 'glove.twitter.27B.100d.filtered.txt') image_input = Input(shape=(2048, )) caption_input = Input(shape=(16, )) noise_input = Input(shape=(16, )) caption_embedding = Embedding(len(vocab), 100, input_length=16, weights=[embedding_weights]) caption_rnn = GRU(256) image_dense = Dense(256, activation='tanh') image_pipeline = image_dense(image_input) caption_pipeline = caption_rnn(caption_embedding(caption_input)) noise_pipeline = caption_rnn(caption_embedding(noise_input))