def add_discriminator_block(self, cur_block, n_input_layers=3): old_model = self.model filters = [512, 512, 512, 512, 256, 128, 64, 32] f = filters[cur_block - 1] # get shape of existing model in_shape = list(old_model.input.shape) # define new input shape as double the size input_shape = (in_shape[-2] * 2, in_shape[-2] * 2, in_shape[-1]) in_image = Input(shape=input_shape) # define new input processing layer if cur_block > 3: d = Conv2DEQ(int(f / 2), (1, 1), padding='same', name='d_conv_' + str(cur_block) + '_1')(in_image) else: d = Conv2DEQ(f, (1, 1), padding='same', name='d_conv_' + str(cur_block) + '_1')(in_image) d = LeakyReLU(alpha=0.2, name='d_relu_' + str(cur_block) + '_1')(d) # define new block if cur_block > 3: d = Conv2DEQ(int(f / 2), (3, 3), padding='same', name='d_conv_' + str(cur_block) + '_2')(d) else: d = Conv2DEQ(f, (3, 3), padding='same', name='d_conv_' + str(cur_block) + '_2')(d) d = LeakyReLU(alpha=0.2, name='d_relu_' + str(cur_block) + '_2')(d) d = Conv2DEQ(f, (3, 3), padding='same', name='d_conv_' + str(cur_block) + '_3')(d) d = LeakyReLU(alpha=0.2, name='d_relu_' + str(cur_block) + '_3')(d) d = AveragePooling2D(name='d_avgpool_' + str(cur_block) + '_1')(d) block_new = d # skip the input, 1x1 and activation for the old model for i in range(n_input_layers, len(old_model.layers)): d = old_model.layers[i](d) # define straight-through model model1 = Model(in_image, d) # downsample the new larger image downsample = AveragePooling2D(name='d_avgpool_' + str(cur_block) + '_2')(in_image) # connect old input processing to downsampled new input block_old = old_model.layers[1](downsample) block_old = old_model.layers[2](block_old) # fade in output of old model input layer with new input d = WeightedSum(name='d_wsum_' + str(cur_block) + '_1')([block_old, block_new]) # skip the input, 1x1 and activation for the old model for i in range(n_input_layers, len(old_model.layers)): d = old_model.layers[i](d) # define fade-in model model2 = Model(in_image, d) self.normal = model1 # set cur model to fade in self.model = model2
def create_model(args, maxlen, vocab): def ortho_reg(weight_matrix): # orthogonal regularization for aspect embedding matrix w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)), K.floatx()) reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value))) return args.ortho_reg * reg vocab_size = len(vocab) # Inputs sentence_input = Input(shape=(maxlen,), dtype='int32', name='sentence_input') neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input') # Construct word embedding layer word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') # Compute sentence representation e_w = word_emb(sentence_input) y_s = Average()(e_w) att_weights = Attention(name='att_weights')([e_w, y_s]) z_s = WeightedSum()([e_w, att_weights]) # Compute representations of negative instances e_neg = word_emb(neg_input) z_n = Average()(e_neg) # Reconstruction p_t = Dense(args.aspect_size)(z_s) p_t = Activation('softmax', name='p_t')(p_t) r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb', W_regularizer=ortho_reg)(p_t) # Loss loss = MaxMargin(name='max_margin')([z_s, z_n, r_s]) model = Model(inputs=[sentence_input, neg_input], outputs=loss) # Word embedding and aspect embedding initialization if args.emb_path: emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) logger.info('Initializing word embedding matrix') K.set_value( model.get_layer('word_emb').embeddings, emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(model.get_layer('word_emb').embeddings))) logger.info('Initializing aspect embedding matrix as centroid of kmean clusters') K.set_value( model.get_layer('aspect_emb').W, emb_reader.get_aspect_matrix(args.aspect_size)) return model
def add_generator_block(self, cur_block): old_model = self.model filters = [512, 512, 512, 256, 128, 64, 32, 16] f = filters[cur_block - 1] # get the end of the last block block_end = old_model.layers[-2].output # upsample, and define new block upsampling = UpSampling2D(name='g_up2d_' + str(cur_block))(block_end) g = Conv2DEQ(f, (3, 3), padding='same', name='g_conv_' + str(cur_block) + '_1')(upsampling) g = LeakyReLU(alpha=0.2, name='g_relu_' + str(cur_block) + '_1')(g) g = PixelNormalization(name='g_pxnorm_' + str(cur_block) + '_1')(g) g = Conv2DEQ(f, (3, 3), padding='same', name='g_conv_' + str(cur_block) + '_2')(g) g = LeakyReLU(alpha=0.2, name='g_relu_' + str(cur_block) + '_2')(g) g = PixelNormalization(name='g_pxnorm_' + str(cur_block) + '_2')(g) # add new output layer out_image = Conv2DEQ(3, (1, 1), padding='same', name='g_conv_' + str(cur_block) + '_3')(g) # define model model1 = Model(old_model.input, out_image) # get the output layer from old model out_old = old_model.layers[-1] # connect the upsampling to the old output layer out_image2 = out_old(upsampling) # define new output image as the weighted sum of the old and new models merged = WeightedSum(name='g_wsum_' + str(cur_block) + '_1')([out_image2, out_image]) # define fade-in model model2 = Model(old_model.input, merged) self.normal = model1 # set cur model to fade in self.model = model2
encoded_passage = passage_bidir_encoder(passage_embedding) encoded_question = passage_bidir_encoder(question_embedding) # PART 2: # Now we compute a similarity between the passage words and the question words, and # normalize the matrix in a couple of different ways for input into some more layers. matrix_attention_layer = MatrixAttention(name='passage_question_similarity') # Shape: (batch_size, num_passage_words, num_question_words) passage_question_similarity = matrix_attention_layer( [encoded_passage, encoded_question]) # Shape: (batch_size, num_passage_words, num_question_words), normalized over question # words for each passage word. passage_question_attention = MaskedSoftmax()(passage_question_similarity) # Shape: (batch_size, num_passage_words, embedding_dim * 2) weighted_sum_layer = WeightedSum(name="passage_question_vectors", use_masking=False) passage_question_vectors = weighted_sum_layer( [encoded_question, passage_question_attention]) # Min's paper finds, for each document word, the most similar question word to it, and # computes a single attention over the whole document using these max similarities. # Shape: (batch_size, num_passage_words) question_passage_similarity = Max(axis=-1)(passage_question_similarity) # Shape: (batch_size, num_passage_words) question_passage_attention = MaskedSoftmax()(question_passage_similarity) # Shape: (batch_size, embedding_dim * 2) weighted_sum_layer = WeightedSum(name="question_passage_vector", use_masking=False) # question_passage_vector = weighted_sum_layer([encoded_passage, question_passage_attention]) question_passage_vector = Lambda( lambda x: K.sum(K.expand_dims(x[0], axis=-1) * x[1], -2))(