Пример #1
0
 def add_discriminator_block(self, cur_block, n_input_layers=3):
     old_model = self.model
     filters = [512, 512, 512, 512, 256, 128, 64, 32]
     f = filters[cur_block - 1]
     # get shape of existing model
     in_shape = list(old_model.input.shape)
     # define new input shape as double the size
     input_shape = (in_shape[-2] * 2, in_shape[-2] * 2, in_shape[-1])
     in_image = Input(shape=input_shape)
     # define new input processing layer
     if cur_block > 3:
         d = Conv2DEQ(int(f / 2), (1, 1),
                      padding='same',
                      name='d_conv_' + str(cur_block) + '_1')(in_image)
     else:
         d = Conv2DEQ(f, (1, 1),
                      padding='same',
                      name='d_conv_' + str(cur_block) + '_1')(in_image)
     d = LeakyReLU(alpha=0.2, name='d_relu_' + str(cur_block) + '_1')(d)
     # define new block
     if cur_block > 3:
         d = Conv2DEQ(int(f / 2), (3, 3),
                      padding='same',
                      name='d_conv_' + str(cur_block) + '_2')(d)
     else:
         d = Conv2DEQ(f, (3, 3),
                      padding='same',
                      name='d_conv_' + str(cur_block) + '_2')(d)
     d = LeakyReLU(alpha=0.2, name='d_relu_' + str(cur_block) + '_2')(d)
     d = Conv2DEQ(f, (3, 3),
                  padding='same',
                  name='d_conv_' + str(cur_block) + '_3')(d)
     d = LeakyReLU(alpha=0.2, name='d_relu_' + str(cur_block) + '_3')(d)
     d = AveragePooling2D(name='d_avgpool_' + str(cur_block) + '_1')(d)
     block_new = d
     # skip the input, 1x1 and activation for the old model
     for i in range(n_input_layers, len(old_model.layers)):
         d = old_model.layers[i](d)
     # define straight-through model
     model1 = Model(in_image, d)
     # downsample the new larger image
     downsample = AveragePooling2D(name='d_avgpool_' + str(cur_block) +
                                   '_2')(in_image)
     # connect old input processing to downsampled new input
     block_old = old_model.layers[1](downsample)
     block_old = old_model.layers[2](block_old)
     # fade in output of old model input layer with new input
     d = WeightedSum(name='d_wsum_' + str(cur_block) +
                     '_1')([block_old, block_new])
     # skip the input, 1x1 and activation for the old model
     for i in range(n_input_layers, len(old_model.layers)):
         d = old_model.layers[i](d)
     # define fade-in model
     model2 = Model(in_image, d)
     self.normal = model1
     # set cur model to fade in
     self.model = model2
Пример #2
0
def create_model(args, maxlen, vocab):

    def ortho_reg(weight_matrix):
        # orthogonal regularization for aspect embedding matrix
        w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)),
                                     K.floatx())
        reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value)))

        return args.ortho_reg * reg

    vocab_size = len(vocab)

    # Inputs
    sentence_input = Input(shape=(maxlen,), dtype='int32', name='sentence_input')
    neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input')

    # Construct word embedding layer
    word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb')

    # Compute sentence representation
    e_w = word_emb(sentence_input)
    y_s = Average()(e_w)
    att_weights = Attention(name='att_weights')([e_w, y_s])
    z_s = WeightedSum()([e_w, att_weights])

    # Compute representations of negative instances
    e_neg = word_emb(neg_input)
    z_n = Average()(e_neg)

    # Reconstruction
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax', name='p_t')(p_t)
    r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb',
                            W_regularizer=ortho_reg)(p_t)

    # Loss
    loss = MaxMargin(name='max_margin')([z_s, z_n, r_s])
    model = Model(inputs=[sentence_input, neg_input], outputs=loss)

    # Word embedding and aspect embedding initialization
    if args.emb_path:
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        logger.info('Initializing word embedding matrix')
        K.set_value(
            model.get_layer('word_emb').embeddings,
            emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(model.get_layer('word_emb').embeddings)))
        logger.info('Initializing aspect embedding matrix as centroid of kmean clusters')
        K.set_value(
            model.get_layer('aspect_emb').W,
            emb_reader.get_aspect_matrix(args.aspect_size))

    return model
Пример #3
0
 def add_generator_block(self, cur_block):
     old_model = self.model
     filters = [512, 512, 512, 256, 128, 64, 32, 16]
     f = filters[cur_block - 1]
     # get the end of the last block
     block_end = old_model.layers[-2].output
     # upsample, and define new block
     upsampling = UpSampling2D(name='g_up2d_' + str(cur_block))(block_end)
     g = Conv2DEQ(f, (3, 3),
                  padding='same',
                  name='g_conv_' + str(cur_block) + '_1')(upsampling)
     g = LeakyReLU(alpha=0.2, name='g_relu_' + str(cur_block) + '_1')(g)
     g = PixelNormalization(name='g_pxnorm_' + str(cur_block) + '_1')(g)
     g = Conv2DEQ(f, (3, 3),
                  padding='same',
                  name='g_conv_' + str(cur_block) + '_2')(g)
     g = LeakyReLU(alpha=0.2, name='g_relu_' + str(cur_block) + '_2')(g)
     g = PixelNormalization(name='g_pxnorm_' + str(cur_block) + '_2')(g)
     # add new output layer
     out_image = Conv2DEQ(3, (1, 1),
                          padding='same',
                          name='g_conv_' + str(cur_block) + '_3')(g)
     # define model
     model1 = Model(old_model.input, out_image)
     # get the output layer from old model
     out_old = old_model.layers[-1]
     # connect the upsampling to the old output layer
     out_image2 = out_old(upsampling)
     # define new output image as the weighted sum of the old and new models
     merged = WeightedSum(name='g_wsum_' + str(cur_block) +
                          '_1')([out_image2, out_image])
     # define fade-in model
     model2 = Model(old_model.input, merged)
     self.normal = model1
     # set cur model to fade in
     self.model = model2
Пример #4
0
encoded_passage = passage_bidir_encoder(passage_embedding)
encoded_question = passage_bidir_encoder(question_embedding)

# PART 2:
# Now we compute a similarity between the passage words and the question words, and
# normalize the matrix in a couple of different ways for input into some more layers.
matrix_attention_layer = MatrixAttention(name='passage_question_similarity')
# Shape: (batch_size, num_passage_words, num_question_words)
passage_question_similarity = matrix_attention_layer(
    [encoded_passage, encoded_question])

# Shape: (batch_size, num_passage_words, num_question_words), normalized over question
# words for each passage word.
passage_question_attention = MaskedSoftmax()(passage_question_similarity)
# Shape: (batch_size, num_passage_words, embedding_dim * 2)
weighted_sum_layer = WeightedSum(name="passage_question_vectors",
                                 use_masking=False)
passage_question_vectors = weighted_sum_layer(
    [encoded_question, passage_question_attention])

# Min's paper finds, for each document word, the most similar question word to it, and
# computes a single attention over the whole document using these max similarities.
# Shape: (batch_size, num_passage_words)
question_passage_similarity = Max(axis=-1)(passage_question_similarity)
# Shape: (batch_size, num_passage_words)
question_passage_attention = MaskedSoftmax()(question_passage_similarity)
# Shape: (batch_size, embedding_dim * 2)
weighted_sum_layer = WeightedSum(name="question_passage_vector",
                                 use_masking=False)
# question_passage_vector = weighted_sum_layer([encoded_passage, question_passage_attention])
question_passage_vector = Lambda(
    lambda x: K.sum(K.expand_dims(x[0], axis=-1) * x[1], -2))(