示例#1
0
 def call(self, x):
     a = K.reshape(K.softmax(K.sum(K.dot(x, self.kernel),axis=-1)), (-1,15,1))
     #a = K.softmax(K.sum(K.dot(x, self.kernel),axis=-1))
     return a
 def call(self, x, mask=None):
     weights = self.feedforward(x)
     weights = K.squeeze(weights, axis=-1)
     weights = K.softmax(weights)
     return K.batch_dot(x, weights, axes=1)
示例#3
0
def attention(inputs, attention_size, time_major=False, return_alphas=False):
    """
    Attention mechanism layer which reduces RNN/Bi-RNN outputs with Attention vector.
    The idea was proposed in the article by Z. Yang et al., "Hierarchical Attention Networks
     for Document Classification", 2016: http://www.aclweb.org/anthology/N16-1174.
    Variables notation is also inherited from the article
    Args:
        inputs: The Attention inputs.
            Matches outputs of RNN/Bi-RNN layer (not final state):
                In case of RNN, this must be RNN outputs `Tensor`:
                    If time_major == False (default), this must be a tensor of shape:
                        `[batch_size, max_time, cell.output_size]`.
                    If time_major == True, this must be a tensor of shape:
                        `[max_time, batch_size, cell.output_size]`.
                In case of Bidirectional RNN, this must be a tuple (outputs_fw, outputs_bw) containing the forward and
                the backward RNN outputs `Tensor`.
                    If time_major == False (default),
                        outputs_fw is a `Tensor` shaped:
                        `[batch_size, max_time, cell_fw.output_size]`
                        and outputs_bw is a `Tensor` shaped:
                        `[batch_size, max_time, cell_bw.output_size]`.
                    If time_major == True,
                        outputs_fw is a `Tensor` shaped:
                        `[max_time, batch_size, cell_fw.output_size]`
                        and outputs_bw is a `Tensor` shaped:
                        `[max_time, batch_size, cell_bw.output_size]`.
        attention_size: Linear size of the Attention weights.
        time_major: The shape format of the `inputs` Tensors.
            If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
            If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
            Using `time_major = True` is a bit more efficient because it avoids
            transposes at the beginning and end of the RNN calculation.  However,
            most TensorFlow data is batch-major, so by default this function
            accepts input and emits output in batch-major form.
        return_alphas: Whether to return attention coefficients variable along with layer's output.
            Used for visualization purpose.
    Returns:
        The Attention output `Tensor`.
        In case of RNN, this will be a `Tensor` shaped:
            `[batch_size, cell.output_size]`.
        In case of Bidirectional RNN, this will be a `Tensor` shaped:
            `[batch_size, cell_fw.output_size + cell_bw.output_size]`.
    """

    if isinstance(inputs, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
        inputs = tf.concat(inputs, 2)

    if time_major:
        # (T,B,D) => (B,T,D)
        inputs = tf.array_ops.transpose(inputs, [1, 0, 2])

    hidden_size = inputs.shape[
        2].value  # D value - hidden size of the RNN layer

    # Trainable parameters
    w_omega = tf.Variable(
        tf.random_normal([hidden_size, attention_size], stddev=0.1))
    b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
    u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))

    with tf.name_scope('v'):
        # Applying fully connected layer with non-linear activation to each of the B*T timestamps;
        #  the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size
        v = tf.tanh(tf.tensordot(inputs, w_omega, axes=1) + b_omega)

    # For each of the timestamps its vector of size A from `v` is reduced with `u` vector
    vu = tf.tensordot(v, u_omega, axes=1, name='vu')  # (B,T) shape
    alphas = K.softmax(vu)  # (B,T) shape

    # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape
    output = K.sum(inputs * tf.expand_dims(alphas, -1), 1)

    if not return_alphas:
        return output
    else:
        return output, alphas
示例#4
0
rk = K.placeholder(len(r))
rfk = K.dot(K.constant(matmap), K.reshape(rk, (-1, 1)))
rffk = K.reshape(rfk, (-1, 1))

v = K.reshape(rfk, (-1, 1))
gamma = 0.90
beta = 10.0

for _ in range(50):
    q0 = K.dot(K.constant(mattrans[0]), v)
    q1 = K.dot(K.constant(mattrans[1]), v)
    q2 = K.dot(K.constant(mattrans[2]), v)
    q3 = K.dot(K.constant(mattrans[3]), v)
    q4 = K.dot(K.constant(mattrans[4]), v)
    Q = K.concatenate([q0, q1, q2, q3, q4])
    pi = K.softmax(beta * Q)
    v = rffk + gamma * K.reshape(K.sum(Q * pi, axis=1), (-1, 1))

planner = K.function([rk], [pi, Q])

r = np.array([0, -1, -1, -1, 10])
piout, Qout = planner([r])


def findpol(grid, pi, r, c):
    if grid[r][c] != 6: return
    maxprob = max(pi[r * ncols + c, :])
    a = 6
    for ana in range(5):
        if pi[r * ncols + c, ana] == maxprob: a = ana
    grid[r][c] = a
示例#5
0
 def compute_loss(self, inputs):
     y_true, y_pred = inputs
     loss = K.categorical_crossentropy(y_true, K.softmax(y_pred))
     return K.mean(loss)
def get_target_ranks(num_users=200,
                     num_words=5000,
                     mask=False,
                     user_data_ratio=0.,
                     save_probs=False):
    user_src_texts, user_trg_texts, test_user_src_texts, test_user_trg_texts, src_vocabs, trg_vocabs \
        = load_cornell_movie_by_user(num_users, num_words, test_on_user=True, user_data_ratio=user_data_ratio)

    train_users = sorted(user_src_texts.keys())
    test_users = sorted(test_user_src_texts.keys())

    save_dir = OUTPUT_PATH + 'target_{}{}/'.format(
        num_users, '_dr' if 0. < user_data_ratio < 1. else '')
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    model_path = 'cornell_movie_dialog'

    if 0. < user_data_ratio < 1.:
        model_path += '_dr{}'.format(user_data_ratio)
        heldout_src_texts, heldout_trg_texts = load_train_users_heldout_data(
            train_users, src_vocabs, trg_vocabs)
        for u in train_users:
            user_src_texts[u] += heldout_src_texts[u]
            user_trg_texts[u] += heldout_trg_texts[u]

    model = build_dialogue_model(Vs=num_words,
                                 Vt=num_words,
                                 mask=mask,
                                 drop_p=0.)
    model.load_weights(MODEL_PATH + '{}_{}.h5'.format(model_path, num_users))

    src_input_var, trg_input_var = model.inputs
    prediction = model.output
    trg_label_var = K.placeholder((None, None), dtype='float32')

    prediction = K.softmax(prediction)
    prob_fn = K.function(
        [src_input_var, trg_input_var, trg_label_var,
         K.learning_phase()], [prediction])

    save_users_rank_results(users=train_users,
                            save_probs=save_probs,
                            user_src_texts=user_src_texts,
                            user_trg_texts=user_trg_texts,
                            src_vocabs=src_vocabs,
                            trg_vocabs=trg_vocabs,
                            cross_domain=False,
                            prob_fn=prob_fn,
                            save_dir=save_dir,
                            member_label=1)
    save_users_rank_results(users=test_users,
                            save_probs=save_probs,
                            user_src_texts=test_user_src_texts,
                            user_trg_texts=test_user_trg_texts,
                            src_vocabs=src_vocabs,
                            trg_vocabs=trg_vocabs,
                            cross_domain=False,
                            prob_fn=prob_fn,
                            save_dir=save_dir,
                            member_label=0)
示例#7
0
    model = Model(inputs=inp1, outputs=[x1, x2])
    model.compile(optimizer=opt,
                  loss=losses,
                  loss_weights=lossWeights,
                  metrics=["accuracy", "mse"])
    history = model.fit(x_train, {
        "recon": x_train,
        "classacc": y_train
    },
                        validation_data=(x_test, {
                            "recon": x_test,
                            "classacc": y_test
                        }),
                        epochs=num_epochs,
                        verbose=1)
    probabilities = K.get_value(K.softmax(
        model.get_layer('tinyLayerE').logits))
    dl = np.zeros(model.get_layer('tinyLayerE').logits.shape)
    p = K.get_value(model.get_layer('tinyLayerE').logits)
    for j in range(dl.shape[0]):
        ind = np.argmax(p, axis=None)
        x = ind // dl.shape[1]
        y = ind % dl.shape[1]
        dl[x][y] = 1
        p[x] = -np.ones(dl.shape[1])
        p[:, y] = -np.ones(dl.shape[0])

    indices = K.get_value(K.argmax(dl))

    hist_df = pd.DataFrame(history.history)
    hist_csv_file = rd + ds + "_" + str(nfeat) + "_" + str(ii) + "_history.csv"
    with open(hist_csv_file, mode='w') as f:
示例#8
0
 def __call__(self, tensor):
     return K.softmax(tensor / self.temperature)
 def identity_loss_v3(y_true, y_pred):
     y_true_reshaped = K.mean(K.reshape(y_true, (-1, select, 30)), axis=1)
     y_pred_reshaped = K.softmax(K.mean(K.reshape(y_pred, (-1, select, 30)), axis=1))
     final_val = K.mean(K.categorical_crossentropy(y_pred_reshaped, y_true_reshaped))
     return final_val + y_pred * 0
def calculate_attention_weight(confidences, att_preds):
    softmaxes = []
    for x, y in zip(confidences, att_preds):
        softmaxes.append(Lambda(lambda x: K.softmax(x[0] * x[1]))([x, y]))
    return Add()(softmaxes)
示例#11
0
 def call(self, x):
     return K.softmax(K.dot(x, self.W_s) + self.b_s)
示例#12
0
文件: main.py 项目: take5v/Learning
 def temperature_softmax(x):
     return K.softmax(x / T)
def main():
    # 加载MNIST数据集
    (x_train, y_train_), (x_test, y_test_) = mnist.load_data()

    image_size = x_train.shape[1]
    x_train = np.reshape(x_train, [-1, image_size, image_size, 1])
    x_test = np.reshape(x_test, [-1, image_size, image_size, 1])
    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255

    # 网络参数
    input_shape = (image_size, image_size, 1)
    batch_size = 100
    kernel_size = 3
    filters = 16
    num_latents = 32
    classes_per_latent = 10  # 这里假设隐变量是num_latents维、classes_per_latent元随机变量
    epochs = 30

    x_in = Input(shape=input_shape)
    x = x_in

    for i in range(2):
        filters *= 2
        x = Conv2D(filters=filters,
                   kernel_size=kernel_size,
                   activation='relu',
                   strides=2,
                   padding='same')(x)

    # 备份当前shape,等下构建decoder的时候要用
    shape = K.int_shape(x)

    x = Flatten()(x)
    x = Dense(32, activation='relu')(x)
    logits = Dense(num_latents * classes_per_latent)(x)
    logits = Reshape((num_latents, classes_per_latent))(logits)

    class GumbelSoftmax(Layer):
        """Gumbel Softmax重参数
    """
        def __init__(self, tau=1., **kwargs):
            super(GumbelSoftmax, self).__init__(**kwargs)
            self.tau = K.variable(tau)

        def call(self, inputs):
            # epsilon = K.random_uniform(shape=K.shape(inputs))
            # epsilon = - K.log(epsilon + K.epsilon())
            # epsilon = - K.log(epsilon + K.epsilon())
            # outputs = inputs + epsilon
            # outputs = K.softmax(outputs / self.tau, -1)

            outputs = K.softmax(inputs, -1)
            return outputs

    gumbel_softmax = GumbelSoftmax()
    z_sample = gumbel_softmax(logits)

    # 解码层,也就是生成器部分
    # 先搭建为一个独立的模型,然后再调用模型
    latent_inputs = Input(shape=(num_latents, classes_per_latent))
    x = Reshape((num_latents * classes_per_latent, ))(latent_inputs)
    x = Dense(32, activation='relu')(x)
    x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(x)
    x = Reshape((shape[1], shape[2], shape[3]))(x)

    for i in range(2):
        x = Conv2DTranspose(filters=filters,
                            kernel_size=kernel_size,
                            activation='relu',
                            strides=2,
                            padding='same')(x)
        filters //= 2

    outputs = Conv2DTranspose(filters=1,
                              kernel_size=kernel_size,
                              activation='sigmoid',
                              padding='same')(x)

    # 搭建为一个独立的模型
    decoder = Model(latent_inputs, outputs)

    x_out = decoder(z_sample)

    # 建立模型
    vae = Model(x_in, x_out)

    # xent_loss是重构loss,kl_loss是KL loss
    xent_loss = K.sum(K.binary_crossentropy(x_in, x_out), axis=[1, 2, 3])
    p = K.clip(K.softmax(logits, -1), K.epsilon(), 1 - K.epsilon())
    # 假设先验分布为均匀分布,那么kl项简化为负熵
    kl_loss = K.sum(p * K.log(p), axis=[1, 2])
    vae_loss = K.mean(xent_loss + kl_loss)

    # add_loss是新增的方法,用于更灵活地添加各种loss
    vae.add_loss(vae_loss)
    vae.compile(optimizer='rmsprop')
    vae.summary()

    class Trainer(Callback):
        def __init__(self):
            self.max_tau = 1.
            self.min_tau = 0.01
            self._tau = self.max_tau - self.min_tau

        def on_batch_begin(self, batch, logs=None):
            tau = self.min_tau + self._tau
            K.set_value(gumbel_softmax.tau, tau)
            self._tau *= 0.999

        def on_epoch_begin(self, epoch, logs=None):
            tau = K.eval(gumbel_softmax.tau)
            print('epoch: %s, tau: %.5f' % (epoch + 1, tau))

    trainer = Trainer()
    vae.fit(x_train,
            shuffle=True,
            epochs=epochs,
            batch_size=batch_size,
            validation_data=(x_test, None),
            callbacks=[trainer])

    # 观察隐变量的两个维度变化是如何影响输出结果的
    n = 15  # figure with 15x15 digits
    digit_size = 28
    figure = np.zeros((digit_size * n, digit_size * n))

    for i in range(n):
        for j in range(n):
            z_sample = np.zeros((1, num_latents, classes_per_latent))
            for iz in range(num_latents):
                jz = np.random.choice(classes_per_latent)
                z_sample[0, iz, jz] = 1
            x_decoded = decoder.predict(z_sample)
            digit = x_decoded[0].reshape(digit_size, digit_size)
            figure[i * digit_size:(i + 1) * digit_size,
                   j * digit_size:(j + 1) * digit_size] = digit

    plt.figure(figsize=(10, 10))
    plt.imshow(figure, cmap='Greys_r')
    plt.show()
示例#14
0
contextEmbd = Embedding(output_dim=EMBEDDING_DIM, input_dim=vocab_size,
                         weights=[embedding_matrix],
                         input_length=context_maxlen, trainable=False)(context_input) #mask_zero=True, 

Q = Bidirectional(GRU(128, return_sequences=True))(questionEmbd)
D = Bidirectional(GRU(128, return_sequences=True))(contextEmbd)

Q1 = Bidirectional(GRU(160, return_sequences=False))(Q)

Qh1 = RepeatVector(context_maxlen)(Q1)
DQ = merge([Qh1, D], mode='concat', name='merge1')
D1 = SimpleAttention2(320, 320, return_sequences=True)(DQ)

output1 = TimeDistributed(Dense(1, activation='sigmoid'))(D1) # batchsize, len, 1
output1reshap = Reshape((context_maxlen,))(output1)
answerPtrBegin_output = Lambda(lambda x: K.softmax(x))(output1reshap) # batchsize, len


D1merge = merge([D1, RepeatVector(context_maxlen)(answerPtrBegin_output)], \
	mode='concat', name='merge2')

output2 = TimeDistributed(Dense(1, activation='sigmoid'))(D1merge)
output2reshape = Reshape((context_maxlen,))(output2)
answerPtrEnd_output = Lambda(lambda x: K.softmax(x))(output2reshape)

model = Model(input=[context_input, question_input], output=[answerPtrBegin_output, answerPtrEnd_output])
rms = optimizers.RMSprop(lr=0.0001)
model.compile(optimizer=rms, loss='categorical_crossentropy',
              loss_weights=[.04, 0.04], metrics=['accuracy'])
model.summary()
# checkpoint
示例#15
0
 def call(self, inputs):
     self.V = K.reshape(self.V, (-1, 1))
     H = K.tanh(K.dot(inputs, self.W) + self.b)
     score = K.softmax(K.dot(H, self.V), axis=1)
     outputs = K.sum(score * inputs, axis=1)
     return outputs
    def gradient_descent(self, sess, models):
        def compare(outputs, labels):
            y = np.argmax(labels)
            pred = np.argmax(outputs)

            if self.TARGETED:
                return (pred == y)
            else:
                return (pred != y)

        shape = (1, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)

        # the variable to optimize over
        modifier = tf.Variable(np.zeros(shape, dtype=np.float32))

        tau = tf.placeholder(tf.float32, [])
        simg = tf.placeholder(tf.float32, shape)
        timg = tf.placeholder(tf.float32, shape)
        tlab = tf.placeholder(tf.float32, (1, FLAGS.NUM_CLASSES))
        const = tf.placeholder(tf.float32, [])

        newimg = tf.clip_by_value(simg + modifier, 0, 1)

        model = models[0]
        outputs = []
        preds = []
        output = model(newimg)
        outputs.append(output)
        preds.append(K.softmax(output))
        orig_output = model(timg)

        real = tf.reduce_sum((tlab) * output)
        other = tf.reduce_max((1 - tlab) * output - (tlab * 10000))

        if self.TARGETED:
            # if targeted, optimize for making the other class most likely
            loss1 = tf.maximum(0.0, other - real + self.CONFIDENCE)
        else:
            # if untargeted, optimize for making this class least likely.
            loss1 = tf.maximum(0.0, real - other + self.CONFIDENCE)
        if len(models) >= 1:
            for i in range(1, len(models)):
                model = models[i]
                output_tmp = model(newimg)
                outputs.append(output_tmp)
                preds.append(K.softmax(output_tmp))

                real = tf.reduce_sum((tlab) * output_tmp)
                other = tf.reduce_max((1 - tlab) * output_tmp - (tlab * 10000))

                if self.TARGETED:
                    # if targetted, optimize for making the other class most likely
                    loss1 += tf.maximum(0.0, other - real + self.CONFIDENCE)
                else:
                    # if untargeted, optimize for making this class least likely.
                    loss1 += tf.maximum(0.0, real - other + self.CONFIDENCE)

        # sum up the losses
        loss2 = tf.reduce_sum(tf.maximum(0.0, tf.abs(newimg - timg) - tau))
        loss = const * loss1 + loss2

        # setup the adam optimizer and keep track of variables we're creating
        start_vars = set(x.name for x in tf.global_variables())
        optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE)
        #optimizer = tf.train.GradientDescentOptimizer(self.LEARNING_RATE)
        train = optimizer.minimize(loss, var_list=[modifier])

        end_vars = tf.global_variables()
        new_vars = [x for x in end_vars if x.name not in start_vars]
        init = tf.variables_initializer(var_list=[modifier] + new_vars)

        def doit(oimgs, labs, starts, tt, CONST):
            prev_scores = None

            imgs = np.array(oimgs)
            starts = np.array(starts)

            # initialize the variables
            sess.run(init)
            while CONST < self.LARGEST_CONST:
                # try solving for each value of the constant
                # print('try const', CONST)
                for step in range(self.MAX_ITERATIONS):
                    feed_dict = {
                        timg: imgs,
                        tlab: labs,
                        tau: tt,
                        simg: starts,
                        const: CONST,
                        K.learning_phase(): 0
                    }
                    #
                    # if step % (self.MAX_ITERATIONS//10) == 0:
                    #    print(step, sess.run((loss,loss1,loss2),feed_dict=feed_dict))

                    # perform the update step
                    _, works, linf = sess.run([train, loss, loss2],
                                              feed_dict=feed_dict)
                    # print(works, linf)

                    # it worked
                    if works < .0001 * CONST and (self.ABORT_EARLY
                                                  or step == CONST - 1):
                        works = True
                        for i in len(outputs):
                            get = sess.run(preds[i], feed_dict=feed_dict)
                            works = works & compare(get, labs)
                        # get = sess.run(K.softmax(output), feed_dict=feed_dict)
                        # works = compare(get, labs)
                        if works:
                            scores, origscores, nimg = sess.run(
                                (output, orig_output, newimg),
                                feed_dict=feed_dict)
                            return scores, origscores, nimg, CONST

                # we didn't succeed, increase constant and try again

                if linf >= 0.1 * self.EPS:
                    # perturbation is too large
                    if prev_scores is None:
                        return prev_scores
                    return prev_scores, prev_origscores, prev_nimg, CONST
                else:
                    # didn't reach target confidence
                    CONST *= self.const_factor

                prev_scores, prev_origscores, prev_nimg = sess.run(
                    (output, orig_output, newimg), feed_dict=feed_dict)

            scores, origscores, nimg = sess.run((output, orig_output, newimg),
                                                feed_dict=feed_dict)
            return scores, origscores, nimg, CONST

        return doit
def get_shadow_ranks(exp_id=0,
                     num_users=200,
                     num_words=5000,
                     mask=False,
                     cross_domain=False,
                     rnn_fn='lstm',
                     h=128,
                     emb_h=128,
                     rerun=False):
    shadow_user_path = 'shadow_users{}_{}_{}_{}.npz'.format(
        exp_id, rnn_fn, num_users, 'cd' if cross_domain else '')
    shadow_train_users = np.load(MODEL_PATH + shadow_user_path)['arr_0']
    shadow_train_users = list(shadow_train_users)
    print shadow_user_path, shadow_train_users

    save_dir = OUTPUT_PATH + 'shadow_exp{}_{}/'.format(exp_id, num_users)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    if cross_domain:
        user_src_texts, user_trg_texts, test_user_src_texts, test_user_trg_texts, src_vocabs, trg_vocabs \
            = load_cross_domain_shadow_user_data(shadow_train_users, num_users, num_words)
    else:
        user_src_texts, user_trg_texts, test_user_src_texts, test_user_trg_texts, src_vocabs, trg_vocabs \
            = load_shadow_user_data(shadow_train_users, num_users, num_words)
    shadow_test_users = sorted(test_user_src_texts.keys())

    model_path = '{}_shadow_exp{}_{}_{}.h5'.format(
        'ubuntu_dialog' if cross_domain else 'cornell_movie_dialog', exp_id,
        rnn_fn, num_users)

    model = build_dialogue_model(Vs=num_words,
                                 Vt=num_words,
                                 mask=mask,
                                 drop_p=0.,
                                 h=h,
                                 demb=emb_h,
                                 rnn_fn=rnn_fn)
    model.load_weights(MODEL_PATH + model_path)

    src_input_var, trg_input_var = model.inputs
    prediction = model.output
    trg_label_var = K.placeholder((None, None), dtype='float32')

    prediction = K.softmax(prediction)
    prob_fn = K.function(
        [src_input_var, trg_input_var, trg_label_var,
         K.learning_phase()], [prediction])

    save_users_rank_results(users=shadow_train_users,
                            rerun=rerun,
                            user_src_texts=user_src_texts,
                            user_trg_texts=user_trg_texts,
                            src_vocabs=src_vocabs,
                            trg_vocabs=trg_vocabs,
                            cross_domain=cross_domain,
                            prob_fn=prob_fn,
                            save_dir=save_dir,
                            member_label=1)
    save_users_rank_results(users=shadow_test_users,
                            rerun=rerun,
                            user_src_texts=test_user_src_texts,
                            user_trg_texts=test_user_trg_texts,
                            src_vocabs=src_vocabs,
                            trg_vocabs=trg_vocabs,
                            cross_domain=cross_domain,
                            prob_fn=prob_fn,
                            save_dir=save_dir,
                            member_label=0)
示例#18
0
def softmax_by_string(t):
    sh = K.shape(t)
    string_sm = []
    for i in range(NUM_STRINGS):
        string_sm.append(K.expand_dims(K.softmax(t[:, i, :]), axis=1))
    return K.concatenate(string_sm, axis=1)
示例#19
0
def memLstm_custom_model(hparams, context, context_mask, utterances):

    print("context_shape: ", context._keras_shape)
    print("utterances_shape: ", utterances._keras_shape)
    print("context_mask: ", context_mask._keras_shape)

    # Use embedding matrix pretrained by Gensim
    embeddings_W = np.load(hparams.embedding_path)
    print("embeddings_W: ", embeddings_W.shape)

    ################################## Define Regular Layers ##################################
    # Utterances Embedding (Output shape: NUM_OPTIONS(100) x BATCH_SIZE(?) x LEN_SEQ(160) x EMBEDDING_DIM(300))
    embedding_context_layer = Embedding(
        input_dim=hparams.vocab_size,
        output_dim=hparams.memn2n_embedding_dim,
        weights=[embeddings_W],
        input_length=hparams.max_context_len,
        mask_zero=True,
        trainable=False)

    embedding_utterance_layer = Embedding(
        input_dim=hparams.vocab_size,
        output_dim=hparams.memn2n_embedding_dim,
        weights=[embeddings_W],
        input_length=hparams.max_utterance_len,
        mask_zero=True,
        trainable=False)

    # Define LSTM Context encoder 1
    LSTM_A = LSTM(hparams.memn2n_rnn_dim,
                  input_shape=(hparams.max_context_len,
                               hparams.memn2n_embedding_dim + 2),
                  use_bias=True,
                  unit_forget_bias=True,
                  return_state=True,
                  return_sequences=True)

    # Define LSTM Utterances encoder
    LSTM_B = LSTM(hparams.memn2n_rnn_dim,
                  input_shape=(hparams.max_utterance_len,
                               hparams.memn2n_embedding_dim),
                  use_bias=True,
                  unit_forget_bias=True,
                  return_state=False,
                  return_sequences=False)
    '''
    # Define LSTM Context encoder 2
    LSTM_C = LSTM(hparams.memn2n_rnn_dim,
                        input_shape=(hparams.max_context_len, hparams.memn2n_embedding_dim+2),
                        unit_forget_bias=True,
                        return_state=False,
                        return_sequences=True)
    '''

    # Define Dense layer to transform utterances
    Dense_1 = Dense(hparams.memn2n_rnn_dim,
                    use_bias=False,
                    kernel_initializer=keras.initializers.TruncatedNormal(
                        mean=0.0, stddev=1.0, seed=None),
                    input_shape=(hparams.memn2n_rnn_dim, ))

    # Define Dense layer to do softmax
    Dense_2 = Dense(1,
                    use_bias=False,
                    kernel_initializer=keras.initializers.TruncatedNormal(
                        mean=0.0, stddev=1.0, seed=None),
                    input_shape=(hparams.memn2n_rnn_dim, ))

    ################################## Define Custom Layers ##################################
    # Define repeat element layer
    custom_repeat_layer = Lambda(
        lambda x: K.repeat_elements(x, hparams.max_context_len, 1))
    custom_repeat_layer2 = Lambda(
        lambda x: K.repeat_elements(x, hparams.num_utterance_options, 1))

    # Expand dimension layer
    expand_dim_layer = Lambda(lambda x: K.expand_dims(x, axis=1))

    # Amplify layer
    amplify_layer = Lambda(lambda x: x * hparams.amplify_val)

    # Define Softmax layer
    softmax_layer = Lambda(lambda x: K.softmax(Masking()(x), axis=-1))
    softmax_layer2 = Lambda(lambda x: K.softmax(Masking()(x), axis=1))

    # Define Stack & Concat layers
    Stack = Lambda(lambda x: K.stack(x, axis=1))

    # Naming tensors
    responses_dot_layer = Lambda(lambda x: x, name='responses_dot')
    responses_attention_layer = Lambda(lambda x: x, name='responses_attention')
    context_attention_layer = Lambda(lambda x: x, name='context_attention')

    # Concat = Lambda(lambda x: K.concatenate(x, axis=1))

    # Sum up last dimension
    Sum = Lambda(lambda x: K.sum(x, axis=-1))
    Sum2 = Lambda(lambda x: K.sum(x, axis=1))

    # Normalize layer
    Normalize = Lambda(lambda x: K.l2_normalize(x, axis=-1))

    # Define tensor slice layer
    GetFirstHalfTensor = Lambda(lambda x: x[:, :, :hparams.memn2n_rnn_dim])
    GetFirstTensor = Lambda(lambda x: x[:, 0, :])
    GetLastHalfTensor = Lambda(lambda x: x[:, :, hparams.memn2n_rnn_dim:])
    GetLastTensor = Lambda(lambda x: x[:, -1, :])

    GetReverseTensor = Lambda(lambda x: K.reverse(x, axes=1))

    ################################## Apply layers ##################################
    # Prepare Masks
    utterances_mask = Reshape((1, hparams.max_context_len))(context_mask)
    utterances_mask = custom_repeat_layer2(utterances_mask)
    context_mask = Reshape((hparams.max_context_len, 1))(context_mask)

    # Context Embedding: (BATCH_SIZE(?) x CONTEXT_LEN x EMBEDDING_DIM)
    context_embedded = embedding_context_layer(context)
    print("context_embedded: ", context_embedded.shape)
    print("context_embedded (history): ", context_embedded._keras_history,
          '\n')
    # Skip this?
    # context_embedded = Concatenate(axis=-1)([context_embedded, context_speaker])

    # Utterances Embedding: (BATCH_SIZE(?) x NUM_OPTIONS x UTTERANCE_LEN x EMBEDDING_DIM)
    utterances_embedded = TimeDistributed(
        embedding_utterance_layer,
        input_shape=(hparams.num_utterance_options,
                     hparams.max_utterance_len))(utterances)
    print("Utterances_embedded: ", utterances_embedded.shape)
    print("Utterances_embedded (history): ",
          utterances_embedded._keras_history, '\n')

    # Encode context A: (BATCH_SIZE(?) x CONTEXT_LEN x RNN_DIM)
    all_context_encoded_Forward,\
    all_context_encoded_Forward_h,\
    all_context_encoded_Forward_c = LSTM_A(context_embedded)


    all_context_encoded_Backward,\
    all_context_encoded_Backward_h,\
    all_context_encoded_Backward_c = LSTM_A(Masking()(GetReverseTensor(context_embedded)))#,
    #initial_state=[all_context_encoded_Forward_h, all_context_encoded_Forward_c])
    all_context_encoded_Backward = Masking()(
        GetReverseTensor(all_context_encoded_Backward))

    # print("context_encoded_A: ", len(context_encoded_A))
    print("all_context_encoded_Forward: ", all_context_encoded_Forward.shape)
    print("all_context_encoded_Forward (history): ",
          all_context_encoded_Forward._keras_history)
    print("all_context_encoded_Backward: ", all_context_encoded_Backward.shape)
    print("all_context_encoded_Backward (history): ",
          all_context_encoded_Backward._keras_history, '\n')

    # Define bi-directional
    all_context_encoded_Bidir = Add()(
        [all_context_encoded_Forward, all_context_encoded_Backward])

    # Encode utterances B: (BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM)
    all_utterances_encoded_B = TimeDistributed(
        LSTM_B,
        input_shape=(hparams.num_utterance_options, hparams.max_utterance_len,
                     hparams.memn2n_embedding_dim))(utterances_embedded)
    all_utterances_encoded_B = TimeDistributed(
        Dense_1,
        input_shape=(hparams.num_utterance_options,
                     hparams.memn2n_rnn_dim))(all_utterances_encoded_B)
    print("all_utterances_encoded_B: ", all_utterances_encoded_B.shape)
    print("all_utterances_encoded_B: (history)",
          all_utterances_encoded_B._keras_history, '\n')

    responses_attention = []
    responses_dot = []
    for i in range(hparams.hops):
        print(str(i + 1) + 'th hop:')
        # 1st Attention & Weighted Sum
        # between Utterances_B(NUM_OPTIONS x RNN_DIM) and Contexts_encoded_Forward(CONTEXT_LEN x RNN_DIM)
        # and apply Softmax
        # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x CONTEXT_LEN)
        attention_Forward = Dot(axes=[2, 2])(
            [all_utterances_encoded_B, all_context_encoded_Forward])
        dot_Forward = attention_Forward
        attention_Forward = amplify_layer(attention_Forward)
        attention_Forward = Add()([attention_Forward, utterances_mask])
        attention_Forward = softmax_layer(attention_Forward)
        print("attention_Forward: ", attention_Forward.shape)
        print("attention_Forward: (history)", attention_Forward._keras_history)

        # between Attention(NUM_OPTIONS x CONTEXT_LEN) and Contexts_A(CONTEXT_LEN x RNN_DIM)
        # equivalent to weighted sum of Contexts_A according to Attention
        # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM)
        weighted_sum_Forward = Dot(axes=[2, 1])(
            [attention_Forward, all_context_encoded_Bidir])
        print("weighted_sum: ", weighted_sum_Forward.shape)
        print("weighted_sum: (history)", weighted_sum_Forward._keras_history,
              '\n')

        # (Output shape: ? x NUM_OPTIONS(100) x RNN_DIM)
        all_utterances_encoded_B = Add()(
            [weighted_sum_Forward, all_utterances_encoded_B])

        # 2nd Attention & Weighted Sum
        # between Utterances_B(NUM_OPTIONS x RNN_DIM) and Contexts_encoded_Backward(CONTEXT_LEN x RNN_DIM)
        # and apply Softmax
        # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x CONTEXT_LEN)
        attention_Backward = Dot(axes=[2, 2])(
            [all_utterances_encoded_B, all_context_encoded_Backward])
        dot_Backward = attention_Backward
        attention_Backward = amplify_layer(attention_Backward)
        attention_Backward = Add()([attention_Backward, utterances_mask])
        attention_Backward = softmax_layer(attention_Backward)

        print("attention_Backward: ", attention_Backward.shape)
        print("attention_Backward: (history)",
              attention_Backward._keras_history)

        # between Attention(NUM_OPTIONS x CONTEXT_LEN) and Contexts_A(CONTEXT_LEN x RNN_DIM)
        # equivalent to weighted sum of Contexts_A according to Attention
        # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM)
        weighted_sum_Backward = Dot(axes=[2, 1])(
            [attention_Backward, all_context_encoded_Bidir])
        print("weighted_sum_Backward: ", weighted_sum_Backward.shape)
        print("weighted_sum_Backward: (history)",
              weighted_sum_Backward._keras_history, '\n')

        # (Output shape: ? x NUM_OPTIONS(100) x RNN_DIM)
        all_utterances_encoded_B = Add()(
            [weighted_sum_Backward, all_utterances_encoded_B])

        dot_Forward = Reshape((1, hparams.num_utterance_options,
                               hparams.max_context_len))(dot_Forward)
        dot_Backward = Reshape((1, hparams.num_utterance_options,
                                hparams.max_context_len))(dot_Backward)
        att_Forward = expand_dim_layer(attention_Forward)
        att_Backward = expand_dim_layer(attention_Backward)

        merge_dots = Concatenate(axis=1)([dot_Forward, dot_Backward])
        merge_responses = Concatenate(axis=1)([att_Forward, att_Backward])
        responses_dot.append(merge_dots)
        responses_attention.append(merge_responses)

        print("repsonses_attention[i]:", merge_responses._keras_shape)

        if i < hparams.hops - 1:
            continue
            '''
            temp = all_context_encoded_Forward
            all_context_encoded_Forward = all_context_encoded_Backward
            all_context_encoded_Backward = temp
            '''
        else:
            print("hop ended")

            ############# Attention to Context #############
            # (Output shape: ? x MAX_CONTEXT_LEN x 1)
            attention_Forward_wrt_context =\
            TimeDistributed(Dense_2,
                            input_shape=(hparams.max_context_len,
                                        hparams.memn2n_rnn_dim))(all_context_encoded_Forward)
            attention_Forward_wrt_context = amplify_layer(
                attention_Forward_wrt_context)
            attention_Forward_wrt_context = Add()(
                [attention_Forward_wrt_context, context_mask])
            attention_Forward_wrt_context = softmax_layer2(
                attention_Forward_wrt_context)

            # (Output shape: ? x 1 x RNN_DIM)
            weighted_sum_Forward_wrt_context = Dot(axes=[1, 1])(
                [attention_Forward_wrt_context, all_context_encoded_Bidir])

            # (Output shape: ? x MAX_CONTEXT_LEN x 1)
            attention_Backward_wrt_context =\
            TimeDistributed(Dense_2,
                            input_shape=(hparams.max_context_len,
                                        hparams.memn2n_rnn_dim))(all_context_encoded_Backward)
            attention_Backward_wrt_context = amplify_layer(
                attention_Backward_wrt_context)
            attention_Backward_wrt_context = Add()(
                [attention_Backward_wrt_context, context_mask])
            attention_Backward_wrt_context = softmax_layer2(
                attention_Backward_wrt_context)

            # (Output shape: ? x 1 x RNN_DIM)
            weighted_sum_Backward_wrt_context = Dot(axes=[1, 1])(
                [attention_Backward_wrt_context, all_context_encoded_Bidir])

            att_Forward_wrt_context = Reshape(
                (1, hparams.max_context_len))(attention_Forward_wrt_context)
            att_Backward_wrt_context = Reshape(
                (1, hparams.max_context_len))(attention_Backward_wrt_context)
            context_attention = Concatenate(axis=1)(
                [att_Forward_wrt_context, att_Backward_wrt_context])

            context_encoded_AplusC = Add()([
                weighted_sum_Forward_wrt_context,
                weighted_sum_Backward_wrt_context
            ])
            #context_encoded_A = Dense_1(context_encoded_A)
            context_encoded_AplusC = Reshape(
                (1, hparams.memn2n_rnn_dim))(context_encoded_AplusC)
            print("context_encoded_AplusC: ", context_encoded_AplusC.shape)
            print("context_encoded_AplusC: (history)",
                  context_encoded_AplusC._keras_history, '\n')

            # (Output shape: ? x 1 x NUM_OPTIONS(100))
            logits = Dot(axes=[2, 2])(
                [context_encoded_AplusC, all_utterances_encoded_B])
            logits = Reshape((hparams.num_utterance_options, ))(logits)
            print("logits: ", logits.shape)
            print("logits: (history)", logits._keras_history, '\n')

            # Softmax layer for probability of each of Dot products in previous layer
            # Softmaxing logits (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100))
            probs = Activation('softmax', name='probs')(logits)
            print("probs: ", probs.shape)
            print("final History: ", probs._keras_history, '\n')

    # Return probabilities(likelihoods) of each of utterances
    # Those will be used to calculate the loss ('sparse_categorical_crossentropy')
    if hparams.hops == 1:
        responses_dot = Reshape((1, 2, hparams.num_utterance_options,
                                 hparams.max_context_len))(responses_dot[0])
        responses_attention = Reshape(
            (1, 2, hparams.num_utterance_options,
             hparams.max_context_len))(responses_attention[0])

    else:
        responses_dot = Stack(responses_dot)
        responses_attention = Stack(responses_attention)

    responses_dot = responses_dot_layer(responses_dot)
    responses_attention = responses_attention_layer(responses_attention)
    context_attention = context_attention_layer(context_attention)
    print("repsonses_attention:", responses_attention._keras_shape)
    print("context_attention:", context_attention._keras_shape)
    return probs, context_attention, responses_attention, responses_dot
示例#20
0
 def soft_logloss(y_true, y_pred):
     logits = y_true[:, nb_classes:]
     y_soft = K.softmax(logits / temperature)
     y_pred_soft = y_pred[:, nb_classes:]
     return logloss(y_soft, y_pred_soft)
 def call(self, x):
     return K.softmax(K.dot(x, self.kernel))
示例#22
0
    def call(self, inputs):

        q, k, v = inputs[:3]

        v_mask, q_mask = None, None

        # 这里的mask.shape=[batch_size, seq_len]或[batch_size, seq_len, 1]

        if len(inputs) > 3:

            v_mask = inputs[3]

            if len(inputs) > 4:

                q_mask = inputs[4]

        # 线性变换

        qw = self.reuse(self.q_dense, q)

        kw = self.reuse(self.k_dense, k)

        vw = self.reuse(self.v_dense, v)

        # 形状变换

        qw = K.reshape(qw, (-1, K.shape(qw)[1], self.heads, self.key_size))

        kw = K.reshape(kw, (-1, K.shape(kw)[1], self.heads, self.key_size))

        vw = K.reshape(vw,
                       (-1, K.shape(vw)[1], self.heads, self.size_per_head))

        # 维度置换

        qw = K.permute_dimensions(qw, (0, 2, 1, 3))

        kw = K.permute_dimensions(kw, (0, 2, 1, 3))

        vw = K.permute_dimensions(vw, (0, 2, 1, 3))

        # Attention

        a = K.batch_dot(qw, kw, [3, 3]) / self.key_size**0.5

        a = K.permute_dimensions(a, (0, 3, 2, 1))

        a = to_mask(a, v_mask, 'add')

        a = K.permute_dimensions(a, (0, 3, 2, 1))

        if (self.mask_right is not False) or (self.mask_right is not None):

            if self.mask_right is True:

                ones = K.ones_like(a[:1, :1])

                mask = (ones - K.tf.matrix_band_part(ones, -1, 0)) * 1e10

                a = a - mask

            else:

                # 这种情况下,mask_right是外部传入的0/1矩阵,shape=[q_len, k_len]

                mask = (1 - K.constant(self.mask_right)) * 1e10

                mask = K.expand_dims(K.expand_dims(mask, 0), 0)

                self.mask = mask

                a = a - mask

        a = K.softmax(a)

        self.a = a

        # 完成输出

        o = K.batch_dot(a, vw, [3, 2])

        o = K.permute_dimensions(o, (0, 2, 1, 3))

        o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim))

        o = to_mask(o, q_mask, 'mul')

        return o
def custom_softmax(x):
    sh = K.shape(x)
    x = K.reshape(x, (sh[0] * sh[1] * sh[2], num_classes))
    x = K.softmax(x)
    x = K.reshape(x, (sh[0], sh[1], sh[2], num_classes))
    return x
示例#24
0
def my_model(opt, word_index, embedding_matrix):
    # sen = [batch, max_sentence_length]
    sen = Input(shape=(opt['max_sentence_length'], ), name='Sentence')
    # asp = [1, N_ASPECT]
    asp = Lambda(
        lambda x: tf.constant([[word_index[w]
                                for w in aspect_label_id.keys()]]),
        name='Aspect')([])

    batch_size = K.shape(sen)[0]

    # Embedding module
    E = Embedding(*embedding_matrix.shape,
                  trainable=False,
                  embeddings_initializer=Constant(embedding_matrix),
                  name='WordVec')

    # BiLSTM module
    # asen = [batch_size, max_sentence_len, 2*lstm_hidden_size]
    asen = Bidirectional(LSTM(opt['lstm_hidden_size'],
                              return_sequences=True,
                              dropout=opt['drop_rate'],
                              recurrent_dropout=opt['drop_rate']),
                         name='BLSTM-Sen')(E(sen))
    # aasp = [1, N_ASPECT, 2*lstm_hidden_size]
    aasp = Bidirectional(LSTM(opt['lstm_hidden_size'],
                              return_sequences=True,
                              dropout=opt['drop_rate'],
                              recurrent_dropout=opt['drop_rate']),
                         name='BLSTM-Asp')(E(asp))
    # aasp = [batch, N_ASPECT, 2*lstm_hidden_size]
    aasp = Lambda(lambda attn: tf.reshape(
        tf.tile(tf.reshape(attn, (-1, )), [batch_size]),
        (batch_size, N_ASPECT, 2 * opt['lstm_hidden_size'])),
                  name='Repeat')(aasp)

    # AOA module
    # X = [batch_size, max_sentence_len, N_ASPECT]
    X = Dot(-1, name='Project')([asen, aasp])
    # attn = [batch_size, max_sentence_len, N_ASPECT]
    attn = Softmax(1, name='Within-Aspect')(X)  # column-wise-softmax
    # X = [batch_size, N_ASPECT, 2*lstm_hidden_size]
    X = Dot(1, name='Attention')([attn, asen])
    X = Dropout(opt['drop_rate'], name='Dropout')(X)
    # X = [batch_size, N_ASPECT * 2 * lstm_hidden_size]
    X = Flatten(name='Flatten')(X)

    # Prediction module
    # X = [batch, dense_hidden_size]
    X = Dense(opt['dense_hidden_size'],
              kernel_regularizer=regularizers.l2(opt['reg_rate']),
              name='Asp-Senti-Clf-1')(X)
    X = LeakyReLU(alpha=opt['leakyRelu_alpha'], name='LeakyReLU')(X)
    # X = [batch, N_SENTI * N_ASPECT]
    X = Dense(N_SENTI * N_ASPECT,
              kernel_regularizer=regularizers.l2(opt['reg_rate']),
              name='Asp-Senti-Clf-2')(X)
    # X = [batch, N_ASPECT, N_SENTI]
    X = Lambda(lambda x: tf.reshape(
        K.softmax(tf.reshape(x, (batch_size, N_ASPECT, N_SENTI))),
        (batch_size, N_SENTI * N_ASPECT)),
               name='Aspect-Softmax')(X)

    return Model(inputs=sen, outputs=X)
示例#25
0
    def MultiHeadsAttModel(self,
                           In_agent,
                           In_neighbor,
                           l=5,
                           d=128,
                           dv=16,
                           dout=128,
                           nv=8,
                           suffix=-1):
        """
        input:[bacth,agent,128]
        output:
        -hidden state: [batch,agent,32]
        -attention: [batch,agent,neighbor]
        """
        """
        agent repr
        """
        pass  #print("In_agent.shape,In_neighbor.shape,l, d, dv, dout, nv", In_agent.shape,In_neighbor.shape,l, d, dv, dout, nv)
        #[batch,agent,dim]->[batch,agent,1,dim]
        agent_repr = Reshape((self.num_agents, 1, d))(In_agent)
        """
        neighbor repr
        """
        #[batch,agent,dim]->(reshape)[batch,1,agent,dim]->(tile)[batch,agent,agent,dim]
        neighbor_repr = RepeatVector3D(self.num_agents)(In_agent)
        pass  #print("neighbor_repr.shape", neighbor_repr.shape)
        #[batch,agent,neighbor,agent]x[batch,agent,agent,dim]->[batch,agent,neighbor,dim]
        neighbor_repr = Lambda(lambda x: batch_dot(x[0], x[1]))(
            [In_neighbor, neighbor_repr])
        pass  #print("neighbor_repr.shape", neighbor_repr.shape)
        """
        attention computation
        """
        #multi-head
        #[batch,agent,1,dim]->[batch,agent,1,dv*nv]
        agent_repr_head = Dense(dv * nv,
                                activation='relu',
                                kernel_initializer='random_normal',
                                name='agent_repr_%d' % suffix)(agent_repr)
        #[batch,agent,1,dv,nv]->[batch,agent,nv,1,dv]
        agent_repr_head = Reshape(
            (self.num_agents, 1, dv, nv))(agent_repr_head)
        agent_repr_head = Lambda(lambda x: K.permute_dimensions(
            x, (0, 1, 4, 2, 3)))(agent_repr_head)
        #agent_repr_head=Lambda(lambda x:K.permute_dimensions(K.reshape(x,(-1,self.num_agents,1,dv,nv)),(0,1,4,2,3)))(agent_repr_head)
        #[batch,agent,neighbor,dim]->[batch,agent,neighbor,dv*nv]

        neighbor_repr_head = Dense(dv * nv,
                                   activation='relu',
                                   kernel_initializer='random_normal',
                                   name='neighbor_repr_%d' %
                                   suffix)(neighbor_repr)
        #[batch,agent,neighbor,dv,nv]->[batch,agent,nv,neighbor,dv]
        pass  #print("DEBUG",neighbor_repr_head.shape)
        pass  #print("self.num_agents,self.num_neighbors,dv,nv", self.num_agents,self.num_neighbors,dv,nv)
        neighbor_repr_head = Reshape(
            (self.num_agents, self.num_neighbors, dv, nv))(neighbor_repr_head)
        neighbor_repr_head = Lambda(lambda x: K.permute_dimensions(
            x, (0, 1, 4, 2, 3)))(neighbor_repr_head)
        #neighbor_repr_head=Lambda(lambda x:K.permute_dimensions(K.reshape(x,(-1,self.num_agents,self.num_neighbors,dv,nv)),(0,1,4,2,3)))(neighbor_repr_head)
        #[batch,agent,nv,1,dv]x[batch,agent,nv,neighbor,dv]->[batch,agent,nv,1,neighbor]
        att = Lambda(lambda x: K.softmax(batch_dot(x[0], x[1], axes=[4, 4])))(
            [agent_repr_head, neighbor_repr_head])
        #[batch,agent,nv,1,neighbor]->[batch,agent,nv,neighbor]
        att_record = Reshape((self.num_agents, nv, self.num_neighbors))(att)

        #self embedding again
        neighbor_hidden_repr_head = Dense(dv * nv,
                                          activation='relu',
                                          kernel_initializer='random_normal',
                                          name='neighbor_hidden_repr_%d' %
                                          suffix)(neighbor_repr)
        neighbor_hidden_repr_head = Reshape(
            (self.num_agents, self.num_neighbors, dv,
             nv))(neighbor_hidden_repr_head)
        neighbor_hidden_repr_head = Lambda(lambda x: K.permute_dimensions(
            x, (0, 1, 4, 2, 3)))(neighbor_hidden_repr_head)
        out = Lambda(lambda x: K.mean(batch_dot(x[0], x[1]), axis=2))(
            [att, neighbor_hidden_repr_head])
        out = Reshape((self.num_agents, dv))(out)
        out = Dense(dout,
                    activation="relu",
                    kernel_initializer='random_normal',
                    name='MLP_after_relation_%d' % suffix)(out)
        return out, att_record
示例#26
0
def predict_window_mulgpu(model,batch, imgs_test, img_deps, img_rows, img_cols, multiloss):

    window_deps = (img_deps/3)*2
    window_rows = (img_rows/3)*2
    window_cols = (img_cols/3)*2

    current_test = imgs_test
    x = current_test.shape[0]
    y = current_test.shape[1]
    z = current_test.shape[2]
    score = np.zeros((x,y,z,2), dtype= 'float32')
    score_num = np.zeros((x,y,z,2), dtype= 'int16')

    count = 0
    deplist = []
    rowlist = []
    collist = []
    num = 0

    box_test = np.zeros((batch,img_deps,img_rows,img_cols,1), dtype="float32")
    for deps in xrange(0,x-img_deps+window_deps,window_deps):
        print (deps)
        for rows in xrange(0, y-img_rows+window_rows, window_rows):
            for cols in xrange(0,z-img_cols+window_cols,window_cols):
                if deps>x-img_deps:
                    deps = x-img_deps
                elif rows > y-img_rows:
                    rows = y-img_rows
                elif cols>z-img_cols:
                    cols = z-img_cols
                elif deps>x-img_deps and rows > y - img_rows:
                    deps = x - img_deps
                    rows = y - img_rows
                elif deps>x-img_deps and cols > z - img_cols:
                    deps = x - img_deps
                    cols = z - img_cols
                elif rows>y-img_rows and cols > z-img_cols:
                    rows = y - img_rows
                    cols = z - img_cols
                elif rows>y-img_rows and cols > z-img_cols and deps > x-img_deps:
                    deps = x - img_deps
                    rows = y - img_rows
                    cols = z - img_cols
                if count == batch:
                    count = 0
                    deplist = []
                    rowlist = []
                    collist = []
                    box_test = np.zeros((batch, img_deps, img_rows, img_cols, 1), dtype="float32")
                patch_test = current_test[deps:deps+img_deps, rows:rows+img_rows, cols:cols+img_cols]
                deplist.append(deps)
                rowlist.append(rows)
                collist.append(cols)
                box_test[count,:,:,:,0] = patch_test
                count += 1
                del patch_test
                if count == batch:
                    num = num+1
                    print ('num: ',num)
                    print ('box:', box_test.shape)

                    patch_test_mask = model.predict(box_test, verbose=0)

                    if multiloss:
                        patch_test_mask = patch_test_mask[2]
                    patch_test_mask = K.softmax(patch_test_mask)
                    patch_test_mask = K.eval(patch_test_mask)
                    print ('predict finish')
                    for i in xrange(batch):
                        score[deplist[i]:deplist[i]+img_deps, rowlist[i]:rowlist[i]+img_rows, collist[i]:collist[i]+img_cols,:] += patch_test_mask[i]
                        score_num[deplist[i]:deplist[i]+img_deps, rowlist[i]:rowlist[i]+img_rows, collist[i]:collist[i]+img_cols,:] += 1
                    # print ('queue finish')
                    del box_test, patch_test_mask, deplist, rowlist, collist
    score = score / (score_num)
    score2 = score[:,:,:,1]
    return score2
示例#27
0
 def call(self, x):
     et = K.squeeze(K.tanh(K.dot(x, self.W) + self.b), axis=-1)
     at = K.softmax(et)
     at = K.expand_dims(at, axis=-1)
     output = x * at
     return K.sum(output, axis=1)
示例#28
0
    def call(self, inputs):
        def hw_flatten(x):
            return kl.Reshape(target_shape=(int(x.shape[1]) * int(x.shape[2]),
                                            int(x.shape[3])))(x)
#             s = x.shape.as_list()
#             return K.reshape(x, shape=[-1,s[1]*s[2],s[3]])

        text, img, masks = inputs
        if masks is not None:
            self.masks = masks
#         self.text_input_shape = tuple(x1.shape[1:].as_list())
        q = kl.Dense(self.filters_q, use_bias=True)(text)
        q = kl.Activation(activation)(q)
        #         q = kl.tanh(alpha=1.0)(q)
        k = kl.Conv2D(filters=self.filters_k,
                      strides=(1, 1),
                      kernel_size=(1, 1),
                      padding='same')(img)
        k = kl.Activation(activation)(k)
        #         k = kl.tanh(alpha=1.0)(k)
        v = kl.Conv2D(filters=self.filters_v,
                      strides=(1, 1),
                      kernel_size=(1, 1),
                      padding='same')(img)
        v = kl.Activation(activation)(v)
        #         v = kl.tanh(alpha=1.0)(v)
        #         print('q.shape,k.shape,v.shape,',q.shape,k.shape,v.shape)
        s = K.batch_dot(q, K.permute_dimensions(hw_flatten(k),
                                                (0, 2, 1)))  # # [bs, N, M]

        if self.masks is not None:
            beta = kl.Multiply()([s, self.masks])
        else:
            beta = s


#         print('s.shape:',s.shape)
        scores = K.softmax(beta, axis=-1)  # attention map

        #         self.beta_shape = tuple(beta.shape[1:].as_list())
        #         print('hw_flatten(v).shape:',hw_flatten(v).shape)
        o = K.batch_dot(scores, hw_flatten(v))  # [bs, N, C]
        #         print('o.shape:',o.shape)
        #         o = K.reshape(o, shape=K.shape(x2))  # [bs, h, w, C]
        #         o = K.conv1d(o,
        #                      kernel=self.kernel_o,
        #                      strides=(1,), padding='same')
        #         o = K.bias_add(o, self.bias_o)
        #         o = kl.tanh(alpha=1.0)(o)
        #         print('o.shape:',o.shape)
        #         x_text = self.gamma1 * x1
        # #         print('x_text.shape:',x_text,x_text.shape)
        #         x_att = self.gamma2 * o
        # #         print('x_att.shape:',x_att,x_att.shape)
        #         x_out = K.concatenate([x_text,x_att],axis=-1) #kl.Concatenate()([x_text,x_att])
        #         print('x_out.shape:',x_out,x_out.shape)

        self.text_sh = tuple(text.shape.as_list())
        self.q_sh = tuple(q.shape.as_list())
        self.k_sh = tuple(k.shape.as_list())
        self.v_sh = tuple(v.shape.as_list())
        self.s_sh = tuple(s.shape.as_list())
        self.scores_sh = tuple(scores.shape.as_list())
        self.beta_sh = tuple(beta.shape.as_list())
        self.o_sh = tuple(o.shape.as_list())
        return [text, q, k, v, s, scores, beta, o]
示例#29
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_xy, box_wh, box_confidence, box_class_probs
示例#30
0
文件: model.py 项目: luozm/Cdiscount
def DARC1(y_true, y_pred):
    y_pred_softmax = K.softmax(y_pred)
    xentropy = K.categorical_crossentropy(y_true, y_pred_softmax)
    reg = K.max(K.sum(K.abs(y_pred), axis=0))
    alpha = 0.001
    return xentropy + alpha * reg