示例#1
0
def ae(x, y, is_train, opt, epoch_t, opt_t=None):
    # print x.get_shape()  # batch L
    if not opt_t: opt_t = opt
    x_emb, W_norm = embedding(x, opt)  # batch L emb
    x_emb = tf.expand_dims(x_emb, 3)  # batch L emb 1

    res = {}
    # cnn encoder
    H_enc, res = conv_encoder(x_emb, is_train, opt, res)

    # infer latent variable z from H_enc
    biasInit = tf.constant_initializer(0.001, dtype=tf.float32)
    z = layers.linear(H_enc,
                      num_outputs=opt.z_dim,
                      biases_initializer=biasInit,
                      scope='z')

    logits = discriminator_linear(z,
                                  opt,
                                  prefix='classify_',
                                  is_train=is_train)  # batch * 1
    prob = tf.nn.sigmoid(logits)

    correct_prediction = tf.equal(tf.round(prob), tf.round(y))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits))

    tf.summary.scalar('loss', loss)
    summaries = [
        "learning_rate",
        "loss",
        # "gradients",
        # "gradient_norm",
    ]
    global_step = tf.Variable(0, trainable=False)

    t_vars = tf.trainable_variables()
    d_vars = [var for var in t_vars if 'dis' in var.name]

    train_op = layers.optimize_loss(
        loss,
        global_step=global_step,
        variables=d_vars,
        #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N,
        #framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(
            lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad))
        if opt.clip_grad else None,
        learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay(
            learning_rate=lr,
            global_step=g,
            decay_rate=opt.decay_rate,
            decay_steps=int(epoch_t * opt.decay_ep)),
        learning_rate=opt.lr,
        summaries=summaries)

    return loss, train_op, accuracy
def ae(x, x_org, is_train, opt, epoch_t, opt_t=None):
    # print x.get_shape()  # batch L
    if not opt_t: opt_t = opt
    x_emb, W_norm = embedding(x, opt)  # batch L emb
    x_emb = tf.expand_dims(x_emb, 3)  # batch L emb 1

    res = {}
    # cnn encoder
    H_enc, res = conv_encoder(x_emb, is_train, opt, res)

    # infer latent variable z from H_enc
    biasInit = tf.constant_initializer(0.001, dtype=tf.float32)
    z = layers.linear(H_enc,
                      num_outputs=opt.z_dim,
                      biases_initializer=biasInit,
                      scope='z')

    rec_loss, rec_sent_1, _ = lstm_decoder_embedding(z, x_org, W_norm, opt_t,
                                                     is_train)
    _, rec_sent_2, _ = lstm_decoder_embedding(z,
                                              x_org,
                                              W_norm,
                                              opt_t,
                                              is_train,
                                              feed_previous=True,
                                              is_reuse=True)

    res['rec_sents_feed_y'] = rec_sent_1
    res['rec_sents'] = rec_sent_2

    # compute total loss
    loss = rec_loss
    tf.summary.scalar('loss', loss)
    summaries = [
        "learning_rate",
        "loss",
        # "gradients",
        # "gradient_norm",
    ]
    global_step = tf.Variable(0, trainable=False)

    train_op = layers.optimize_loss(
        loss,
        global_step=global_step,
        #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N,
        #framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(
            lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad))
        if opt.clip_grad else None,
        learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay(
            learning_rate=lr,
            global_step=g,
            decay_rate=opt.decay_rate,
            decay_steps=int(epoch_t * opt.decay_ep)),
        learning_rate=opt.lr,
        summaries=summaries)

    return res, loss, train_op
示例#3
0
def auto_encoder(x, x_org, is_train, opt, opt_t=None):
    if not opt_t:
        opt_t = opt
    x_emb, W_norm = embedding(x, opt)   # batch L emb
    x_emb = tf.expand_dims(x_emb, 3)   # batch L emb 1
    res = {}
    # cnn encoder

    H_enc, res = conv_encoder(x_emb, is_train, opt, res)

    H_dec = H_enc

    if opt.model == 'rnn_rnn':
        loss, rec_sent_1, _ = seq2seq(x, x_org, opt)
        _, rec_sent_2, _ = seq2seq(x, x_org, opt, feed_previous=True, is_reuse=True)

        res['rec_sents_feed_y'] = rec_sent_1
        res['rec_sents'] = rec_sent_2


    elif opt.model == 'cnn_rnn':
        # lstm decoder
        H_dec2 = tf.identity(H_dec)
        loss, rec_sent_1, _ = lstm_decoder(H_dec, x_org, opt)  #

        _, rec_sent_2, _ = lstm_decoder(H_dec, x_org, opt, feed_previous=True, is_reuse=True)

        res['rec_sents_feed_y'] = rec_sent_1
        res['rec_sents'] = rec_sent_2

    else:

        # deconv decoder
        loss, res = deconv_decoder(H_dec, x_org, W_norm, is_train, opt_t, res)

    tf.summary.scalar('loss', loss)
    summaries = [
                "learning_rate",
                "loss",
                "gradients",
                "gradient_norm",
                ]

    global_step = tf.Variable(0, trainable=False)


    train_op = layers.optimize_loss(
        loss,
        global_step=global_step,
        #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N,
        #framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None,
        learning_rate_decay_fn=lambda lr,g: tf.train.exponential_decay(learning_rate=lr, global_step=g, decay_rate=opt.decay_rate, decay_steps=3000),
        learning_rate=opt.lr,
        summaries=summaries
        )
    return res, loss, train_op
示例#4
0
def vae(beta, x, x_org, is_train, opt, lr, opt_t=None):
    # print x.get_shape()  # batch L
    if not opt_t: opt_t = opt
    x_emb, W_norm = embedding(x, opt)  # batch L emb
    x_emb = tf.expand_dims(x_emb, 3)  # batch L emb 1

    res = {}
    # cnn encoder
    H_enc, res = conv_encoder(x_emb, is_train, opt, res)

    # infer latent variable z from H_enc
    biasInit = tf.constant_initializer(0.001, dtype=tf.float32)
    mu = layers.linear(H_enc, num_outputs=opt.z_dim, biases_initializer=biasInit, scope='mu')
    logvar = layers.linear(H_enc, num_outputs=opt.z_dim, biases_initializer=biasInit, scope='logvar')

    z = sample_z(mu, logvar)
    kl_loss = tf.reduce_mean(-0.5 * tf.reduce_sum(1 + logvar - tf.square(mu) - tf.exp(logvar), axis=-1))

    rec_loss, rec_sent_1, _ = lstm_decoder_embedding(z, x_org, W_norm, opt_t, is_train)
    _, rec_sent_2, _ = lstm_decoder_embedding(z, x_org, W_norm, opt_t, is_train, feed_previous=True, is_reuse=True)

    res['rec_sents_feed_y'] = rec_sent_1
    res['rec_sents'] = rec_sent_2

    # compute total loss
    loss = rec_loss + beta * kl_loss
    tf.summary.scalar('beta', beta)
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('kl_loss', kl_loss)
    tf.summary.scalar('rec_loss', rec_loss)
    summaries = [
                "learning_rate",
                "loss",
                # "gradients",
                # "gradient_norm",
                ]
    global_step = tf.Variable(0, trainable=False)

    train_op = layers.optimize_loss(
        loss,
        global_step=global_step,
        #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N,
        #framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None,
        learning_rate=lr,
        summaries=summaries
        )

    return res, loss, rec_loss, kl_loss, train_op
示例#5
0
文件: s2s.py 项目: dreasysnail/CoCon
def conditional_s2s(src, tgt, z,  opt, opt_t=None, is_reuse_generator = None):
    if not opt_t: opt_t = opt
    res = {}
    if opt.use_tgt_z:
        
        W_norm_d = embedding_only(opt, prefix = 'd_', is_reuse = None) 
        z, _ = encoder(tgt, W_norm_d, opt, l_temp = 1, prefix = 'd_' , is_reuse = None, is_prob=None, is_padded= False)
    syn_sent, syn_one_hot, H_dec, sup_loss, sample_loss, sup_loss_all = s2s(z, src, tgt, opt, is_reuse = is_reuse_generator, prefix ='g_')
    
    if opt.global_feature:
        z_hat, _ = encoder(syn_one_hot, W_norm_d, opt, l_temp = 1, prefix = 'd_' , is_reuse = True, is_prob=True, is_padded= False)
        z_loss = tf.reduce_sum(tf.square(z - z_hat))/opt.batch_size/opt.n_hid
        res['z'] = z
        res['z_hat'] = z_hat 
        res['z_loss'] = z_loss 
    
    res['syn_sent'] = syn_sent 
    g_cost = sup_loss + (z_loss*opt.lambda_z if opt.global_feature else 0)
    tf.summary.scalar('sup_loss', sup_loss)
    if opt.global_feature:
        tf.summary.scalar('z_loss', z_loss)
    summaries = [
        "learning_rate",
        "loss",
    ]
    t_vars = tf.trainable_variables()
    g_vars = [var for var in t_vars if 'g_' in var.name]
    train_op_g = layers.optimize_loss(
        g_cost,
        
        framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None,
        
        variables=g_vars,
        learning_rate=opt.lr_g,
        summaries=summaries)
    return res, g_cost, train_op_g
示例#6
0
def auto_encoder(x, x_org, is_train, opt, opt_t=None):
    # print x.get_shape()  # batch L
    with tf.variable_scope("pretrain"):

        if not opt_t: opt_t = opt
        x_emb, W_norm = embedding(x, opt)  # batch L emb

        x_emb = tf.expand_dims(x_emb, 3)  # batch L emb 1

        res = {}

        H, res = conv_encoder(x_emb, is_train, opt, res)

        H_mean, H_log_sigma_sq = vae_classifier_2layer(H, opt)
        eps = tf.random_normal([opt.batch_size, opt.ef_dim],
                               0,
                               1,
                               dtype=tf.float32)
        H_dec = tf.add(H_mean, tf.multiply(tf.sqrt(tf.exp(H_log_sigma_sq)),
                                           eps))

        H_dec2 = tf.identity(H_dec)

        # print x_rec.get_shape()
        if opt.model == 'rnn_rnn':
            loss, rec_sent_1, _ = seq2seq(x, x_org, opt)
            _, rec_sent_2, _ = seq2seq(x,
                                       x_org,
                                       opt,
                                       feed_previous=True,
                                       is_reuse=True)
            #res['logits'] = logits
            res['rec_sents_feed_y'] = rec_sent_1
            res['rec_sents'] = rec_sent_2

        elif opt.model == 'cnn_rnn':
            # lstm decoder
            if opt.rnn_share_emb:
                loss, rec_sent_1, _ = lstm_decoder_embedding(
                    H_dec2, x_org, W_norm, opt_t)
                # random_z = tf.random_normal([opt.batch_size, opt.ef_dim])
                _, rec_sent_2, _ = lstm_decoder_embedding(H_dec2,
                                                          x_org,
                                                          W_norm,
                                                          opt_t,
                                                          feed_previous=True,
                                                          is_reuse=True)

            else:
                loss, rec_sent_1, _ = lstm_decoder(H_dec2, x_org, opt_t)  #
                _, rec_sent_2, _ = lstm_decoder(H_dec2,
                                                x_org,
                                                opt_t,
                                                feed_previous=True,
                                                is_reuse=True)

            kl_loss = tf.reduce_mean(-0.5 * tf.reduce_mean(
                1 + H_log_sigma_sq - tf.square(H_mean) -
                tf.exp(H_log_sigma_sq), 1))
            loss += kl_loss

            res['rec_sents_feed_y'] = rec_sent_1
            res['rec_sents'] = rec_sent_2

        else:

            # deconv decoder
            H_dec2 = tf.expand_dims(tf.expand_dims(H_dec, 1), 1)
            loss, res = deconv_decoder(H_dec2, x_org, W_norm, is_train, opt_t,
                                       res)
            res['rec_sents'] = res['rec_sents'][:, (opt.filter_shape -
                                                    1):(opt.filter_shape - 1 +
                                                        opt.sentence)]

    # *tf.cast(tf.not_equal(x_temp,0), tf.float32)
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('kl_loss', kl_loss)
    summaries = [
        "learning_rate",
        "loss",
        # "gradients",
        # "gradient_norm",
    ]
    global_step = tf.Variable(0, trainable=False)

    train_op = layers.optimize_loss(
        loss,
        global_step=global_step,
        #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N,
        #framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(
            lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad))
        if opt.clip_grad else None,
        learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay(
            learning_rate=lr,
            global_step=g,
            decay_rate=opt.decay_rate,
            decay_steps=3000),
        learning_rate=opt.lr,
        summaries=summaries)

    # optimizer = tf.train.AdamOptimizer(learning_rate=opt.lr)  # Or another optimization algorithm.
    # train_op = optimizer.minimize(
    #     loss,
    #     aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)

    return res, loss, train_op  #, fake_gen
示例#7
0
def textGAN(x, opt):

    #res = {}
    res_ = {}

    with tf.variable_scope("pretrain"):
        #z = tf.random_uniform([opt.batch_size, opt.latent_size], minval=-1.,maxval=1.)
        z = tf.random_normal([opt.batch_size, opt.latent_size])
        W_norm = embedding_only(opt, is_reuse = None)
        _, syn_sent, logits = lstm_decoder_embedding(z, tf.ones_like(x), W_norm, opt, add_go = True, feed_previous=True, is_reuse=None, is_softargmax = True, is_sampling = False)
        prob = [tf.nn.softmax(l*opt.L) for l in logits]
        prob = tf.stack(prob,1)

        # _, syn_onehot, rec_sent, _ = lstm_decoder_embedding(z, x_org, W_norm, opt)
        # x_emb_fake = tf.tensordot(syn_onehot, W_norm, [[2],[0]])
        # x_emb_fake = tf.expand_dims(x_emb_fake, 3)

    with tf.variable_scope("d_net"):
        logits_real, H_real = discriminator(x, opt)




        ## Real Trail
        # x_emb, W_norm = embedding(x, opt, is_reuse = None)  # batch L emb
        # x_emb = tf.expand_dims(x_emb, 3)  # batch L emb 1
        # H_enc, res = conv_encoder(x_emb, opt, res, is_reuse = None)

    with tf.variable_scope("d_net"):
        logits_fake, H_fake = discriminator(prob, opt, is_prob = True, is_reuse = True)


        # H_enc_fake, res_ = conv_encoder(x_emb_fake, is_train, opt, res_, is_reuse=True)
        # logits_real = discriminator_2layer(H_enc, opt)
        # logits_syn = discriminator_2layer(H_enc_fake, opt, is_reuse=True)

    res_['syn_sent'] = syn_sent
    res_['real_f'] = tf.squeeze(H_real)
    # Loss

    D_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.ones_like(logits_real), logits = logits_real)) + \
                 tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.zeros_like(logits_fake), logits = logits_fake))

    fake_mean = tf.reduce_mean(H_fake,axis=0)
    real_mean = tf.reduce_mean(H_real,axis=0)
    mean_dist = tf.sqrt(tf.reduce_mean((fake_mean - real_mean)**2))
    res_['mean_dist'] = mean_dist

    # cov_fake = acc_fake_xx - tensor.dot(acc_fake_mean.dimshuffle(0, 'x'), acc_fake_mean.dimshuffle(0, 'x').T)  +identity
    # cov_real = acc_real_xx - tensor.dot(acc_real_mean.dimshuffle(0, 'x'), acc_real_mean.dimshuffle(0, 'x').T)  +identity

    # cov_fake_inv = tensor.nlinalg.matrix_inverse(cov_fake)
    # cov_real_inv = tensor.nlinalg.matrix_inverse(cov_real)
    #tensor.nlinalg.trace(tensor.dot(cov_fake_inv,cov_real) + tensor.dot(cov_real_inv,cov_fake))

    GAN_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits_fake, labels=tf.ones_like(logits_fake))
    MMD_loss = compute_MMD_loss(tf.squeeze(H_fake), tf.squeeze(H_real), opt)
    G_loss = mean_dist #MMD_loss # + tf.reduce_mean(GAN_loss) # mean_dist #
    res_['mmd'] = MMD_loss
    res_['gan'] = tf.reduce_mean(GAN_loss)
    # *tf.cast(tf.not_equal(x_temp,0), tf.float32)
    tf.summary.scalar('D_loss', D_loss)
    tf.summary.scalar('G_loss', G_loss)
    summaries = [
                "learning_rate",
                #"G_loss",
                #"D_loss"
                # "gradients",
                # "gradient_norm",
                ]
    global_step = tf.Variable(0, trainable=False)

    all_vars = tf.trainable_variables()
    g_vars = [var for var in all_vars if
                  var.name.startswith('pretrain')]
    d_vars = [var for var in all_vars if
              var.name.startswith('d_')]
    print [g.name for g in g_vars]
    generator_op = layers.optimize_loss(
        G_loss,
        global_step = global_step,
        #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N,
        #framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None,
        learning_rate_decay_fn=lambda lr,g: tf.train.exponential_decay(learning_rate=lr, global_step = g, decay_rate=opt.decay_rate, decay_steps=3000),
        learning_rate=opt.lr,
        variables = g_vars,
        summaries = summaries
        )

    discriminator_op = layers.optimize_loss(
        D_loss,
        global_step = global_step,
        #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N,
        #framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None,
        learning_rate_decay_fn=lambda lr,g: tf.train.exponential_decay(learning_rate=lr, global_step = g, decay_rate=opt.decay_rate, decay_steps=3000),
        learning_rate=opt.lr,
        variables = d_vars,
        summaries = summaries
        )

    # optimizer = tf.train.AdamOptimizer(learning_rate=opt.lr)  # Or another optimization algorithm.
    # train_op = optimizer.minimize(
    #     loss,
    #     aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)


    return res_, G_loss, D_loss, generator_op, discriminator_op
示例#8
0
def dialog_gan(src, tgt, opt, opt_t=None):

    z = tf.random_uniform(shape=[opt.fake_size, opt.n_z],
                          minval=-1.,
                          maxval=1.)

    if opt.two_side:
        res_dict, gan_cost_d, gan_cost_g = conditional_gan(src,
                                                           tgt,
                                                           z,
                                                           opt,
                                                           opt_t=opt_t)
        src_rev, tgt_rev = tf.concat([
            tf.cast(tf.zeros([opt.batch_size,
                              (opt.filter_shape - 1)]), tf.int32), tgt
        ], 1), src[:, (opt.filter_shape - 1):]
        rev_res_dict, gan_cost_d_rev, gan_cost_g_rev = conditional_gan(
            src_rev,
            tgt_rev,
            z,
            opt,
            opt_t=opt_t,
            is_reuse_generator=tf.AUTO_REUSE)
        gan_cost_d += opt.lambda_backward * gan_cost_d_rev
        gan_cost_g += opt.lambda_backward * gan_cost_g_rev
    else:
        res_dict, gan_cost_d, gan_cost_g = conditional_gan(src,
                                                           tgt,
                                                           z,
                                                           opt,
                                                           opt_t=opt_t)

    t_vars = tf.trainable_variables()
    d_vars = [var for var in t_vars if 'd_' in var.name]
    if opt.g_fix:
        g_vars = [var for var in t_vars if 'g_g_' in var.name]
        print("Fix most G params, except" + " ".join([v.name for v in g_vars]))
    else:
        g_vars = [var for var in t_vars if 'g_' in var.name]

    tf.summary.scalar('loss_d', gan_cost_d)
    tf.summary.scalar('loss_g', gan_cost_g)

    summaries = [
        "learning_rate",
        "loss",
    ]
    global_step = tf.Variable(0, trainable=False)
    train_op_d = layers.optimize_loss(
        gan_cost_d,
        global_step=global_step,
        optimizer=opt.optimizer,
        clip_gradients=(
            lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad))
        if opt.clip_grad else None,
        learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay(
            learning_rate=lr,
            global_step=g,
            decay_rate=opt.decay_rate,
            decay_steps=3000),
        variables=d_vars,
        learning_rate=opt.lr_d,
        summaries=summaries)

    train_op_g = layers.optimize_loss(
        gan_cost_g,
        global_step=global_step,
        optimizer=opt.optimizer,
        clip_gradients=(
            lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad))
        if opt.clip_grad else None,
        learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay(
            learning_rate=lr,
            global_step=g,
            decay_rate=opt.decay_rate,
            decay_steps=3000),
        variables=g_vars,
        learning_rate=opt.lr_g,
        summaries=summaries)

    return res_dict, gan_cost_d, train_op_d, gan_cost_g, train_op_g
示例#9
0
def semi_classifier(alpha, x, x_org, x_lab, y, dp_ratio, opt, opt_t=None):
    # print x.get_shape()  # batch L
    is_train = True
    if not opt_t: opt_t = opt
    x_lab_emb, W_norm = embedding(x_lab, opt)  # batch L emb
    x_emb = tf.nn.embedding_lookup(W_norm, x)
    x_emb = tf.expand_dims(x_emb, 3)  # batch L emb 1
    x_lab_emb = tf.expand_dims(x_lab_emb, 3)  # batch L emb 1
    x_lab_emb = tf.nn.dropout(x_lab_emb, dp_ratio)
    res = {}

    # cnn encoder
    H_enc, res = conv_encoder(x_emb, is_train, opt, res)
    H_lab_enc, res = conv_encoder(x_lab_emb, is_train, opt, res, is_reuse=True)
    H_dec = H_enc

    #H_lab_enc = tf.nn.dropout(H_lab_enc, opt.dropout_ratio)
    logits = classifier_2layer(H_lab_enc,
                               opt,
                               dropout=dp_ratio,
                               prefix='classify',
                               is_reuse=None)
    dis_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits))

    # calculate the accuracy
    prob = tf.nn.sigmoid(logits)

    # if opt.model == 'rnn_rnn':
    #     rec_loss, rec_sent_1, _ = seq2seq(x, x_org, opt)
    #     _, rec_sent_2, _ = seq2seq(x, x_org, opt, feed_previous=True, is_reuse=True)
    #     res['rec_sents_feed_y'] = rec_sent_1
    #     res['rec_sents'] = rec_sent_2

    # elif opt.model == 'cnn_rnn':
    #     # lstm decoder
    #     H_dec2 = tf.identity(H_dec)
    #     rec_loss, rec_sent_1, _ = lstm_decoder(H_dec, x_org, opt)  #

    #     _, rec_sent_2, _ = lstm_decoder(H_dec, x_org, opt, feed_previous=True, is_reuse=True)

    #     res['rec_sents_feed_y'] = rec_sent_1
    #     res['rec_sents'] = rec_sent_2

    # else:

    #     # deconv decoder
    rec_loss, res = deconv_decoder(H_dec, x_org, W_norm, is_train, opt_t, res)

    correct_prediction = tf.equal(tf.round(prob), tf.round(y))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # calculate the total loss
    loss = alpha * rec_loss + (1 - alpha) * dis_loss

    tf.summary.scalar('loss', loss)
    tf.summary.scalar('rec_loss', rec_loss)
    tf.summary.scalar('dis_loss', dis_loss)
    summaries = [
        # "learning_rate",
        "loss"
        # "gradients",
        # "gradient_norm",
    ]
    global_step = tf.Variable(0, trainable=False)
    train_op = layers.optimize_loss(
        loss,
        global_step=global_step,
        # framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(
            lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad))
        if opt.clip_grad else None,
        #learning_rate_decay_fn=lambda lr, g: tf.train.exponential_decay(learning_rate=lr, global_step=g,
        #                                                                decay_rate=opt.decay_rate, decay_steps=3000),
        learning_rate=opt.lr,
        summaries=summaries)
    return res, dis_loss, rec_loss, loss, train_op, prob, accuracy
def auto_encoder(x, x_org, is_train, opt, opt_t=None):
    # print x.get_shape()  # batch L
    if not opt_t: opt_t = opt
    x_emb, W_norm = embedding(x, opt)  # batch L emb
    x_emb = tf.expand_dims(x_emb, 3)  # batch L emb 1

    res = {}
    #res['W'] = W_norm
    # cnn encoder
    H_enc, res = conv_encoder(x_emb, is_train, opt, res)

    # H_dec = layers.relu(Y4, 200, biases_initializer=biasInit)
    H_dec = H_enc
    # print x_rec.get_shape()
    if opt.model == 'rnn_rnn':
        loss, rec_sent_1, _ = seq2seq(x, x_org, opt)
        _, rec_sent_2, _ = seq2seq(x, x_org, opt, feed_previous=True, is_reuse=True)
        #res['logits'] = logits
        res['rec_sents_feed_y'] = rec_sent_1
        res['rec_sents'] = rec_sent_2


    elif opt.model == 'cnn_rnn':
        # lstm decoder
        H_dec2 = tf.identity(H_dec)
        if opt.rnn_share_emb:
            loss, rec_sent_1, _ = lstm_decoder_embedding(H_dec2, x_org, W_norm, opt_t)  #
            _, rec_sent_2, _ = lstm_decoder_embedding(H_dec2, x_org, W_norm, opt_t, feed_previous=True, is_reuse=True)
        else:
            loss, rec_sent_1, _ = lstm_decoder(H_dec2, x_org, opt_t)  #
            _, rec_sent_2, _ = lstm_decoder(H_dec2, x_org, opt_t, feed_previous=True, is_reuse=True)


        res['rec_sents_feed_y'] = rec_sent_1
        res['rec_sents'] = rec_sent_2
        # res['H1'],res['H2'],res['o1'],res['o2'] = H1, H2, o1, o2

    else:

        # deconv decoder
        loss, res = deconv_decoder(H_dec, x_org, W_norm, is_train, opt_t, res)

    # *tf.cast(tf.not_equal(x_temp,0), tf.float32)
    tf.summary.scalar('loss', loss)
    summaries = [
                "learning_rate",
                "loss",
                # "gradients",
                # "gradient_norm",
                ]
    global_step = tf.Variable(0, trainable=False)


    train_op = layers.optimize_loss(
        loss,
        global_step = global_step,
        #aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N,
        #framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad)) if opt.clip_grad else None,
        learning_rate_decay_fn=lambda lr,g: tf.train.exponential_decay(learning_rate=lr, global_step = g, decay_rate=opt.decay_rate, decay_steps=3000),
        learning_rate=opt.lr,
        summaries = summaries
        )

    # optimizer = tf.train.AdamOptimizer(learning_rate=opt.lr)  # Or another optimization algorithm.
    # train_op = optimizer.minimize(
    #     loss,
    #     aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N)


    return res, loss, train_op
示例#11
0
def conditional_s2s(src,
                    tgt,
                    is_train,
                    opt,
                    opt_t=None,
                    is_reuse_generator=None):
    if not opt_t: opt_t = opt
    W_norm_d = embedding_only(opt, prefix='d_', is_reuse=None)
    res = {}
    z_all, z_tgt, loss_pred_z = feature_vector(src,
                                               tgt,
                                               is_train,
                                               W_norm_d,
                                               opt,
                                               prefix='d_')
    if opt.local_feature:
        z_all_l, z_tgt_l, loss_pred_z_l = feature_vector(src,
                                                         tgt,
                                                         is_train,
                                                         W_norm_d,
                                                         opt,
                                                         prefix='l_')  #  B Z
        z_all = tf.concat([z_all, z_all_l], axis=1)
        z_tgt = tf.concat([z_tgt, z_tgt_l], axis=1)
        loss_pred_z += loss_pred_z_l

    if opt.multiple_src:
        syn_sent, syn_one_hot, H_dec, sup_loss, sample_loss, sup_loss_all = s2s(
            z_all, src, tgt, opt, is_reuse=is_reuse_generator, prefix='g_')
    else:
        syn_sent, syn_one_hot, H_dec, sup_loss, sample_loss, sup_loss_all = s2s(
            z_all, src[-1], tgt, opt, is_reuse=is_reuse_generator, prefix='g_')

    is_logit = (opt.z_loss != 'L2')
    if opt.global_feature:
        _, z_hat = encoder(syn_one_hot,
                           W_norm_d,
                           opt,
                           num_outputs=opt.n_z,
                           l_temp=1,
                           prefix='d_',
                           is_reuse=True,
                           is_prob=True,
                           is_padded=False,
                           is_logit=is_logit)
        if opt.local_feature:
            _, z_hat_l = encoder(syn_one_hot,
                                 W_norm_d,
                                 opt,
                                 num_outputs=opt.n_z,
                                 l_temp=1,
                                 prefix='l_',
                                 is_reuse=True,
                                 is_prob=True,
                                 is_padded=False,
                                 is_logit=is_logit)
            z_hat = tf.concat([z_hat, z_hat_l], axis=1)  #  B Z

        if opt.z_loss == 'L2':
            z_loss = tf.reduce_mean(tf.square(z_all - z_hat))
        else:
            z_loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=z_all,
                                                        logits=z_hat))
        res['z'] = z_all
        res['z_hat'] = z_hat
        res['z_tgt'] = z_tgt
        res['z_loss'] = z_loss
        res['z_loss_pred'] = loss_pred_z

    res['syn_sent'] = syn_sent

    g_cost = sup_loss + (z_loss * opt.lambda_z
                         if opt.global_feature else 0) + loss_pred_z

    tf.summary.scalar('sup_loss', sup_loss)
    if opt.global_feature:
        tf.summary.scalar('z_loss', z_loss)
    summaries = [
        "learning_rate",
        "loss",
    ]

    t_vars = tf.trainable_variables()
    g_vars = [var for var in t_vars if 'g_' in var.name]
    train_op_g = layers.optimize_loss(
        g_cost,
        framework.get_global_step(),
        optimizer=opt.optimizer,
        clip_gradients=(
            lambda grad: _clip_gradients_seperate_norm(grad, opt.clip_grad))
        if opt.clip_grad else None,
        variables=(t_vars if opt.relax_d else g_vars),
        learning_rate=opt.lr_g,
        summaries=summaries)

    return res, g_cost, train_op_g