Пример #1
0
def transfer(model, decoder, sess, args, vocab, data0, data1, out_path):
    batches, order0, order1 = get_batches(data0, data1, vocab.word2id,
                                          args.batch_size)

    data0_tsf, data1_tsf = [], []
    losses = Losses(len(batches))
    for batch in batches:
        ori, tsf = decoder.rewrite(batch)
        half = batch['size'] / 2
        data0_tsf += tsf[:half]
        data1_tsf += tsf[half:]

        loss, loss_g, loss_d, loss_d0, loss_d1 = sess.run(
            [
                model.loss, model.loss_g, model.loss_d, model.loss_d0,
                model.loss_d1
            ],
            feed_dict=feed_dictionary(model, batch, args.rho, args.gamma_min))
        losses.add(loss, loss_g, loss_d, loss_d0, loss_d1)

    n0, n1 = len(data0), len(data1)
    data0_tsf = reorder(order0, data0_tsf)[:n0]
    data1_tsf = reorder(order1, data1_tsf)[:n1]

    if out_path:
        write_sent(data0_tsf, out_path + '.0' + '.tsf')
        write_sent(data1_tsf, out_path + '.1' + '.tsf')

    return losses
Пример #2
0
def transfer(model, decoder, sess, args, vocab, data0, data1, out_path):
    batches, order0, order1 = get_batches(data0, data1,
        vocab.word2id, args.batch_size)

    #data0_rec, data1_rec = [], []
    data0_tsf, data1_tsf = [], []
    losses = Accumulator(len(batches), ['loss', 'rec', 'adv', 'd0', 'd1'])
    for batch in batches:
        rec, tsf = decoder.rewrite(batch)
        half = batch['size'] / 2
        #data0_rec += rec[:half]
        #data1_rec += rec[half:]
        data0_tsf += tsf[:half]
        data1_tsf += tsf[half:]

        loss, loss_rec, loss_adv, loss_d0, loss_d1 = sess.run([model.loss,
            model.loss_rec, model.loss_adv, model.loss_d0, model.loss_d1],
            feed_dict=feed_dictionary(model, batch, args.rho, args.gamma_min))
        losses.add([loss, loss_rec, loss_adv, loss_d0, loss_d1])

    n0, n1 = len(data0), len(data1)
    #data0_rec = reorder(order0, data0_rec)[:n0]
    #data1_rec = reorder(order1, data1_rec)[:n1]
    data0_tsf = reorder(order0, data0_tsf)[:n0]
    data1_tsf = reorder(order1, data1_tsf)[:n1]

    if out_path:
        #write_sent(data0_rec, out_path+'.0'+'.rec')
        #write_sent(data1_rec, out_path+'.1'+'.rec')
        write_sent(data0_tsf, out_path+'.0'+'.tsf')
        write_sent(data1_tsf, out_path+'.1'+'.tsf')

    return losses
def transfer(model, decoder, sess, args, vocab, data0, data1, out_path):
    batches, order0, order1 = get_batches(data0, data1,
        vocab.word2id, args.batch_size)

    #data0_rec, data1_rec = [], []
    data0_tsf, data1_tsf = [], []
    losses = Accumulator(len(batches), ['loss', 'rec', 'adv', 'd0', 'd1'])
    for batch in batches:
        rec, tsf = decoder.rewrite(batch)
        half = batch['size'] / 2
        #data0_rec += rec[:half]
        #data1_rec += rec[half:]
        data0_tsf += tsf[:half]
        data1_tsf += tsf[half:]

        loss, loss_rec, loss_adv, loss_d0, loss_d1 = sess.run([model.loss,
            model.loss_rec, model.loss_adv, model.loss_d0, model.loss_d1],
            feed_dict=feed_dictionary(model, batch, args.rho, args.gamma_min))
        losses.add([loss, loss_rec, loss_adv, loss_d0, loss_d1])

    n0, n1 = len(data0), len(data1)
    #data0_rec = reorder(order0, data0_rec)[:n0]
    #data1_rec = reorder(order1, data1_rec)[:n1]
    data0_tsf = reorder(order0, data0_tsf)[:n0]
    data1_tsf = reorder(order1, data1_tsf)[:n1]

    if out_path:
        #write_sent(data0_rec, out_path+'.0'+'.rec')
        #write_sent(data1_rec, out_path+'.1'+'.rec')
        write_sent(data0_tsf, out_path+'.0'+'.tsf')
        write_sent(data1_tsf, out_path+'.1'+'.tsf')

    return losses
Пример #4
0
def transfer(model, decoder, sess, args, vocab, data0, data1, out_path):
    batches, order0, order1 = get_batches(data0,
                                          data1,
                                          vocab.word2id,
                                          args.batch_size,
                                          max_seq_len=args.max_seq_length)

    # data0_rec, data1_rec = [], []
    data0_tsf, data1_tsf = [], []
    losses = Accumulator(
        len(batches),
        ['loss', 'rec', 'adv', 'd0', 'd1', 'loss_rec_cyc', 'loss_kld'])
    for batch in batches:
        rec, tsf = decoder.rewrite(batch)
        half = batch['size'] // 2
        # data0_rec += rec[:half]
        # data1_rec += rec[half:]
        data0_tsf += tsf[:half]
        data1_tsf += tsf[half:]

        loss, loss_rec, loss_adv, loss_d0, loss_d1, loss_rec_cyc, loss_kld = \
          sess.run([model.loss,
                    model.loss_rec, model.loss_adv, model.loss_d0, model.loss_d1,
                    model.loss_rec_cyc, model.kld_loss],
                   feed_dict=feed_dictionary(model=model,
                                             batch=batch,
                                             rho=args.rho,
                                             epsilon=args.epsilon,
                                             gamma=args.gamma_min,
                                             anneal=args.anneal,
                                             C=args.C))

        # feed_dict order: model, batch, rho, epsilon, gamma, dropout=1, learning_rate=None, anneal=1
        losses.add([
            loss, loss_rec, loss_adv, loss_d0, loss_d1, loss_rec_cyc, loss_kld
        ])

    n0, n1 = len(data0), len(data1)
    # data0_rec = reorder(order0, data0_rec)[:n0]
    # data1_rec = reorder(order1, data1_rec)[:n1]
    data0_tsf = reorder(order0, data0_tsf)[:n0]
    data1_tsf = reorder(order1, data1_tsf)[:n1]

    if out_path:
        # write_sent(data0_rec, out_path+'.0'+'.rec')
        # write_sent(data1_rec, out_path+'.1'+'.rec')
        write_sent(data0_tsf, out_path + 'formal' + '.tsf')
        write_sent(data1_tsf, out_path + 'informal' + '.tsf')

    return losses
Пример #5
0
def transfer(model, sess, args, vocab, data0, data1, out_path):
    batches, order0, order1 = get_batches(data0, data1, vocab.word2id,
                                          args.batch_size)

    data0_tsf, data1_tsf = [], []
    losses = Losses(len(batches))
    for batch in batches:
        ori, tsf, loss, loss_g, loss_d, loss_d0, loss_d1 = rewrite(
            model, sess, args, vocab, batch)
        half = batch['size'] / 2
        data0_tsf += tsf[:half]
        data1_tsf += tsf[half:]
        losses.add(loss, loss_g, loss_d, loss_d0, loss_d1)

    n0, n1 = len(data0), len(data1)
    data0_tsf = reorder(order0, data0_tsf)[:n0]
    data1_tsf = reorder(order1, data1_tsf)[:n1]

    if out_path:
        write_sent(data0_tsf, out_path + '.0' + '.tsf')
        write_sent(data1_tsf, out_path + '.1' + '.tsf')

    return losses
Пример #6
0
def transfer(model, decoder, sess, args, vocab, data0, data1, out_path):
    batches, order0, order1 = get_batches(data0, data1, vocab.word2id,
                                          args.batch_size)

    #data0_rec, data1_rec = [], []
    data0_tsf, data1_tsf = [], []
    losses = Accumulator(len(batches), ['loss', 'rec', 'adv', 'd0', 'd1'])
    for batch in batches:
        rec, tsf = decoder.rewrite(batch)
        half = int(batch['size'] // 2)
        #data0_rec += rec[:half]
        #data1_rec += rec[half:]
        data0_tsf += tsf[:half]
        data1_tsf += tsf[half:]

        loss, loss_rec, loss_adv, loss_d0, loss_d1 = sess.run(
            [
                model.loss, model.loss_rec, model.loss_adv, model.loss_d0,
                model.loss_d1
            ],
            feed_dict=feed_dictionary(model, batch, args.rho, args.gamma_min))
        losses.add([loss, loss_rec, loss_adv, loss_d0, loss_d1])
    #
    # with tf.Graph().as_default():
    #     sess = tf.Session()
    #     with sess.as_default():
    #         m = model.Model()
    #         m = model.Model()
    #         global_step = tf.Variable(0, name='global_step', trainable=False)
    #         optimizer = tf.train.AdamOptimizer(1e-2)
    #         grads_and_vars = optimizer.compute_gradients(m.loss)
    #         train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars, global_step=global_step)
    #
    #         loss_summary = tf.summary.scalar('loss', m.loss)
    #
    #         train_summary_op = tf.summary.merge([loss_summary])
    #         train_summary_writer = tf.summary.FileWriter('./summary/train', sess.graph)
    #
    #         dev_summary_op = tf.summary.merge([loss_summary])
    #         dev_summary_writer = tf.summary.FileWriter('./summary/dev', sess.graph)
    #
    #         def train_step(x_batch, y_batch):
    #             feed_dict = {m.input_x: x_batch,
    #                          m.input_y: y_batch}
    #             _, step, summaries, loss = sess.run(
    #                 [train_op, global_step, train_summary_op, m.loss], feed_dict)
    #             train_summary_writer.add_summary(summaries, step)
    #
    #         def dev_step(x_batch, y_batch):
    #             feed_dict = {m.input_x: x_batch,
    #                          m.input_y: y_batch}
    #
    #             step, summaries, loss = sess.run(
    #                 [global_step, dev_summary_op, m.loss], feed_dict)
    #             dev_summary_writer.add_summary(summaries, step)
    #
    #         sess.run(tf.global_variables_initializer())
    #         batches = batch_iter(list(zip(x_train, y_train)), 100, 100)
    #         for batch in batches:
    #             x_batch, y_batch = zip(*batch)
    #             train_step(x_batch, y_batch)
    #             current_step = tf.train.global_step(sess, global_step)
    #             if current_step % 3 == 0:
    #                 print('\nEvaluation:')
    #                 dev_step(x_val, y_val)
    #
    #
    # tf.summary.scalar('loss', loss)

    n0, n1 = len(data0), len(data1)
    #data0_rec = reorder(order0, data0_rec)[:n0]
    #data1_rec = reorder(order1, data1_rec)[:n1]
    data0_tsf = reorder(order0, data0_tsf)[:n0]
    data1_tsf = reorder(order1, data1_tsf)[:n1]

    if out_path:
        #write_sent(data0_rec, out_path+'.0'+'.rec')
        #write_sent(data1_rec, out_path+'.1'+'.rec')
        write_sent(data0_tsf, out_path + '.0' + '.tsf')
        write_sent(data1_tsf, out_path + '.1' + '.tsf')

    return losses