and (i + start_epoch + 1) % 5 == 0):
            filename = data_path + "LFT_result_%d.txt" % (i + start_epoch)
            
            polite_responses = run_seq2seq(
                sess, source_test_polite, target_test, "test", i + start_epoch)
            neutral_responses = run_seq2seq(
                sess, source_test_neutral, target_test, "test", i + start_epoch)
            rude_responses = run_seq2seq(
                sess, source_test_rude, target_test, "test", i + start_epoch)

            assert len(polite_responses) == len(neutral_responses) == len(rude_responses)

            num_responses = len(polite_responses)
            zipped = zip_lsts(
                [source_test[:num_responses], 
                 target_test[:num_responses],
                 polite_responses, 
                 neutral_responses, 
                 rude_responses])

            flattened = [decode2string(index2token, sent, remove_END_TOKEN=True) 
                         for tp in zipped for sent in tp]

            # now we mark sentences that are generated by our model
            marked_G = [("G: " + sent) 
                        if k % 5 == 1 else sent
                        for (k, sent) in enumerate(flattened)]
            marked_P = [("P: " + sent) 
                        if k % 5 == 2 else sent
                        for (k, sent) in enumerate(marked_G)]
            marked_N = [("N: " + sent) 
                        if k % 5 == 3 else sent
Exemplo n.º 2
0
        if (((i + start_epoch + 1) >= 10  # only test for later epochs
             and (i + start_epoch + 1) % 5 == 0) or infer_only and not get_PPL
            ):  # for getting perplexity of test data, use train branch
            responses = run_seq2seq(sess, source_test, target_test, "test",
                                    i + start_epoch)

            #             # need to store all inferred responses in a pickle file
            #             if infer_only:
            #                 dump_pickle(
            #                     "%sseq2seq_RL_result%s_%d_infer.pkl" % (data_path, extra_str, i + start_epoch),
            #                     responses)

            num_responses = len(responses)
            zipped = zip_lsts([
                source_test[:num_responses], target_test[:num_responses],
                responses
            ])
            flattened = [
                decode2string(index2token, sent, remove_END_TOKEN=True)
                for tp in zipped for sent in tp
            ]

            # now we mark sentences that are generated by our model
            marked_G = [("G: " + sent) if k % 3 == 1 else sent
                        for (k, sent) in enumerate(flattened)]

            marked_M = [("M: " + sent) if k % 3 == 2 else sent
                        for (k, sent) in enumerate(marked_G)]

            filename = "%sseq2seq_RL_result%s_%d.txt" % (data_path, extra_str,
                                                         i + start_epoch)
Exemplo n.º 3
0
def build_seq2seq(input_seqs, target_seqs, filtered_target_seqs,
                  input_seq_lengths, target_seq_lengths, is_training):

    with tf.variable_scope("seq2seq"):
        with tf.device('/cpu:0'):
            reuse = False

            if get_PPL:
                keep_prob = tf.convert_to_tensor(1.0)
            else:
                keep_prob = get_keep_prob(dropout_rate, is_training)

            sequence_mask = get_sequence_mask(target_seq_lengths)

            unk_mask = get_mask(target_seqs, unk_indices)
            decoder_mask = tf.logical_and(sequence_mask,
                                          tf.logical_not(unk_mask))
            decoder_mask_float = tf.cast(decoder_mask, tf.float32)

            # Embed inputs
            with tf.variable_scope("embedding"):
                embedding = create_embedding(embedding_word2vec_politeness,
                                             embedding_word2vec_movie,
                                             shared_vocab_size_politeness,
                                             shared_vocab_size_movie,
                                             new_vocab_size_politeness,
                                             new_vocab_size_movie, "seq2seq")
                embedded_input_seqs = tf.nn.embedding_lookup(
                    embedding, input_seqs)
                embedded_target_seqs = tf.nn.embedding_lookup(
                    embedding, target_seqs)

            # Optimizer
            optimizer = tf.train.AdamOptimizer(learning_rate)

            tower_grads = []
            if credit_assignment:
                tower_grads_polite = []
            sample_ids_lst = []
            final_lengths_lst = []
            sampled_sample_ids_lst = []
            sampled_final_lengths_lst = []
            reuse = False
            trainable_variables = []

            num_tokens_lst = []
            total_losses = []
    for i in xrange(num_gpus):
        with tf.device("/gpu:%d" % (gpu_start_index + i)):
            with tf.variable_scope("seq2seq"):
                if (i == 1):
                    reuse = True

                start = i * batch_size_per_gpu
                end = start + batch_size_per_gpu

                input_max_seq_length = tf.reduce_max(
                    input_seq_lengths[start:end])
                target_max_seq_length = tf.reduce_max(
                    target_seq_lengths[start:end])

                with tf.variable_scope("encoder", reuse=reuse):
                    cell_fw = create_MultiRNNCell([hidden_size_encoder] *
                                                  (num_layers_encoder // 2),
                                                  keep_prob,
                                                  num_proj=None,
                                                  reuse=reuse)
                    cell_bw = create_MultiRNNCell([hidden_size_encoder] *
                                                  (num_layers_encoder // 2),
                                                  keep_prob,
                                                  num_proj=None,
                                                  reuse=reuse)
                    (encoder_outputs_original, encoder_final_state_original
                     ) = bidirecitonal_dynamic_lstm(
                         cell_fw, cell_bw, embedded_input_seqs[
                             start:end, :input_max_seq_length, :],
                         input_seq_lengths[start:end])

                    [
                        encoder_outputs, encoder_seq_lengths,
                        encoder_final_state
                    ] = tf.cond(is_training, lambda: [
                        encoder_outputs_original, input_seq_lengths[start:end],
                        encoder_final_state_original
                    ], lambda: [
                        tf.contrib.seq2seq.tile_batch(encoder_outputs_original,
                                                      beam_width),
                        tf.contrib.seq2seq.tile_batch(
                            input_seq_lengths[start:end], beam_width),
                        tile_multi_cell_state(encoder_final_state_original)
                    ])  # only works for decoder that has >1 layers!

                with tf.variable_scope("decoder", reuse=reuse):
                    decoder_cell = create_MultiRNNCell(
                        [hidden_size_decoder] * (num_layers_decoder),
                        keep_prob,
                        num_proj=vocab_size,
                        memory=encoder_outputs,
                        memory_seq_lengths=encoder_seq_lengths,
                        reuse=reuse)

                    decoder_zero_state = tf.cond(
                        is_training, lambda: decoder_cell.zero_state(
                            batch_size_per_gpu, tf.float32),
                        lambda: decoder_cell.zero_state(
                            batch_size_per_gpu * beam_width, tf.float32))

                    state_last = decoder_zero_state[-1].clone(
                        cell_state=encoder_final_state[-1])
                    state_previous = encoder_final_state[:-1]
                    decoder_initial_state = state_previous + (
                        state_last, )  # concat tuples

                    # training helper (for teacher forcing)
                    helper_train = tf.contrib.seq2seq.TrainingHelper(
                        embedded_target_seqs[
                            start:end, :target_max_seq_length -
                            1, :],  # get rid of end_token
                        target_seq_lengths[start:end] -
                        1)  # the length is thus decreased by 1

                    (decoder_outputs_train,
                     _) = decode(decoder_cell,
                                 helper_train,
                                 initial_state=decoder_initial_state)
                    (logits, _) = decoder_outputs_train

                    # Get trainable_variables
                    # (up to now we already have all the seq2seq trainable vars)
                    if trainable_variables == []:
                        trainable_variables = tf.get_collection(
                            tf.GraphKeys.TRAINABLE_VARIABLES, scope="seq2seq")

                    loss_ML = tf.contrib.seq2seq.sequence_loss(
                        logits,
                        target_seqs[
                            start:end,
                            1:target_max_seq_length],  # get rid of start_token
                        decoder_mask_float[start:end, 1:target_max_seq_length])
                    num_tokens = tf.reduce_sum(
                        decoder_mask_float[start:end, 1:target_max_seq_length])

                    num_tokens_lst.append(num_tokens)

                    total_loss = loss_ML * num_tokens
                    total_losses.append(total_loss)

                    if polite_training:
                        helper_sample = tf.contrib.seq2seq.SampleEmbeddingHelper(
                            embedding, start_tokens[start:end], end_token)
                        (decoder_outputs_sample,
                         final_lengths_sample) = decode(
                             decoder_cell, helper_sample,
                             decoder_initial_state)
                        (logits_sample,
                         sample_ids_sample) = decoder_outputs_sample
                        max_final_lengths_sample = tf.reduce_max(
                            final_lengths_sample)
                        sampled_sample_ids_lst.append(
                            pad_and_truncate(sample_ids_sample,
                                             final_lengths_sample))
                        sampled_final_lengths_lst.append(final_lengths_sample)

                        # Compute sampled sequence loss WITHOUT averaging (will do that later)
                        decoder_mask_sample = get_sequence_mask(
                            final_lengths_sample, dtype=tf.float32)
                        seq_losses_sample = tf.contrib.seq2seq.sequence_loss(
                            logits_sample,
                            sample_ids_sample,
                            decoder_mask_sample,
                            average_across_timesteps=False,
                            average_across_batch=False)

            if polite_training:
                with tf.variable_scope(
                        "classifier"):  # jump back to the classifier scope
                    # Filter out tokens that the classifier doesn't know
                    vocab_mask = tf.cast(
                        sample_ids_sample < vocab_size_politeness, tf.int32)
                    sample_ids_sample_classifier = sample_ids_sample * vocab_mask

                    # Feed sampled ids to classifier
                    (scores_RL, credit_weights_RL) = build_classifier(
                        sample_ids_sample_classifier, final_lengths_sample,
                        reuse)

                    # Stop gradients from propagating back
                    scores_RL_stop = tf.stop_gradient(scores_RL)
                    credit_weights_RL_stop = tf.stop_gradient(
                        credit_weights_RL)

                    if thresholding:
                        # Filter scores that are >= threshold and <= 1 - threshold
                        filtered_scores_RL = tf.map_fn(filter_with_threshold,
                                                       scores_RL_stop)
                    else:
                        filtered_scores_RL = scores_RL_stop

                with tf.variable_scope("seq2seq"):
                    with tf.variable_scope("decoder", reuse=reuse):
                        # Get valid mask for sampled sequence
                        decoder_mask_classifier = tf.cast(
                            tf.not_equal(sample_ids_sample, 0), tf.float32
                        )  # propagate back the whole sentence (including <end>)

                        tiled_scores = tf.tile(  # tile scores to 2D
                            tf.expand_dims(filtered_scores_RL - baseline,
                                           axis=1),
                            [1, max_final_lengths_sample])

                        if flip_polite:  # if we actually want a rude dialogue system
                            tiled_scores = -1.0 * tiled_scores

                        # Compute seq losses for polite-RL
                        seq_losses_classifier = (
                            beta * seq_losses_sample *
                            decoder_mask_classifier /
                            tf.reduce_sum(decoder_mask_classifier) *
                            tiled_scores)

                        if credit_assignment:
                            grads_polite = tf.gradients(
                                seq_losses_classifier,
                                trainable_variables,
                                grad_ys=credit_weights_RL_stop
                            )  # credit weights as initial gradients
                            grads_polite = zip_lsts(
                                [grads_polite, trainable_variables])
                            tower_grads_polite.append(grads_polite)
                        else:
                            loss_polite = tf.reduce_sum(seq_losses_classifier)
            else:
                credit_weights_RL_stop = None

            with tf.variable_scope("seq2seq"):
                with tf.variable_scope("decoder", reuse=reuse):
                    # Infer branch (beam search!)
                    beam_search_decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                        decoder_cell,
                        embedding,
                        start_tokens[start:end],
                        end_token,
                        decoder_initial_state,
                        beam_width,
                        length_penalty_weight=length_penalty_weight)
                    output_beam = tf.contrib.seq2seq.dynamic_decode(
                        beam_search_decoder,
                        #                     impute_finished=True, # cannot be used with Beamsearch
                        maximum_iterations=max_iterations,
                        swap_memory=True)
                    sample_ids = output_beam[0].predicted_ids[:, :, 0]
                    final_lengths = output_beam[2][:, 0]

                    sample_ids_lst.append(
                        pad_and_truncate(sample_ids, final_lengths))
                    final_lengths_lst.append(final_lengths)

        with tf.device("/gpu:%d" % (gpu_start_index + i)):
            with tf.variable_scope("seq2seq", reuse=reuse):
                # Compute loss
                loss = loss_ML

                if polite_training and not credit_assignment:
                    loss = loss + loss_polite

                # Compute tower gradients
                grads = compute_grads(loss, optimizer, trainable_variables)
                tower_grads.append(grads)

    with tf.device('/cpu:0'):
        with tf.variable_scope("seq2seq"):
            # Concat sample ids and their respective lengths
            batch_sample_ids = tf.concat(sample_ids_lst, axis=0)
            batch_final_lengths = tf.concat(final_lengths_lst, axis=0)

            if polite_training:
                batch_sampled_sample_ids = tf.concat(sampled_sample_ids_lst,
                                                     axis=0)

            batch_total_loss = tf.add_n(total_losses)
            batch_num_tokens = tf.add_n(num_tokens_lst)

            # Thus, the effective batch size is actually batch_size_per_gpu
            if polite_training and credit_assignment:
                apply_gradients_op = apply_multiple_grads(
                    optimizer, [tower_grads, tower_grads_polite])
            else:
                apply_gradients_op = apply_grads(optimizer, tower_grads)

    return (batch_sample_ids, batch_final_lengths, batch_total_loss,
            batch_num_tokens, apply_gradients_op, credit_weights_RL_stop,
            embedding)
Exemplo n.º 4
0
def run_vhred(model, sess, mode, epoch):
    training_flag = True if mode == "train" else False
    norm_dialogues = norm_data_dict[mode]
    adv_dialogues = adv_data_dict[mode]

    generator = DataGenerator(norm_dialogues,
                              adv_dialogues=adv_dialogues,
                              feed_both_examples=feed_both_examples,
                              is_training=training_flag,
                              batch_size=batch_size,
                              max_dialogue_length=max_dialogue_length)
    batch = generator.batch_generator()
    print("Initialized data generator.")

    responses = []
    total_loss = 0.0
    adv_total_loss = 0.0
    total_num_tokens = 0.0
    batch_counter = 0
    if mode != "train":
        source_lst = []
        target_lst = []
        dialogue_indices_lst = []
        start_turn_indices_lst = []

    if use_max_margin:
        avg_margin = 0.0

    while True:
        next_batch = next(batch)
        if next_batch is None:
            break

        # if it's os, we always set start_turn_indices to 0
        (dialogue_indices, start_turn_indices, examples,
         turn_lengths_lst) = next_batch

        feed_dict_seqs = {
            model.dialogue: examples,
            model.turn_length: turn_lengths_lst,
            model.start_turn_index: start_turn_indices,
            model.start_tokens: [start_token] * batch_size
        }

        if mode == "train":
            if use_max_margin:
                fetches = [
                    model.batch_total_loss,
                    model.batch_num_tokens,
                    model.apply_gradients_op,
                    model.avg_margin_loss,  # testing
                    model.global_step
                ]
            else:
                fetches = [
                    model.batch_total_loss, model.batch_num_tokens,
                    model.apply_gradients_op, model.global_step
                ]
            feed_dict = {
                model.keep_prob: 1 - dropout_rate,
                model.is_training: training_flag
            }

            result = sess.run(fetches,
                              feed_dict={
                                  **feed_dict_seqs,
                                  **feed_dict
                              })

            if use_max_margin:
                avg_margin = (avg_margin * batch_counter +
                              result[-2]) / (batch_counter + 1)
                print(
                    "Avg margin (this should be getting smaller (or getting larger in abs. value) over time):",
                    avg_margin)

            if feed_both_examples:
                (loss, adv_loss) = result[0]
            else:
                loss = result[0]

            average_log_perplexity = loss / result[1]
            total_loss += loss
            total_num_tokens += result[1]
            print("Epoch (%s) %d, Batch %d, Global step %d:" %
                  (mode, epoch, batch_counter, result[-1]))
            print("Perplexity: %.2f" % exp(average_log_perplexity))
            print("Perplexity so far:", exp(total_loss / total_num_tokens))

            if feed_both_examples:
                adv_average_log_perplexity = adv_loss / result[1]
                adv_total_loss += adv_loss
                print("Adv-perplexity: %.2f" % exp(adv_average_log_perplexity))
                print("Adv-perplexity so far:",
                      exp(adv_total_loss / total_num_tokens))
        else:
            (source, target) = get_source_and_target(examples)
            source_lst.extend(source)
            target_lst.extend(target)

            dialogue_indices_lst.extend(dialogue_indices)
            start_turn_indices_lst.extend(start_turn_indices)

            feed_dict = {
                model.keep_prob: 1.0,
                model.is_training: training_flag
            }
            (ids, lengths) = sess.run(
                [model.batch_sample_ids_beam, model.batch_final_lengths_beam],
                feed_dict={
                    **feed_dict_seqs,
                    **feed_dict
                })

            batch_responses = [[
                index for index in response[:length]
            ] for (response, length) in zip(ids.tolist(), lengths.tolist())]
            responses.extend(batch_responses)
            print("Finished testing batch %d" % batch_counter)

        batch_counter += 1

    if mode == "train":
        epoch_perplexity = total_loss / total_num_tokens
        print("Epoch (%s) %d average perplexity: %.2f" %
              (mode, epoch, exp(epoch_perplexity)))

        if force_store_point == "":
            store_ckpt = os.path.join(ckpt_path, f"{model_extra_str}_{epoch}")
        else:
            store_ckpt = force_store_point
        saver_seq2seq.save(sess, store_ckpt)
        print(f"Checkpoint saved for epoch {epoch}.")
    else:
        zipped = zip_lsts([
            dialogue_indices_lst, start_turn_indices_lst, source_lst,
            target_lst, responses
        ])
        zipped.sort(key=lambda x: x[:2]
                    )  # sort on dialogue indices & start_turn_indices
        zipped_responses = zip_lsts(unzip_lst(zipped)[2:])
        return zipped_responses
def run_seq2seq(sess, mode, epoch, feed_score=1.0):
    """see if we need to append end_token"""    
    is_training = (mode == "train")
    
    if is_training:
        (source_lst, target_lst, score_lst) = unzip_lst(LFT_examples)        
    else:
        (source_lst, target_lst) = data_dict[mode]
        score_lst = [feed_score] * len(source_lst)
    
#     source_lst = source_lst[:batch_size * 2]
#     target_lst = target_lst[:batch_size * 2]
#     score_lst = score_lst[:batch_size * 2]
    
    num_examples = len(source_lst)
    assert num_examples >= batch_size
    num_batches = num_examples // batch_size
    
    keep_prob = (1 - dropout_rate) if is_training else 1.0
    start_tokens = [start_token] * batch_size
    
    total_loss = 0.0
    num_tokens = 0
    zipped_lst = []
    for i in range(num_batches):
        start = i * batch_size
        end = start + batch_size
        
        sources = source_lst[start:end]
        source_lengths = list(map(len, sources))
        targets = target_lst[start:end]
        target_lengths = list(map(len, targets))
        
        scores = score_lst[start:end]
        
        feed_dict = {
            model.source: pad(sources, source_lengths),
            model.source_length: source_lengths,
            model.target: pad(targets, target_lengths),
            model.target_length: target_lengths,
            model.start_tokens: start_tokens,
            model.keep_prob: keep_prob,
            model.is_training: is_training,
            model.score: scores}
        
        if is_training:
            fetches = [model.batch_total_loss, model.batch_num_tokens, model.apply_gradients_op]
        else:
            fetches = [model.batch_sample_ids_beam, model.batch_final_lengths_beam]
        
        result = sess.run(fetches, feed_dict=feed_dict)
        
        if is_training:
            total_loss += result[0]
            num_tokens += result[1]
            print("Epoch (%s) %d Batch %d perplexity: %.2f" % 
                  (mode, epoch, i, exp(result[0] / result[1])))
            print("Perplexity so far:", exp(total_loss / num_tokens))
        else:
            print("Finished testing batch %d" % i)
            responses = [response[:length] 
                         for (response, length) 
                         in zip(result[0].tolist(), result[1].tolist())]
            zipped = zip_lsts([sources, targets, responses])
            zipped_lst.extend(zipped)
                    
    if is_training:
        print("Epoch (%s) %d average perplexity: %.2f" % 
              (mode, epoch, exp(total_loss / num_tokens)))
        if not get_PPL:
            saver_seq2seq.save(sess, "%sseq2seq_RL%s_%d" % (ckpt_path, extra_str, epoch))
            print("Checkpoint saved for epoch %d." % epoch)
                    
    return zipped_lst
def zip_remove_duplicates_unzip(lsts):
    zipped = zip_lsts(lsts)
    zipped_without_duplicates = remove_duplicates(zipped)    
    unzipped = unzip_lst(zipped_without_duplicates)
    return unzipped
        if infer_only and not get_PPL and (i + start_epoch - 1) % 5 == 0: # for getting perplexity of test data, use train branch
            print("Inferring on test set...")
            mode = "test"

            responses_lst = []
            source_lst = []
            target_lst = []
            score_range = list(np.arange(0.0, 1.1, 0.5))
            for score in score_range:
                zipped_responses = run_seq2seq(
                    sess, mode, i + start_epoch, feed_score=score)
                (source_lst, target_lst, responses) = unzip_lst(zipped_responses)
                responses_lst.append(responses)
            num_responses = len(responses_lst[0])    

            zipped = zip_lsts([source_lst, target_lst] + responses_lst)
        
        flattened = [decode2string(index2token, sent, end_token=end_token, remove_END_TOKEN=True) 
                     for tp in zipped for sent in tp]

        # now we mark sentences that are generated by our model
        num_lines = len(score_range) + 2
        marked_G = [("G: " + sent)
                    if k % num_lines == 1 else sent
                    for (k, sent) in enumerate(flattened)]

        marked_M = [("M: " + sent) 
                    if k % num_lines in range(2, num_lines) else sent
                    for (k, sent) in enumerate(marked_G)]
        
        filename = ("%sseq2seq_RL_%s_result%s_%d.txt" % 
    # Run fusion model for different fusion rates
    responses_lst = []
    for fusion_rate in fusion_rate_candidates:
        responses = run_fusion(sess, fusion_rate, source_test)

        dump_pickle(
            data_path + "/fusion_%.1f_%d_infer.pkl" %
            (fusion_rate, seq2seq_epoch), responses)

        responses_lst.append(responses)

    num_responses = len(responses_lst[0])
    print("Generated %d responses for each fusion rate." % num_responses)

    # add in source sents and ground truths
    zipped_responses = zip_lsts([source_test[:num_responses]] +
                                [target_test[:num_responses]] + responses_lst)

    # Write results to file
    filename = data_path + "fusion_responses_%.1f.txt" % fusion_rate_candidates[
        0]

    text_zipped_responses = [[
        label + decode2string(index2token, response, remove_END_TOKEN=True)
        for (label, response) in
        zip(["", "G: "] +
            convert_list_to_str_list(fusion_rate_candidates), responses)
    ] for responses in zipped_responses]

    flattened_text_responses = [
        response for responses in text_zipped_responses
        for response in responses