Пример #1
0
def main(unused_argv):
    if len(unused_argv)!=1:
        raise Exception('Problem with flags: %s'%unused_argv)

    FLAGS.log_root=os.path.join(FLAGS.log_root, FLAGS.exp_name)
    if not os.path.exists(FLAGS.log_root):
        raise Exception('log directory %s does not exist.'%FLAGS.log_root)

    vocab=Vocab(FLAGS.vocab_path, FLAGS.vocab_size)

    hparam_list=['mode','lr','adagrad_init_acc','rand_unif_init_mag','trunc_norm_init_std','max_grad_norm','hidden_dim','emb_dim','batch_size','max_dec_steps','max_enc_steps','coverage','cov_loss_wt','pointer_gen']
    hps_dict={}
    for key,val in FLAGS.__flags.iteritems(): # for each flag
        if key in hparam_list: # if it's in the list
            hps_dict[key]=val # add it to the dict
    hps=namedtuple("HParams", hps_dict.keys())(**hps_dict)    

    model=SummarizationModel(hps,vocab)

    result_map=[]
    model.build_graph()
    sess=tf.Session(config=get_config())
    trained_model_folder=os.path.join(FLAGS.log_root,'train')

    evaluation_folder=os.path.join(FLAGS.log_root,'eval')
    ckpt_list=get_ckpt_list(trained_model_folder, max_ckpt_num=FLAGS.max_ckpt_num, interval=FLAGS.interval)
    if os.path.exists(evaluation_folder+os.sep+'result.pkl'):
        result_map=cPickle.load(open(evaluation_folder+os.sep+'result.pkl','rb'))
        ckpt_list_included=[]
        ckpt_list_extra=[]
        for ckpt_file, loss in result_map:
            ckpt_list_included.append(ckpt_file)
        for ckpt_file in ckpt_list:
            if not ckpt_file in ckpt_list_included:
                ckpt_list_extra.append(ckpt_file)
        ckpt_list=ckpt_list_extra
        print('%d ckpt already included in the existing result.pkl, skip ...'%len(ckpt_list_included))
    print('There are %d ckpts to evaluate'%len(ckpt_list))

    for idx,ckpt_file in enumerate(ckpt_list):
        print('Start analyzing checkpoint %d/%d'%(idx+1,len(ckpt_list)))
        saver=tf.train.Saver(max_to_keep=3)
        load_ckpt(saver,sess,os.path.join(trained_model_folder,ckpt_file))
        batcher=Batcher(FLAGS.data_path,vocab,hps,single_pass=True)
        avg_loss=eval(model,batcher,vocab,sess)
        print('check point:%s, Average loss in validation set: %.3f'%(ckpt_file, avg_loss))
        result_map.append([ckpt_file,avg_loss])
        if not os.path.exists(evaluation_folder):
            os.makedirs(evaluation_folder)
        cPickle.dump(result_map,open(evaluation_folder+os.sep+'result.pkl','wb'))

    if sys.version_info.major==2:
        result_map=sorted(result_map,lambda x,y:-1 if x[1]>y[1] else 1)
    else:
        result_map=sorted(result_map,key=lambda x:x[1],reverse=True)
    print('==Summary==')
    for ckpt,avg_loss in result_map:
        print('check point: %s, average loss: %.3f'%(ckpt,avg_loss))
    cPickle.dump(result_map,open(evaluation_folder+os.sep+'result.pkl','wb'))
    print('results saved in %s'%(evaluation_folder+os.sep+'result.pkl'))
Пример #2
0
def _load_model():
    # These imports are slow - lazy import.
    import tensorflow as tf
    from data import Vocab
    from model import Hps, Settings, SummarizationModel

    global _settings, _hps, _vocab, _sess, _model

    # Define settings and hyperparameters
    _settings = Settings(
        embeddings_path='',
        log_root='',
        trace_path='',  # traces/traces_blog',
    )
    _hps = Hps(
        # parameters important for decoding
        attn_only_entities=False,
        batch_size=_beam_size,
        copy_only_entities=False,
        emb_dim=128,
        enc_hidden_dim=200,
        dec_hidden_dim=300,
        max_dec_steps=1,
        max_enc_steps=400,
        mode='decode',
        output_vocab_size=20000,
        restrictive_embeddings=False,
        save_matmul=False,
        tied_output=True,
        two_layer_lstm=True,
        # other parameters
        adagrad_init_acc=.1,
        adam_optimizer=True,
        copy_common_loss_wt=0.,
        cov_loss_wt=0.,
        high_attn_loss_wt=0.,
        lr=.15,
        max_grad_norm=2.,
        people_loss_wt=0.,
        rand_unif_init_mag=.02,
        scatter_loss_wt=0.,
        sharp_loss_wt=0.,
        trunc_norm_init_std=1e-4,
    )

    # Define model
    _vocab = Vocab(_vocab_path, _vocab_size)
    _model = SummarizationModel(_settings, _hps, _vocab)
    _model.build_graph()

    # Load model from disk
    saver = tf.train.Saver()
    config = tf.ConfigProto(
        allow_soft_placement=True,
        #intra_op_parallelism_threads=1,
        #inter_op_parallelism_threads=1,
    )
    _sess = tf.Session(config=config)
    ckpt_state = tf.train.get_checkpoint_state(_model_dir)
    saver.restore(_sess, ckpt_state.model_checkpoint_path)
def build_graph():
  tf.reset_default_graph()
  tf.logging.info('Building the model.')
  if hpm['decode'] or hpm['decode_using_prev']:
    hpm['max_dec_len'] = 1
  mod = SummarizationModel(hpm)
  tf.logging.info('Building the graph.')
  mod.add_placeholder()

  device = "/gpu:0" if tf.test.is_gpu_available() else "/cpu:0"
  with tf.device(device):
    mod.build_graph()
  if hpm['training'] or hpm['eval']:
    tf.logging.info('Adding training ops.')
    mod.add_loss()
    mod.add_train_op(device)
  if hpm['decode']:
    assert mod.hpm['batch_size'] == mod.hpm['beam_size']
    mod.add_top_k_likely_outputs()

  if hpm['decode_using_prev']:
    mod.add_loss()
    #mod.add_top_k_likely_outputs()
    #mod.add_prob_logits_samples()
  return mod
Пример #4
0
def main(args):
    main_start = time.time()

    tf.set_random_seed(2019)
    random.seed(2019)
    np.random.seed(2019)

    if len(args) != 1:
        raise Exception('Problem with flags: %s' % args)

    # Correcting a few flags for test/eval mode.
    if FLAGS.mode != 'train':
        FLAGS.batch_size = FLAGS.beam_size
        FLAGS.bs_dec_steps = FLAGS.dec_steps

        if FLAGS.model.lower() != "tx":
            FLAGS.dec_steps = 1

    assert FLAGS.mode == 'train' or FLAGS.batch_size == FLAGS.beam_size, \
        "In test mode, batch size should be equal to beam size."

    assert FLAGS.mode == 'train' or FLAGS.dec_steps == 1 or FLAGS.model.lower() == "tx", \
        "In test mode, no. of decoder steps should be one."

    os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'
    os.environ['CUDA_VISIBLE_DEVICES'] = ",".join(
        str(gpu_id) for gpu_id in FLAGS.GPUs)

    if not os.path.exists(FLAGS.PathToCheckpoint):
        os.makedirs(FLAGS.PathToCheckpoint)

    if FLAGS.mode == "test" and not os.path.exists(FLAGS.PathToResults):
        os.makedirs(FLAGS.PathToResults)
        os.makedirs(FLAGS.PathToResults + 'predictions')
        os.makedirs(FLAGS.PathToResults + 'groundtruths')

    if FLAGS.mode == 'eval':
        eval_model(FLAGS.PathToResults)
    else:
        start = time.time()
        vocab = Vocab(max_vocab_size=FLAGS.vocab_size,
                      emb_dim=FLAGS.dim,
                      dataset_path=FLAGS.PathToDataset,
                      glove_path=FLAGS.PathToGlove,
                      vocab_path=FLAGS.PathToVocab,
                      lookup_path=FLAGS.PathToLookups)

        if FLAGS.model.lower() == "plain":
            print("Setting up the plain model.\n")
            data = DataGenerator(path_to_dataset=FLAGS.PathToDataset,
                                 max_inp_seq_len=FLAGS.enc_steps,
                                 max_out_seq_len=FLAGS.dec_steps,
                                 vocab=vocab,
                                 use_pgen=FLAGS.use_pgen,
                                 use_sample=FLAGS.sample)
            summarizer = SummarizationModel(vocab, data)

        elif FLAGS.model.lower() == "hier":
            print("Setting up the hier model.\n")
            data = DataGeneratorHier(
                path_to_dataset=FLAGS.PathToDataset,
                max_inp_sent=FLAGS.max_enc_sent,
                max_inp_tok_per_sent=FLAGS.max_enc_steps_per_sent,
                max_out_tok=FLAGS.dec_steps,
                vocab=vocab,
                use_pgen=FLAGS.use_pgen,
                use_sample=FLAGS.sample)
            summarizer = SummarizationModelHier(vocab, data)

        elif FLAGS.model.lower() == "rlhier":
            print("Setting up the Hier RL model.\n")
            data = DataGeneratorHier(
                path_to_dataset=FLAGS.PathToDataset,
                max_inp_sent=FLAGS.max_enc_sent,
                max_inp_tok_per_sent=FLAGS.max_enc_steps_per_sent,
                max_out_tok=FLAGS.dec_steps,
                vocab=vocab,
                use_pgen=FLAGS.use_pgen,
                use_sample=FLAGS.sample)
            summarizer = SummarizationModelHierSC(vocab, data)

        else:
            raise ValueError(
                "model flag should be either of plain/hier/bayesian/shared!! \n"
            )

        end = time.time()
        print(
            "Setting up vocab, data and model took {:.2f} sec.".format(end -
                                                                       start))

        summarizer.build_graph()

        if FLAGS.mode == 'train':
            summarizer.train()
        elif FLAGS.mode == "test":
            summarizer.test()
        else:
            raise ValueError("mode should be either train/test!! \n")

        main_end = time.time()
        print("Total time elapsed: %.2f \n" % (main_end - main_start))
Пример #5
0
def main(unused_argv):
    print("unused_argv: ", unused_argv)
    if len(unused_argv
           ) != 1:  # prints a message if you've entered flags incorrectly
        raise Exception("Problem with flags: %s" % unused_argv)

    tf.logging.set_verbosity(
        tf.logging.INFO)  # choose what level of logging you want
    tf.logging.info('Starting seq2seq_attention in %s mode...', (FLAGS.mode))

    # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary
    FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name)
    if not os.path.exists(FLAGS.log_root):
        if FLAGS.mode == "train":
            os.makedirs(FLAGS.log_root)
        else:
            raise Exception(
                "Logdir %s doesn't exist. Run in train mode to create it." %
                (FLAGS.log_root))
    print("FLAGS.vocab_size: ", FLAGS.vocab_size)
    vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size)  # create a vocabulary
    print("vocab size: ", vocab.size())
    # If in decode mode, set batch_size = beam_size
    # Reason: in decode mode, we decode one example at a time.
    # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses.
    if FLAGS.mode == 'decode':
        FLAGS.batch_size = FLAGS.beam_size

    # If single_pass=True, check we're in decode mode
    if FLAGS.single_pass and FLAGS.mode != 'decode':
        raise Exception(
            "The single_pass flag should only be True in decode mode")

    # Make a namedtuple hps, containing the values of the hyperparameters that the model needs
    hparam_list = [
        'mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag',
        'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim',
        'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage',
        'cov_loss_wt', 'pointer_gen', 'fine_tune', 'train_size', 'subred_size',
        'use_doc_vec', 'use_multi_attn', 'use_multi_pgen', 'use_multi_pvocab',
        'create_ckpt'
    ]
    hps_dict = {}
    for key, val in FLAGS.__flags.items():  # for each flag
        if key in hparam_list:  # if it's in the list
            hps_dict[key] = val  # add it to the dict
    hps = namedtuple("HParams", hps_dict.keys())(**hps_dict)

    # Create a batcher object that will create minibatches of data
    batcher = Batcher(FLAGS.data_path,
                      vocab,
                      hps,
                      single_pass=FLAGS.single_pass)

    tf.set_random_seed(111)  # a seed value for randomness

    #   return

    if hps.mode.value == 'train':
        print("creating model...")
        model = SummarizationModel(hps, vocab)

        # -------------------------------------
        if hps.create_ckpt.value:
            step = 0

            model.build_graph()
            print("get value")
            pretrained_ckpt = '/home/cs224u/pointer/log/pretrained_model_tf1.2.1/train/model-238410'
            reader = pywrap_tensorflow.NewCheckpointReader(pretrained_ckpt)
            var_to_shape_map = reader.get_variable_to_shape_map()
            value = {}
            for key in var_to_shape_map:
                value[key] = reader.get_tensor(key)

            print("assign op")
            assign_op = []
            if hps.use_multi_pvocab.value:
                new_key = [
                    "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear_0/Bias",
                    "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear_1/Bias"
                ]
                for v in tf.trainable_variables():
                    key = v.name.split(":")[0]
                    if key in new_key:
                        origin_key = "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear/" + key.split(
                            "/")[-1]
                        a_op = v.assign(tf.convert_to_tensor(
                            value[origin_key]))
                    else:
                        a_op = v.assign(tf.convert_to_tensor(value[key]))
                    # if key == "seq2seq/embedding/embedding":
                    # a_op = v.assign(tf.convert_to_tensor(value[key]))
                    assign_op.append(a_op)
            else:
                for v in tf.trainable_variables():
                    key = v.name.split(":")[0]
                    if key == "seq2seq/embedding/embedding":
                        a_op = v.assign(tf.convert_to_tensor(value[key]))
                        assign_op.append(a_op)
            # ratio = 1
            # for v in tf.trainable_variables():
            #   key = v.name.split(":")[0]
            #   # embedding (50000, 128) -> (50000, 32)

            #   if key == "seq2seq/embedding/embedding":
            #       print (key)
            #       print (value[key].shape)
            #       d1 = value[key].shape[1]
            #       a_op = v.assign(tf.convert_to_tensor(value[key][:,:d1//ratio]))
            #   # kernel (384, 1024) -> (96, 256)
            #   # w_reduce_c (512, 256) -> (128, 64)
            #   elif key == "seq2seq/encoder/bidirectional_rnn/fw/lstm_cell/kernel" or \
            #   key == "seq2seq/encoder/bidirectional_rnn/bw/lstm_cell/kernel" or \
            #   key == "seq2seq/reduce_final_st/w_reduce_c" or \
            #   key == "seq2seq/reduce_final_st/w_reduce_h" or \
            #   key == "seq2seq/decoder/attention_decoder/Linear/Matrix" or \
            #   key == "seq2seq/decoder/attention_decoder/lstm_cell/kernel" or \
            #   key == "seq2seq/decoder/attention_decoder/Attention/Linear/Matrix" or \
            #   key == "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear/Matrix":
            #       print (key)
            #       print (value[key].shape)
            #       d0, d1 = value[key].shape[0], value[key].shape[1]
            #       a_op = v.assign(tf.convert_to_tensor(value[key][:d0//ratio, :d1//ratio]))
            #   # bias (1024,) -> (256,)
            #   elif key == "seq2seq/encoder/bidirectional_rnn/fw/lstm_cell/bias" or \
            #   key == "seq2seq/encoder/bidirectional_rnn/bw/lstm_cell/bias" or \
            #   key == "seq2seq/reduce_final_st/bias_reduce_c" or \
            #   key == "seq2seq/reduce_final_st/bias_reduce_h" or \
            #   key == "seq2seq/decoder/attention_decoder/lstm_cell/bias" or \
            #   key == "seq2seq/decoder/attention_decoder/v" or \
            #   key == "seq2seq/decoder/attention_decoder/Attention/Linear/Bias" or \
            #   key == "seq2seq/decoder/attention_decoder/Linear/Bias" or \
            #   key == "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear/Bias":
            #       print (key)
            #       print (value[key].shape)
            #       d0 = value[key].shape[0]
            #       a_op = v.assign(tf.convert_to_tensor(value[key][:d0//ratio]))
            #   # W_h (1, 1, 512, 512) -> (1, 1, 128, 128)
            #   elif key == "seq2seq/decoder/attention_decoder/W_h":
            #       print (key)
            #       print (value[key].shape)
            #       d2, d3 = value[key].shape[2], value[key].shape[3]
            #       a_op = v.assign(tf.convert_to_tensor(value[key][:,:,:d2//ratio,:d3//ratio]))
            #   # Matrix (1152, 1) -> (288, 1)
            #   elif key == "seq2seq/decoder/attention_decoder/calculate_pgen/Linear/Matrix" or \
            #   key == "seq2seq/output_projection/w":
            #       print (key)
            #       print (value[key].shape)
            #       d0 = value[key].shape[0]
            #       a_op = v.assign(tf.convert_to_tensor(value[key][:d0//ratio,:]))
            #   # Bias (1,) -> (1,)
            #   elif key == "seq2seq/output_projection/v" or \
            #   key == "seq2seq/decoder/attention_decoder/calculate_pgen/Linear/Bias":
            #       print (key)
            #       print (value[key].shape)
            #       a_op = v.assign(tf.convert_to_tensor(value[key]))

            #   # multi_attn
            #   if hps.use_multi_attn.value:
            #     if key == "seq2seq/decoder/attention_decoder/attn_0/v" or \
            #     key == "seq2seq/decoder/attention_decoder/attn_1/v":
            #     # key == "seq2seq/decoder/attention_decoder/attn_2/v":
            #       k = "seq2seq/decoder/attention_decoder/v"
            #       print (key)
            #       print (value[k].shape)
            #       d0 = value[k].shape[0]
            #       a_op = v.assign(tf.convert_to_tensor(value[k][:d0//ratio]))
            #     if key == "seq2seq/decoder/attention_decoder/Attention/Linear_0/Bias" or \
            #     key == "seq2seq/decoder/attention_decoder/Attention/Linear_1/Bias":
            #     # key == "seq2seq/decoder/attention_decoder/Attention/Linear_2/Bias":
            #       k = "seq2seq/decoder/attention_decoder/Attention/Linear/Bias"
            #       print (key)
            #       print (value[k].shape)
            #       d0 = value[k].shape[0]
            #       a_op = v.assign(tf.convert_to_tensor(value[k][:d0//ratio]))
            #   elif hps.use_multi_pgen.value:
            #     if key == "seq2seq/decoder/attention_decoder/Linear_0/Bias" or \
            #     key == "seq2seq/decoder/attention_decoder/Linear_1/Bias":
            #     # key == "seq2seq/decoder/attention_decoder/Linear_2/Bias":
            #       k = "seq2seq/decoder/attention_decoder/Linear/Bias"
            #       print (key)
            #       print (value[k].shape)
            #       d0 = value[k].shape[0]
            #       a_op = v.assign(tf.convert_to_tensor(value[k][:d0//ratio]))
            #     if key == "seq2seq/decoder/attention_decoder/calculate_pgen/Linear_0/Bias" or \
            #     key == "seq2seq/decoder/attention_decoder/calculate_pgen/Linear_1/Bias":
            #     # key == "seq2seq/decoder/attention_decoder/calculate_pgen/Linear_2/Bias":
            #       k = "seq2seq/decoder/attention_decoder/calculate_pgen/Linear/Bias"
            #       print (key)
            #       print (value[k].shape)
            #       a_op = v.assign(tf.convert_to_tensor(value[k]))
            #   elif hps.use_multi_pvocab.value:
            #     if key == "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear_0/Bias" or \
            #     key == "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear_1/Bias":
            #     # key == "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear_2/Bias":
            #       k = "seq2seq/decoder/attention_decoder/AttnOutputProjection/Linear/Bias"
            #       print (key)
            #       print (value[k].shape)
            #       d0 = value[k].shape[0]
            #       a_op = v.assign(tf.convert_to_tensor(value[k][:d0//ratio]))

            #    assign_op.append(a_op)

            # Add an op to initialize the variables.
            init_op = tf.global_variables_initializer()
            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()
            with tf.Session(config=util.get_config()) as sess:
                sess.run(init_op)
                # Do some work with the model.
                for a_op in assign_op:
                    a_op.op.run()

                for _ in range(0):
                    batch = batcher.next_batch()
                    results = model.run_train_step(sess, batch)

                # Save the variables to disk.
                if hps.use_multi_attn.value:
                    ckpt_tag = "multi_attn_2_attn_proj"
                elif hps.use_multi_pgen.value:
                    ckpt_tag = "multi_attn_2_pgen_proj"
                elif hps.use_multi_pvocab.value:
                    ckpt_tag = "big_multi_attn_2_pvocab_proj"
                else:
                    ckpt_tag = "pointer_proj"

                ckpt_to_save = '/home/cs224u/pointer/log/ckpt/' + ckpt_tag + '/model.ckpt-' + str(
                    step)
                save_path = saver.save(sess, ckpt_to_save)
                print("Model saved in path: %s" % save_path)

        # -------------------------------------
        else:
            setup_training(model, batcher, hps)

    elif hps.mode.value == 'eval':
        model = SummarizationModel(hps, vocab)
        run_eval(model, batcher, vocab)
    elif hps.mode.value == 'decode':
        decode_model_hps = hps  # This will be the hyperparameters for the decoder model
        decode_model_hps = hps._replace(
            max_dec_steps=1
        )  # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries
        model = SummarizationModel(decode_model_hps, vocab)
        decoder = BeamSearchDecoder(model, batcher, vocab)
        decoder.decode(
        )  # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once)
    else:
        raise ValueError("The 'mode' flag must be one of train/eval/decode")