示例#1
0
def create_models(path,
                  en_vocab_size,
                  session,
                  forward_only,
                  beam_search,
                  beam_size=10,
                  attention=True):
    """Create translation model and initialize or load parameters in session."""
    model = Seq2SeqModel(en_vocab_size,
                         en_vocab_size,
                         _buckets,
                         FLAGS.size,
                         FLAGS.num_layers,
                         FLAGS.max_gradient_norm,
                         FLAGS.batch_size,
                         FLAGS.learning_rate,
                         FLAGS.learning_rate_decay_factor,
                         forward_only=forward_only,
                         beam_search=beam_search,
                         beam_size=beam_size,
                         attention=attention)
    print("The training dir is: %s" % FLAGS.train_dir)
    ckpt = tf.train.get_checkpoint_state(path)

    # ckpt.model_checkpoint_path ="./big_models/chat_bot.ckpt-183600"
    # print ckpt.model_checkpoint_path
    if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        print("Created model with fresh parameters.")
        session.run(tf.initialize_all_variables())
    return model
示例#2
0
def create_model(session,
                 forward_only,
                 beam_search,
                 beam_size=10,
                 attention=True):
    """Create translation model and initialize or load parameters in session."""
    model = Seq2SeqModel(FLAGS.en_vocab_size,
                         FLAGS.en_vocab_size,
                         _buckets,
                         FLAGS.size,
                         FLAGS.num_layers,
                         FLAGS.max_gradient_norm,
                         FLAGS.batch_size,
                         FLAGS.learning_rate,
                         FLAGS.learning_rate_decay_factor,
                         forward_only=forward_only,
                         beam_search=beam_search,
                         beam_size=beam_size,
                         attention=attention)
    print(FLAGS.train_dir)
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)

    # ckpt.model_checkpoint_path ="./big_models/chat_bot.ckpt-183600"
    # print ckpt.model_checkpoint_path
    if ckpt and ckpt.model_checkpoint_path:
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        raise ValueError("model file not loaded correctly: {}".format(
            ckpt.model_checkpoint_path))
        # Don't create fresh model if cannot be loaded
        # print("Created model with fresh parameters.")
        # session.run(tf.global_variables_initializer())
    return model
示例#3
0
def main():
    question = '微信分享问题'
    graph = tf.Graph()
    with graph.as_default():
        model = Seq2SeqModel(hidden_size,num_layers,batch_size,sequence_length,embedding_size,
                             learning_rate,num_encoder_symbols,num_decoder_symbols,'true')
        with tf.Session(graph=graph) as session:
            model.pred(session,question)
示例#4
0
def load_model(session, config):
    model = Seq2SeqModel(config, 'decode')
    if tf.train.checkpoint_exists(FLAGS.model_path):
        print('Reloading model parameters..')
        model.restore(session, FLAGS.model_path)
    else:
        raise ValueError('No such file:[{}]'.format(FLAGS.model_path))
    return model
示例#5
0
def main():
    epochs = 5000
    graph = tf.Graph()
    with graph.as_default():
        model = Seq2SeqModel(hidden_size,num_layers,batch_size,sequence_length,embedding_size,
                             learning_rate,num_encoder_symbols,num_decoder_symbols,'false')
        with tf.Session(graph=graph) as session:
            model.train(session,epochs)
示例#6
0
    def create_tf_model(self, tf_session, mode):

        config = {
            'optimizer':
            self.config.get('training', 'optimizer'),
            'cell_type':
            self.config.get('model', 'cell_type'),
            'attention_type':
            self.config.get('model', 'attention_type'),
            'hidden_units':
            self.config.getint('model', 'hidden_units'),
            'depth':
            self.config.getint('model', 'depth'),
            'embedding_size':
            self.config.getint('model', 'embedding_size'),

            # 'num_encoder_symbols'  : self.config.getint('model', 'num_encoder_symbols'),
            # 'num_decoder_symbols'  : self.config.getint('model', 'num_decoder_symbols'),
            'num_encoder_symbols':
            len(self.input_dict),
            'num_decoder_symbols':
            len(self.output_dict),
            'use_residual':
            self.config.getboolean('model', 'use_residual'),
            'attn_input_feeding':
            self.config.getboolean('model', 'attn_input_feeding'),
            'use_dropout':
            self.config.getboolean('model', 'use_dropout'),
            'dropout_rate':
            self.config.getfloat('model', 'dropout_rate'),
            'learning_rate':
            self.config.getfloat('training', 'learning_rate'),
            'max_gradient_norm':
            self.config.getfloat('training', 'max_gradient_norm'),
            'use_fp16':
            self.config.getboolean('model', 'use_fp16'),
            'beam_width':
            self.config.getint('decode', 'beam_width'),
            'max_decode_step':
            self.config.getint('decode', 'max_decode_step'),
        }

        if mode == 'train':
            self.batch_size = self.config.getint("training", "batch_size")
        else:
            self.batch_size = 1

        logging.info("creating %s seq2seq model: %d layer(s) of %d units." %
                     (mode, config['depth'], config['hidden_units']))

        self.model = Seq2SeqModel(config, mode)

        init = tf.global_variables_initializer()
        tf_session.run(init)

        return self.model
示例#7
0
def main():
    question = '明白了,我把那个售后更改成上门取件就能用那个退换无忧了对吧'
    graph = tf.Graph()
    with graph.as_default():
        model = Seq2SeqModel(hidden_size, num_layers, batch_size, sequence_length, embedding_size,
                             learning_rate, num_encoder_symbols, num_decoder_symbols, 'true')
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(graph=graph, config=config) as session:
            reply = model.test(session, question, epoch=17)
            print(reply)
示例#8
0
def parse_options():
    parser = argparse.ArgumentParser()

    Train.add_parse_options(parser)
    Encoder.add_parse_options(parser)
    AttnDecoder.add_parse_options(parser)
    Seq2SeqModel.add_parse_options(parser)
    LMModel.add_parse_options(parser)
    BeamSearch.add_parse_options(parser)

    parser.add_argument("-dev",
                        default=False,
                        action="store_true",
                        help="Get dev set results using the last saved model")
    parser.add_argument("-test",
                        default=False,
                        action="store_true",
                        help="Get test results using the last saved model")
    args = parser.parse_args()
    args = vars(args)
    return process_args(args)
示例#9
0
def main():
    epochs = 50
    graph = tf.Graph()
    with graph.as_default():
        model = Seq2SeqModel(hidden_size, num_layers, batch_size,
                             sequence_length, embedding_size, learning_rate,
                             num_encoder_symbols, num_decoder_symbols, 'false')
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True

        with tf.Session(graph=graph, config=config) as session:
            model.train(session, epochs, epoch=16)
示例#10
0
文件: main.py 项目: wanyao1992/tf-s2s
def create_model(session, args, dicts):
    model = Seq2SeqModel(args, dicts)
    if args.restore:
        print('Reloading model parameters..')
        ckpt = tf.train.get_checkpoint_state(args.model_dir)
        model.restore(session, ckpt.model_checkpoint_path)
    else:
        if not os.path.exists(args.model_dir):
            os.makedirs(args.model_dir)
        print('Created new model parameters..')
        session.run(tf.global_variables_initializer())

    return model
示例#11
0
 def __init__(self, vocab):
     self.model = Seq2SeqModel(vocab, training_mode=False)
     # self.model = MaluubaModel(vocab, training_mode=False)
     with self.model.graph.as_default():
         self.model.ping = tf.constant("ack")
     # self.model = MaluubaModel(vocab, training_mode=False)
     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_limit,
                                 allow_growth=True,
                                 visible_device_list='0')
     self.sess = tf.Session(graph=self.model.graph,
                            config=tf.ConfigProto(
                                gpu_options=gpu_options,
                                allow_soft_placement=True))
示例#12
0
    def ask_question(self, question):
        if len(question)>self.sequence_length:
            question=question[:self.sequence_length]
        graph = tf.Graph()
        with graph.as_default():
            model = Seq2SeqModel(self.hidden_size, self.num_layers, self.batch_size, self.sequence_length, self.embedding_size,
                                 self.learning_rate, self.num_encoder_symbols, self.num_decoder_symbols, 'true')
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            with tf.Session(graph=graph, config=config) as session:
                reply = model.test(session, question, epoch=26)

        return reply
示例#13
0
def create_model(session, FLAGS):
    run_mode = 'decode' if FLAGS.decode else 'train'

    print 'Running in "{}" mode'.format(run_mode.upper())
    model = Seq2SeqModel(FLAGS, run_mode)

    ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        print 'Reloading model parameters..'
        model.restore(session, ckpt.model_checkpoint_path)

    else:
        print 'Created new model parameters..'
        session.run(tf.global_variables_initializer())

    return model
示例#14
0
def create_model(session, FLAGS):
    config = OrderedDict(sorted(FLAGS.flag_values_dict().items()))
    model = Seq2SeqModel(config, 'train')

    ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        print 'Reloading model parameters..'
        model.restore(session, ckpt.model_checkpoint_path)

    else:
        if not os.path.exists(FLAGS.model_dir):
            os.makedirs(FLAGS.model_dir)
        print 'Created new model parameters..'
        session.run(tf.global_variables_initializer())

    return model
def create_model(session, srce_vocab_size, trgt_vocab_size, forward_only):
  """Create translation model and initialize or load parameters in session."""
  model = Seq2SeqModel(
      srce_vocab_size, trgt_vocab_size, _buckets,
      100, 74, 2, 5.0, 1,
      0.01, 0.95, keep_prob=0.8,
      forward_only=forward_only)
  # model.writer = tf.train.SummaryWriter(os.path.join(FLAGS.data_dir, "summary"), session.graph_def) # visualization
  checkpoint_path = os.path.join(data_dir, "checkpoint")
  ckpt = tf.train.get_checkpoint_state(checkpoint_path)
  if ckpt and gfile.Exists(ckpt.model_checkpoint_path):
    print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
    model.saver.restore(session, ckpt.model_checkpoint_path)
  else:
    raise ValueError(" Invalid arguments! Fails on creating models! ")
  return model
示例#16
0
    def create_eval_model(self, dev_set, standalone=False):
        with tf.variable_scope("model",
                               reuse=(True if not standalone else None)):
            print("Creating dev model")
            dev_seq2seq_params = copy.deepcopy(self.seq2seq_params)
            dev_seq2seq_params.tasks = {'char'}
            dev_seq2seq_params.num_layers = {
                'char': dev_seq2seq_params.num_layers['char']
            }
            model_dev = Seq2SeqModel(dev_set.data_iter,
                                     isTraining=False,
                                     params=dev_seq2seq_params)

            params = Bunch()
            params.best_model_dir = self.params.best_model_dir
            params.vocab_dir = self.params.vocab_dir

            self.eval_model = Eval(model_dev, params=params)
示例#17
0
def get_seq2seq_model(session, forward_only, dict_lengths,
                      max_sentence_lengths, model_dir):
    model = Seq2SeqModel(source_vocab_size=dict_lengths[0],
                         target_vocab_size=dict_lengths[1],
                         buckets=[max_sentence_lengths],
                         size=256,
                         num_layers=2,
                         max_gradient_norm=5.0,
                         batch_size=128,
                         learning_rate=1.0,
                         learning_rate_decay_factor=0.99,
                         forward_only=forward_only,
                         dtype=tf.float16)
    ckpt = tf.train.get_checkpoint_state(model_dir)
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        print('[+] Loaded checkpoint {}'.format(ckpt.model_checkpoint_path))
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        session.run(tf.global_variables_initializer())
    return model
示例#18
0
def create_model(session, feed_future_data, train_model, observation_steps,
                 prediction_steps, batch_size, rnn_size, num_layers,
                 learning_rate, learning_rate_decay_factor, input_size,
                 max_gradient_norm):
    model = Seq2SeqModel(feed_future_data, train_model, observation_steps,
                         prediction_steps, batch_size, rnn_size, num_layers,
                         learning_rate, learning_rate_decay_factor, input_size,
                         max_gradient_norm)
    if not os.path.exists(FLAGS.train_dir):
        os.makedirs(FLAGS.train_dir)
    if not os.path.exists(
            os.path.join(FLAGS.train_dir, get_title_from_params())):
        os.makedirs(os.path.join(FLAGS.train_dir, get_title_from_params()))
    ckpt = tf.train.get_checkpoint_state(
        os.path.join(FLAGS.train_dir, get_title_from_params()))
    if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        model.saver.restore(session, ckpt.model_checkpoint_path)
    else:
        print("Created model with fresh parameters.")
        session.run(tf.initialize_all_variables())
    return model
示例#19
0
    def main(self):
        self.text_data = TextData(self.args)

        with tf.Graph().as_default():
            # build seq2seq model
            self.seq2seq_model = Seq2SeqModel(self.args, self.text_data)

            # Saver/summaries
            out_dir = os.path.abspath(
                os.path.join(os.path.curdir, self.args.modeldir))
            self.writer = tf.summary.FileWriter(out_dir)
            self.saver = tf.train.Saver()

            session_conf = tf.ConfigProto(
                allow_soft_placement=self.args.allow_soft_placement,
                log_device_placement=self.args.log_device_placement)
            self.sess = tf.Session(config=session_conf)

            self.restore_previous_model()

            if self.args.test == 'interactive':
                self.test_interactive()
            else:
                self.train()
示例#20
0
    def __init__(self,
                 encoders,
                 decoders,
                 checkpoint_dir,
                 learning_rate,
                 learning_rate_decay_factor,
                 batch_size,
                 keep_best=1,
                 dev_prefix=None,
                 score_function='corpus_scores',
                 name=None,
                 ref_ext=None,
                 pred_edits=False,
                 dual_output=False,
                 binary=None,
                 truncate_lines=True,
                 ensemble=False,
                 checkpoints=None,
                 beam_size=1,
                 len_normalization=1,
                 early_stopping=True,
                 **kwargs):

        self.batch_size = batch_size
        self.character_level = {}
        self.binary = []

        for encoder_or_decoder in encoders + decoders:
            encoder_or_decoder.ext = encoder_or_decoder.ext or encoder_or_decoder.name
            self.character_level[
                encoder_or_decoder.ext] = encoder_or_decoder.character_level
            self.binary.append(encoder_or_decoder.get('binary', False))

        self.char_output = decoders[0].character_level

        self.src_ext = [encoder.ext for encoder in encoders]
        self.trg_ext = [decoder.ext for decoder in decoders]

        self.extensions = self.src_ext + self.trg_ext

        self.ref_ext = ref_ext
        if self.ref_ext is not None:
            self.binary.append(False)

        self.pred_edits = pred_edits
        self.dual_output = dual_output

        self.dev_prefix = dev_prefix
        self.name = name

        self.max_input_len = [encoder.max_len for encoder in encoders]
        self.max_output_len = [decoder.max_len for decoder in decoders]

        if truncate_lines:
            self.max_len = None  # we let seq2seq.get_batch handle long lines (by truncating them)
        else:  # the line reader will drop lines that are too long
            self.max_len = dict(
                zip(self.extensions, self.max_input_len + self.max_output_len))

        self.learning_rate = tf.Variable(learning_rate,
                                         trainable=False,
                                         name='learning_rate',
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)

        with tf.device('/cpu:0'):
            self.global_step = tf.Variable(0,
                                           trainable=False,
                                           name='global_step')
            self.baseline_step = tf.Variable(0,
                                             trainable=False,
                                             name='baseline_step')

        self.filenames = utils.get_filenames(extensions=self.extensions,
                                             dev_prefix=dev_prefix,
                                             name=name,
                                             ref_ext=ref_ext,
                                             binary=self.binary,
                                             **kwargs)
        utils.debug('reading vocabularies')
        self.vocabs = None
        self.src_vocab, self.trg_vocab = None, None
        self.read_vocab()

        for encoder_or_decoder, vocab in zip(encoders + decoders, self.vocabs):
            if vocab:
                encoder_or_decoder.vocab_size = len(vocab.reverse)

        utils.debug('creating model')

        self.models = []
        if ensemble and checkpoints is not None:
            for i, _ in enumerate(checkpoints, 1):
                with tf.variable_scope('model_{}'.format(i)):
                    model = Seq2SeqModel(encoders,
                                         decoders,
                                         self.learning_rate,
                                         self.global_step,
                                         name=name,
                                         pred_edits=pred_edits,
                                         dual_output=dual_output,
                                         baseline_step=self.baseline_step,
                                         **kwargs)
                    self.models.append(model)
            self.seq2seq_model = self.models[0]
        else:
            self.seq2seq_model = Seq2SeqModel(encoders,
                                              decoders,
                                              self.learning_rate,
                                              self.global_step,
                                              name=name,
                                              pred_edits=pred_edits,
                                              dual_output=dual_output,
                                              baseline_step=self.baseline_step,
                                              **kwargs)
            self.models.append(self.seq2seq_model)

        self.seq2seq_model.create_beam_op(self.models, beam_size,
                                          len_normalization, early_stopping)

        self.batch_iterator = None
        self.dev_batches = None
        self.train_size = None
        self.saver = None
        self.keep_best = keep_best
        self.checkpoint_dir = checkpoint_dir
        self.epoch = None

        self.training = utils.AttrDict()  # used to keep track of training

        try:
            self.reversed_scores = getattr(
                evaluation, score_function).reversed  # the lower the better
        except AttributeError:
            self.reversed_scores = False  # the higher the better
示例#21
0
def train():
    parser = argparse.ArgumentParser(
        description='train model based on data over the period')
    parser.add_argument('--train-data-files',
                        required=True,
                        type=str,
                        help='train data files')
    parser.add_argument('--eval-data-files', type=str, help='eval data files')
    parser.add_argument('--batch-size',
                        type=int,
                        default=512,
                        help='batch size')
    parser.add_argument('--hidden-size',
                        type=int,
                        default=64,
                        help='hidden size in rnn')
    parser.add_argument('--input-dim',
                        type=int,
                        default=1,
                        help='input data dimension')
    parser.add_argument('--output-dim',
                        type=int,
                        default=1,
                        help='output data dimension')
    parser.add_argument('--input-seq-len',
                        type=int,
                        default=10,
                        help='input sequnece length')
    parser.add_argument('--output-seq-len',
                        type=int,
                        default=5,
                        help='output sequence length')
    parser.add_argument('--optimizer',
                        type=str,
                        default='Adam',
                        help='optimizer')
    parser.add_argument('--learning_rate',
                        type=float,
                        default=0.01,
                        help='learning rate')
    parser.add_argument('--layer_cnt',
                        type=int,
                        default=1,
                        help='seq2seq layer cnt')
    parser.add_argument('--lambda-l2-reg',
                        type=float,
                        default=0.02,
                        help='lambda l2 reg')
    parser.add_argument('--gradient_clipping',
                        type=float,
                        default=2.5,
                        help='gradient clippling')
    parser.add_argument('--model_dir',
                        type=str,
                        required=True,
                        help='model output directory')
    parser.add_argument('--steps',
                        type=int,
                        default=None,
                        help='training steps')
    parser.add_argument('--epoch',
                        type=int,
                        default=None,
                        help='training epoch')

    args = parser.parse_args()

    def create_params():
        return {
            'optimizer': args.optimizer,
            'learning_rate': args.learning_rate,
            'lambda_l2_reg': args.lambda_l2_reg,
            'gradient_clipping': args.gradient_clipping,
            'steps': args.steps
        }

    hparams = create_params()

    seq2seq_model = Seq2SeqModel(args.input_dim, args.output_dim,
                                 args.input_seq_len, args.output_seq_len,
                                 args.hidden_size, args.layer_cnt)

    seq2seq_model_fn = seq2seq_model.create_model_fn()

    estimator = tf.estimator.Estimator(model_fn=seq2seq_model_fn,
                                       model_dir=args.model_dir,
                                       params=hparams)

    #train input fn
    train_dataset = TimeSeriesDataset(args.train_data_files, args.epoch,
                                      args.batch_size, args.input_seq_len,
                                      args.output_seq_len)

    estimator.train(input_fn=train_dataset.input_fn, steps=args.steps)

    eval_dataset = TimeSeriesDataset(args.eval_data_files, 1, args.batch_size,
                                     args.input_seq_len, args.output_seq_len)

    estimator.evaluate(input_fn=eval_dataset.input_fn)
示例#22
0
import numpy as np
from util import parse_file, sentence_to_word_id, create_buckets, _buckets, parse_sentence, EOS, ignore_list

id2word = json.load(open("dictionary_i2w.json", "r"))
word2id = json.load(open("dictionary_w2i.json", "r"))
id2word = {int(key): value for key, value in id2word.items()}
vocab_size = len(word2id)

# (2)モデルの生成
sess = tf.Session()
model = Seq2SeqModel(vocab_size,
                     vocab_size,
                     _buckets,
                     128,
                     3,
                     5.0,
                     1,
                     0.5,
                     0.99,
                     forward_only=True,
                     use_lstm=True)
saver = tf.train.Saver()
model.saver.restore(sess, "./tmp/model.ckpt")


# (1)応答用の関数を示すデコレータの定義
@default_reply
def replay_message(message):
    """
    Slack Botの応答を定義する
示例#23
0
def create_model(forward_only):
    return Seq2SeqModel(MAX_VOCAB_SIZE, MAX_VOCAB_SIZE, BUCKETS, LAYER_SIZE,
                        LSTM_LAYES, MAX_GRADIENT_NORM, BATCH_SIZE,
                        LEARNING_RATE, LEARNING_RATE_DECAY_FACTOR,
                        forward_only)
    if args.const_folding:
        outputs = [
            tf.identity(tf.identity(logits, name="logits"),
                        name="logits_identity")
        ]
    else:
        outputs = [tf.identity(logits, name="logits")]
elif args.model_name == 'seq2seq':
    print('>> Converting graph seq2seq')
    batch_size = 1
    encoder_step = 1
    encoder_layer = 1
    decoder_step = 1
    decoder_layer = 1
    hidden_size = 128
    model = Seq2SeqModel(batch_size, hidden_size, encoder_layer, encoder_step,
                         decoder_layer, decoder_step)
    eval_inputs = tf.placeholder(tf.float32,
                                 [encoder_step, batch_size, hidden_size],
                                 'eval_input')

    eval_inputs_list = tf.split(value=eval_inputs,
                                axis=0,
                                num_or_size_splits=encoder_step)
    for i in range(len(eval_inputs_list)):
        eval_inputs_list[i] = tf.squeeze(eval_inputs_list[i], axis=[0])
    logits = model(eval_inputs_list)

    inputs = [eval_inputs]
    if args.const_folding:
        outputs = [
            tf.identity(tf.identity(logits, name="logits"),
model_args.evaluate_during_training_steps = 0
model_args.overwrite_output_dir = True
model_args.early_stopping_consider_epochs = True
model_args.use_early_stopping = True
model_args.use_cached_eval_features = True
model_args.train_batch_size = 32
model_args.save_steps = 0
model_args.early_stopping_metric = "matches"
model_args.early_stopping_metric_minimize = False
model_args.output_dir = "outputs_v2/"
model_args.weight_decay = 0.01
model_args.learning_rate = 3e-5


model = Seq2SeqModel(encoder_type="bert", encoder_name="cahya/bert-base-indonesian-522M",
                     decoder_name="cahya/bert-base-indonesian-522M",
                     args=model_args, use_cuda=False)

def count_matches(labels, preds):
    # print(labels)
    # print(preds)
    predictions = []
    for pred in preds:
      if (pred.strip()).endswith("/"):
        a = "[SEP] " + pred.split('/')[0].strip() + "/ [SEP]"
        predictions.append(a)
      else:
        if len(pred.split('/')) > 1:
          a = "[SEP] " + pred.split('/')[0].strip() + '/'+ pred.split('/')[1].strip() + " [SEP]"
          predictions.append(a)
        elif len(pred.split('/')) == 1:
示例#26
0
def main(_):
    if FLAGS.testing:
        print('TEST MODE - reducing model size')
        FLAGS.context_encoder_units = 100
        FLAGS.answer_encoder_units = 100
        FLAGS.decoder_units = 100
        FLAGS.batch_size = 8
        FLAGS.eval_batch_size = 8
        # FLAGS.embedding_size=50

    run_id = str(int(time.time()))
    chkpt_path = FLAGS.model_dir + 'qgen/' + FLAGS.model_type + '/' + run_id
    restore_path = FLAGS.model_dir + 'qgen/' + FLAGS.restore_path if FLAGS.restore_path is not None else None  #'MALUUBA-CROP-LATENT'+'/'+'1534123959'
    # restore_path=FLAGS.model_dir+'saved/qgen-maluuba-crop-glove-smart'
    disc_path = FLAGS.model_dir + 'saved/discriminator-trained-latent'

    print("Run ID is ", run_id)
    print("Model type is ", FLAGS.model_type)

    if not os.path.exists(chkpt_path):
        os.makedirs(chkpt_path)

    # load dataset
    train_data = loader.load_squad_triples(FLAGS.data_path, False)
    dev_data = loader.load_squad_triples(FLAGS.data_path, True)

    train_contexts_unfilt, _, ans_text_unfilt, ans_pos_unfilt = zip(
        *train_data)
    dev_contexts_unfilt, _, dev_ans_text_unfilt, dev_ans_pos_unfilt = zip(
        *dev_data)

    if FLAGS.testing:
        train_data = train_data[:1000]
        num_dev_samples = 100
    else:
        num_dev_samples = FLAGS.num_dev_samples

    if FLAGS.filter_window_size_before > -1:
        train_data = preprocessing.filter_squad(
            train_data,
            window_size_before=FLAGS.filter_window_size_before,
            window_size_after=FLAGS.filter_window_size_after,
            max_tokens=FLAGS.filter_max_tokens)
        dev_data = preprocessing.filter_squad(
            dev_data,
            window_size_before=FLAGS.filter_window_size_before,
            window_size_after=FLAGS.filter_window_size_after,
            max_tokens=FLAGS.filter_max_tokens)

    print('Loaded SQuAD with ', len(train_data), ' triples')
    train_contexts, train_qs, train_as, train_a_pos = zip(*train_data)

    if FLAGS.restore:
        if restore_path is None:
            exit('You need to specify a restore path!')
        with open(restore_path + '/vocab.json', encoding="utf-8") as f:
            vocab = json.load(f)
    elif FLAGS.glove_vocab:
        vocab = loader.get_glove_vocab(FLAGS.data_path,
                                       size=FLAGS.vocab_size,
                                       d=FLAGS.embedding_size)
        with open(chkpt_path + '/vocab.json', 'w',
                  encoding="utf-8") as outfile:
            json.dump(vocab, outfile)
    else:
        vocab = loader.get_vocab(train_contexts + train_qs, FLAGS.vocab_size)
        with open(chkpt_path + '/vocab.json', 'w',
                  encoding="utf-8") as outfile:
            json.dump(vocab, outfile)

    # Create model
    if FLAGS.model_type[:7] == "SEQ2SEQ":
        model = Seq2SeqModel(vocab,
                             training_mode=True,
                             use_embedding_loss=FLAGS.embedding_loss)
    elif FLAGS.model_type[:7] == "MALUUBA":
        # TEMP
        if not FLAGS.policy_gradient:
            FLAGS.qa_weight = 0
            FLAGS.lm_weight = 0
        model = MaluubaModel(vocab,
                             training_mode=True,
                             use_embedding_loss=FLAGS.embedding_loss)
        # if FLAGS.model_type[:10] == "MALUUBA_RL":
        #     qa_vocab=model.qa.vocab
        #     lm_vocab=model.lm.vocab
        if FLAGS.policy_gradient:
            discriminator = DiscriminatorInstance(trainable=FLAGS.disc_train,
                                                  path=disc_path)
    else:
        exit("Unrecognised model type: " + FLAGS.model_type)

    # create data streamer
    with SquadStreamer(vocab, FLAGS.batch_size, FLAGS.num_epochs,
                       shuffle=True) as train_data_source, SquadStreamer(
                           vocab, FLAGS.eval_batch_size, 1,
                           shuffle=True) as dev_data_source:

        with model.graph.as_default():
            saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True)

        # change visible devices if using RL models
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_limit,
                                    visible_device_list='0',
                                    allow_growth=True)
        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                              allow_soft_placement=False),
                        graph=model.graph) as sess:

            summary_writer = tf.summary.FileWriter(
                FLAGS.log_dir + 'qgen/' + FLAGS.model_type + '/' + run_id,
                sess.graph)

            train_data_source.initialise(train_data)

            num_steps_train = len(train_data) // FLAGS.batch_size
            num_steps_dev = num_dev_samples // FLAGS.eval_batch_size

            if FLAGS.restore:
                saver.restore(sess, tf.train.latest_checkpoint(restore_path))
                start_e = 15  #FLAGS.num_epochs
                print('Loaded model')
            else:
                start_e = 0
                sess.run(tf.global_variables_initializer())
                # sess.run(model.glove_init_ops)

                f1summary = tf.Summary(value=[
                    tf.Summary.Value(tag="dev_perf/f1", simple_value=0.0)
                ])
                bleusummary = tf.Summary(value=[
                    tf.Summary.Value(tag="dev_perf/bleu", simple_value=0.0)
                ])

                summary_writer.add_summary(f1summary, global_step=0)
                summary_writer.add_summary(bleusummary, global_step=0)

            # Initialise the dataset
            # sess.run(model.iterator.initializer, feed_dict={model.context_ph: train_contexts,
            #                                   model.qs_ph: train_qs, model.as_ph: train_as, model.a_pos_ph: train_a_pos})

            best_oos_nll = 1e6

            lm_score_moments = online_moments.OnlineMoment()
            qa_score_moments = online_moments.OnlineMoment()
            disc_score_moments = online_moments.OnlineMoment()

            # for e in range(start_e,start_e+FLAGS.num_epochs):
            # Train for one epoch
            for i in tqdm(range(num_steps_train * FLAGS.num_epochs),
                          desc='Training'):
                # Get a batch
                train_batch, curr_batch_size = train_data_source.get_batch()

                # Are we doing policy gradient? Do a forward pass first, then build the PG batch and do an update step
                if FLAGS.model_type[:
                                    10] == "MALUUBA_RL" and FLAGS.policy_gradient:

                    # do a fwd pass first, get the score, then do another pass and optimize
                    qhat_str, qhat_ids, qhat_lens = sess.run(
                        [
                            model.q_hat_beam_string, model.q_hat_beam_ids,
                            model.q_hat_beam_lens
                        ],
                        feed_dict={
                            model.input_batch: train_batch,
                            model.is_training: FLAGS.pg_dropout,
                            model.hide_answer_in_copy: True
                        })

                    # The output is as long as the max allowed len - remove the pointless extra padding
                    qhat_ids = qhat_ids[:, :np.max(qhat_lens)]
                    qhat_str = qhat_str[:, :np.max(qhat_lens)]

                    pred_str = byte_token_array_to_str(qhat_str, qhat_lens - 1)
                    gold_q_str = byte_token_array_to_str(
                        train_batch[1][0], train_batch[1][3])

                    # Get reward values
                    lm_score = (-1 * model.lm.get_seq_perplexity(pred_str)
                                ).tolist()  # lower perplexity is better

                    # retrieve the uncropped context for QA evaluation
                    unfilt_ctxt_batch = [
                        train_contexts_unfilt[ix] for ix in train_batch[3]
                    ]
                    ans_text_batch = [
                        ans_text_unfilt[ix] for ix in train_batch[3]
                    ]
                    ans_pos_batch = [
                        ans_pos_unfilt[ix] for ix in train_batch[3]
                    ]

                    qa_pred = model.qa.get_ans(unfilt_ctxt_batch, pred_str)
                    qa_pred_gold = model.qa.get_ans(unfilt_ctxt_batch,
                                                    gold_q_str)

                    # gold_str=[]
                    # pred_str=[]
                    qa_f1s = []
                    gold_ans_str = byte_token_array_to_str(train_batch[2][0],
                                                           train_batch[2][2],
                                                           is_array=False)

                    qa_f1s.extend([
                        metrics.f1(metrics.normalize_answer(gold_ans_str[b]),
                                   metrics.normalize_answer(qa_pred[b]))
                        for b in range(curr_batch_size)
                    ])

                    disc_scores = discriminator.get_pred(
                        unfilt_ctxt_batch, pred_str, ans_text_batch,
                        ans_pos_batch)

                    if i > FLAGS.pg_burnin // 2:
                        lm_score_moments.push(lm_score)
                        qa_score_moments.push(qa_f1s)
                        disc_score_moments.push(disc_scores)

                    # print(disc_scores)
                    # print((e-start_e)*num_steps_train+i, flags.pg_burnin)

                    if i > FLAGS.pg_burnin:
                        # A variant of popart
                        qa_score_whitened = (
                            qa_f1s - qa_score_moments.mean
                        ) / np.sqrt(qa_score_moments.variance + 1e-6)
                        lm_score_whitened = (
                            lm_score - lm_score_moments.mean
                        ) / np.sqrt(lm_score_moments.variance + 1e-6)
                        disc_score_whitened = (
                            disc_scores - disc_score_moments.mean
                        ) / np.sqrt(disc_score_moments.variance + 1e-6)

                        lm_summary = tf.Summary(value=[
                            tf.Summary.Value(tag="rl_rewards/lm",
                                             simple_value=np.mean(lm_score))
                        ])
                        summary_writer.add_summary(lm_summary, global_step=(i))
                        qa_summary = tf.Summary(value=[
                            tf.Summary.Value(tag="rl_rewards/qa",
                                             simple_value=np.mean(qa_f1s))
                        ])
                        summary_writer.add_summary(qa_summary, global_step=(i))
                        disc_summary = tf.Summary(value=[
                            tf.Summary.Value(tag="rl_rewards/disc",
                                             simple_value=np.mean(disc_scores))
                        ])
                        summary_writer.add_summary(disc_summary,
                                                   global_step=(i))

                        lm_white_summary = tf.Summary(value=[
                            tf.Summary.Value(tag="rl_rewards/lm_white",
                                             simple_value=np.mean(
                                                 lm_score_whitened))
                        ])
                        summary_writer.add_summary(lm_white_summary,
                                                   global_step=(i))
                        qa_white_summary = tf.Summary(value=[
                            tf.Summary.Value(tag="rl_rewards/qa_white",
                                             simple_value=np.mean(
                                                 qa_score_whitened))
                        ])
                        summary_writer.add_summary(qa_white_summary,
                                                   global_step=(i))
                        disc_white_summary = tf.Summary(value=[
                            tf.Summary.Value(tag="rl_rewards/disc_white",
                                             simple_value=np.mean(
                                                 disc_score_whitened))
                        ])
                        summary_writer.add_summary(disc_white_summary,
                                                   global_step=(i))

                        # Build a combined batch - half ground truth for MLE, half generated for PG
                        train_batch_ext = duplicate_batch_and_inject(
                            train_batch, qhat_ids, qhat_str, qhat_lens)

                        # print(qhat_ids)
                        # print(qhat_lens)
                        # print(train_batch_ext[2][2])

                        rl_dict = {
                            model.lm_score:
                            np.asarray((lm_score_whitened *
                                        FLAGS.lm_weight).tolist() + [
                                            FLAGS.pg_ml_weight
                                            for b in range(curr_batch_size)
                                        ]),
                            model.qa_score:
                            np.asarray((qa_score_whitened *
                                        FLAGS.qa_weight).tolist() +
                                       [0 for b in range(curr_batch_size)]),
                            model.disc_score:
                            np.asarray((disc_score_whitened *
                                        FLAGS.disc_weight).tolist() +
                                       [0 for b in range(curr_batch_size)]),
                            model.rl_lm_enabled:
                            True,
                            model.rl_qa_enabled:
                            True,
                            model.rl_disc_enabled:
                            FLAGS.disc_weight > 0,
                            model.step:
                            i - FLAGS.pg_burnin,
                            model.hide_answer_in_copy:
                            True
                        }

                        # perform a policy gradient step, but combine with a XE step by using appropriate rewards
                        ops = [
                            model.pg_optimizer, model.train_summary,
                            model.q_hat_string
                        ]
                        if i % FLAGS.eval_freq == 0:
                            ops.extend([
                                model.q_hat_ids, model.question_ids,
                                model.copy_prob, model.question_raw,
                                model.question_length
                            ])
                            res_offset = 5
                        else:
                            res_offset = 0
                        ops.extend([model.lm_loss, model.qa_loss])
                        res = sess.run(ops,
                                       feed_dict={
                                           model.input_batch: train_batch_ext,
                                           model.is_training: False,
                                           **rl_dict
                                       })
                        summary_writer.add_summary(res[1], global_step=(i))

                        # Log only the first half of the PG related losses
                        lm_loss_summary = tf.Summary(value=[
                            tf.Summary.Value(
                                tag="train_loss/lm",
                                simple_value=np.mean(res[3 + res_offset]
                                                     [:curr_batch_size]))
                        ])
                        summary_writer.add_summary(lm_loss_summary,
                                                   global_step=(i))
                        qa_loss_summary = tf.Summary(value=[
                            tf.Summary.Value(
                                tag="train_loss/qa",
                                simple_value=np.mean(res[4 + res_offset]
                                                     [:curr_batch_size]))
                        ])
                        summary_writer.add_summary(qa_loss_summary,
                                                   global_step=(i))

                    # TODO: more principled scheduling here than alternating steps
                    if FLAGS.disc_train:
                        ixs = np.round(
                            np.random.binomial(1, 0.5, curr_batch_size))
                        qbatch = [
                            pred_str[ix].replace(" </Sent>", "").replace(
                                " <PAD>", "")
                            if ixs[ix] < 0.5 else gold_q_str[ix].replace(
                                " </Sent>", "").replace(" <PAD>", "")
                            for ix in range(curr_batch_size)
                        ]

                        loss = discriminator.train_step(unfilt_ctxt_batch,
                                                        qbatch,
                                                        ans_text_batch,
                                                        ans_pos_batch,
                                                        ixs,
                                                        step=(i))

                else:
                    # Normal single pass update step. If model has PG capability, fill in the placeholders with empty values
                    if FLAGS.model_type[:
                                        7] == "MALUUBA" and not FLAGS.policy_gradient:
                        rl_dict = {
                            model.lm_score:
                            [0 for b in range(curr_batch_size)],
                            model.qa_score:
                            [0 for b in range(curr_batch_size)],
                            model.disc_score:
                            [0 for b in range(curr_batch_size)],
                            model.rl_lm_enabled: False,
                            model.rl_qa_enabled: False,
                            model.rl_disc_enabled: False,
                            model.hide_answer_in_copy: False
                        }
                    else:
                        rl_dict = {}

                    # Perform a normal optimizer step
                    ops = [
                        model.optimizer, model.train_summary,
                        model.q_hat_string
                    ]
                    if i % FLAGS.eval_freq == 0:
                        ops.extend([
                            model.q_hat_ids, model.question_ids,
                            model.copy_prob, model.question_raw,
                            model.question_length
                        ])
                    res = sess.run(ops,
                                   feed_dict={
                                       model.input_batch: train_batch,
                                       model.is_training: True,
                                       **rl_dict
                                   })
                    summary_writer.add_summary(res[1], global_step=(i))

                # Dump some output periodically
                if i > 0 and i % FLAGS.eval_freq == 0 and (
                        i > FLAGS.pg_burnin or not FLAGS.policy_gradient):
                    with open(FLAGS.log_dir + 'out.htm', 'w',
                              encoding='utf-8') as fp:
                        fp.write(
                            output_pretty(res[2].tolist(), res[3], res[4],
                                          res[5], 0, i))
                    gold_batch = res[6]
                    gold_lens = res[7]
                    f1s = []
                    bleus = []
                    for b, pred in enumerate(res[2]):
                        pred_str = tokens_to_string(pred[:gold_lens[b] - 1])
                        gold_str = tokens_to_string(
                            gold_batch[b][:gold_lens[b] - 1])
                        f1s.append(metrics.f1(gold_str, pred_str))
                        bleus.append(metrics.bleu(gold_str, pred_str))

                    f1summary = tf.Summary(value=[
                        tf.Summary.Value(tag="train_perf/f1",
                                         simple_value=sum(f1s) / len(f1s))
                    ])
                    bleusummary = tf.Summary(value=[
                        tf.Summary.Value(tag="train_perf/bleu",
                                         simple_value=sum(bleus) / len(bleus))
                    ])

                    summary_writer.add_summary(f1summary, global_step=(i))
                    summary_writer.add_summary(bleusummary, global_step=(i))

                    # Evaluate against dev set
                    f1s = []
                    bleus = []
                    nlls = []

                    np.random.shuffle(dev_data)
                    dev_subset = dev_data[:num_dev_samples]
                    dev_data_source.initialise(dev_subset)
                    for j in tqdm(range(num_steps_dev), desc='Eval ' + str(i)):
                        dev_batch, curr_batch_size = dev_data_source.get_batch(
                        )
                        pred_batch, pred_ids, pred_lens, gold_batch, gold_lens, ctxt, ctxt_len, ans, ans_len, nll = sess.run(
                            [
                                model.q_hat_beam_string, model.q_hat_beam_ids,
                                model.q_hat_beam_lens, model.question_raw,
                                model.question_length, model.context_raw,
                                model.context_length, model.answer_locs,
                                model.answer_length, model.nll
                            ],
                            feed_dict={
                                model.input_batch: dev_batch,
                                model.is_training: False
                            })

                        nlls.extend(nll.tolist())
                        # out_str="<h1>"+str(e)+' - '+str(datetime.datetime.now())+'</h1>'
                        for b, pred in enumerate(pred_batch):
                            pred_str = tokens_to_string(
                                pred[:pred_lens[b] - 1]).replace(
                                    ' </Sent>', "").replace(" <PAD>", "")
                            gold_str = tokens_to_string(
                                gold_batch[b][:gold_lens[b] - 1])
                            f1s.append(metrics.f1(gold_str, pred_str))
                            bleus.append(metrics.bleu(gold_str, pred_str))
                            # out_str+=pred_str.replace('>','&gt;').replace('<','&lt;')+"<br/>"+gold_str.replace('>','&gt;').replace('<','&lt;')+"<hr/>"
                        if j == 0:
                            title = chkpt_path
                            out_str = output_eval(title, pred_batch, pred_ids,
                                                  pred_lens, gold_batch,
                                                  gold_lens, ctxt, ctxt_len,
                                                  ans, ans_len)
                            with open(FLAGS.log_dir + 'out_eval_' +
                                      FLAGS.model_type + '.htm',
                                      'w',
                                      encoding='utf-8') as fp:
                                fp.write(out_str)

                    f1summary = tf.Summary(value=[
                        tf.Summary.Value(tag="dev_perf/f1",
                                         simple_value=sum(f1s) / len(f1s))
                    ])
                    bleusummary = tf.Summary(value=[
                        tf.Summary.Value(tag="dev_perf/bleu",
                                         simple_value=sum(bleus) / len(bleus))
                    ])
                    nllsummary = tf.Summary(value=[
                        tf.Summary.Value(tag="dev_perf/nll",
                                         simple_value=sum(nlls) / len(nlls))
                    ])

                    summary_writer.add_summary(f1summary, global_step=i)
                    summary_writer.add_summary(bleusummary, global_step=i)
                    summary_writer.add_summary(nllsummary, global_step=i)

                    mean_nll = sum(nlls) / len(nlls)
                    if mean_nll < best_oos_nll:
                        print("New best NLL! ", mean_nll, " Saving...")
                        best_oos_nll = mean_nll
                        saver.save(sess,
                                   chkpt_path + '/model.checkpoint',
                                   global_step=i)
                    else:
                        print("NLL not improved ", mean_nll)
                        if FLAGS.policy_gradient:
                            print("Saving anyway")
                            saver.save(sess,
                                       chkpt_path + '/model.checkpoint',
                                       global_step=i)
                        if FLAGS.disc_train:
                            print("Saving disc")
                            discriminator.save_to_chkpt(FLAGS.model_dir, i)
示例#27
0
def main(_):

    model_type=FLAGS.model_type
    # chkpt_path = FLAGS.model_dir+'saved/qgen-maluuba-crop-glove-smart'
    # chkpt_path = FLAGS.model_dir+'qgen-saved/MALUUBA-CROP-LATENT/1533247183'
    disc_path = FLAGS.model_dir+'saved/discriminator-trained-latent'
    chkpt_path = FLAGS.model_dir+'qgen/'+ model_type+'/'+FLAGS.eval_model_id

    # load dataset
    # train_data = loader.load_squad_triples(FLAGS.data_path, False)
    dev_data = loader.load_squad_triples(FLAGS.data_path, dev=FLAGS.eval_on_dev, test=FLAGS.eval_on_test)

    if len(dev_data) < FLAGS.num_eval_samples:
        exit('***ERROR*** Eval dataset is smaller than the num_eval_samples flag!')
    if len(dev_data) > FLAGS.num_eval_samples:
        print('***WARNING*** Eval dataset is larger than the num_eval_samples flag!')

    # train_contexts_unfilt, _,_,train_a_pos_unfilt = zip(*train_data)
    dev_contexts_unfilt, _,_,dev_a_pos_unfilt = zip(*dev_data)

    if FLAGS.filter_window_size_before >-1:
        # train_data = preprocessing.filter_squad(train_data, window_size=FLAGS.filter_window_size, max_tokens=FLAGS.filter_max_tokens)
        dev_data = preprocessing.filter_squad(dev_data, window_size_before=FLAGS.filter_window_size_before, window_size_after=FLAGS.filter_window_size_after, max_tokens=FLAGS.filter_max_tokens)


    # print('Loaded SQuAD with ',len(train_data),' triples')
    print('Loaded SQuAD dev set with ',len(dev_data),' triples')
    # train_contexts, train_qs, train_as,train_a_pos = zip(*train_data)
    dev_contexts, dev_qs, dev_as, dev_a_pos = zip(*dev_data)


    # vocab = loader.get_vocab(train_contexts, tf.app.flags.FLAGS.vocab_size)
    with open(chkpt_path+'/vocab.json') as f:
        vocab = json.load(f)

    with SquadStreamer(vocab, FLAGS.eval_batch_size, 1, shuffle=False) as dev_data_source:

        glove_embeddings = loader.load_glove(FLAGS.data_path)


        # Create model
        if model_type[:7] == "SEQ2SEQ":
            model = Seq2SeqModel(vocab, training_mode=False)
        elif model_type[:2] == "RL":
            # TEMP - no need to spin up the LM or QA model at eval time
            FLAGS.qa_weight = 0
            FLAGS.lm_weight = 0
            model = RLModel(vocab, training_mode=False)
        else:
            exit("Unrecognised model type: "+model_type)

        with model.graph.as_default():
            saver = tf.train.Saver()

        if FLAGS.eval_metrics:
            lm = LstmLmInstance()
            # qa = MpcmQaInstance()
            qa = QANetInstance()

            lm.load_from_chkpt(FLAGS.model_dir+'saved/lmtest')
            # qa.load_from_chkpt(FLAGS.model_dir+'saved/qatest')
            qa.load_from_chkpt(FLAGS.model_dir+'saved/qanet2')

            discriminator = DiscriminatorInstance(trainable=False, path=disc_path)

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_limit)
        with tf.Session(graph=model.graph, config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
            if not os.path.exists(chkpt_path):
                exit('Checkpoint path doesnt exist! '+chkpt_path)
            # summary_writer = tf.summary.FileWriter(FLAGS.log_directory+"eval/"+str(int(time.time())), sess.graph)

            saver.restore(sess, tf.train.latest_checkpoint(chkpt_path))
            # print('Loading not implemented yet')
            # else:
            #     sess.run(tf.global_variables_initializer())
            #     sess.run(model.glove_init_ops)

            num_steps = FLAGS.num_eval_samples//FLAGS.eval_batch_size

            # Initialise the dataset

            # np.random.shuffle(dev_data)
            dev_data_source.initialise(dev_data)

            f1s=[]
            bleus=[]
            qa_scores=[]
            qa_scores_gold=[]
            lm_scores=[]
            nlls=[]
            disc_scores=[]
            sowe_similarities=[]
            copy_probs=[]

            qgolds=[]
            qpreds=[]
            qpred_ids=[]
            qgold_ids=[]
            ctxts=[]
            answers=[]
            ans_positions=[]

            metric_individuals=[]
            res=[]
            for e in range(1):
                for i in tqdm(range(num_steps), desc='Epoch '+str(e)):
                    dev_batch, curr_batch_size = dev_data_source.get_batch()
                    pred_batch,pred_beam,pred_beam_lens,pred_ids,pred_lens,gold_batch, gold_lens,gold_ids,ctxt,ctxt_len,ans,ans_len,nll,copy_prob= sess.run([model.q_hat_beam_string, model.q_hat_full_beam_str, model.q_hat_full_beam_lens,model.q_hat_beam_ids,model.q_hat_beam_lens,model.question_raw, model.question_length, model.question_ids, model.context_raw, model.context_length, model.answer_locs, model.answer_length, model.nll, model.mean_copy_prob], feed_dict={model.input_batch: dev_batch ,model.is_training:False})

                    unfilt_ctxt_batch = [dev_contexts_unfilt[ix] for ix in dev_batch[3]]
                    a_text_batch = ops.byte_token_array_to_str(dev_batch[2][0], dev_batch[2][2], is_array=False)
                    unfilt_apos_batch = [dev_a_pos_unfilt[ix] for ix in dev_batch[3]]

                    # subtract 1 to remove the "end sent token"
                    pred_q_batch = [q.replace(' </Sent>',"").replace(" <PAD>","") for q in ops.byte_token_array_to_str(pred_batch, pred_lens-1)]

                    ctxts.extend(unfilt_ctxt_batch)
                    answers.extend(a_text_batch)
                    ans_positions.extend([dev_a_pos_unfilt[ix] for ix in dev_batch[3]])
                    copy_probs.extend(copy_prob.tolist())



                    # get QA score

                    # gold_str=[]
                    # pred_str=[]


                    gold_ans = ops.byte_token_array_to_str(dev_batch[2][0], dev_batch[2][2], is_array=False)
                    # pred_str = ops.byte_token_array_to_str([dev_batch[0][0][b][qa_pred[b][0]:qa_pred[b][1]] for b in range(curr_batch_size)], is_array=False)
                    nlls.extend(nll.tolist())

                    if FLAGS.eval_metrics:
                        qa_pred = qa.get_ans(unfilt_ctxt_batch, ops.byte_token_array_to_str(pred_batch, pred_lens))
                        gold_qa_pred = qa.get_ans(unfilt_ctxt_batch, ops.byte_token_array_to_str(dev_batch[1][0], dev_batch[1][3]))

                        qa_score_batch = [metrics.f1(metrics.normalize_answer(gold_ans[b]), metrics.normalize_answer(qa_pred[b])) for b in range(curr_batch_size)]
                        qa_score_gold_batch = [metrics.f1(metrics.normalize_answer(gold_ans[b]), metrics.normalize_answer(gold_qa_pred[b])) for b in range(curr_batch_size)]
                        lm_score_batch = lm.get_seq_perplexity(pred_q_batch).tolist()
                        disc_score_batch = discriminator.get_pred(unfilt_ctxt_batch, pred_q_batch, gold_ans, unfilt_apos_batch).tolist()

                    for b, pred in enumerate(pred_batch):
                        pred_str = pred_q_batch[b].replace(' </Sent>',"").replace(" <PAD>","")
                        gold_str = tokens_to_string(gold_batch[b][:gold_lens[b]-1])
                        f1s.append(metrics.f1(gold_str, pred_str))
                        bleus.append(metrics.bleu(gold_str, pred_str))
                        qgolds.append(gold_str)
                        qpreds.append(pred_str)

                        # calc cosine similarity between sums of word embeddings
                        pred_sowe = np.sum(np.asarray([glove_embeddings[w] if w in glove_embeddings.keys() else np.zeros((FLAGS.embedding_size,)) for w in preprocessing.tokenise(pred_str ,asbytes=False)]) ,axis=0)
                        gold_sowe = np.sum(np.asarray([glove_embeddings[w] if w in glove_embeddings.keys() else np.zeros((FLAGS.embedding_size,)) for w in preprocessing.tokenise(gold_str ,asbytes=False)]) ,axis=0)
                        this_similarity = np.inner(pred_sowe, gold_sowe)/np.linalg.norm(pred_sowe, ord=2)/np.linalg.norm(gold_sowe, ord=2)

                        sowe_similarities.append(this_similarity)



                        this_metric_dict={
                            'f1':f1s[-1],
                            'bleu': bleus[-1],
                            'nll': nlls[-1],
                            'sowe': sowe_similarities[-1]
                            }
                        if FLAGS.eval_metrics:
                            this_metric_dict={
                            **this_metric_dict,
                            'qa': qa_score_batch[b],
                            'lm': lm_score_batch[b],
                            'disc': disc_score_batch[b]}
                            qa_scores.extend(qa_score_batch)
                            lm_scores.extend(lm_score_batch)
                            disc_scores.extend(disc_score_batch)
                        metric_individuals.append(this_metric_dict)

                        res.append({
                            'c':unfilt_ctxt_batch[b],
                            'q_pred': pred_str,
                            'q_gold': gold_str,
                            'a_pos': unfilt_apos_batch[b],
                            'a_text': a_text_batch[b],
                            'metrics': this_metric_dict,

                            'q_pred_ids': pred_ids.tolist()[b],
                            'q_gold_ids': dev_batch[1][1][b].tolist()

                        })

                    # Quick output
                    if i==0:
                        # print(copy_prob.tolist())
                        # print(copy_probs)
                        pred_str = tokens_to_string(pred_batch[0][:pred_lens[0]-1])
                        gold_str = tokens_to_string(gold_batch[0][:gold_lens[0]-1])
                        # print(pred_str)
                        print(qpreds[0])
                        print(gold_str)


                        title=chkpt_path
                        out_str = output_eval(title,pred_batch,  pred_ids, pred_lens, gold_batch, gold_lens, ctxt, ctxt_len, ans, ans_len)
                        with open(FLAGS.log_directory+'out_eval_'+model_type+'.htm', 'w', encoding='utf-8') as fp:
                            fp.write(out_str)

            # res = list(zip(qpreds,qgolds,ctxts,answers,ans_positions,metric_individuals))
            metric_dict={
                'f1':np.mean(f1s),
                'bleu': metrics.bleu_corpus(qgolds, qpreds),
                'nll':np.mean(nlls),
                'sowe': np.mean(sowe_similarities)
                }
            if FLAGS.eval_metrics:
                metric_dict={**metric_dict,
                'qa':np.mean(qa_scores),
                'lm':np.mean(lm_scores),
                'disc': np.mean(disc_scores)}
            # print(res)
            with open(FLAGS.log_directory+'out_eval_'+model_type+("_test" if FLAGS.eval_on_test else "")+("_train" if (not FLAGS.eval_on_dev and not FLAGS.eval_on_test) else "")+'.json', 'w', encoding='utf-8') as fp:
                json.dump({"metrics":metric_dict, "results": res}, fp)


            print("F1: ", np.mean(f1s))
            print("BLEU: ", metrics.bleu_corpus(qgolds, qpreds))
            print("NLL: ", np.mean(nlls))
            print("SOWE: ", np.mean(sowe_similarities))

            print("Copy prob: ", np.mean(copy_probs))
            if FLAGS.eval_metrics:
                print("QA: ", np.mean(qa_scores))
                print("LM: ", np.mean(lm_scores))
                print("Disc: ", np.mean(disc_scores))
示例#28
0
    "save_model_every_epoch": False,
    "evaluate_during_training": True,
    "evaluate_generated_text": True,
    "evaluate_during_training_verbose": True,
    "use_multiprocessing": False,
    "max_length": 25,
    "manual_seed": 4,
    "save_steps": 11898,
    "gradient_accumulation_steps": 1,
    "output_dir": "./exp/template",
}

# Initialize model
model = Seq2SeqModel(
    encoder_decoder_type="bart",
    encoder_decoder_name="facebook/bart-large",
    args=model_args,
    # use_cuda=False,
)

# Train the model
model.train_model(train_df, eval_data=eval_df)

# Evaluate the model
results = model.eval_model(eval_df)

# Use the model for prediction

print(
    model.predict([
        "Japan began the defence of their Asian Cup title with a lucky 2-1 win against Syria in a Group C championship match on Friday."
    ]))
示例#29
0
word2idx, idx2word = load_dict()
# data_path = '../data/train_data_idx.pkl'
data_path = '../data/train_new_data_idx.pkl'
trainingSamples = loadDataset(data_path)

# test_path = '../data/test_data_idx.pkl'
test_path = '../data/test_new_data_idx26.pkl'
testingSamples = loadDataset(test_path)

model = Seq2SeqModel(FLAGS.rnn_size,
                     FLAGS.num_layers,
                     FLAGS.embedding_size,
                     FLAGS.learning_rate,
                     FLAGS.learning_rate_decay_factor,
                     word2idx,
                     mode='train',
                     use_attention=True,
                     beam_search=False,
                     beam_size=5,
                     max_gradient_norm=5.0)

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
    print('Reloading model parameters..')
    model.saver.restore(sess, ckpt.model_checkpoint_path)
else:
示例#30
0
    def __init__(self,
                 phase,
                 visualize,
                 output_dir,
                 batch_size,
                 initial_learning_rate,
                 steps_per_checkpoint,
                 model_dir,
                 target_embedding_size,
                 attn_num_hidden,
                 attn_num_layers,
                 clip_gradients,
                 max_gradient_norm,
                 session,
                 load_model,
                 gpu_id,
                 use_distance=True,
                 max_image_width=160,
                 max_image_height=60,
                 max_prediction_length=8,
                 channels=1,
                 reg_val=0):

        self.use_distance = use_distance

        # We need resized width, not the actual width
        max_resized_width = 1. * max_image_width / max_image_height * DataGen.IMAGE_HEIGHT

        self.max_original_width = max_image_width
        self.max_width = int(math.ceil(max_resized_width))

        self.encoder_size = 4096
        self.decoder_size = max_prediction_length + 2
        self.buckets = [(self.encoder_size, self.decoder_size)]

        if gpu_id >= 0:
            device_id = '/gpu:' + str(gpu_id)
        else:
            device_id = '/cpu:0'
        self.device_id = device_id

        if not os.path.exists(model_dir):
            os.makedirs(model_dir)

        if phase == 'test':
            batch_size = 1

        logging.info('phase: %s', phase)
        logging.info('model_dir: %s', model_dir)
        logging.info('load_model: %s', load_model)
        logging.info('output_dir: %s', output_dir)
        logging.info('steps_per_checkpoint: %d', steps_per_checkpoint)
        logging.info('batch_size: %d', batch_size)
        logging.info('learning_rate: %f', initial_learning_rate)
        logging.info('reg_val: %d', reg_val)
        logging.info('max_gradient_norm: %f', max_gradient_norm)
        logging.info('clip_gradients: %s', clip_gradients)
        logging.info('max_image_width %f', max_image_width)
        logging.info('max_prediction_length %f', max_prediction_length)
        logging.info('channels: %d', channels)
        logging.info('target_embedding_size: %f', target_embedding_size)
        logging.info('attn_num_hidden: %d', attn_num_hidden)
        logging.info('attn_num_layers: %d', attn_num_layers)
        logging.info('visualize: %s', visualize)

        self.reg_val = reg_val
        self.sess = session
        self.steps_per_checkpoint = steps_per_checkpoint
        self.model_dir = model_dir
        self.output_dir = output_dir
        self.batch_size = batch_size
        self.global_step = tf.Variable(0, trainable=False)
        self.phase = phase
        self.visualize = visualize
        #self.learning_rate = initial_learning_rate
        self.clip_gradients = clip_gradients
        self.channels = channels

        if phase == 'train':
            self.forward_only = False
        else:
            self.forward_only = True

        with tf.device(device_id):

            self.height = tf.constant(DataGen.IMAGE_HEIGHT, dtype=tf.int32)
            self.height_float = tf.constant(DataGen.IMAGE_HEIGHT,
                                            dtype=tf.float64)
            self.learning_rate = tf.placeholder(tf.float32, shape=[])

            self.img_pl = tf.placeholder(tf.string,
                                         name='input_image_as_bytes')
            self.img_data = tf.cond(tf.less(tf.rank(self.img_pl), 1),
                                    lambda: tf.expand_dims(self.img_pl, 0),
                                    lambda: self.img_pl)
            self.img_data = tf.map_fn(self._prepare_image,
                                      self.img_data,
                                      dtype=tf.float32)
            num_images = tf.shape(self.img_data)[0]

            self.encoder_masks = []
            for i in xrange(self.encoder_size + 1):
                self.encoder_masks.append(tf.tile([[1.]], [num_images, 1]))

            self.decoder_inputs = []
            self.target_weights = []
            for i in xrange(self.decoder_size + 1):
                self.decoder_inputs.append(tf.tile([1], [num_images]))
                if i < self.decoder_size:
                    self.target_weights.append(tf.tile([1.], [num_images]))
                else:
                    self.target_weights.append(tf.tile([0.], [num_images]))

            cnn_model = CNN(self.img_data)
            self.conv_output = cnn_model.tf_output()
            self.conv_output = tf.expand_dims(self.conv_output, -1)  # TODO:
            self.perm_conv_output = tf.transpose(self.conv_output,
                                                 perm=[1, 0, 2])
            self.attention_decoder_model = Seq2SeqModel(
                encoder_masks=self.encoder_masks,
                encoder_inputs_tensor=self.perm_conv_output,
                decoder_inputs=self.decoder_inputs,
                target_weights=self.target_weights,
                target_vocab_size=len(DataGen.CHARMAP),
                buckets=self.buckets,
                target_embedding_size=target_embedding_size,
                attn_num_layers=attn_num_layers,
                attn_num_hidden=attn_num_hidden,
                forward_only=self.forward_only)

            table = tf.contrib.lookup.MutableHashTable(
                key_dtype=tf.int64,
                value_dtype=tf.string,
                default_value="",
                checkpoint=True,
            )

            insert = table.insert(
                tf.constant(list(range(len(DataGen.CHARMAP))), dtype=tf.int64),
                tf.constant(DataGen.CHARMAP),
            )

            with tf.control_dependencies([insert]):
                num_feed = []
                prb_feed = []

                for line in xrange(len(self.attention_decoder_model.output)):
                    guess = tf.argmax(
                        self.attention_decoder_model.output[line], axis=1)
                    proba = tf.reduce_max(tf.nn.softmax(
                        self.attention_decoder_model.output[line]),
                                          axis=1)
                    num_feed.append(guess)
                    prb_feed.append(proba)

                # Join the predictions into a single output string.
                trans_output = tf.transpose(num_feed)
                trans_output = tf.map_fn(
                    lambda m: tf.foldr(
                        lambda a, x: tf.cond(
                            tf.equal(x, DataGen.EOS_ID),
                            lambda: '',
                            lambda: table.lookup(x) + a  # pylint: disable=undefined-variable
                        ),
                        m,
                        initializer=''),
                    trans_output,
                    dtype=tf.string)

                # Calculate the total probability of the output string.
                trans_outprb = tf.transpose(prb_feed)
                trans_outprb = tf.gather(trans_outprb,
                                         tf.range(tf.size(trans_output)))
                trans_outprb = tf.map_fn(lambda m: tf.foldr(
                    lambda a, x: tf.multiply(tf.cast(x, tf.float64), a),
                    m,
                    initializer=tf.cast(1, tf.float64)),
                                         trans_outprb,
                                         dtype=tf.float64)

                self.prediction = tf.cond(
                    tf.equal(tf.shape(trans_output)[0], 1),
                    lambda: trans_output[0],
                    lambda: trans_output,
                )
                self.probability = tf.cond(
                    tf.equal(tf.shape(trans_outprb)[0], 1),
                    lambda: trans_outprb[0],
                    lambda: trans_outprb,
                )

                self.prediction = tf.identity(self.prediction,
                                              name='prediction')
                self.probability = tf.identity(self.probability,
                                               name='probability')

            if not self.forward_only:  # train
                self.updates = []
                self.summaries_by_bucket = []

                params = tf.trainable_variables()
                #opt = tf.train.AdadeltaOptimizer(learning_rate=initial_learning_rate)
                opt = tf.train.GradientDescentOptimizer(
                    learning_rate=self.learning_rate)
                loss_op = self.attention_decoder_model.loss

                if self.reg_val > 0:
                    reg_losses = tf.get_collection(
                        tf.GraphKeys.REGULARIZATION_LOSSES)
                    logging.info('Adding %s regularization losses',
                                 len(reg_losses))
                    logging.debug('REGULARIZATION_LOSSES: %s', reg_losses)
                    loss_op = self.reg_val * tf.reduce_sum(
                        reg_losses) + loss_op

                gradients, params = list(
                    zip(*opt.compute_gradients(loss_op, params)))
                if self.clip_gradients:
                    gradients, _ = tf.clip_by_global_norm(
                        gradients, max_gradient_norm)

                # Summaries for loss, variables, gradients, gradient norms and total gradient norm.
                summaries = [
                    tf.summary.scalar("loss", loss_op),
                    tf.summary.scalar("total_gradient_norm",
                                      tf.global_norm(gradients))
                ]
                all_summaries = tf.summary.merge(summaries)
                self.summaries_by_bucket.append(all_summaries)

                # update op - apply gradients
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    self.updates.append(
                        opt.apply_gradients(list(zip(gradients, params)),
                                            global_step=self.global_step))

        self.saver_all = tf.train.Saver(tf.all_variables())
        self.checkpoint_path = os.path.join(self.model_dir, "model.ckpt")

        ckpt = tf.train.get_checkpoint_state(model_dir)
        if ckpt and load_model:
            # pylint: disable=no-member
            logging.info("Reading model parameters from %s",
                         ckpt.model_checkpoint_path)
            self.saver_all.restore(self.sess, ckpt.model_checkpoint_path)
        else:
            logging.info("Created model with fresh parameters.")
            self.sess.run(tf.initialize_all_variables())