示例#1
0
def _forward(config):
    assert config.load
    test_data = read_data(config, config.forward_name, True)
    update_config(config, [test_data])

    _config_debug(config)

    if config.use_glove_for_unk:
        word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec']
        new_word2idx_dict = test_data.shared['new_word2idx']
        idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()}
        new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32')
        config.new_emb_mat = new_emb_mat

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = ForwardEvaluator(config, model)
    graph_handler = GraphHandler(config, model)  # controls all tensors and variables in the graph, including loading /saving

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    num_batches = math.ceil(test_data.num_examples / config.batch_size)
    if 0 < config.test_num_batches < num_batches:
        num_batches = config.test_num_batches
    e = evaluator.get_evaluation_from_batches(sess, tqdm(test_data.get_batches(config.batch_size, num_batches=num_batches), total=num_batches))
    print(e)
    if config.dump_answer:
        print("dumping answer ...")
        graph_handler.dump_answer(e, path=config.answer_path)
    if config.dump_eval:
        print("dumping eval ...")
        graph_handler.dump_eval(e, path=config.eval_path)
示例#2
0
def _test(config):
    test_data = read_data(config, 'test', True)
    update_config(config, [test_data])

    _config_debug(config)

    if config.use_glove_for_unk:
        word2vec_dict = test_data.shared[
            'lower_word2vec'] if config.lower_word else test_data.shared[
                'word2vec']
        new_word2idx_dict = test_data.shared['new_word2idx']
        idx2vec_dict = {
            idx: word2vec_dict[word]
            for word, idx in new_word2idx_dict.items()
        }
        new_emb_mat = np.array(
            [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))],
            dtype='float32')
        config.new_emb_mat = new_emb_mat

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = MultiGPUF1Evaluator(
        config,
        models,
        tensor_dict=models[0].tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)
    num_steps = math.ceil(test_data.num_examples /
                          (config.batch_size * config.num_gpus))
    if 0 < config.test_num_batches < num_steps:
        num_steps = config.test_num_batches

    e = None
    for multi_batch in tqdm(test_data.get_multi_batches(
            config.batch_size,
            config.num_gpus,
            num_steps=num_steps,
            cluster=config.cluster),
                            total=num_steps):
        ei = evaluator.get_evaluation(sess, multi_batch)
        e = ei if e is None else e + ei
        if config.vis:
            eval_subdir = os.path.join(
                config.eval_dir, "{}-{}".format(ei.data_type,
                                                str(ei.global_step).zfill(6)))
            if not os.path.exists(eval_subdir):
                os.mkdir(eval_subdir)
            path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8))
            graph_handler.dump_eval(ei, path=path)
    print(e)
    if config.dump_answer:
        print("dumping answer ...")
        graph_handler.dump_answer(e)
    if config.dump_eval:
        print("dumping eval ...")
        graph_handler.dump_eval(e)
示例#3
0
    def data_ready(self, data=None, update=False):

        config = self.config
        config.batch_size = 1
        test_data = read_data(self.config,
                              'demo',
                              True,
                              data=data,
                              data_set=self.test_data)
        #        test_data = read_data(self.config, 'demo', True)

        if update:
            update_config(self.config, [test_data])
            if config.use_glove_for_unk:
                word2vec_dict = test_data.shared[
                    'lower_word2vec'] if config.lower_word else test_data.shared[
                        'word2vec']
                new_word2idx_dict = test_data.shared['new_word2idx']
                idx2vec_dict = {
                    idx: word2vec_dict[word]
                    for word, idx in new_word2idx_dict.items()
                }
                new_emb_mat = np.array(
                    [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))],
                    dtype='float32')
                config.new_emb_mat = new_emb_mat
        self.config = config
        self.test_data = test_data
示例#4
0
def _train(config):
    data_filter = get_squad_data_filter(config)
    train_data = read_data(config,
                           'train',
                           config.load,
                           data_filter=data_filter)
    dev_data = read_data(config, 'dev', True, data_filter=data_filter)

    update_config(config, [train_data, dev_data])
    _config_debug(config)

    word2vec_dict = train_data.shared[
        'lower_word2vec'] if config.lower_word else train_data.shared[
            'word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])
    config.emb_mat = emb_mat
    print("finished data preprocessing")

    # construct model graph and variables (using default graph)
    print("Getting torch model")
    # model = get_torch_model()
    ##    print("num params: {}".format(get_num_params()))
    #    trainer = TorchTrainer(config, model)
    #    evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=model.tensor_dict if config.vis else None)
    #    graph_handler = GraphHandler(config, model)  # controls all tensors and variables in the graph, including loading /saving
    #
    #    # Variables
    #    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    #    graph_handler.initialize(sess)

    # Begin training
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0
示例#5
0
    def query():
        with tempfile.TemporaryDirectory(dir=orig_data_dir) as inter_dir:
            # Recieve data, process it
            data = bottle.request.json
            config.data_dir = inter_dir
            with tempfile.NamedTemporaryFile('w',
                                             suffix='.json',
                                             dir=orig_data_dir) as data_file:
                json.dump(data, data_file)
                data_file.flush()
                prepro_args = prepro.get_args([
                    '--mode', 'single', '--single_path', data_file.name, '-pm',
                    '--target_dir', inter_dir
                ])
                prepro.prepro(prepro_args, glove_dict=glove_dict)
            test_data = read_data(config, config.forward_name, True)
            num_batches = math.ceil(test_data.num_examples / config.batch_size)
            if 0 < config.eval_num_batches < num_batches:
                num_batches = config.eval_num_batches

            # Run model on data
            e = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(test_data.get_batches(config.batch_size,
                                           num_batches=num_batches),
                     total=num_batches))
            eval_path = os.path.join(inter_dir, 'eval.pkl.gz')
            graph_handler.dump_eval(e, path=eval_path)

            # Extract predictions through the ensemble code
            data_path = os.path.join(inter_dir, 'data_single.json')
            with open(data_path) as f:
                data_single_obj = json.load(f)
            shared_path = os.path.join(inter_dir, 'shared_single.json')
            with open(shared_path) as f:
                shared_single_obj = json.load(f)
            with tempfile.NamedTemporaryFile('w',
                                             suffix='.json',
                                             dir=orig_data_dir) as target_file:
                target_path = target_file.name
                ensemble_args = ensemble.get_args([
                    '--data_path', data_path, '--shared_path', shared_path,
                    '-o', target_path, eval_path
                ])
                ensemble.ensemble(ensemble_args)
                target_file.flush()
                with open(target_path, 'r') as f:
                    pred_obj = json.load(f)

        return {
            'data_single': data_single_obj,
            'eval': e.dict,
            'shared_single': shared_single_obj,
            'predictions': pred_obj
        }
示例#6
0
def _test(config):
    test_data = read_data(config, 'dev', True)
    update_config(config, [test_data])
    _config_debug(config)

    if config.use_glove_for_unk:
        word2vec_dict = test_data.shared[
            'lower_word2vec'] if config.lower_word else test_data.shared[
                'word2vec']
        new_word2idx_dict = test_data.shared['new_word2idx']
        idx2vec_dict = {
            idx: word2vec_dict[word]
            for word, idx in new_word2idx_dict.items()
        }
        new_emb_mat = np.array(
            [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))],
            dtype='float32')
        config.new_emb_mat = new_emb_mat

    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = ScoreEvaluator(
        config, model, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)

    configgpu = tf.ConfigProto(allow_soft_placement=True,
                               log_device_placement=False)
    configgpu.gpu_options.allow_growth = True
    sess = tf.Session(config=configgpu)
    graph_handler.initialize(sess)

    num_steps = math.ceil(test_data.num_examples /
                          (config.batch_size * config.num_gpus))
    if 0 < config.test_num_batches < num_steps:
        num_steps = config.test_num_batches

    e = None
    part = 0
    eval_step = 0
    for multi_batch in tqdm(test_data.get_multi_batches(
            config.batch_size,
            config.num_gpus,
            num_steps=num_steps,
            cluster=config.cluster),
                            total=num_steps):
        eval_step += 1
        ei = evaluator.get_evaluation_from_batches(sess, multi_batch)
        e = ei if e is None else e + ei
        if (eval_step % 5000 == 0):
            graph_handler.dump_score(e, part=part)
            e = None
            part += 1
    graph_handler.dump_score(e, part=part)
示例#7
0
def _test(config):
    test_data = read_data(config, 'test', True)
    update_config(config, [test_data])

    _config_debug(config)

    word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec']
    word2idx_dict = test_data.shared['word2idx']
    new_word2idx_dict = test_data.shared['new_word2idx']
    print('word2idx len : {}, new_word2idx len : {}'.format(len(word2idx_dict), len(new_word2idx_dict)))

    idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}

    idx2word_dict = {idx: word for word, idx in word2idx_dict.items()}
    offset = len(idx2word_dict)
    idx2word_dict.update({offset+idx: word for word, idx in new_word2idx_dict.items()})
    test_data.shared['idx2word'] = idx2word_dict

    emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
                        else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
                        for idx in range(config.word_vocab_size)])
    config.emb_mat = emb_mat

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=models[0].tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)
    num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus))
    if 0 < config.test_num_batches < num_steps:
        num_steps = config.test_num_batches

    e = None
    for multi_batch in tqdm(test_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps):
        ei = evaluator.get_evaluation(sess, multi_batch)
        e = ei if e is None else e + ei
        if config.vis:
            eval_subdir = os.path.join(config.eval_dir, "{}-{}".format(ei.data_type, str(ei.global_step).zfill(6)))
            if not os.path.exists(eval_subdir):
                os.mkdir(eval_subdir)
            path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8))
            graph_handler.dump_eval(ei, path=path)

    print(e)
    if config.dump_answer:
        print("dumping answer ...")
        graph_handler.dump_answer(e)
    if config.dump_eval:
        print("dumping eval ...")
        graph_handler.dump_eval(e)
示例#8
0
def _test(config):
    t1 = time.time()
    print("[{}] loading data..".format(t1))
    test_data = read_data(config, config.testfile, "test")
    t2 = time.time()
    print("[{}] updating config..".format(t2))
    update_config(config, [test_data])

    _config_debug(config)

    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=models[0].tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)
    t3 = time.time()
    print("[{}] creating session..".format(t3))
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    t4 = time.time()
    print("[{}] initializing session..".format(t4))
    graph_handler.initialize(sess)
    num_steps = int(math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus)))
    if 0 < config.test_num_batches < num_steps:
        num_steps = config.test_num_batches

    e = None
    t5 = time.time()
    print("loading model takes {}s\n begin evaluating..".format(t5 - t3))
    count = 0
    total_time = 0
    for multi_batch in tqdm(test_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps,
                                                        cluster=config.cluster), total=num_steps):
        t_start = time.time()
        evaluator.set_count(count)
        ei = evaluator.get_evaluation(sess, multi_batch)
        t_end = time.time()
        count += 1
        single_time = t_end - t_start
        total_time += single_time
        answer_id = list(ei.id2answer_dict["scores"].keys())[0]
        answer = ei.id2answer_dict[answer_id]
        print("id: {}, answer: {}, correct: {}, time: {:6.4f}s"
              .format(answer_id, answer.encode('ascii', 'ignore').decode('ascii'), int(ei.acc) == 1, single_time))
        sys.stdout.flush()
        e = ei if e is None else e + ei

    t6 = time.time()
    #print("[{}] finish evaluation".format(t6))
    #print("total time:{} for {} evaluations, avg:{}".format(total_time, count, total_time * 1.0 / count))
    
    print(e)
    print("dumping answer ...")
    graph_handler.dump_answer(e)
    """
示例#9
0
def _test(config):
    test_data = read_data(config, 'test', True)
    update_config(config, [test_data])

    _config_debug(config)

    if config.use_glove_for_unk:
        word2vec_dict = test_data.shared['lower_word2vec'] if config.lower_word else test_data.shared['word2vec']
        new_word2idx_dict = test_data.shared['new_word2idx']
        idx2vec_dict = {idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items()}
        new_emb_mat = np.array([idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32')
        config.new_emb_mat = new_emb_mat

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=models[0].tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)
    num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus))
    if 0 < config.test_num_batches < num_steps:
        num_steps = config.test_num_batches

    e = None
    for multi_batch in tqdm(test_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps):
        ei = evaluator.get_evaluation(sess, multi_batch)
        e = ei if e is None else e + ei
        if config.vis:
            eval_subdir = os.path.join(config.eval_dir, "{}-{}".format(ei.data_type, str(ei.global_step).zfill(6)))
            if not os.path.exists(eval_subdir):
                os.mkdir(eval_subdir)
            path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8))
            graph_handler.dump_eval(ei, path=path)

    print(e)
    if config.dump_answer:
        print("dumping answer ...")
        graph_handler.dump_answer(e)
    if config.dump_eval:
        print("dumping eval ...")
        graph_handler.dump_eval(e)
def _predict(config):
    predict_data = read_data(config, 'predict', True)
    update_config(config, [predict_data])

    _config_debug(config)

    # if config.use_glove_for_unk:
    word2vec_dict = predict_data.shared['word2vec']
    config.emb_mat = np.array(word2vec_dict, dtype=np.float32)

    #pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    predictor = myMultiGPUF1Predictor(config,
                                      models,
                                      tensor_dict=models[0].tensor_dict)
    graph_handler = GraphHandler(config, model)

    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)
    num_steps = math.ceil(predict_data.num_examples /
                          (config.batch_size * config.num_gpus))
    if 0 < config.test_num_batches < num_steps:
        num_steps = config.test_num_batches

    labelout = {}
    for multi_batch in tqdm(predict_data.get_multi_batches(
            config.batch_size,
            config.num_gpus,
            num_steps=num_steps,
            cluster=config.cluster),
                            total=num_steps):
        labelout = predictor.get_labelout(sess, multi_batch, labelout)

    outstring = ""
    for key, value in labelout.items():
        outstring += str(key)
        outstring += ','
        outstring += str(value)
        outstring += '\n'

    with open('../data/predictor.txt', 'w') as f:
        f.write(outstring)
def _train(config):
    data_filter = get_squad_data_filter(config)
    train_data = read_data(config, 'train', config.load)
    dev_data = read_data(config, 'dev', True)
    update_config(config, [train_data, dev_data])

    _config_debug(config)

    word2vec_dict = train_data.shared['word2vec']
    # word2idx_dict = train_data.shared['word2idx']
    # idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}
    # emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
    #                     else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
    #                     for idx in range(config.word_vocab_size)])
    config.emb_mat = np.array(word2vec_dict, dtype=np.float32)
    print("embmat", config.emb_mat)
    print('begin construct')
    # construct model graph and variables (using default graph)
    #pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    trainer = MultiGPUTrainer(config, models)
    evaluator = myMultiGPUF1Evaluator(
        config, models, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(
        config, model
    )  # controls all tensors and variables in the graph, including loading /saving
    print('construct graph ready')

    # Variables
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)
    print('initialize session ready')

    # Begin training
    print("begin train")
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0
    for batches in tqdm(train_data.get_multi_batches(config.batch_size,
                                                     config.num_gpus,
                                                     num_steps=num_steps,
                                                     shuffle=True,
                                                     cluster=config.cluster),
                        total=num_steps):
        global_step = sess.run(
            model.global_step
        ) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess,
                                               batches,
                                               get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)

        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples /
                                  (config.batch_size * config.num_gpus))
            if 0 < config.val_num_batches < num_steps:
                num_steps = config.val_num_batches
            e_train = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(train_data.get_multi_batches(config.batch_size,
                                                  config.num_gpus,
                                                  num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_train.summaries, global_step)
            e_dev = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(dev_data.get_multi_batches(config.batch_size,
                                                config.num_gpus,
                                                num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)

            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
            if config.dump_answer:
                graph_handler.dump_answer(e_dev)
    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
示例#12
0
def _train(config):
    if config.dataset == 'qangaroo':
        data_filter = get_qangaroo_data_filter(config)
    else:
        raise NotImplementedError

    train_data = read_data(config,
                           'train',
                           config.load,
                           data_filter=data_filter)
    dev_data = read_data(config, 'dev', True, data_filter=data_filter)
    update_config(config, [train_data, dev_data])

    word2vec_dict = train_data.shared[
        'lower_word2vec'] if config.lower_word else train_data.shared[
            'word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])

    # construct model graph and variables (using default graph)
    pprint(config.__flags, indent=2)
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    with sess.as_default():
        models = get_multi_gpu_models(config, emb_mat)
        model = models[0]
        print("num params: {}".format(get_num_params()))
        trainer = MultiGPUTrainer(config, models)
        if config.reasoning_layer is not None and config.mac_prediction == 'candidates':
            evaluator = MultiGPUF1CandidateEvaluator(
                config,
                models,
                tensor_dict=model.tensor_dict if config.vis else None)
        else:
            evaluator = MultiGPUF1Evaluator(
                config,
                models,
                tensor_dict=model.tensor_dict if config.vis else None)
        graph_handler = GraphHandler(
            config, model
        )  # controls all tensors and variables in the graph, including loading /saving

        # Variables
        #gpu_options = tf.GPUOptions(allow_growth=True)
        #sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options))
        #sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
        graph_handler.initialize(sess)

    # Begin training
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0

    for batches in tqdm(train_data.get_multi_batches(config.batch_size,
                                                     config.num_gpus,
                                                     num_steps=num_steps,
                                                     shuffle=True,
                                                     cluster=config.cluster),
                        total=num_steps):

        INSUFFICIENT_DATA = False
        for batch in batches:
            _, ds = batch
            if len(ds.data['x']) < config.batch_size:
                INSUFFICIENT_DATA = True
                break
        if INSUFFICIENT_DATA:
            continue

        global_step = sess.run(
            model.global_step
        ) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0

        loss, summary, train_op = trainer.step(sess,
                                               batches,
                                               get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)

        if not config.eval:
            continue

        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples /
                                  (config.batch_size * config.num_gpus))
            if 0 < config.val_num_batches < num_steps:
                num_steps = config.val_num_batches
            e_dev = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(dev_data.get_multi_batches(config.batch_size,
                                                config.num_gpus,
                                                num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)
            e_train = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(train_data.get_multi_batches(config.batch_size,
                                                  config.num_gpus,
                                                  num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_train.summaries, global_step)
            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
            if config.dump_answer:
                graph_handler.dump_answer(e_dev)
    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
示例#13
0
def _train(config):
    data_filter = get_squad_data_filter(config)
    train_data = read_data(config,
                           'train',
                           config.load,
                           data_filter=data_filter)
    dev_data = read_data(config, config.dev_name, True, data_filter=None)
    update_config(config, [train_data, dev_data])

    _config_debug(config)

    word2vec_dict = train_data.shared[
        'lower_word2vec'] if config.lower_word else train_data.shared[
            'word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])
    config.emb_mat = emb_mat
    # construct model graph and variables (using default graph)
    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    print("num params: {}".format(get_num_params()))
    trainer = MultiGPUTrainer(config, models)
    if config.model_name == 'basic':
        ThisEvaluator = MultiGPUF1Evaluator
    elif config.model_name in ['basic-class', 'basic-generate', 'baseline']:
        ThisEvaluator = MultiGPUClassificationAccuracyEvaluator
    elif config.model_name == 'span-gen':
        ThisEvaluator = UnionEvaluator

    evaluator = ThisEvaluator(
        config, models, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(
        config, model
    )  # controls all tensors and variables in the graph, including loading /saving
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

    graph_handler.initialize(sess)
    # Begin training
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0

    for batches in tqdm(train_data.get_multi_batches(config.batch_size,
                                                     config.num_gpus,
                                                     num_steps=num_steps,
                                                     shuffle=True,
                                                     cluster=config.cluster),
                        total=num_steps):
        global_step = sess.run(
            model.global_step
        ) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess,
                                               batches,
                                               get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            print("Saving variables on step ", global_step)
            graph_handler.save(sess, global_step=global_step)
        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples /
                                  (config.batch_size * config.num_gpus))
            if 0 < config.val_num_batches < num_steps:
                num_steps = config.val_num_batches
            """ 
            train_batches = tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)
            e_train = evaluator.get_evaluation_from_batches(
                sess, train_batches
            )
            graph_handler.add_summaries(e_train.summaries, global_step)
            """
            e_dev = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(dev_data.get_multi_batches(config.batch_size,
                                                config.num_gpus,
                                                num_steps=num_steps),
                     total=num_steps))
            print("Evaluated on dev at step ", global_step, ": ", e_dev)
            graph_handler.add_summaries(e_dev.summaries, global_step)
            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
            if config.dump_answer:
                graph_handler.dump_answer(e_dev)

    if global_step % config.save_period != 0:
        print("Final save at step ", global_step)
        graph_handler.save(sess, global_step=global_step)
示例#14
0
def _train(config):
    data_filter = get_squad_data_filter(config)
    train_data = read_data(config, 'train', config.load, data_filter=data_filter)
    dev_data = read_data(config, 'dev', True, data_filter=data_filter)
    update_config(config, [train_data, dev_data])

    _config_debug(config)

    word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}
    emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
                        else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
                        for idx in range(config.word_vocab_size)])
    config.emb_mat = emb_mat

    # construct model graph and variables (using default graph)
    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    trainer = MultiGPUTrainer(config, models)
    evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)  # controls all tensors and variables in the graph, including loading /saving

    # Variables
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    # Begin training
    num_steps = config.num_steps or int(math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0
    for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus,
                                                     num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps):
        global_step = sess.run(model.global_step) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)

        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus))
            if 0 < config.val_num_batches < num_steps:
                num_steps = config.val_num_batches
            e_train = evaluator.get_evaluation_from_batches(
                sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)
            )
            graph_handler.add_summaries(e_train.summaries, global_step)
            e_dev = evaluator.get_evaluation_from_batches(
                sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)

            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
            if config.dump_answer:
                graph_handler.dump_answer(e_dev)
    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
示例#15
0
def _train(config):
    # this get squad data filter return a function
    data_filter = get_squad_data_filter(config)
    # config.load, True, "load saved data? [True]"
    train_data = read_data(config,
                           'train',
                           config.load,
                           data_filter=data_filter)  # DataSet
    dev_data = read_data(config, 'dev', config.load,
                         data_filter=data_filter)  # DataSet
    update_config(config, [train_data, dev_data])
    # update config such as max sent size and so on.
    _config_debug(config)

    word2vec_dict = train_data.shared[
        'lower_word2vec'] if config.lower_word else train_data.shared[
            'word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])
    config.emb_mat = emb_mat

    # construct model graph and variables (using default graph)
    pprint(config.flag_values_dict(), indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    trainer = MultiGPUTrainer(config, models)  #
    evaluator = MultiGPUF1Evaluator(
        config, models, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(
        config, model
    )  # controls all tensors and variables in the graph, including loading /saving

    # Variables
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    # Begin training
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0
    for batches in tqdm(train_data.get_multi_batches(
            batch_size=config.batch_size,
            num_batches_per_step=config.num_gpus,
            num_steps=num_steps,
            shuffle=True,
            cluster=config.cluster),
                        total=num_steps):
        global_step = sess.run(
            model.global_step
        ) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess,
                                               batches,
                                               get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)

        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples /
                                  (config.batch_size * config.num_gpus))
            if 0 < config.val_num_batches < num_steps:
                num_steps = config.val_num_batches
            e_train = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(train_data.get_multi_batches(config.batch_size,
                                                  config.num_gpus,
                                                  num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_train.summaries, global_step)
            e_dev = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(dev_data.get_multi_batches(config.batch_size,
                                                config.num_gpus,
                                                num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)

            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
            if config.dump_answer:
                graph_handler.dump_answer(e_dev)
    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
示例#16
0
def _train(config):
    # load_metadata(config, 'train')  # this updates the config file according to metadata file

    data_filter = get_squad_data_filter(config)
    train_data = read_data(config,
                           'train',
                           config.load,
                           data_filter=data_filter)
    dev_data = read_data(config, 'dev', True, data_filter=data_filter)
    # test_data = read_data(config, 'test', True, data_filter=data_filter)
    update_config(config, [train_data, dev_data])

    _config_draft(config)

    word2vec_dict = train_data.shared[
        'lower_word2vec'] if config.lower_word else train_data.shared[
            'word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    print("{}/{} unique words have corresponding glove vectors.".format(
        len(idx2vec_dict), len(word2idx_dict)))
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])
    config.emb_mat = emb_mat

    # construct model graph and variables (using default graph)
    pprint(config.__flags, indent=2)
    # model = Model(config)
    models = get_multi_gpu_models(config)
    model = models[0]
    trainer = MultiGPUTrainer(config, models)
    evaluator = MultiGPUF1Evaluator(
        config, models, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(
        config, model
    )  # controls all tensors and variables in the graph, including loading /saving

    # Variables
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    # begin training
    print(train_data.num_examples)
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0
    for batches in tqdm(train_data.get_multi_batches(config.batch_size,
                                                     config.num_gpus,
                                                     num_steps=num_steps,
                                                     shuffle=True,
                                                     cluster=config.cluster),
                        total=num_steps):
        global_step = sess.run(
            model.global_step
        ) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess,
                                               batches,
                                               get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)

        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples /
                                  (config.batch_size * config.num_gpus))
            if 0 < config.eval_num_batches < num_steps:
                num_steps = config.eval_num_batches
            e_train = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(train_data.get_multi_batches(config.batch_size,
                                                  config.num_gpus,
                                                  num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_train.summaries, global_step)
            e_dev = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(dev_data.get_multi_batches(config.batch_size,
                                                config.num_gpus,
                                                num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)

            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
            if config.dump_answer:
                graph_handler.dump_answer(e_dev)
    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
示例#17
0
def _train(config):
    # load_metadata(config, 'train')  # this updates the config file according to metadata file
    k = config.k
    sup_unsup_ratio = config.sup_unsup_ratio
    save_dir = 'error_results_newsqa_k=%s' % k
    f1_thres = 0.1

    data_filter = get_squad_data_filter(config)
    train_data = read_data(config,
                           'train',
                           config.load,
                           data_filter=data_filter)
    dev_data = read_data(config, 'dev', True, data_filter=data_filter)

    # Baseline model
    config.data_dir = config.baseline_dir
    squad_train_data = read_data(config,
                                 'train',
                                 config.load,
                                 data_filter=data_filter)

    # test_data = read_data(config, 'test', True, data_filter=data_filter)
    update_config(config, [squad_train_data, train_data, dev_data])

    _config_draft(config)

    word2vec_dict = train_data.shared[
        'lower_word2vec'] if config.lower_word else train_data.shared[
            'word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    print("{}/{} unique words have corresponding glove vectors.".format(
        len(idx2vec_dict), len(word2idx_dict)))
    print(len(word2vec_dict))

    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])
    config.emb_mat = emb_mat

    # construct model graph and variables (using default graph)
    pprint(config.__flags, indent=2)
    # model = Model(config)
    models = get_multi_gpu_models(config)
    model = models[0]
    trainer = MultiGPUTrainer(config, models)
    evaluator = MultiGPUF1Evaluator(
        config, models, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(
        config
    )  # controls all tensors and variables in the graph, including loading /saving

    # Variables
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    config_proto.gpu_options.per_process_gpu_memory_fraction = 0.65

    sess = tf.Session(config=config_proto)
    graph_handler.initialize(sess)

    batches_list = []

    # begin training
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0
    global_scores = []

    # Combine batching together
    train_data_batcher = train_data.get_multi_batches(config.batch_size,
                                                      config.num_gpus,
                                                      num_steps=num_steps,
                                                      shuffle=True,
                                                      cluster=config.cluster)
    squad_data_batcher = squad_train_data.get_multi_batches(
        config.batch_size,
        config.num_gpus,
        num_steps=num_steps,
        shuffle=True,
        cluster=config.cluster)
    idx = [-1]
    ratio = sup_unsup_ratio

    def combine_batchers(unsupervised_generator, supervised_generator, ratio):
        while True:
            idx[0] = idx[0] + 1
            if idx[0] % ratio == 0:
                print("Yielding unsupervised")
                unsup_batch = next(unsupervised_generator)
                for _, data_set in unsup_batch:
                    if config.use_special_token:
                        data_set.data['dataset_type'] = ['NEWSQA']

                y = data_set.data['y']
                x = data_set.data['x']
                q = data_set.data['q']
                for xi, yi, qi in zip(x, y, q):
                    start_id = yi[0][0][1]
                    end_id = yi[0][1][1]
                    ans = xi[0][start_id:end_id]
                yield unsup_batch
            else:
                print("Yielding squad")
                sup_batch = next(supervised_generator)
                y = data_set.data['y']
                x = data_set.data['x']
                for xi, yi in zip(x, y):
                    start_id = yi[0][0][1]
                    end_id = yi[0][1][1]

                    ans = xi[0][start_id:end_id]
                yield sup_batch

    combined_batcher = combine_batchers(train_data_batcher,
                                        squad_data_batcher,
                                        ratio=ratio)

    for batches in tqdm(combined_batcher, total=num_steps):

        global_step = sess.run(
            model.global_step
        ) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0

        scores = trainer.get_scores(sess,
                                    batches,
                                    get_summary=get_summary,
                                    k=k)
        loss, summary, train_op = trainer.margin_step(sess,
                                                      batches=batches,
                                                      top_k_batches=scores,
                                                      get_summary=get_summary)
        #loss, summary, train_op = trainer.step(sess, batches=batches, get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)

        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples /
                                  (config.batch_size * config.num_gpus))
            if 0 < config.eval_num_batches < num_steps:
                num_steps = config.eval_num_batches
            e_train = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(train_data.get_multi_batches(config.batch_size,
                                                  config.num_gpus,
                                                  num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_train.summaries, global_step)
            e_dev = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(dev_data.get_multi_batches(config.batch_size,
                                                config.num_gpus,
                                                num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)

            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
            if config.dump_answer:
                graph_handler.dump_answer(e_dev)
示例#18
0
文件: main.py 项目: zyang22/iss-rnns
def _train(config):
    data_filter = get_squad_data_filter(config)
    #train_data = read_data(config, 'train', config.load, data_filter=data_filter)
    train_data = read_data(config, 'train', False, data_filter=data_filter)
    dev_data = read_data(config, 'dev', True, data_filter=data_filter)
    update_config(config, [train_data, dev_data])

    _config_debug(config)

    word2vec_dict = train_data.shared[
        'lower_word2vec'] if config.lower_word else train_data.shared[
            'word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])
    config.emb_mat = emb_mat

    # construct model graph and variables (using default graph)
    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    print("num params: {}".format(get_num_params()))
    trainer = MultiGPUTrainer(config, models)
    evaluator = MultiGPUF1Evaluator(
        config, models, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(
        config, model
    )  # controls all tensors and variables in the graph, including loading /saving

    # Variables
    config_proto = tf.ConfigProto()
    config_proto.gpu_options.allow_growth = True
    config_proto.allow_soft_placement = True
    sess = tf.Session(config=config_proto)
    graph_handler.initialize(sess)

    # plot weights
    for train_var in tf.trainable_variables():
        plot_tensor(train_var.eval(session=sess),
                    train_var.op.name,
                    plot_weights=config.plot_weights,
                    hidden_size=config.hidden_size)

    # Begin training
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0
    for batches in tqdm(train_data.get_multi_batches(config.batch_size,
                                                     config.num_gpus,
                                                     num_steps=num_steps,
                                                     shuffle=True,
                                                     cluster=config.cluster),
                        total=num_steps):
        global_step = sess.run(
            model.global_step
        ) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess,
                                               batches,
                                               get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)

        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples /
                                  (config.batch_size * config.num_gpus))
            if 0 < config.val_num_batches < num_steps:
                num_steps = config.val_num_batches
            e_train = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(train_data.get_multi_batches(config.batch_size,
                                                  config.num_gpus,
                                                  num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_train.summaries, global_step)
            e_dev = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(dev_data.get_multi_batches(config.batch_size,
                                                config.num_gpus,
                                                num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)

            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
            if config.dump_answer:
                graph_handler.dump_answer(e_dev)
    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
示例#19
0
def _train(config):
    data_filter = get_squad_data_filter(config)
    #以下三行是读取数据的部分
    train_data = read_data(config,
                           'train',
                           config.load,
                           data_filter=data_filter)
    dev_data = read_data(config, 'dev', True, data_filter=data_filter)
    update_config(config, [train_data, dev_data])

    _config_debug(config)
    #这里生成的emb——mat是个什么东西呀
    word2vec_dict = train_data.shared['word2vec']
    word2idx_dict = train_data.shared['word2idx']
    idx2vec_dict = {
        word2idx_dict[word]: vec
        for word, vec in word2vec_dict.items() if word in word2idx_dict
    }
    emb_mat = np.array([
        idx2vec_dict[idx]
        if idx in idx2vec_dict else np.random.multivariate_normal(
            np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
        for idx in range(config.word_vocab_size)
    ])
    config.emb_mat = emb_mat
    # construct model graph and variables (using default graph)
    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    trainer = MultiGPUTrainer(config, models)
    evaluator = MultiGPUF1Evaluator(
        config, models, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(
        config, model
    )  # controls all tensors and variables in the graph, including loading /saving

    # Variables
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

    graph_handler.initialize(sess)

    # Begin training
    num_steps = config.num_steps or int(
        math.ceil(train_data.num_examples /
                  (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0
    for batches in tqdm(train_data.get_multi_batches(config.batch_size,
                                                     config.num_gpus,
                                                     num_steps=num_steps,
                                                     shuffle=True,
                                                     cluster=config.cluster),
                        total=num_steps):
        global_step = sess.run(
            model.global_step
        ) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess,
                                               batches,
                                               get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)
            #17.tf.train.Saver().save(sess, 'ckpts/')在ckpts/ 路径下主要保存四个文件checkpoint:文本文件 vim 可查看内容 记录保存了那些checkpoint
            # 以下三个文件组成一个checkpoint:
            # model.ckpt.data-00000-of-00001: 某个ckpt的数据文件
            # model.ckpt.index :某个ckpt的index文件 二进制 或者其他格式 不可直接查看
            # model.ckpt.meta:某个ckpt的meta数据  二进制 或者其他格式 不可直接查看

        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples /
                                  (config.batch_size * config.num_gpus))
            if 0 < config.val_num_batches < num_steps:
                num_steps = config.val_num_batches
            e_train = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(train_data.get_multi_batches(config.batch_size,
                                                  config.num_gpus,
                                                  num_steps=num_steps),
                     total=num_steps))
            graph_handler.add_summaries(e_train.summaries, global_step)
            e_dev = evaluator.get_evaluation_from_batches(
                sess,
                tqdm(dev_data.get_multi_batches(config.batch_size,
                                                config.num_gpus,
                                                num_steps=num_steps,
                                                shuffle=True),
                     total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)

            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
                print

    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
示例#20
0
def _train(config):
    data_filter = get_squad_data_filter(config)
    train_data = read_data(config, 'train', config.load, data_filter=data_filter)
    dev_data = read_data(config, 'dev', True, data_filter=data_filter)
    update_config(config, [train_data, dev_data])

    _config_debug(config)

    word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec']
    word2idx_dict = train_data.shared['word2idx']
    new_word2idx_dict = train_data.shared['new_word2idx']
    print('word2idx len : {}, new_word2idx len : {}'.format(len(word2idx_dict), len(new_word2idx_dict)))

    idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict}

    idx2word_dict = {idx: word for word, idx in word2idx_dict.items()}
    offset = len(idx2word_dict)
    idx2word_dict.update({offset+idx: word for word, idx in new_word2idx_dict.items()})
    train_data.shared['idx2word'] = idx2word_dict

    emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict
                           else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size))
                           for idx in range(config.word_vocab_size)])
    config.emb_mat = emb_mat

    # construct model graph and variables (using default graph)
    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]

    print("num params: {}".format(get_num_params()))
    trainer = MultiGPUTrainer(config, models)
    evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=model.tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)  # controls all tensors and variables in the graph, including loading /saving

    # Variables
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    # Begin training
    num_steps = config.num_steps or int(math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs
    global_step = 0
    for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus,
                                                     num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps):
        # QA, QG shared encoder로 학습하는 부분
        global_step = sess.run(model.global_step) + 1
        #get_summary = global_step % config.log_period == 1
        get_summary = True # 너무 답답해서 매 스텝마다 찍어야겠다...

        loss, seq2seq_loss, summary, train_op, gen_q_sample = trainer.step(sess, batches, get_summary=get_summary)
        config.is_gen = False
        print("global step : ", global_step)
        print("Loss : ", loss, "|", seq2seq_loss)
        print("Generated Question Sample : ", ' '.join([idx2word_dict[w] for w in gen_q_sample[0]]))
        """
        config.is_gen = True
        for (_, batch) in batches:
            batch.data['q'] = ['']*len(gen_q_sample)
            batch.data['cq'] = ['']*len(gen_q_sample)
            for b_idx in range(len(gen_q_sample)):
                batch.data['q'][b_idx] = [idx2word_dict[w] if w in idx2word_dict else "-UNK-" for w in gen_q_sample[b_idx]]
                batch.data['cq'][b_idx] = [list(idx2word_dict[w] if w in idx2word_dict else "-UNK-") for w in gen_q_sample[b_idx]]

        qa_gen_loss, _, __, train_op, ___ = trainer.step(sess, batch, get_summary=get_summary, is_gen=config.is_gen)
        print("QA Gen Loss : ", qa_gen_loss)
        """
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # occasional saving
        if global_step % config.save_period == 0:
            graph_handler.save(sess, global_step=global_step)

        if not config.eval:
            continue
        # Occasional evaluation
        if global_step % config.eval_period == 0:
            num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus))
            if 0 < config.val_num_batches < num_steps:
                num_steps = config.val_num_batches
            """
            e_train = evaluator.get_evaluation_from_batches(
                sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)
            )
            graph_handler.add_summaries(e_train.summaries, global_step)
            """
            e_dev = evaluator.get_evaluation_from_batches(
                sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)
            print(e_dev)
            if config.dump_eval:
                graph_handler.dump_eval(e_dev)
            if config.dump_answer:
                graph_handler.dump_answer(e_dev)
    if global_step % config.save_period != 0:
        graph_handler.save(sess, global_step=global_step)
示例#21
0
def _train(config):
    data_filter = get_squad_data_filter(config)
    train_data = read_data(config, config.trainfile, "train", data_filter = data_filter)
    dev_data = read_data(config, config.validfile, "valid", data_filter = data_filter)
    update_config(config, [train_data, dev_data])

    _config_debug(config)

    models = get_multi_gpu_models(config)
    model = models[0]
    print("num params: {}".format(get_num_params()))
    trainer = MultiGPUTrainer(config, models)
    evaluator = MultiGPUF1Evaluator(config, models, tensor_dict=model.tensor_dict if config.vis else None)
    # controls all tensors and variables in the graph, including loading /saving
    graph_handler = GraphHandler(config, model)

    # Variables
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
    graph_handler.initialize(sess)

    # Begin training
    num_steps = min(config.num_steps,int(math.ceil(train_data.num_examples /
                                                  (config.batch_size * config.num_gpus))) * config.num_epochs)
    acc = 0
    for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps,
                                                     shuffle=True, cluster=config.cluster),
                        total=num_steps):
        global_step = sess.run(model.global_step) + 1  # +1 because all calculations are done after step
        get_summary = global_step % config.log_period == 0
        loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary)
        if get_summary:
            graph_handler.add_summary(summary, global_step)

        # Occasional evaluation and saving
        if global_step % config.save_period == 0:
            num_steps = int(math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)))
            if 0 < config.val_num_batches < num_steps:
                num_steps = config.val_num_batches
            e_train = evaluator.get_evaluation_from_batches(sess, tqdm(train_data.get_multi_batches(
                config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps))
            graph_handler.add_summaries(e_train.summaries, global_step)
            e_dev = evaluator.get_evaluation_from_batches(
                sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps),
                           total=num_steps))
            graph_handler.add_summaries(e_dev.summaries, global_step)

            if e_dev.acc > acc:
                acc = e_dev.acc
                print("begin saving model...")
                print(e_dev)
                graph_handler.save(sess)
                print("end saving model, dumping eval and answer...")
                if config.dump_eval:
                    graph_handler.dump_eval(e_dev)
                if config.dump_answer:
                    graph_handler.dump_answer(e_dev)
                print("end dumping")

    print("begin freezing model...")

    config.clear_device = False
    config.input_path = graph_handler.save_path
    config.output_path = "model"
    config.input_names = None
    config.output_names = None

    freeze_graph(config)
    print("model frozen at {}".format(config.output_path))
示例#22
0
文件: main.py 项目: zyang22/iss-rnns
def _test(config):
    test_data = read_data(config, 'test', True)
    update_config(config, [test_data])

    _config_debug(config)

    if config.use_glove_for_unk:
        word2vec_dict = test_data.shared[
            'lower_word2vec'] if config.lower_word else test_data.shared[
                'word2vec']
        new_word2idx_dict = test_data.shared['new_word2idx']
        idx2vec_dict = {
            idx: word2vec_dict[word]
            for word, idx in new_word2idx_dict.items()
        }
        new_emb_mat = np.array(
            [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))],
            dtype='float32')
        config.new_emb_mat = new_emb_mat

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    model = models[0]
    evaluator = MultiGPUF1Evaluator(
        config,
        models,
        tensor_dict=models[0].tensor_dict if config.vis else None)
    graph_handler = GraphHandler(config, model)

    config_proto = tf.ConfigProto()
    config_proto.gpu_options.allow_growth = True
    config_proto.allow_soft_placement = True
    sess = tf.Session(config=config_proto)
    graph_handler.initialize(sess)
    num_steps = math.ceil(test_data.num_examples /
                          (config.batch_size * config.num_gpus))
    if 0 < config.test_num_batches < num_steps:
        num_steps = config.test_num_batches

    # plot weights
    for train_var in tf.trainable_variables():
        plot_tensor(train_var.eval(session=sess),
                    train_var.op.name,
                    plot_weights=config.plot_weights,
                    hidden_size=config.hidden_size)
    plt.show()
    if config.group_config:
        get_structure_sparsity(sess, config.group_config)
        print('Structure coordinating...')
        sess.run([model.get_var_structure_coordinate_op()])
        get_structure_sparsity(sess, config.group_config)

    e = None
    for multi_batch in tqdm(test_data.get_multi_batches(
            config.batch_size,
            config.num_gpus,
            num_steps=num_steps,
            cluster=config.cluster),
                            total=num_steps):
        ei = evaluator.get_evaluation(sess, multi_batch)
        e = ei if e is None else e + ei
        if config.vis:
            eval_subdir = os.path.join(
                config.eval_dir, "{}-{}".format(ei.data_type,
                                                str(ei.global_step).zfill(6)))
            if not os.path.exists(eval_subdir):
                os.mkdir(eval_subdir)
            path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8))
            graph_handler.dump_eval(ei, path=path)

    print(e)
    if config.dump_answer:
        print("dumping answer ...")
        graph_handler.dump_answer(e)
    if config.dump_eval:
        print("dumping eval ...")
        graph_handler.dump_eval(e)
示例#23
0
def _test(config):
    assert config.load
    test_data = read_data(config, 'test',
                          True)  #read_data(config, 'test', True)
    update_config(config, [test_data])

    _config_draft(config)

    if config.use_glove_for_unk:
        word2vec_dict = test_data.shared[
            'lower_word2vec'] if config.lower_word else test_data.shared[
                'word2vec']
        new_word2idx_dict = test_data.shared['new_word2idx']
        idx2vec_dict = {
            idx: word2vec_dict[word]
            for word, idx in new_word2idx_dict.items()
        }
        # print("{}/{} unique words have corresponding glove vectors.".format(len(idx2vec_dict), len(word2idx_dict)))
        new_emb_mat = np.array(
            [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))],
            dtype='float32')
        config.new_emb_mat = new_emb_mat

    pprint(config.__flags, indent=2)
    models = get_multi_gpu_models(config)
    evaluator = MultiGPUF1Evaluator(
        config,
        models,
        tensor_dict=models[0].tensor_dict if config.vis else None)
    graph_handler = GraphHandler(
        config
    )  # controls all tensors and variables in the graph, including loading /saving

    config_proto = tf.ConfigProto(allow_soft_placement=True)
    config_proto.gpu_options.per_process_gpu_memory_fraction = 0.7

    sess = tf.Session(config=config_proto)
    graph_handler.initialize(sess)
    num_steps = math.ceil(test_data.num_examples /
                          (config.batch_size * config.num_gpus))

    #if 0 < config.eval_num_batches < num_steps:
    #    num_steps = config.eval_num_batches

    #print(config.eval_num_batches)
    #assert(False)
    e = None
    for multi_batch in tqdm(test_data.get_multi_batches(
            config.batch_size,
            config.num_gpus,
            num_steps=num_steps,
            cluster=config.cluster),
                            total=num_steps):
        idx, batch = multi_batch[0]
        if config.use_special_token:
            batch.data['dataset_type'] = ['NEWSQA']
        ei = evaluator.get_evaluation(sess, multi_batch)
        e = ei if e is None else e + ei
        if config.vis:
            eval_subdir = os.path.join(
                config.eval_dir, "{}-{}".format(ei.data_type,
                                                str(ei.global_step).zfill(6)))
            if not os.path.exists(eval_subdir):
                os.mkdir(eval_subdir)
            path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8))
            graph_handler.dump_eval(ei, path=path)
    print(e)
    if config.dump_answer:
        print("dumping answer ...")
        graph_handler.dump_answer(e)
    if config.dump_eval:
        print("dumping eval ...")
        graph_handler.dump_eval(e)