def _test(config): test_data = read_data(config, 'test', True) update_config(config, [test_data]) _config_debug(config) if config.use_glove_for_unk: word2vec_dict = test_data.shared[ 'lower_word2vec'] if config.lower_word else test_data.shared[ 'word2vec'] new_word2idx_dict = test_data.shared['new_word2idx'] idx2vec_dict = { idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items() } new_emb_mat = np.array( [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32') config.new_emb_mat = new_emb_mat pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] evaluator = MultiGPUEvaluator( config, models, tensor_dict=models[0].tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.test_num_batches < num_steps: num_steps = config.test_num_batches e = None for multi_batch in tqdm(test_data.get_multi_batches( config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps): ei = evaluator.get_evaluation(sess, multi_batch) e = ei if e is None else e + ei if config.vis: eval_subdir = os.path.join( config.eval_dir, "{}-{}".format(ei.data_type, str(ei.global_step).zfill(6))) if not os.path.exists(eval_subdir): os.mkdir(eval_subdir) path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8)) graph_handler.dump_eval(ei, path=path) print("test acc: %f, loss: %f" % (e.acc, e.loss)) if config.dump_answer: print("dumping answer ...") graph_handler.dump_answer(e) if config.dump_eval: print("dumping eval ...") graph_handler.dump_eval(e)
def test(config): _config_test(config) de2idx, idx2de = load_de_vocab() en2idx, idx2en = load_en_vocab() model = ConvSeq2Seq(config) graph_handler = GraphHandler(config) inferencer = Inferencer(config, model) sess = tf.Session() graph_handler.initialize(sess) global_step = 0 refs = [] hypotheses = [] with codecs.open(os.path.join(config.eval_dir, config.model_name), "w", "utf-8") as fout: for i, batch in tqdm(enumerate(get_batch_for_test())): preds = inferencer.run(sess, batch) sources = batch['source'] targets = batch['target'] for source, target, pred in zip(sources, targets, preds): got = " ".join(idx2en[idx] for idx in pred).split("</S>")[0].strip() fout.write("- source: " + source +"\n") fout.write("- expected: " + target + "\n") fout.write("- got: " + got + "\n\n") fout.flush() ref = target.split() hypothesis = got.split() if len(ref) > 3 and len(hypothesis) > 3: refs.append([ref]) hypotheses.append(hypothesis) score = corpus_bleu(refs, hypotheses) fout.write("Bleu Score = " + str(100*score))
def test(config): with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] print("Loading model...") test_batch = get_dataset(config.test_record_file, get_record_parser(config, is_test=True), config).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False) graph_handler = GraphHandler(config, model) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) #saver = tf.train.Saver() graph_handler.initialize(sess) #saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) losses = [] answer_dict = {} remapped_dict = {} ensember_dict = {} for step in tqdm(range(total // config.batch_size + 1)): start_logits, stop_logits, qa_id, loss, yp1, yp2 = sess.run([ model.start_logits, model.stop_logits, model.qa_id, model.loss, model.yp1, model.yp2 ]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) start_logits.tolist() stop_logits.tolist() for id, start, stop in zip(qa_id, start_logits, stop_logits): ensember_dict[str(id)] = {'yp1': start, 'yp2': stop} loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) answer_path = config.answer_file + "_" + str(config.load_step) with open(answer_path, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format(metrics['exact_match'], metrics['f1'])) ensember_dict['loss'] = loss ensember_dict['exact_math'] = metrics['exact_match'] ensember_dict['f1'] = metrics['f1'] file_name = config.model_name + '_' + config.run_id + '.pklz' save_path = os.path.join(config.result_path, file_name) with gzip.open(save_path, 'wb', compresslevel=3) as fh: pickle.dump(ensember_dict, fh)
def _check(config): word2idx = Counter( json.load(open("data/word2idx_new.json", "r"))["word2idx"]) vocab_size = len(word2idx) #word2vec = {} # or get_word2vec(word2idx) word2vec = Counter( json.load( open("data/word2vec_{}.json".format(config.pretrain_from), "r"))["word2vec"]) idx2vec = { word2idx[word]: vec for word, vec in word2vec.items() if word in word2idx and word != "UNK" } unk_embedding = np.random.multivariate_normal( np.zeros(config.word_embedding_size), np.eye(config.word_embedding_size)) config.emb_mat = np.array([ idx2vec[idx] if idx in idx2vec else unk_embedding for idx in range(vocab_size) ]) config.vocab_size = vocab_size print("emb_mat:", config.emb_mat.shape) if config.data_from == "reuters": train_data = read_reuters(config, data_type="train", word2idx=word2idx) dev_data = read_reuters(config, data_type="test", word2idx=word2idx) elif config.data_from == "20newsgroup": train_data = read_news(config, data_type="train", word2idx=word2idx) dev_data = read_newss(config, data_type="test", word2idx=word2idx) config.train_size = train_data.get_data_size() config.dev_size = dev_data.get_data_size() pprint(config.__flags, indent=2) model = get_model(config) graph_handler = GraphHandler(config, model) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_batches = config.num_batches or int( math.ceil( train_data.num_examples / config.batch_size)) * config.num_epochs global_step = 0 for batch in tqdm(train_data.get_batches(config.batch_size, num_batches=num_batches, shuffle=True, cluster=config.cluster), total=num_batches): batch_idx, batch_ds = batch global_step = sess.run(model.global_step) + 1 # print("global_step:", global_step) get_summary = global_step % config.log_period feed_dict = model.get_feed_dict(batch, config) check, xx_final, xx_context = sess.run( [model.check, model.xx_final, model.xx_context], feed_dict=feed_dict) print("check:", check.shape, type(check), xx_final.shape, xx_context.shape)
def test(config): gpu_options = tf.GPUOptions(visible_device_list="2") sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess_config.gpu_options.allow_growth = True with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] print("Loading model...") test_batch = get_dataset(config.test_record_file, get_record_parser(config, is_test=True), config).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False) graph_handler = GraphHandler(config, model) with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) graph_handler.initialize(sess) sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) losses = [] answer_dict = {} remapped_dict = {} for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_, outlier = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) print("\n", loss) if (loss > 50): for i, j, k in zip(qa_id.tolist(), yp1.tolist(), yp2.tolist()): print(answer_dict[str(i)], j, k) #print("IDs: {} Losses: {} Yp1: {} Yp2: {}".format(qa_id.tolist(),\ # loss.tolist(), yp1.tolist(), yp2.tolist())) loss = np.mean(losses) # evaluate with answer_dict, but in evaluate-v1.1.py, evaluate with remapped_dict # since only that is saved. Both dict are a little bit different, check evaluate-v1.1.py metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {} Rouge-l-f: {} Rouge-l-p: {} Rouge-l-r: {}".format(\ metrics['exact_match'], metrics['f1'], metrics['rouge-l-f'], metrics['rouge-l-p'],\ metrics['rouge-l-r']))
def test(model_params): model_params.batch_size = _DECODE_BATCH_SIZE model_params.load_model = FLAGS.is_load if FLAGS.load_path: model_params.load_path = FLAGS.load_path test_file = os.path.join(FLAGS.data_dir, _TEST_TAG + '_' + FLAGS.dataset_type + '.' + _JSON_TAG) test_data =utils_file.read_file(test_file, _JSON_TAG) dicts_path = os.path.join(model_params.dict_dir, 'dicts' + '_' + FLAGS.dataset_type + '.' + _JSON_TAG) dicts = utils_file.read_file(dicts_path, _JSON_TAG) assert dicts is not None is_binary = True if FLAGS.dataset_type != 'binary': is_binary = False test_data = dataset.VTTExample(test_data, 'test', model_params, dicts, is_binary) emb_mat_token, emb_mat_glove = test_data.emb_mat_token, test_data.emb_mat_glove with tf.variable_scope('model') as scope: if FLAGS.dataset_type != 'multiple': model = Model(emb_mat_token, emb_mat_glove, len(test_data.dicts['token']), len(test_data.dicts['char']), test_data.max_token_size, model_params, scope=scope.name) else: model = Model_Selector(emb_mat_token, emb_mat_glove, len(test_data.dicts['token']), len(test_data.dicts['char']), test_data.max_token_size, test_data.max_ans_size, model_params, scope=scope.name) graphHandler = GraphHandler(model, model_params) evaluator = Evaluator(model, model_params, is_binary) gpu_options = tf.GPUOptions() graph_config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True) sess = tf.Session(config=graph_config) graphHandler.initialize(sess) test_loss, test_acc, test_dict = evaluator.get_evaluation(sess, test_data) tf.logging.info('test loss : %.4f accuracy %.4f' % (test_loss, test_acc))
def load_model(data, model_params): with tf.compat.v1.variable_scope('model') as scope: model = Model_Selector(data.emb_mat_token, data.emb_mat_glove, len(data.dicts['token']), len(data.dicts['char']), data.max_token_size, data.max_ans_size, model_params, scope.name) graphHandler = GraphHandler(model, model_params) #gpu_options = tf.GPUOptions() graph_config = tf.ConfigProto() graph_config.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=graph_config) #config=graph_config) graphHandler.initialize(sess) return model, sess
def _test(config): if config.data_from == "20newsgroup": config.test_batch_size = 281 word2idx = Counter(json.load(open("../data/{}/word2idx_{}.json".format(config.data_from, config.data_from), "r"))["word2idx"]) idx2word = json.load(open("../data/{}/word2idx_{}.json".format(config.data_from, config.data_from), "r"))["idx2word"] assert len(word2idx) == len(idx2word) for i in range(10): assert word2idx[idx2word[i]] == i vocab_size = len(word2idx) word2vec = Counter(json.load(open("../data/{}/word2vec_{}.json".format(config.data_from, config.pretrain_from), "r"))["word2vec"]) # word2vec = {} if config.debug or config.load else get_word2vec(config, word2idx) idx2vec = {word2idx[word]: vec for word, vec in word2vec.items() if word in word2idx} unk_embedding = np.random.multivariate_normal(np.zeros(config.word_embedding_size), np.eye(config.word_embedding_size)) config.emb_mat = np.array([idx2vec[idx] if idx in idx2vec else unk_embedding for idx in range(vocab_size)]) config.vocab_size = vocab_size test_dict = {} if os.path.exists("../data/{}/{}_{}{}.json".format(config.data_from, config.data_from, config.dev_type, config.clftype)): test_dict = json.load(open("../data/{}/{}_{}{}.json".format(config.data_from, config.data_from, config.dev_type, config.clftype), "r")) if config.data_from == "reuters": dev_data = DataSet(test_dict, "test") if len(test_dict)>0 else read_reuters(config, data_type="test", word2idx=word2idx) elif config.data_from == "20newsgroup": dev_data = DataSet(test_dict, "test") if len(test_dict)>0 else read_news(config, data_type="test", word2idx=word2idx) elif config.data_from == "ice": dev_data = DataSet(test_dict, config.dev_type) config.dev_size = dev_data.get_data_size() # if config.use_glove_for_unk: pprint(config.__flags, indent=2) model = get_model(config) graph_handler = GraphHandler(config, model) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) # check #w_embeddings = sess.run(model.word_embeddings) #print("w_embeddings:", w_embeddings.shape, w_embeddings) dev_evaluate = Evaluator(config, model) num_steps = math.floor(dev_data.num_examples / config.test_batch_size) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches # print("num_steps:", num_steps) e_dev = dev_evaluate.get_evaluation_from_batches( sess, tqdm(dev_data.get_batches(config.test_batch_size, num_batches=num_steps), total=num_steps))
def train(config): model = ConvSeq2Seq(config) trainer = Trainer(config, model) graph_handler = GraphHandler(config) sess = tf.Session() graph_handler.initialize(sess) for i, batch in tqdm(enumerate(get_batch(num_epoch=config.num_epoch))): global_step = sess.run(model.global_step) + 1 loss, acc, summary = trainer.run_step(sess, batch) print "global_step: %d, loss: %f, acc: %f" % (global_step, loss, acc) get_summary = global_step % config.log_period == 0 if get_summary: graph_handler.add_summary(summary, global_step) if global_step % config.save_period == 0: graph_handler.save_model(sess, global_step) if global_step % config.eval_period == 0: pass if global_step % config.save_period != 0: graph_handler.save_model(sess)
def run_test_while_training(self, ep): test_graph_handler = GraphHandler(self.config, self.test_data_gen, None) test_graph_handler.set_saving_mode(False) test_graph_handler.set_result_path( os.path.join('results', '{}_test.txt'.format(ep))) temp = self.graph_handler self.graph_handler = test_graph_handler self.run_test() self.graph_handler = temp
def _test(config): test_data = read_data(config, 'test', True) update_config(config, [test_data]) _config_debug(config) pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] evaluator = AccuracyEvaluator(config.test_num_can, config, model, tensor_dict=models[0].tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus)) e = None tensor=[] for i, multi_batch in enumerate(tqdm( test_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps)): ei = evaluator.get_evaluation(sess, multi_batch) # outfinal=ei.tensor # tensor.extend(outfinal) e = ei if e is None else e + ei # if config.vis: # eval_subdir = os.path.join(config.eval_dir, # "{}-{}".format(multi_batch[0][1].data_type, str(ei.global_step).zfill(6))) # if not os.path.exists(eval_subdir): # os.mkdir(eval_subdir) # path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8)) # graph_handler.dump_eval(ei, path=path) print(e.acc) if config.dump_eval: print("dumping eval ...") graph_handler.dump_eval(e) if config.dump_answer: print("dumping answers ...") graph_handler.dump_answer(e)
def main(unused_argv): assert FLAGS.input_file_pattern, "--input_file_pattern is required" assert FLAGS.train_dir, "--train_dir is required" model_config = configuration.ModelConfig() model_config.input_file_pattern = FLAGS.input_file_pattern # Create training directory. train_dir = FLAGS.train_dir if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) # Build the TensorFlow graph. g = tf.Graph() with g.as_default(): # Build the model. model = auto_encoder_model.Auto_Encoder_Model(model_config, mode="train") model.build() graph_handler = GraphHandler(model_config, model) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() queue_runner = tf.train.start_queue_runners(sess=sess, coord=coord) graph_handler.initialize(sess) for _ in tqdm(range(1, model_config.num_steps + 1)): global_step = sess.run(model.global_step) + 1 loss, train_op, output = sess.run( [model.total_loss, model.train_op, model.outputs_]) if global_step % model_config.period == 0: summary_op = tf.summary.merge_all() summaries = sess.run(summary_op) graph_handler.writer.add_summary(summaries, global_step) graph_handler.writer.flush() if global_step % model_config.checkpoint == 0: filename = os.path.join( model_config.save_dir, "{}_{}.ckpt".format(model_config.model_name, global_step)) graph_handler.save(sess, filename) coord.join(queue_runner)
with open(args.config) as f: config = yaml.load(f, Loader=yaml.FullLoader) compute_config(config, args) print('[Task] Load Data Loader') data_loader = DataLoader(config) print('[Done] Loaded Data Loader') print('[Task] Load Data Generator') data_gen = DataGenerator(config, data_loader) if args.test_only and args.test_data_path is not None: data_genenrator_from_data(data_gen, args, config) print('[Done] Loaded Data Generator') print('[Task] Load Graph Handler') mem = Mem(config, data_gen) graph_handler = GraphHandler(config, data_gen, mem) print('[Done] Loaded Graph Handler') print('[Task] Build Model') model_on_graph = create_model(config) print('[Done] Built Model') print('[Task] Load Agent') agent = Agent(config, graph_handler, model_on_graph) if not args.test_only and config['train_params']['test_while_training']: test_data_gen = DataGenerator(config, data_loader) data_genenrator_from_data(test_data_gen, args, config) agent.set_test_data_gen(test_data_gen) print('[Done] Loaded Agent') if args.test_only: agent.run_test() else:
def train(config): gpu_options = tf.GPUOptions(visible_device_list="2") sess_config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) sess_config.gpu_options.allow_growth = True with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.train_eval_file, "r") as fh: train_eval_file = json.load(fh) with open(config.dev_eval_file, "r") as fh: dev_eval_file = json.load(fh) with open(config.dev_meta, "r") as fh: meta = json.load(fh) dev_total = meta["total"] print("Building model...") parser = get_record_parser(config) train_dataset = get_batch_dataset(config.train_record_file, parser, config) dev_dataset = get_dataset(config.dev_record_file, parser, config) handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle(handle, train_dataset.output_types, train_dataset.output_shapes) train_iterator = train_dataset.make_one_shot_iterator() dev_iterator = dev_dataset.make_one_shot_iterator() model = Model(config, iterator, word_mat, char_mat) graph_handler = GraphHandler( config, model ) # controls all tensors and variables in the graph, including loading /saving loss_save = 100.0 patience = 0 lr = config.init_lr with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) graph_handler.initialize(sess) train_handle = sess.run(train_iterator.string_handle()) dev_handle = sess.run(dev_iterator.string_handle()) sess.run(tf.assign(model.is_train, tf.constant(True, dtype=tf.bool))) sess.run(tf.assign(model.lr, tf.constant(lr, dtype=tf.float32))) print("Started training") for _ in tqdm(range(1, config.num_steps + 1)): global_step = sess.run(model.global_step) + 1 loss, train_op = sess.run([model.loss, model.train_op], feed_dict={handle: train_handle}) if global_step % config.period == 0: loss_sum = tf.Summary(value=[ tf.Summary.Value(tag="model/loss", simple_value=loss), ]) graph_handler.add_summary(loss_sum, global_step) if global_step % config.checkpoint == 0: sess.run( tf.assign(model.is_train, tf.constant(False, dtype=tf.bool))) _, summ = evaluate_batch(model, config.val_num_batches, train_eval_file, sess, "train", handle, train_handle) for s in summ: graph_handler.add_summary(s, global_step) metrics, summ = evaluate_batch( model, dev_total // config.batch_size + 1, dev_eval_file, sess, "dev", handle, dev_handle) sess.run( tf.assign(model.is_train, tf.constant(True, dtype=tf.bool))) dev_loss = metrics["loss"] if dev_loss < loss_save: loss_save = dev_loss patience = 0 else: patience += 1 if patience >= config.patience: lr /= 2.0 loss_save = dev_loss patience = 0 sess.run(tf.assign(model.lr, tf.constant(lr, dtype=tf.float32))) graph_handler.add_summaries(summ, global_step) graph_handler.writer.flush() filename = os.path.join( config.save_dir, "{}_{}.ckpt".format(config.model_name, global_step)) graph_handler.save(sess, filename)
def _train(config): word2idx = Counter( json.load( open( "data/{}/word2idx_{}.json".format(config.data_from, config.data_from), "r"))["word2idx"]) vocab_size = len(word2idx) print("vocab_size", vocab_size) word2vec = Counter( json.load( open( "data/{}/word2vec_{}.json".format(config.data_from, config.pretrain_from), "r"))["word2vec"]) # word2vec = {} if config.debug or config.load else get_word2vec(config, word2idx) idx2vec = { word2idx[word]: vec for word, vec in word2vec.items() if word in word2idx and word != "UNK" } unk_embedding = np.random.multivariate_normal( np.zeros(config.word_embedding_size), np.eye(config.word_embedding_size)) config.emb_mat = np.array([ idx2vec[idx] if idx in idx2vec else unk_embedding for idx in range(vocab_size) ]) config.vocab_size = vocab_size print("emb_mat:", config.emb_mat.shape) train_dict, test_dict = {}, {} if os.path.exists("data/{}/{}_{}.json".format(config.data_from, config.data_from, "train")): train_dict = json.load( open( "data/{}/{}_{}.json".format(config.data_from, config.data_from, "train"), "r")) if os.path.exists("data/{}/{}_{}.json".format(config.data_from, config.data_from, "test")): test_dict = json.load( open( "data/{}/{}_{}.json".format(config.data_from, config.data_from, "test"), "r")) # check if config.data_from == "reuters": train_data = DataSet(train_dict, "train") if len(train_dict) > 0 else read_reuters( config, data_type="train", word2idx=word2idx) dev_data = DataSet(test_dict, "test") if len(test_dict) > 0 else read_reuters( config, data_type="test", word2idx=word2idx) elif config.data_from == "20newsgroup": train_data = DataSet(train_dict, "train") if len(train_dict) > 0 else read_news( config, data_type="train", word2idx=word2idx) dev_data = DataSet(test_dict, "test") if len(test_dict) > 0 else read_news( config, data_type="test", word2idx=word2idx) config.train_size = train_data.get_data_size() config.dev_size = dev_data.get_data_size() print("train/dev:", config.train_size, config.dev_size) if config.max_docs_length > 2000: config.max_docs_length = 2000 pprint(config.__flags, indent=2) model = get_model(config) graph_handler = GraphHandler(config, model) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_batches = config.num_batches or int( math.ceil( train_data.num_examples / config.batch_size)) * config.num_epochs global_step = 0 dev_evaluate = Evaluator(config, model) for batch in tqdm(train_data.get_batches(config.batch_size, num_batches=num_batches, shuffle=True, cluster=config.cluster), total=num_batches): batch_idx, batch_ds = batch ''' if config.debug: for key, value in batch_ds.data.items(): if not key.startswith("x"): print(key, value) continue ''' global_step = sess.run(model.global_step) + 1 # print("global_step:", global_step) get_summary = global_step % config.log_period feed_dict = model.get_feed_dict(batch, config) logits, y, y_len, loss, summary, train_op = sess.run( [ model.logits, model.y, model.y_seq_length, model.loss, model.summary, model.train_op ], feed_dict=feed_dict) #print("logits:", logits[0:3], y[0:3], y_len[0:3], logits.shape, y.shape, y_len.shape) print("loss:", loss) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: #config.test_batch_size = config.dev_size/3 num_steps = math.ceil(dev_data.num_examples / config.test_batch_size) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches # print("num_steps:", num_steps) e_dev = dev_evaluate.get_evaluation_from_batches( sess, tqdm(dev_data.get_batches(config.test_batch_size, num_batches=num_steps), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step)
def _train(config): train_data = read_data(config, 'val_train', config.load) dev_data = read_data(config, 'val_val', True) # test = read_data(config, 'test', True) update_config(config, [train_data, dev_data]) _config_debug(config) word2vec_dict = train_data.shared['lower_word2vec'] if config.lower_word else train_data.shared['word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict} emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size)]) config.emb_mat = emb_mat pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] print("num params: {}".format(get_num_params())) trainer = MultiGPUTrainer(config, models) evaluator = AccuracyEvaluator(config.train_num_can, config, model, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) # controls all tensors and variables in the graph, including loading /saving # Variables sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) # Begin training num_steps = config.num_steps or int( math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs global_step = 0 best_dev=[0,0] for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=False, cluster=config.cluster), total=num_steps): global_step = sess.run(model.global_step) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) if not config.eval: continue if global_step % config.eval_period == 0: num_steps_dev = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) num_steps_train = math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus)) e_train = evaluator.get_evaluation_from_batches( sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps_train), total=num_steps_train) ) # graph_handler.add_summaries(e_test.summaries, global_step) e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps_dev), total=num_steps_dev)) # graph_handler.dump_eval(e) # graph_handler.add_summaries(e_dev.summaries, global_step) print('train step:{} loss:{} acc:{}'.format(global_step, e_train.loss, e_train.acc)) print('val step:{} loss:{} acc:{}'.format(global_step, e_dev.loss, e_dev.acc)) # print('w_s:{}'.format(w_s)) if global_step > 700: config.save_period = 50 config.eval_period = 50 if best_dev[0] < e_dev.acc: best_dev=[e_dev.acc,global_step,e_train.acc] graph_handler.save(sess, global_step=global_step) # if config.dump_eval: # graph_handler.dump_eval(e_dev) if global_step % config.save_period != 0: graph_handler.save(sess, global_step=global_step) print (best_dev) print ("you can test on test data set and set load setp is {}".format(best_dev[1]))
def _train(config): np.set_printoptions(threshold=np.inf) train_data = read_data(config, 'train', config.load) dev_data = read_data(config, 'dev', True) update_config(config, [train_data, dev_data]) _config_debug(config) word2vec_dict = train_data.shared[ 'lower_word2vec'] if config.lower_word else train_data.shared[ 'word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = { word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict } emb_mat = np.array([ idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal( np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size) ]) config.emb_mat = emb_mat def make_idx2word(): """ return index of the word from the preprocessed dictionary. """ idx2word = {} d = train_data.shared['word2idx'] for word, idx in d.items(): print(word) idx2word[idx] = word if config.use_glove_for_unk: d2 = train_data.shared['new_word2idx'] for word, idx in d2.items(): print(word) idx2word[idx + len(d)] = word return idx2word idx2word = make_idx2word() # Save total number of words used in this dictionary: words in GloVe + etc tokens(including UNK, POS, ... etc) print("size of config.id2word len:", len(idx2word)) print("size of config.total_word_vocab_size:", config.total_word_vocab_size) # construct model graph and variables (using default graph) pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] print("num params: {}".format(get_num_params())) trainer = MultiGPUTrainer(config, models) evaluator = MultiGPUEvaluator( config, models, tensor_dict=model.tensor_dict if config.vis else None) graph_handler = GraphHandler( config, model ) # controls all tensors and variables in the graph, including loading /saving # Variables sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_steps = config.num_steps or int( math.ceil(train_data.num_examples / (config.batch_size * config.num_gpus))) * config.num_epochs min_val = {} min_val['loss'] = 100.0 min_val['acc'] = 0 min_val['step'] = 0 min_val['patience'] = 0 for batches in tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps, shuffle=True, cluster=config.cluster), total=num_steps): global_step = sess.run( model.global_step ) + 1 # +1 because all calculations are done after step get_summary = global_step % config.log_period == 0 loss, summary, train_op = trainer.step(sess, batches, get_summary=get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: num_steps = math.ceil(dev_data.num_examples / (config.batch_size * config.num_gpus)) # num_steps: total steps to finish this training session. # val_num_batches: 100 if 0 < config.val_num_batches < num_steps: # if config.val_num_batches is less the the actual steps required to run whole dev set. Run evaluation up to the step. num_steps = config.val_num_batches # This train loss is calulated from sampling the same number of data size of dev_data. e_train = evaluator.get_evaluation_from_batches( sess, tqdm(train_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_train.summaries, global_step) # This e_dev may differ from the dev_set used in test time because some data is filtered out here. e_dev = evaluator.get_evaluation_from_batches( sess, tqdm(dev_data.get_multi_batches(config.batch_size, config.num_gpus, num_steps=num_steps), total=num_steps)) graph_handler.add_summaries(e_dev.summaries, global_step) print("%s e_train: loss=%.4f" % (header, e_train.loss)) print("%s e_dev: loss=%.4f" % (header, e_dev.loss)) print() if min_val['loss'] > e_dev.loss: min_val['loss'] = e_dev.loss min_val['step'] = global_step min_val['patience'] = 0 else: min_val['patience'] = min_val['patience'] + 1 if min_val['patience'] >= 1000: slack.notify( text="%s patience reached %d. early stopping." % (header, min_val['patience'])) break slack.notify(text="%s e_dev: loss=%.4f" % (header, e_dev.loss)) if config.dump_eval: graph_handler.dump_eval(e_dev) if config.dump_answer: graph_handler.dump_answer(e_dev) slack.notify( text= "%s <@U024BE7LH|insikk> Train is finished. e_dev: loss=%.4f at step=%d\nPlease assign another task to get more research result" % (header, min_val['loss'], min_val['step'])) if global_step % config.save_period != 0: graph_handler.save(sess, global_step=global_step)
def train(model_params): train_file = os.path.join(FLAGS.data_dir, _TRAIN_TAG + '_' + FLAGS.dataset_type + '.' + _JSON_TAG) validation_file = os.path.join(FLAGS.data_dir, _VALIDATION_TAG + '_' + FLAGS.dataset_type + '.' + _JSON_TAG) test_file = os.path.join(FLAGS.data_dir, _TEST_TAG + '_' + FLAGS.dataset_type + '.' + _JSON_TAG) filepaths = [train_file, validation_file, test_file] train_data, validation_data, test_data = utils_file.get_spilited_data(filepaths) dicts_file = os.path.join(model_params.dict_dir, 'dicts' + '_' + FLAGS.dataset_type + '.' + _JSON_TAG) if FLAGS.use_dicts: dicts = utils_file.read_file(dicts_file, _JSON_TAG) else: dicts = preprocess_data.build_dicts(train_data, model_params.glove_path, FLAGS.dataset_type) utils_file.write_file(dicts_file, dicts, _JSON_TAG) is_binary = True if FLAGS.dataset_type != 'binary': is_binary = False train_data = dataset.VTTExample(train_data, 'train', model_params, dicts, is_binary) valid_data= dataset.VTTExample(validation_data, 'validation', model_params, dicts, is_binary) # test_data = dataset.VTTExample(test_data, 'test', model_params, dicts, is_binary) emb_mat_token, emb_mat_glove = train_data.emb_mat_token, train_data.emb_mat_glove with tf.variable_scope('model') as scope: if FLAGS.dataset_type != 'multiple': model = Model(emb_mat_token, emb_mat_glove, len(train_data.dicts['token']), len(train_data.dicts['char']), train_data.max_token_size, model_params=model_params, scope=scope.name) else: model = Model_Selector(emb_mat_token, emb_mat_glove, len(train_data.dicts['token']), len(train_data.dicts['char']), train_data.max_token_size, train_data.max_ans_size, model_params, scope.name) model_params.load_model = FLAGS.is_load if FLAGS.load_path: model_params.load_path = FLAGS.load_path graphHandler = GraphHandler(model, model_params) evaluator = Evaluator(model, model_params, is_binary) perform_recoder = PerformRecoder(model_params) gpu_options = tf.GPUOptions() graph_config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True) sess = tf.Session(config=graph_config) graphHandler.initialize(sess) steps_per_epoch = int(math.ceil((train_data.sample_num / model_params.batch_size))) num_steps = FLAGS.num_steps or FLAGS.epochs * steps_per_epoch global_step = 0 for sample_batch, batch_num, epoch, batch_idx in train_data.generate_batch_sample(num_steps): global_step = sess.run(model.global_step) + 1 if_get_summary = global_step % (model_params.log_period) == 0 loss, summary, train_op = model.step(sess, sample_batch, get_summary=if_get_summary) if global_step % 100 == 0: tf.logging.info('global_steps : %d' % global_step) tf.logging.info('loss : %.4f' % loss) if if_get_summary: graphHandler.add_summary(summary, global_step) if global_step % model_params.eval_period == 0 or epoch == FLAGS.epochs: train_loss, train_acc, train_dict = evaluator.get_evaluation(sess, train_data, global_step) tf.logging.info('train loss : %.4f accuracy %.4f' % (train_loss, train_acc)) dev_loss, dev_acc, dev_dict = evaluator.get_evaluation(sess, valid_data, global_step) tf.logging.info('validation loss : %.4f accuracy %.4f' % (dev_loss, dev_acc)) is_in_top, deleted_step = perform_recoder.update_top_list(global_step, dev_acc, sess) if train_acc - dev_acc > 0.02 : break if epoch == FLAGS.epochs: break
def _train(config): word2idx = Counter(json.load(open("../data/{}/word2idx_{}.json".format(config.data_from, config.data_from), "r"))["word2idx"]) idx2word = json.load(open("../data/{}/word2idx_{}.json".format(config.data_from, config.data_from), "r"))["idx2word"] assert len(word2idx) == len(idx2word) for i in range(10): assert word2idx[idx2word[i]] == i vocab_size = len(word2idx) print("vocab_size", vocab_size, idx2word[:10]) word2vec = Counter(json.load(open("../data/{}/word2vec_{}.json".format(config.data_from, config.pretrain_from), "r"))["word2vec"]) # word2vec = {} if config.debug or config.load else get_word2vec(config, word2idx) idx2vec = {word2idx[word]: vec for word, vec in word2vec.items() if word in word2idx} print("no unk words:", len(idx2vec)) unk_embedding = np.random.multivariate_normal(np.zeros(config.word_embedding_size), np.eye(config.word_embedding_size)) config.emb_mat = np.array([idx2vec[idx] if idx in idx2vec else unk_embedding for idx in range(vocab_size)]) config.vocab_size = vocab_size print("emb_mat:", config.emb_mat.shape) test_type = "test" if config.data_from == "ice": test_type = "dev" else: test_type = "test" train_dict, test_dict = {}, {} ice_flat = "" if config.data_from == "ice" and config.model_name.endswith("flat"): ice_flat = "_flat" if os.path.exists("../data/{}/{}_{}{}{}.json".format(config.data_from, config.data_from, "train", ice_flat, config.clftype)): train_dict = json.load(open("../data/{}/{}_{}{}{}.json".format(config.data_from, config.data_from, "train", ice_flat, config.clftype), "r")) if os.path.exists("../data/{}/{}_{}{}{}.json".format(config.data_from, config.data_from, test_type, ice_flat, config.clftype)): test_dict = json.load(open("../data/{}/{}_{}{}{}.json".format(config.data_from, config.data_from, test_type, ice_flat, config.clftype), "r")) # check for key, val in train_dict.items(): if isinstance(val[0], list) and len(val[0])>10: print(key, val[0][:50]) else: print(key, val[0:4]) print("train:", len(train_dict)) print("test:", len(test_dict)) if config.data_from == "reuters": train_data = DataSet(train_dict, "train") if len(train_dict)>0 else read_reuters(config, data_type="train", word2idx=word2idx) dev_data = DataSet(test_dict, "test") if len(test_dict)>0 else read_reuters(config, data_type="test", word2idx=word2idx) elif config.data_from == "20newsgroup": train_data = DataSet(train_dict, "train") if len(train_dict)>0 else read_news(config, data_type="train", word2idx=word2idx) dev_data = DataSet(test_dict, "test") if len(test_dict)>0 else read_news(config, data_type="test", word2idx=word2idx) elif config.data_from == "ice": train_data = DataSet(train_dict, "train") dev_data = DataSet(test_dict, "dev") config.train_size = train_data.get_data_size() config.dev_size = dev_data.get_data_size() print("train/dev:", config.train_size, config.dev_size) # calculate doc length # TO CHECK avg_len = 0 for d_l in train_dict["x_len"]: avg_len += d_l/config.train_size print("avg_len at train:", avg_len) if config.max_docs_length > 2000: config.max_docs_length = 2000 pprint(config.__flags, indent=2) model = get_model(config) trainer = Trainer(config, model) graph_handler = GraphHandler(config, model) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_batches = config.num_batches or int(math.ceil(train_data.num_examples / config.batch_size)) * config.num_epochs global_step = 0 dev_evaluate = Evaluator(config, model) best_f1 = 0.50 for batch in tqdm(train_data.get_batches(config.batch_size, num_batches=num_batches, shuffle=True, cluster=config.cluster), total=num_batches): global_step = sess.run(model.global_step) + 1 # print("global_step:", global_step) get_summary = global_step % config.log_period loss, summary, train_op = trainer.step(sess, batch, get_summary) if get_summary: graph_handler.add_summary(summary, global_step) # occasional saving # if global_step % config.save_period == 0 : # graph_handler.save(sess, global_step=global_step) if not config.eval: continue # Occasional evaluation if global_step % config.eval_period == 0: #config.test_batch_size = config.dev_size/3 num_steps = math.ceil(dev_data.num_examples / config.test_batch_size) if 0 < config.val_num_batches < num_steps: num_steps = config.val_num_batches # print("num_steps:", num_steps) e_dev = dev_evaluate.get_evaluation_from_batches( sess, tqdm(dev_data.get_batches(config.test_batch_size, num_batches=num_steps), total=num_steps)) if e_dev.fv > best_f1: best_f1 = e_dev.fv #if global_step % config.save_period == 0: graph_handler.save(sess, global_step=global_step) graph_handler.add_summaries(e_dev.summaries, global_step) print("f1:", best_f1)
np.random.normal(size=(BATCH_SIZE, 128)).astype('float32')) all_fixed_noise_samples = Generator(BATCH_SIZE, labels, noise=fixed_noise) def generate_image(iteration): samples = session.run(all_fixed_noise_samples) samples = ((samples + 1.) * (255.99 / 2)).astype('int32') lib.save_images.save_images( samples.reshape((BATCH_SIZE, 3, 64, 64)), 'samples_{}.png'.format(iteration)) coord = tf.train.Coordinator() queue_runner = tf.train.start_queue_runners(sess=session, coord=coord) # Train loop graph_handler = GraphHandler(config) graph_handler.initialize(session) session.run(tf.global_variables_initializer()) # 训练 session.run(tf.assign(is_train, tf.constant(True, dtype=tf.bool))) for _ in range(ITERS): start_time = time.time() iteration = session.run(global_step) + 1 # Train generator if iteration > 0: _ = session.run(gen_train_op) # Train critic # model hot start if (MODE == 'dcgan') or (MODE == 'lsgan'): disc_iters = 1