def compute_qgen_accuracy(sess, dataset, batchifier, evaluator, mode, tokenizer, save_path, cpu_pool, batch_size, store_games, dump_suffix): logger = logging.getLogger() for m in mode: if m != "beam_search": test_iterator = Iterator(dataset, pool=cpu_pool, batch_size=batch_size, batchifier=batchifier, shuffle=False, use_padding=True) test_score = evaluator.process(sess, test_iterator, mode=m, store_games=store_games) # Retrieve the generated games and dump them as a dataset if store_games: generated_dialogues = evaluator.get_storage() dump_samples_into_dataset(generated_dialogues, save_path=save_path, tokenizer=tokenizer, name=dump_suffix + "." + m) logger.info("Accuracy ({} - {}): {}".format( dataset.set, m, test_score))
def test_qgen(sess, testset, tokenizer, qgen, cpu_pool, batch_size, logger): qgen_sources = qgen.get_sources(sess) qgen_evaluator = Evaluator(qgen_sources, qgen.scope_name, network=qgen, tokenizer=tokenizer) qgen_batchifier = QuestionerBatchifier(tokenizer, qgen_sources, status=('success',)) qgen_iterator = Iterator(testset, pool=cpu_pool, batch_size=batch_size, batchifier=qgen_batchifier) [qgen_loss] = qgen_evaluator.process(sess, qgen_iterator, outputs=[qgen.ml_loss]) logger.info("QGen test loss: {}".format(qgen_loss))
def test_guesser(sess, testset, tokenizer, guesser, cpu_pool, batch_size, logger): guesser_sources = guesser.get_sources(sess) guesser_evaluator = Evaluator(guesser_sources, guesser.scope_name, network=guesser, tokenizer=tokenizer) guesser_batchifier = QuestionerBatchifier(tokenizer, guesser_sources, status=('success',)) guesser_iterator = Iterator(testset, pool=cpu_pool, batch_size=batch_size, batchifier=guesser_batchifier) [guesser_loss, guesser_error] = guesser_evaluator.process(sess, guesser_iterator, [guesser.loss, guesser.error]) logger.info("Guesser test loss: {}".format(guesser_loss)) logger.info("Guesser test error: {}".format(guesser_error))
def test_oracle(sess, testset, tokenizer, oracle, cpu_pool, batch_size, logger): oracle_dataset = OracleDataset(testset) oracle_sources = oracle.get_sources(sess) oracle_evaluator = Evaluator(oracle_sources, oracle.scope_name, network=oracle, tokenizer=tokenizer) oracle_batchifier = OracleBatchifier(tokenizer, oracle_sources, status=('success',)) oracle_iterator = Iterator(oracle_dataset, pool=cpu_pool, batch_size=batch_size, batchifier=oracle_batchifier) [oracle_loss, oracle_error] = oracle_evaluator.process(sess, oracle_iterator, [oracle.loss, oracle.error]) logger.info("Oracle test loss: {}".format(oracle_loss)) logger.info("Oracle test error: {}".format(oracle_error))
# create training tools evaluator = Evaluator(sources, network.scope_name, network=network, tokenizer=tokenizer) batchifier = QuestionerBatchifier(tokenizer, sources, status=('success', )) for t in range(start_epoch, no_epoch): logger.info('Epoch {}..'.format(t + 1)) train_iterator = Iterator(trainset, batch_size=batch_size, pool=cpu_pool, batchifier=batchifier, shuffle=True) train_loss, train_accuracy = evaluator.process(sess, train_iterator, outputs=outputs + [optimizer]) valid_iterator = Iterator(validset, pool=cpu_pool, batch_size=batch_size * 2, batchifier=batchifier, shuffle=False) valid_loss, valid_accuracy = evaluator.process(sess, valid_iterator, outputs=outputs)
def extract_raw( image_shape, dataset_cstor, dataset_args, batchifier_cstor, source_name, out_dir, set_type, no_threads, ): for one_set in set_type: ############################ # LOAD DATASET ############################ print("Load dataset...") dataset_args["which_set"] = one_set dataset = dataset_cstor(**dataset_args) # hack dataset to only keep one game by image image_id_set = {} games = [] for game in dataset.games: if game.image.id not in image_id_set: games.append(game) image_id_set[game.image.id] = 1 dataset.games = games no_images = len(games) # prepare batch builder dummy_tokenizer = DummyTokenizer() batchifier = batchifier_cstor(tokenizer=dummy_tokenizer, sources=[source_name]) cpu_pool = Pool(no_threads, maxtasksperchild=1000) iterator = Iterator(dataset, batch_size=64, pool=cpu_pool, batchifier=batchifier) filepath = os.path.join(out_dir, "{}_features.h5".format(one_set)) with h5py.File(filepath, 'w') as f: feat_dataset = f.create_dataset('features', shape=[no_images] + image_shape, dtype=np.float32) idx2img = f.create_dataset('idx2img', shape=[no_images], dtype=np.int32) pt_hd5 = 0 for batch in tqdm(iterator): # Store dataset batch_size = len(batch["raw"]) feat_dataset[pt_hd5:pt_hd5 + batch_size] = batch[source_name] # Store idx to image.id for i, game in enumerate(batch["raw"]): idx2img[pt_hd5 + i] = game.image.id # update hd5 pointer pt_hd5 += batch_size print("Start dumping file: {}".format(filepath)) print("Finished dumping file: {}".format(filepath)) print("Done!")
def extract_features( img_input, ft_output, network_ckpt, dataset_cstor, dataset_args, batchifier_cstor, out_dir, set_type, batch_size, no_threads, gpu_ratio): # CPU/GPU option cpu_pool = Pool(no_threads, maxtasksperchild=1000) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_ratio) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) as sess: saver = tf.train.Saver() saver.restore(sess, network_ckpt) for one_set in set_type: print("Load dataset -> set: {}".format(one_set)) dataset_args["which_set"] = one_set dataset = dataset_cstor(**dataset_args) # hack dataset to only keep one game by image image_id_set = {} games = [] for game in dataset.games: if game.image.id not in image_id_set: games.append(game) image_id_set[game.image.id] = 1 dataset.games = games no_images = len(games) source_name = os.path.basename(img_input.name[:-2]) dummy_tokenizer = DummyTokenizer() batchifier = batchifier_cstor(tokenizer=dummy_tokenizer, sources=[source_name]) iterator = Iterator(dataset, batch_size=batch_size, pool=cpu_pool, batchifier=batchifier) ############################ # CREATE FEATURES ############################ print("Start computing image features...") filepath = os.path.join(out_dir, "{}_features.h5".format(one_set)) with h5py.File(filepath, 'w') as f: ft_shape = [int(dim) for dim in ft_output.get_shape()[1:]] ft_dataset = f.create_dataset('features', shape=[no_images] + ft_shape, dtype=np.float32) idx2img = f.create_dataset('idx2img', shape=[no_images], dtype=np.int32) pt_hd5 = 0 for batch in tqdm(iterator): feat = sess.run(ft_output, feed_dict={img_input: numpy.array(batch[source_name])}) # Store dataset batch_size = len(batch["raw"]) ft_dataset[pt_hd5: pt_hd5 + batch_size] = feat # Store idx to image.id for i, game in enumerate(batch["raw"]): idx2img[pt_hd5 + i] = game.image.id # update hd5 pointer pt_hd5 += batch_size print("Start dumping file: {}".format(filepath)) print("Finished dumping file: {}".format(filepath)) print("Done!")
# CPU/GPU option cpu_pool = Pool(args.no_thread, maxtasksperchild=1000) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_ratio) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) as sess: saver = tf.train.Saver() saver.restore(sess, args.ckpt) features = dict() for one_set in args.set_type: print("Load dataset -> set: {}".format(one_set)) dataset = OracleDataset.load(args.data_dir, one_set, image_loader=image_loader, crop_loader=crop_loader) batchifier = OracleBatchifier(tokenizer=None, sources=[source]) iterator = Iterator(dataset, batch_size=args.batch_size, pool=cpu_pool, batchifier=batchifier) for batch in tqdm(iterator): feat = sess.run(end_points[feature_name], feed_dict={images: numpy.array(batch[source])}) for f, game in zip(feat, batch["raw"]): f = f.squeeze() if args.mode == "crop": id = game.object_id else: id = game.picture.id if args.network == "resnet": np.savez_compressed(os.path.join(out_dir, "{}.npz".format(id)), x="features")
evaluator = Evaluator(loop_sources, qgen_network.scope_name, network=qgen_network, tokenizer=tokenizer) train_batchifier = LooperBatchifier(tokenizer, loop_sources, train=True) eval_batchifier = LooperBatchifier(tokenizer, loop_sources, train=False) # Initialize the looper to eval/train the game-simulation qgen_network.build_sampling_graph(qgen_config["model"], tokenizer=tokenizer, max_length=loop_config['loop']['max_depth']) looper_evaluator = BasicLooper(loop_config, oracle=oracle_network, guesser=guesser_network, qgen=qgen_network, tokenizer=tokenizer) test_iterator = Iterator(testset, pool=cpu_pool, batch_size=batch_size, batchifier=eval_batchifier, shuffle=False, use_padding=True) test_score = looper_evaluator.process(sess, test_iterator, mode="sampling") logger.info("Test success ratio (Init-Sampling): {}".format(test_score)) logs = [] # Start training final_val_score = 0. for epoch in range(no_epoch): logger.info("Epoch {}/{}".format(epoch, no_epoch)) train_iterator = Iterator(trainset, batch_size=batch_size, pool=cpu_pool, batchifier=train_batchifier,
def extract_features(img_input, ft_output, network_ckpt, dataset_cstor, dataset_args, batchifier_cstor, out_dir, set_type, batch_size, no_threads, gpu_ratio): # CPU/GPU option cpu_pool = Pool(no_threads, maxtasksperchild=1000) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_ratio) # gpu_options.allow_growth = True saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: print("+++ 0") saver.restore(sess, network_ckpt) print("+++ 1") # tf.initialize_all_variables().run() for one_set in set_type: print("+++ 2") print("Load dataset -> set: {}".format(one_set)) dataset_args["which_set"] = one_set dataset = dataset_cstor(**dataset_args) print("+++ 3") # hack dataset to only keep one game by image image_id_set = {} games = [] print("+++ 4") nb_total_trouve = 0 nb_nonTrouve = 0 print("+++ 5") print("[Data Length ] = {}".format(len(dataset.games))) for game in dataset.games: if game.image.id not in image_id_set: games.append(game) image_id_set[game.image.id] = 1 print("+++ 6") # img = game.image.url.split("/")[-1] # img = img + ".jpg" # # print("[Img]= {}".format(img)) # # s = open(os.path.join("data/img/raw/", "42.jpg")) # # try: # print(str(img++".jpg")) # f = open(os.path.join("data/img/raw/", img)) # # nb_total_trouve += 1 # except: # # print("Not Found !!") # nb_nonTrouve += 1 # print("[Trouve = {} et Non_Trouve = {} ]".format(nb_total_trouve,nb_nonTrouve)) # # print(img ++".jpg") dataset.games = games no_images = len(games) source_name = os.path.basename(img_input.name[:-2]) dummy_tokenizer = DummyTokenizer() batchifier = batchifier_cstor(tokenizer_question=dummy_tokenizer, sources=[source_name]) iterator = Iterator(dataset, batch_size=batch_size, pool=cpu_pool, batchifier=batchifier) ############################ # CREATE FEATURES ############################ print("Start computing image features...") filepath = os.path.join(out_dir, "{}_features.h5".format(one_set)) with h5py.File(filepath, 'w') as f: print("--- 1") ft_shape = [int(dim) for dim in ft_output.get_shape()[1:]] print("--- 2") ft_dataset = f.create_dataset('features', shape=[no_images] + ft_shape, dtype=np.float32) print("--- 3") idx2img = f.create_dataset('idx2img', shape=[no_images], dtype=np.int32) print("--- 4") pt_hd5 = 0 print("--- 5") for batch in tqdm(iterator): # print("--- 6") # print(" ... ",numpy.array(batch[source_name]),numpy.array(batch[source_name]).shape) feat = sess.run( ft_output, feed_dict={img_input: numpy.array(batch[source_name])}) # Store dataset #print("--- 7") batch_size = len(batch["raw"]) ft_dataset[pt_hd5:pt_hd5 + batch_size] = feat #print("--- 8") # Store idx to image.id for i, game in enumerate(batch["raw"]): idx2img[pt_hd5 + i] = game.image.id #print("--- 9") # update hd5 pointer pt_hd5 += batch_size print("Start dumping file: {}".format(filepath)) print("Finished dumping file: {}".format(filepath)) print("Done!")
from clevr.data_provider.clevr_dataset import CLEVRDataset from clevr.data_provider.clevr_batchifier import CLEVRBatchifier if __name__ == "__main__": feat_dir = "/media/datas2/tmp" data_dir = "/home/sequel/fstrub/clevr_data" image_builder = h5FeatureBuilder(img_dir=feat_dir, bufferize=False) print("Load datasets...") dataset = CLEVRDataset(folder=data_dir, which_set="val", image_builder=image_builder) cpu_pool = ThreadPool(1) dummy_tokenizer = DummyTokenizer() batchifier = CLEVRBatchifier(tokenizer=dummy_tokenizer, sources=["image"]) iterator = Iterator(dataset, batch_size=64, pool=cpu_pool, batchifier=batchifier) for batch in tqdm(iterator): pass print("Done!")
logs = [] # Start training final_val_score = 0. for epoch in range(no_epoch): if args.skip_training: logger.info("Skip training...") break logger.info("Epoch {}/{}".format(epoch, no_epoch)) cpu_pool = create_cpu_pool(args.no_thread, use_process=False) train_iterator = Iterator(trainset, batch_size=batch_size, pool=cpu_pool, shuffle=True, batchifier=train_batchifier) [train_accuracy, _] = game_engine.process(sess, train_iterator, optimizer=optimizer, mode="sampling") valid_iterator = Iterator(validset, pool=cpu_pool, batch_size=batch_size, batchifier=eval_batchifier, shuffle=False) [val_accuracy, games] = game_engine.process(sess, valid_iterator, mode="sampling") logger.info("Accuracy (train - sampling) : {}".format(train_accuracy)) logger.info("Accuracy (valid - sampling) : {}".format(val_accuracy))