def load_eval_data(max_num=0): imdb_file = cfg.IMDB_FILE % cfg.TEST.SPLIT_REF data_reader = DataReader( imdb_file, shuffle=False, max_num=max_num, batch_size=cfg.TEST.BATCH_SIZE, vocab_question_file=cfg.VOCAB_QUESTION_FILE, T_encoder=cfg.T_ENCODER, vocab_answer_file=cfg.VOCAB_ANSWER_FILE, load_spatial_feature=True, spatial_feature_dir=cfg.SPATIAL_FEATURE_DIR, add_pos_enc=cfg.ADD_POS_ENC, img_H=cfg.IMG_H, img_W=cfg.IMG_W, pos_enc_dim=cfg.PE_DIM, pos_enc_scale=cfg.PE_SCALE) num_vocab = data_reader.batch_loader.vocab_dict.num_vocab num_choices = data_reader.batch_loader.answer_dict.num_vocab return data_reader, num_vocab, num_choices
def load_train_data(max_num=0): load_train_time = time.time() imdb_file = cfg.IMDB_FILE % cfg.TRAIN.SPLIT_REF data_reader = DataReader(imdb_file, shuffle=True, max_num=max_num, batch_size=cfg.TRAIN.BATCH_SIZE, vocab_question_file=cfg.VOCAB_QUESTION_FILE, T_encoder=cfg.T_ENCODER, vocab_answer_file=cfg.VOCAB_ANSWER_FILE, load_spatial_feature=True, spatial_feature_dir=cfg.SPATIAL_FEATURE_DIR, add_pos_enc=cfg.ADD_POS_ENC, img_H=cfg.IMG_H, img_W=cfg.IMG_W, pos_enc_dim=cfg.PE_DIM, pos_enc_scale=cfg.PE_SCALE) #print('after data reader') num_vocab = data_reader.batch_loader.vocab_dict.num_vocab num_choices = data_reader.batch_loader.answer_dict.num_vocab print('load_train_time: ', time.time() - load_train_time) return data_reader, num_vocab, num_choices
vocab_layout_file = './exp_clevr/data/vocabulary_layout.txt' vocab_answer_file = './exp_clevr/data/answers_clevr.txt' imdb_file_tst = './exp_clevr/data/imdb/imdb_%s.npy' % tst_image_set save_file = './exp_clevr/results/%s/%s.%s.txt' % (exp_name, snapshot_name, tst_image_set) os.makedirs(os.path.dirname(save_file), exist_ok=True) eval_output_file = './exp_clevr/eval_outputs/%s/%s.%s.txt' % (exp_name, snapshot_name, tst_image_set) os.makedirs(os.path.dirname(eval_output_file), exist_ok=True) assembler = Assembler(vocab_layout_file) data_reader_tst = DataReader(imdb_file_tst, shuffle=False, one_pass=True, batch_size=N, T_encoder=T_encoder, T_decoder=T_decoder, assembler=assembler, vocab_question_file=vocab_question_file, vocab_answer_file=vocab_answer_file, prune_filter_module=prune_filter_module) num_vocab_txt = data_reader_tst.batch_loader.vocab_dict.num_vocab num_vocab_nmn = len(assembler.module_names) num_choices = data_reader_tst.batch_loader.answer_dict.num_vocab # Network inputs input_seq_batch = tf.placeholder(tf.int32, [None, None]) seq_length_batch = tf.placeholder(tf.int32, [None]) image_feat_batch = tf.placeholder(tf.float32, [None, H_feat, W_feat, D_feat]) expr_validity_batch = tf.placeholder(tf.bool, [None]) # The model for testing
merge_cfg_from_file(args.cfg) assert cfg.EXP_NAME == os.path.basename(args.cfg).replace('.yaml', '') if args.opts: merge_cfg_from_list(args.opts) # Start session os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg.GPU_ID) sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions(allow_growth=cfg.GPU_MEM_GROWTH))) # Data files imdb_file = cfg.IMDB_FILE % cfg.TEST.SPLIT_LOC data_reader = DataReader( imdb_file, shuffle=False, one_pass=True, batch_size=cfg.TRAIN.BATCH_SIZE, vocab_question_file=cfg.VOCAB_QUESTION_FILE, T_encoder=cfg.MODEL.T_ENCODER, vocab_answer_file=cfg.VOCAB_ANSWER_FILE, load_gt_layout=True, vocab_layout_file=cfg.VOCAB_LAYOUT_FILE, T_decoder=cfg.MODEL.T_CTRL, img_H=cfg.MODEL.H_IMG, img_W=cfg.MODEL.W_IMG) num_vocab = data_reader.batch_loader.vocab_dict.num_vocab num_choices = data_reader.batch_loader.answer_dict.num_vocab module_names = data_reader.batch_loader.layout_dict.word_list # Inputs and model input_seq_batch = tf.placeholder(tf.int32, [None, None]) seq_length_batch = tf.placeholder(tf.int32, [None]) image_feat_batch = tf.placeholder( tf.float32, [None, cfg.MODEL.H_FEAT, cfg.MODEL.W_FEAT, cfg.MODEL.FEAT_DIM]) model = Model( input_seq_batch, seq_length_batch, image_feat_batch, num_vocab=num_vocab, num_choices=num_choices, module_names=module_names, is_training=False)
# Load config cfg = build_cfg_from_argparse() # Start session os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg.GPU_ID) sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=cfg.GPU_MEM_GROWTH))) # Data files imdb_file_vqa = cfg.IMDB_FILE % cfg.TRAIN.SPLIT_VQA imdb_file_loc = cfg.IMDB_FILE % cfg.TRAIN.SPLIT_LOC data_reader_vqa = DataReader(imdb_file_vqa, shuffle=True, one_pass=False, batch_size=cfg.TRAIN.BATCH_SIZE, vocab_question_file=cfg.VOCAB_QUESTION_FILE, T_encoder=cfg.MODEL.T_ENCODER, vocab_answer_file=cfg.VOCAB_ANSWER_FILE, load_gt_layout=True, vocab_layout_file=cfg.VOCAB_LAYOUT_FILE, T_decoder=cfg.MODEL.T_CTRL) data_reader_loc = DataReader(imdb_file_loc, shuffle=True, one_pass=False, batch_size=cfg.TRAIN.BATCH_SIZE, vocab_question_file=cfg.VOCAB_QUESTION_FILE, T_encoder=cfg.MODEL.T_ENCODER, vocab_answer_file=cfg.VOCAB_ANSWER_FILE, load_gt_layout=True, vocab_layout_file=cfg.VOCAB_LAYOUT_FILE, T_decoder=cfg.MODEL.T_CTRL, img_H=cfg.MODEL.H_IMG,