Exemplo n.º 1
0
def load_eval_data(max_num=0):
    imdb_file = cfg.IMDB_FILE % cfg.TEST.SPLIT_REF
    data_reader = DataReader(
        imdb_file, shuffle=False, max_num=max_num,
        batch_size=cfg.TEST.BATCH_SIZE,
        vocab_question_file=cfg.VOCAB_QUESTION_FILE,
        T_encoder=cfg.T_ENCODER,
        vocab_answer_file=cfg.VOCAB_ANSWER_FILE,
        load_spatial_feature=True,
        spatial_feature_dir=cfg.SPATIAL_FEATURE_DIR,
        add_pos_enc=cfg.ADD_POS_ENC, img_H=cfg.IMG_H, img_W=cfg.IMG_W,
        pos_enc_dim=cfg.PE_DIM, pos_enc_scale=cfg.PE_SCALE)
    num_vocab = data_reader.batch_loader.vocab_dict.num_vocab
    num_choices = data_reader.batch_loader.answer_dict.num_vocab
    return data_reader, num_vocab, num_choices
Exemplo n.º 2
0
def load_train_data(max_num=0):
    load_train_time = time.time()
    imdb_file = cfg.IMDB_FILE % cfg.TRAIN.SPLIT_REF
    data_reader = DataReader(imdb_file,
                             shuffle=True,
                             max_num=max_num,
                             batch_size=cfg.TRAIN.BATCH_SIZE,
                             vocab_question_file=cfg.VOCAB_QUESTION_FILE,
                             T_encoder=cfg.T_ENCODER,
                             vocab_answer_file=cfg.VOCAB_ANSWER_FILE,
                             load_spatial_feature=True,
                             spatial_feature_dir=cfg.SPATIAL_FEATURE_DIR,
                             add_pos_enc=cfg.ADD_POS_ENC,
                             img_H=cfg.IMG_H,
                             img_W=cfg.IMG_W,
                             pos_enc_dim=cfg.PE_DIM,
                             pos_enc_scale=cfg.PE_SCALE)
    #print('after data reader')
    num_vocab = data_reader.batch_loader.vocab_dict.num_vocab
    num_choices = data_reader.batch_loader.answer_dict.num_vocab
    print('load_train_time: ', time.time() - load_train_time)
    return data_reader, num_vocab, num_choices
Exemplo n.º 3
0
vocab_layout_file = './exp_clevr/data/vocabulary_layout.txt'
vocab_answer_file = './exp_clevr/data/answers_clevr.txt'

imdb_file_tst = './exp_clevr/data/imdb/imdb_%s.npy' % tst_image_set

save_file = './exp_clevr/results/%s/%s.%s.txt' % (exp_name, snapshot_name, tst_image_set)
os.makedirs(os.path.dirname(save_file), exist_ok=True)
eval_output_file = './exp_clevr/eval_outputs/%s/%s.%s.txt' % (exp_name, snapshot_name, tst_image_set)
os.makedirs(os.path.dirname(eval_output_file), exist_ok=True)

assembler = Assembler(vocab_layout_file)

data_reader_tst = DataReader(imdb_file_tst, shuffle=False, one_pass=True,
                             batch_size=N,
                             T_encoder=T_encoder,
                             T_decoder=T_decoder,
                             assembler=assembler,
                             vocab_question_file=vocab_question_file,
                             vocab_answer_file=vocab_answer_file,
                             prune_filter_module=prune_filter_module)

num_vocab_txt = data_reader_tst.batch_loader.vocab_dict.num_vocab
num_vocab_nmn = len(assembler.module_names)
num_choices = data_reader_tst.batch_loader.answer_dict.num_vocab

# Network inputs
input_seq_batch = tf.placeholder(tf.int32, [None, None])
seq_length_batch = tf.placeholder(tf.int32, [None])
image_feat_batch = tf.placeholder(tf.float32, [None, H_feat, W_feat, D_feat])
expr_validity_batch = tf.placeholder(tf.bool, [None])

# The model for testing
Exemplo n.º 4
0
merge_cfg_from_file(args.cfg)
assert cfg.EXP_NAME == os.path.basename(args.cfg).replace('.yaml', '')
if args.opts:
    merge_cfg_from_list(args.opts)


# Start session
os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg.GPU_ID)
sess = tf.Session(config=tf.ConfigProto(
    gpu_options=tf.GPUOptions(allow_growth=cfg.GPU_MEM_GROWTH)))

# Data files
imdb_file = cfg.IMDB_FILE % cfg.TEST.SPLIT_LOC
data_reader = DataReader(
    imdb_file, shuffle=False, one_pass=True, batch_size=cfg.TRAIN.BATCH_SIZE,
    vocab_question_file=cfg.VOCAB_QUESTION_FILE, T_encoder=cfg.MODEL.T_ENCODER,
    vocab_answer_file=cfg.VOCAB_ANSWER_FILE, load_gt_layout=True,
    vocab_layout_file=cfg.VOCAB_LAYOUT_FILE, T_decoder=cfg.MODEL.T_CTRL,
    img_H=cfg.MODEL.H_IMG, img_W=cfg.MODEL.W_IMG)
num_vocab = data_reader.batch_loader.vocab_dict.num_vocab
num_choices = data_reader.batch_loader.answer_dict.num_vocab
module_names = data_reader.batch_loader.layout_dict.word_list

# Inputs and model
input_seq_batch = tf.placeholder(tf.int32, [None, None])
seq_length_batch = tf.placeholder(tf.int32, [None])
image_feat_batch = tf.placeholder(
    tf.float32, [None, cfg.MODEL.H_FEAT, cfg.MODEL.W_FEAT, cfg.MODEL.FEAT_DIM])
model = Model(
    input_seq_batch, seq_length_batch, image_feat_batch, num_vocab=num_vocab,
    num_choices=num_choices, module_names=module_names, is_training=False)
Exemplo n.º 5
0
# Load config
cfg = build_cfg_from_argparse()

# Start session
os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg.GPU_ID)
sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(
    allow_growth=cfg.GPU_MEM_GROWTH)))

# Data files
imdb_file_vqa = cfg.IMDB_FILE % cfg.TRAIN.SPLIT_VQA
imdb_file_loc = cfg.IMDB_FILE % cfg.TRAIN.SPLIT_LOC
data_reader_vqa = DataReader(imdb_file_vqa,
                             shuffle=True,
                             one_pass=False,
                             batch_size=cfg.TRAIN.BATCH_SIZE,
                             vocab_question_file=cfg.VOCAB_QUESTION_FILE,
                             T_encoder=cfg.MODEL.T_ENCODER,
                             vocab_answer_file=cfg.VOCAB_ANSWER_FILE,
                             load_gt_layout=True,
                             vocab_layout_file=cfg.VOCAB_LAYOUT_FILE,
                             T_decoder=cfg.MODEL.T_CTRL)
data_reader_loc = DataReader(imdb_file_loc,
                             shuffle=True,
                             one_pass=False,
                             batch_size=cfg.TRAIN.BATCH_SIZE,
                             vocab_question_file=cfg.VOCAB_QUESTION_FILE,
                             T_encoder=cfg.MODEL.T_ENCODER,
                             vocab_answer_file=cfg.VOCAB_ANSWER_FILE,
                             load_gt_layout=True,
                             vocab_layout_file=cfg.VOCAB_LAYOUT_FILE,
                             T_decoder=cfg.MODEL.T_CTRL,
                             img_H=cfg.MODEL.H_IMG,