def process(): # load data blacklist = make_blacklist() save_json('data/kptest_blacklist.json', blacklist) qrpe = load_qrpe_data(blacklist) vtfp = load_vtfp_data(blacklist) import pdb pdb.set_trace() meta = qrpe + vtfp # process data images, image_ids, questions = [], [], [] encoder = SentenceEncoder() for item in meta: image_id = item['image_id'] image = item['image'] tokens = encoder.encode_sentence(item['question']) images.append(image) image_ids.append(image_id) questions.append(tokens) # put to array from post_process_variation_questions import put_to_array arr, arr_len = put_to_array(questions) save_json('data/QRD_irrelevant_meta.json', { 'images': images, 'image_ids': image_ids }) image_ids = np.array(image_ids, dtype=np.int32) save_hdf5('data/QRD_irrelevant_data.data', { 'image_ids': image_ids, 'quest': arr, 'quest_len': arr_len })
def extract_w2v(): trainset = 'trainval' top_ans_file = 'data/vqa_%s_answer_word_counts.txt' % trainset answer_vocab = [] with open(top_ans_file, 'r') as fs: for line in fs: token = line.split(' ')[0].strip() answer_vocab.append(token) # extract w2v encoder = SentenceEncoder() encoding = [] index2ans = [] print('Extracting answer codings') for i, ans in enumerate(answer_vocab): w2v = encoder.encode(ans) encoding.append(w2v) key = '_'.join(v.strip() for v in ans.split(',')) key = '%s:%d' % (key, i + 1) index2ans.append(key) ans_enc = np.concatenate(encoding) # l2 norm print('Normalise and compute distance') # _norm = np.sqrt(np.square(ans_enc).sum(axis=1)) + 1e-8 # ans_enc /= _norm[:, np.newaxis] from util import save_hdf5 save_hdf5('data/vqa_trainval_answer_vocab_w2v.data', {'ans_w2v': ans_enc})
def _encode_w2v(images, encoder, subset): quest_coding = [] cands_coding = [] labels = [] quest_ids = [] cands_meta = [] for i, info in enumerate(images): if not i % 1000: tf.logging.info("%s: processed %d of %d items." % (subset.upper(), i, len(images))) quest_id = info.question_id q_w2v = encoder.encode(info.question) ca_w2v, label = _encode_answer_candidates(info, encoder) # pdb.set_trace() quest_coding.append(q_w2v) cands_coding.append(ca_w2v) labels.append(label) quest_ids.append(quest_id) _m = {'quest_id': quest_id, 'cands': info.choices} cands_meta.append(_m) # ready to pack data quest_coding = np.concatenate(quest_coding, axis=0).astype(np.float32) cands_coding = np.concatenate(cands_coding, axis=0).astype(np.float32) labels = np.array(labels, dtype=np.int32) quest_ids = np.array(quest_ids, dtype=np.int32) save_hdf5( 'data3/vqa_mc_w2v_coding_%s.data' % subset, { 'quest_w2v': quest_coding, 'cands_w2v': cands_coding, 'labels': labels, 'quest_ids': quest_ids }) save_json('data3/vqa_mc_cands_%s.meta' % subset, cands_meta)
def _process_dataset(subset, images, encoder): meta_filename = os.path.join(FLAGS.output_dir, 'v2_vqa_std_mscoco_kp%s.meta' % subset) data_filename = os.path.join(FLAGS.output_dir, 'v2_vqa_std_mscoco_kp%s.data' % subset) num_images = len(images) quests = [] labels = [] answers = [] couter_examples = [] image_names = [] quest_ids = [] for i in range(num_images): image = images[i] image_names.append(image.filename) quest_ids.append(image.question_id) counter_example_id = image.counter_example if counter_example_id is None: counter_example_id = -1 couter_examples.append(counter_example_id) res = encoder.encode(image) if res is None: continue quest, label, ans = res # remove start and end word quest = quest[1:-1] ans = ans[1:-1] quests.append(quest) answers.append(ans) labels.append(label) if not i % 1000: print("%s: Processed %d of %d items." % (datetime.now(), i, num_images)) sys.stdout.flush() # merge questions to a matrix quest_arr, quest_len = _list_tokens_to_array(quests) ans_arr, ans_len = _list_tokens_to_array(answers) labels = np.array(labels, dtype=np.int32) couter_examples = np.array(couter_examples, dtype=np.int32) # save data file save_hdf5( data_filename, { 'quest_arr': quest_arr, 'quest_len': quest_len, 'answer': labels, 'counter_example': couter_examples, 'quest_ids': np.array(quest_ids, dtype=np.int32) }) ans_filename = 'data/v2_answer_std_mscoco_kp%s.data' % subset save_hdf5(ans_filename, {'ans_arr': ans_arr, 'ans_len': ans_len}) # save meta file d = {'quest_id': quest_ids, 'images': image_names} json.dump(d, open(meta_filename, 'w')) print("%s: Wrote %d VQA files to %s" % (datetime.now(), num_images, meta_filename)) sys.stdout.flush()
def process_test(): from util import save_hdf5, save_json # load data meta = load_bsir_dataset() # process data labels, images, image_ids, questions = [], [], [], [] encoder = SentenceEncoder() for item in meta: image_id = item['image_id'] image = item['image'] tokens = encoder.encode_sentence(item['question']) images.append(image) image_ids.append(image_id) questions.append(tokens) labels.append(item['label']) # put to array from post_process_variation_questions import put_to_array arr, arr_len = put_to_array(questions) save_json('data/QRD_irrelevant_meta_test.json', { 'images': images, 'image_ids': image_ids }) image_ids = np.array(image_ids, dtype=np.int32) labels = np.array(labels, dtype=np.float32) import pdb pdb.set_trace() save_hdf5( 'data/QRD_irrelevant_data_test.data', { 'image_ids': image_ids, 'quest': arr, 'quest_len': arr_len, 'labels': labels })
def extract_w2v(): top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' answer_vocab = [] with open(top_ans_file, 'r') as fs: for line in fs: answer_vocab.append(line.strip()) # extract w2v encoder = SentenceEncoder() encoding = [] index2ans = [] print('Extracting answer codings') for i, ans in enumerate(answer_vocab): w2v = encoder.encode(ans) encoding.append(w2v) key = '_'.join(v.strip() for v in ans.split(',')) key = '%s:%d' % (key, i + 1) index2ans.append(key) ans_enc = np.concatenate(encoding) # l2 norm print('Normalise and compute distance') _norm = np.sqrt(np.square(ans_enc).sum(axis=1)) + 1e-8 ans_enc /= _norm[:, np.newaxis] num = ans_enc.shape[0] # dist = cdist(ans_enc, ans_enc) sim = np.dot(ans_enc, ans_enc.transpose()) from util import save_hdf5 save_hdf5('data/top2000_answer_feat.data', {'ans_w2v': ans_enc, 'sim': sim})
def pool_resnet_features(split='train'): SPLITS = ['test', 'train', 'val', 'vg_aug_train'] assert (split in SPLITS) seed_file = os.path.join('/usr/data/fl302/code/inverse_vqa/data2', 'v7w_std_mscoco_%s.meta' % split) d = load_json(seed_file) image_ids = d['image_ids'] image_names = d['images'] image_id2fpath = {} for image_id, name in zip(image_ids, image_names): image_id2fpath.update({image_id: name}) FEAT_DIR = '/usr/data/fl302/data/visual_genome/ResNet152/resnet_res5c' image_ids = np.unique(image_ids) idx = 0 t = time() feats = [] for image_id in image_ids: if idx % 100 == 0: print('processed %d images (%0.2f sec/batch)' % (idx, time() - t)) t = time() file_name = image_id2fpath[image_id] feat_file = os.path.join(FEAT_DIR, file_name + '.npz') f = _load_and_pool_feature(feat_file) feats.append(f) idx += 1 feats = np.concatenate(feats, axis=0).astype(np.float32) save_hdf5('data2/v7w_res152_%s.h5' % split, { 'image_ids': image_ids, 'features': feats })
def dump_dict_and_mapping(quest_vocab, mapping): reverse_qvoc = {v: k for (k, v) in quest_vocab.iteritems()} with open('data/vqa_trainval_merged_word_counts', 'w') as fs: for i in range(len(reverse_qvoc)): fs.write('%s %d\n' % (reverse_qvoc[i], i)) from util import save_hdf5 save_hdf5('data/answer_index_to_merged_index.mapping', {'mapping': mapping})
def convert(): model_name = 'ivaq_var_restval' checkpoint_path = 'model/var_ivqa_pretrain_restval/model.ckpt-505000' # build model from config import ModelConfig model_config = ModelConfig() model_fn = get_model_creation_fn('VAQ-Var') # create graph g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'beam') model.build() tf_embedding = model._answer_embed tf_answer_feed = model._ans tf_answer_len_feed = model._ans_len # Restore from checkpoint print('Restore from %s' % checkpoint_path) restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build reader top_ans_file = '/import/vision-ephemeral/fl302/code/' \ 'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True, top_ans_file=top_ans_file) to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) answer_encoder = mc_ctx.encoder top_answer_inds = range(2000) top_answers = answer_encoder.get_top_answers(top_answer_inds) answer_seqs = answer_encoder.encode_to_sequence(top_answers) for i, (ans, seq) in enumerate(zip(top_answers, answer_seqs)): rec_ans = to_sentence.index_to_answer(seq) ans = ' '.join(_tokenize_sentence(ans)) print('%d: Raw: %s, Rec: %s' % (i + 1, ans, rec_ans)) assert (ans == rec_ans) print('Checking passed') # extract print('Converting...') ans_arr, ans_arr_len = put_to_array(answer_seqs) import pdb pdb.set_trace() embedding = sess.run(tf_embedding, feed_dict={ tf_answer_feed: ans_arr.astype(np.int32), tf_answer_len_feed: ans_arr_len.astype(np.int32) }) # save sv_file = 'data/v1_%s_top2000_lstm_embedding.h5' % model_name from util import save_hdf5 save_hdf5(sv_file, {'answer_embedding': embedding}) print('Done')
def _process_dataset(subset, images, encoder): meta_filename = os.path.join(FLAGS.output_dir, 'vqa_std_mscoco_%s.meta' % subset) data_filename = os.path.join(FLAGS.output_dir, 'vqa_std_mscoco_%s.data' % subset) num_images = len(images) quests = [] labels = [] image_names = [] quest_ids = [] for i in range(num_images): image = images[i] image_names.append(image.filename) quest_ids.append(image.question_id) res = encoder.encode(image) if res is None: continue quest, label = res # remove start and end word quest = quest[1:-1] quests.append(quest) labels.append(label) if not i % 1000: print("%s: Processed %d of %d items." % (datetime.now(), i, num_images)) sys.stdout.flush() # merge questions to a matrix quest_len = [len(q) for q in quests] max_len = max(quest_len) quest_arr = np.zeros([num_images, max_len], dtype=np.int32) for i, x in enumerate(quest_arr): x[:quest_len[i]] = quests[i] quest_len = np.array(quest_len, dtype=np.int32) answer_arr = np.array(labels, dtype=np.int32) # save data file save_hdf5(data_filename, { 'quest_arr': quest_arr, 'quest_len': quest_len, 'answer': answer_arr }) # save meta file d = {'quest_id': quest_ids, 'images': image_names} json.dump(d, open(meta_filename, 'w')) print("%s: Wrote %d VQA files to %s" % (datetime.now(), num_images, meta_filename)) sys.stdout.flush()
def vaq_condition(checkpoint_path=None): subset = 'dev' model_config = ModelConfig() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = Reader(batch_size=1, subset=subset, output_attr=True, output_im=False, output_qa=True, output_capt=False) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type) checkpoint_path = ckpt.model_checkpoint_path g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'condition') model.build() saver = tf.train.Saver() sess = tf.Session() tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver.restore(sess, checkpoint_path) fetch_op = model.losses num_batches = reader.num_batches save_file = 'data/%s_vaq_cond_score1000-2000_%s.hdf5' % ((FLAGS.model_type).lower(), subset) print('Save File: %s' % save_file) print('Running conditioning...') nlls, quest_ids = [], [] for i in range(num_batches): update_progress(i / float(num_batches)) outputs = reader.get_test_batch() im_feed, quest, _, ans_feed, quest_id, image_id = outputs losses = sess.run(fetch_op, feed_dict=model.fill_feed_dict(outputs[:-2])) scores = losses[:, :-1].mean(axis=1) scores = scores[np.newaxis, ::] nlls.append(scores) quest_ids.append(quest_id) nlls = np.concatenate(nlls, axis=0) quest_ids = np.concatenate(quest_ids, axis=0) print('\nSaving result files: %s...' % save_file) save_hdf5(save_file, {'nll': nlls, 'quest_ids': quest_ids})
def load_data(): subsets = ['train', 'val'] image_ids, features = [], [] for subset in subsets: print('Loading subset: %s' % subset) fpath = 'data/imagenet_%s_features.h5' % subset d = load_feature_file_vqabaseline(fpath) image_ids.append(d['image_ids']) features.append(d['features']) image_ids = np.concatenate(image_ids).astype(np.int32) features = np.concatenate(features).astype(np.float32) print('Saving...') save_hdf5('/usr/data/fl302/code/compute_nn/res152_trainval.h5', {'image_ids': image_ids, 'features': features}) print('Done')
def make_top_answer_data_layer(): from build_v2_ivqa_data import _load_vocab, _list_tokens_to_array from nltk.tokenize import word_tokenize from util import save_hdf5 answer_vocab_file = 'data/vqa_trainval_answer_word_counts.txt' vocab = _load_vocab(answer_vocab_file) top_answers = load_top_answer_list() answers = [] for top_ans in top_answers: tokenized = word_tokenize(str(top_ans).lower()) token_ids = [vocab.word_to_id(word) for word in tokenized] answers.append(token_ids) ans_arr, ans_len = _list_tokens_to_array(answers) save_hdf5('data/top_answer2000_sequences.h5', {'answer_seq': ans_arr, 'answer_seq_len': ans_len})
def _split_attributes(subset, atts, image_ids): image_id2att_index = {image_id: i for i, image_id in enumerate(image_ids)} subset_image_ids = get_subset_image_id(subset) index = -np.ones(len(subset_image_ids), dtype=np.int32) for i, image_id in enumerate(subset_image_ids): if image_id in image_id2att_index: index[i] = image_id2att_index[image_id] num_match = (index >= 0).sum() print('Find %d matched attributes for subset %s, missed %d\n' % (num_match, subset, index.size-num_match)) # slice index[index < 0] = 0 scores = atts[index, :] data_file = 'data/attribute_std_mscoco_%s.data' % subset save_hdf5(data_file, {'att_arr': scores.astype(np.float32), 'image_ids': np.array(subset_image_ids, dtype=np.int32)})
def encode_answers(quest_ids, subset): ctx = MultiChoiceQuestionManger(subset='trainval', load_ans=True, answer_coding='sequence') answers = [] for q_id in quest_ids: ans, seq = ctx.get_gt_answer_and_sequence_coding(q_id) answers += seq # merge questions to a matrix seq_len = [len(q) for q in answers] max_len = max(seq_len) num_capts = len(answers) ans_arr = np.zeros([num_capts, max_len], dtype=np.int32) for i, x in enumerate(ans_arr): x[:seq_len[i]] = answers[i] seq_len = np.array(seq_len, dtype=np.int32) vqa_data_file = 'data/answer_std_mscoco_%s.data' % subset save_hdf5(vqa_data_file, {'ans_arr': ans_arr, 'ans_len': seq_len})
def convert_val_attributes(): data_root = '/import/vision-ephemeral/fl302/code/slim' subsets = ['Val'] image_ids, labels = [], [] for subset in subsets: fname = 'Inception_v3_1000_%s_Dets.mat' % subset print('Loading %s...' % fname) fpath = os.path.join(data_root, fname) d = loadmat(fpath) t_labels = d['labels'].astype(np.float32) t_image_ids = d['image_id'].flatten() data_file = 'data/capt1k_std_mscoco_%s.data' % subset.lower() save_hdf5( data_file, { 'att_arr': t_labels.astype(np.float32), 'image_ids': np.array(t_image_ids, dtype=np.int32) })
def find_boxes_of_questions(regions, v7w_qa2box, subset): seed_file = os.path.join('/usr/data/fl302/code/inverse_vqa/data2', 'v7w_std_mscoco_%s.meta' % subset) d = load_json(seed_file) quest_ids = d['quest_id'] sv_file = '/usr/data/fl302/code/inverse_vqa/data2/v7w_qa_boxes_%s.data' % subset qa2reg = load_json( '/usr/data/fl302/data/visual_genome/qa_to_region_mapping.json') has_boxes = [] quest_boxes = [] for quest_id in quest_ids: # see whether it is in v7w annotation if quest_id in v7w_qa2box: box = v7w_qa2box[quest_id] quest_boxes.append(box) has_boxes.append(True) continue # check visual genome annotation q_key = str(quest_id) if q_key in qa2reg: region_id = qa2reg[q_key] if region_id in regions: # print('In') box = regions[region_id] quest_boxes.append(box) has_boxes.append(True) else: box = [0., 0., 1., 1.] has_boxes.append(False) quest_boxes.append(box) else: box = [0., 0., 1., 1.] has_boxes.append(False) quest_boxes.append(box) quest_boxes = np.array(quest_boxes) has_boxes = np.array(has_boxes) quest_ids = np.array(quest_ids) tf.logging.info('Subset %s, %d/%d QAs have region annotation' % (subset, has_boxes.sum(), has_boxes.size)) save_hdf5(sv_file, { 'quest_ids': quest_ids, 'has_boxes': has_boxes, 'quest_boxes': quest_boxes })
def _load_answer_type(self, quest_ids): answer_type_file = 'data/%sanswer_type_std_mscoco_%s.data' % ( self._version_suffix, self._subset) if not os.path.exists(answer_type_file): _mc_ctx = MultiChoiceQuestionManger(subset='val') answer_type_ids = [ _mc_ctx.get_answer_type_coding(quest_id) for quest_id in quest_ids ] answer_type_ids = np.array(answer_type_ids, dtype=np.int32) save_hdf5( answer_type_file, { 'answer_type': answer_type_ids, 'quest_ids': np.array(quest_ids, dtype=np.int32) }) else: d = load_hdf5(answer_type_file) answer_type_ids = d['answer_type'] self._answer_type = answer_type_ids
def process(subset): print('Processing subset %s' % subset) disable_entries = [0, 1, 2000] feats, image_ids = load_image_data(subset) image_id2qa_index, labels = load_qa_data(subset) num = feats.shape[0] bin_labels = np.zeros((num, 2001), dtype=np.float32) for image_id, bow in zip(image_ids, bin_labels): ind = image_id2qa_index[image_id] _label = np.unique(labels[ind]) bow[_label] = 1.0 # disable meaningless entries bin_labels[:, disable_entries] = 0. sv_file = 'data/vqa_std_mscoco_multilabel_%s.data' % subset save_hdf5(sv_file, { 'features': feats, 'labels': bin_labels, 'image_ids': image_ids })
def process_dataset(mc, subset): print('Processing %s' % subset) quest_ids = load_dataset(subset) im_encoder = FeatureEncoder(subset) answer_enc = [] image_enc = [] for quest_id in quest_ids: _, w2v = mc.get_gt_answer_and_word2vec(quest_id) answer_enc.append(w2v) im_feat = im_encoder.get_feature(mc.get_image_id(quest_id)) image_enc.append(im_feat[np.newaxis, :]) quest_ids = np.array(quest_ids, dtype=np.int32) answer_enc = np.concatenate(answer_enc, axis=0).astype(np.float32) image_enc = np.concatenate(image_enc, axis=0).astype(np.float32) save_hdf5('data/image_answer_coding_%s.h5' % subset, { 'quest_ids': quest_ids, 'answer_enc': answer_enc, 'image_enc': image_enc })
def split_subset(subset, inputs): print('Processing split %s' % subset) images, quest_id, quest_arr, quest_len, answer, attr_image_ids, attr_arr = inputs vqa_image_ids = [find_image_id_from_fname(fname) for fname in images] # get coco ids coco_ids = get_image_id(subset) # build coco id hashing table coco_ids = {image_id: i for i, image_id in enumerate(coco_ids)} # split vqa data keep_tab = np.array([im_id in coco_ids for im_id in vqa_image_ids]) images = [im for im, keep in zip(images, keep_tab) if keep] quest_id = [q_id for q_id, keep in zip(quest_id, keep_tab) if keep] quest_arr = quest_arr[keep_tab] quest_len = quest_len[keep_tab] answer = answer[keep_tab] # split attribute data keep_tab = np.array([im_id in coco_ids for im_id in attr_image_ids]) attr_image_ids = attr_image_ids[keep_tab] attr_arr = attr_arr[keep_tab] # process answers encode_answers(quest_id, subset) # save to files vqa_meta_file = 'data/vqa_std_mscoco_kp%s.meta' % subset save_json(vqa_meta_file, {'images': images, 'quest_id': quest_id}) vqa_data_file = 'data/vqa_std_mscoco_kp%s.data' % subset save_hdf5(vqa_data_file, { 'quest_arr': quest_arr, 'quest_len': quest_len, 'answer': answer }) attr_data_file = 'data/attribute_std_mscoco_kp%s.data' % subset save_hdf5(attr_data_file, { 'image_ids': attr_image_ids, 'att_arr': attr_arr })
def load_res152_feature(): sets = ['val', 'test', 'restval'] fdir = '/import/vision-ephemeral/fl302/code/text-to-image/' feats = [] image_ids = [] for subset in sets: d = load_hdf5(os.path.join(fdir, 'mscoco_res152_%s.h5' % subset)) image_ids.append(d['image_ids'].flatten()) feats.append(d['features']) feats = np.concatenate(feats) image_ids = np.concatenate(image_ids) # vertify vertify_image_ids(image_ids) # save data_file = 'data/res152_std_mscoco_%s.data' % 'val' save_hdf5( data_file, { 'att_arr': feats.astype(np.float32), 'image_ids': np.array(image_ids, dtype=np.int32) })
def _process_dataset(subset, images, encoder): data_filename = os.path.join(FLAGS.output_dir, 'caption_std_mscoco_%s.data' % subset) num_images = len(images) capts = [] image_ids = [] for i, info in enumerate(images): image_id = info.image_id for c in info.captions: capts.append(encoder.encode(c)) image_ids.append(image_id) if not i % 1000: print("%s: Processed %d of %d items." % (datetime.now(), i, num_images)) sys.stdout.flush() # merge questions to a matrix seq_len = [len(q) for q in capts] max_len = max(seq_len) num_capts = len(capts) dummy_id = encoder.vocab_size + 1 capt_arr = np.ones([num_capts, max_len], dtype=np.int32) * dummy_id for i, x in enumerate(capt_arr): x[:seq_len[i]] = capts[i] seq_len = np.array(seq_len, dtype=np.int32) image_ids = np.array(image_ids, dtype=np.int32) # save data file save_hdf5(data_filename, { 'capt_arr': capt_arr, 'capt_len': seq_len, 'image_ids': image_ids }) print("%s: Wrote %d caption files to %s" % (datetime.now(), num_images, data_filename)) sys.stdout.flush()
def load_question_candidates(subset): _model_suffix = 'var_' if FLAGS.use_var else '' d = load_hdf5('data4/%sivqa_%s_question_tokens.data' % (_model_suffix, subset)) ext_quest_arr = d['ext_quest_arr'] ext_quest_len = d['ext_quest_len'] seed_quest_ids = d['ext_quest_ids'] quest_id2index, quest_ids, top_answers, \ answer_arr, answer_arr_len = load_answer_data(subset) ext_top_answer = [] ext_answer_arr = [] ext_answer_arr_len = [] # process answer data num = seed_quest_ids.shape[0] for i, qids in enumerate(seed_quest_ids): if i % 1000 == 0: print('Processed %d/%d' % (i, num)) idx = quest_id2index[qids[0]] ext_top_answer.append(top_answers[idx]) ext_answer_arr.append(answer_arr[idx]) ext_answer_arr_len.append(answer_arr_len[idx]) # concat data ext_top_answer = np.array(ext_top_answer).astype(np.int32) ext_answer_arr = np.concatenate(ext_answer_arr).astype(np.int32) ext_answer_arr_len = np.array(ext_answer_arr_len).astype(np.int32) save_hdf5( 'data4/%sivqa_%s_question_answers.data' % (_model_suffix, FLAGS.subset), { 'ext_quest_arr': ext_quest_arr, 'ext_quest_len': ext_quest_len, 'ext_quest_ids': seed_quest_ids, 'ext_top_answer': ext_top_answer, 'ext_answer_arr': ext_answer_arr, 'ext_answer_arr_len': ext_answer_arr_len })
def _process_dataset(subset, images, encoder): meta_filename = os.path.join(FLAGS.output_dir, 'v7w_std_mscoco_%s.meta' % subset) data_filename = os.path.join(FLAGS.output_dir, 'v7w_std_mscoco_%s.data' % subset) num_images = len(images) quests = [] labels = [] answers = [] image_names = [] quest_ids = [] image_ids = [] for i in range(num_images): image = images[i] image_ids.append(image.image_id) image_names.append(image.filename) quest_ids.append(image.question_id) res = encoder.encode(image) if res is None: continue quest, ans, label = res # remove start and end word quest = quest[1:-1] ans = ans[1:-1] answers.append(ans) quests.append(quest) labels.append(label) if not i % 1000: print("%s: Processed %d of %d items." % (datetime.now(), i, num_images)) sys.stdout.flush() # merge questions to a matrix def put_to_array(datums): _len = [len(q) for q in datums] max_len = max(_len) _arr = np.zeros([num_images, max_len], dtype=np.int32) for i, x in enumerate(_arr): x[:_len[i]] = datums[i] _len = np.array(_len, dtype=np.int32) return _arr, _len quest_arr, quest_len = put_to_array(quests) ans_arr, ans_len = put_to_array(answers) labels = np.array(labels, dtype=np.int32) num_oov = np.sum(labels == FLAGS.num_top_answers) tf.logging.info('In vocabulary percentage: %0.2f' % (100. - 100. * num_oov / labels.size)) # save data file save_hdf5( data_filename, { 'quest_arr': quest_arr, 'quest_len': quest_len, 'ans_arr': ans_arr, 'ans_len': ans_len, 'answer': labels }) # save meta file d = {'quest_id': quest_ids, 'images': image_names, 'image_ids': image_ids} json.dump(d, open(meta_filename, 'w')) print("%s: Wrote %d VQA files to %s" % (datetime.now(), num_images, meta_filename)) sys.stdout.flush()
def test(checkpoint_path=None): batch_size = 100 config = ModelConfig() # Get model function # model_fn = get_model_creation_fn(FLAGS.model_type) _model_suffix = 'var_' if FLAGS.use_var else '' # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=FLAGS.testset, feat_type=config.feat_type, version=FLAGS.version, var_suffix=_model_suffix) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') # model.set_agent_ids([0]) model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. # top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' # to_sentence = SentenceGenerator(trainset='trainval', # top_ans_file=top_ans_file) # to_sentence = SentenceGenerator(trainset='trainval') ans_ids = [] ans_scores = [] gt_scores = [] quest_ids = [] print('Running inference on split %s...' % FLAGS.testset) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() generated_ans = sess.run(prob, feed_dict=model.fill_feed_dict(outputs[:-2])) _gt_labels = outputs[3] _this_batch_size = _gt_labels.size _gt_scores = generated_ans[np.arange(_this_batch_size, ), _gt_labels] gt_scores.append(_gt_scores) generated_ans[:, -1] = 0 top_ans = np.argmax(generated_ans, axis=1) top_scores = np.max(generated_ans, axis=1) ans_ids.append(top_ans) ans_scores.append(top_scores) quest_id = outputs[-2] quest_ids.append(quest_id) quest_ids = np.concatenate(quest_ids) ans_ids = np.concatenate(ans_ids) ans_scores = np.concatenate(ans_scores) gt_scores = np.concatenate(gt_scores) # save results tf.logging.info('Saving results') # res_file = FLAGS.result_format % (FLAGS.version, FLAGS.testset) from util import save_hdf5 save_hdf5( 'data4/%sv2qa_%s_qa_scores.data' % (_model_suffix, FLAGS.testset), { 'ext_quest_ids': quest_ids, 'ext_cand_scores': gt_scores, 'ext_cand_pred_labels': ans_ids, 'ext_cand_pred_scores': ans_scores })
def test(checkpoint_path=None): batch_size = 100 config = ModelConfig() # Get model function # model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') # model.set_agent_ids([0]) model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) # to_sentence = SentenceGenerator(trainset='trainval') ans_ids = [] quest_ids = [] ans_preds = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() generated_ans = sess.run( prob, feed_dict=model.fill_feed_dict(outputs[:-2])) ans_preds.append(generated_ans) generated_ans[:, -1] = 0 top_ans = np.argmax(generated_ans, axis=1) ans_ids.append(top_ans) quest_id = outputs[-2] quest_ids.append(quest_id) quest_ids = np.concatenate(quest_ids) ans_ids = np.concatenate(ans_ids) ans_preds = np.concatenate(ans_preds) result = [{u'answer': to_sentence.index_to_top_answer(aid), u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)] # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % (FLAGS.version, TEST_SET) data_file = 'data5/%s_%s_scores_flt.data' % (TEST_SET, FLAGS.model_type) from util import save_hdf5 save_hdf5(data_file, {'quest_ids': quest_ids, 'ans_preds': ans_preds}) json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) return res_file, quest_ids
aug_quest_id = question_id * 1000 + _pid res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence } results.append(res_i) extend_questions += cur_pathes save_json(res_file, results) ext_quest_arr, ext_quest_len = put_to_array(extend_questions) ext_quest_ids = np.array(extended_question_ids, dtype=np.int32) save_hdf5( 'data_rl/%sivqa_%s_question_tokens.data' % (_model_suffix, FLAGS.subset), { 'ext_quest_arr': ext_quest_arr, 'ext_quest_len': ext_quest_len, 'ext_quest_ids': ext_quest_ids }) return res_file def main(_): from watch_model import ModelWatcher subset = FLAGS.subset def test_model(model_path): with tf.Graph().as_default(): res_file = ivqa_decoding_beam_search(checkpoint_path=model_path, subset=subset) cider = evaluate_oracle(res_file)
def backup_statistics(self): save_hdf5(self._cache_file, { 'loss': self._loss, 'num_visit': self._num_visit })
def process(K=1., use_global_thresh=False): _model_suffix = 'var_' if FLAGS.use_var else '' # load VQA scores d = load_hdf5('data4/%svqg_%s_qa_scores.data' % (_model_suffix, FLAGS.testset)) # cand_scores = d['ext_cand_scores'] quest_ids = d['ext_quest_ids'] ext_top_answer = d['ext_cand_pred_labels'] cand_scores = d['ext_cand_pred_scores'] # load QAs d = load_hdf5('data4/%svqg_%s_question_tokens.data' % (_model_suffix, FLAGS.testset)) ext_quest_arr = d['ext_quest_arr'] ext_quest_len = d['ext_quest_len'] seed_quest_ids = d['ext_quest_ids'] # ext_top_answer = d['ext_top_answer'] assert (np.all(np.equal(quest_ids, seed_quest_ids))) num_all = quest_ids.shape[0] print(quest_ids.shape[0]) # build index quest_id2index = {} for i, quest_id_tuple in enumerate(quest_ids): quest_id, _ = quest_id_tuple.tolist() if quest_id in quest_id2index: quest_id2index[quest_id].append(i) else: quest_id2index[quest_id] = [i] # parse slice_index = [] unk_quest_ids = quest_id2index.keys() num = len(unk_quest_ids) if use_global_thresh: loc = int(num * K) thresh = -np.sort(-cand_scores)[loc] thresh = 0.3 print('Global thresh: %0.2f' % thresh) keep_tab = cand_scores > thresh quest_ids = quest_ids[keep_tab] ext_quest_arr = ext_quest_arr[keep_tab] ext_quest_len = ext_quest_len[keep_tab] ext_top_answer = ext_top_answer[keep_tab] else: for i, quest_id in enumerate(unk_quest_ids): if i % 1000 == 0: print('Processed %d/%d' % (i, num)) _index = quest_id2index[quest_id] _scores = cand_scores[_index] _max_score_idx = (-_scores).argsort()[:K] if K == 1: add_idx = _index[_max_score_idx] slice_index.append(add_idx) else: add_idx = [_index[_mci] for _mci in _max_score_idx] slice_index += add_idx # get data quest_ids = quest_ids[slice_index] ext_quest_arr = ext_quest_arr[slice_index] ext_quest_len = ext_quest_len[slice_index] ext_top_answer = ext_top_answer[slice_index] # save save_hdf5('data4/%svqg_%s_question_answers_fltmax.data' % (_model_suffix, FLAGS.testset), {'ext_quest_arr': ext_quest_arr, 'ext_quest_len': ext_quest_len, 'ext_quest_ids': quest_ids, 'ext_top_answer': ext_top_answer}) print('%d/%d' % (ext_top_answer.size, num_all))