def visualise(): mc_ctx = MultiChoiceQuestionManger() to_sentence = SentenceGenerator(trainset='trainval') # writer = ExperimentWriter('latex/examples_replay_buffer_rescore') writer = ExperimentWriter('latex/examples_replay_buffer_rescore_prior') # d = load_json('vqa_replay_buffer/vqa_replay_low_rescore.json') d = load_json('vqa_replay_buffer/vqa_replay_low_rescore_prior_05_04.json') memory = d['memory'] # show random 100 keys = deepcopy(memory.keys()) np.random.seed(123) np.random.shuffle(keys) vis_keys = keys[:100] for i, quest_key in enumerate(vis_keys): pathes = memory[quest_key] if len(pathes) == 0: continue # if it has valid questions quest_id = int(quest_key) image_id = mc_ctx.get_image_id(quest_id) gt_question = mc_ctx.get_question(quest_id) answer = mc_ctx.get_gt_answer(quest_id) head = 'Q: %s A: %s' % (gt_question, answer) im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id) im_path = os.path.join(IM_ROOT, im_file) questions = [] for p in pathes.keys(): conf1, conf2 = pathes[p] _tokens = [int(t) for t in p.split(' ')] sentence = to_sentence.index_to_question(_tokens) descr = '%s (%0.2f-%0.2f)' % (sentence, conf1, conf2) questions.append(descr) writer.add_result(image_id, quest_id, im_path, head, questions) writer.render()
def test(): import json import numpy as np from w2v_answer_encoder import MultiChoiceQuestionManger model = StateClassifier(input_dim=512, phase='test') model.build() prob = model.prob # Load vocabulary # to_sentence = SentenceGenerator(trainset='trainval') # create multiple choice question manger mc_manager = MultiChoiceQuestionManger(subset='val', answer_coding='sequence') sess = tf.Session() # Load model ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) checkpoint_path = ckpt.model_checkpoint_path saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # get data result = [] reader = StateDataPetcher(batch_size=18, subset='dev', shuffle=False, max_epoch=1) num = reader.num_samples for itr in range(num): feat, label, quest_id = reader.pop_batch() feed_dict = model.fill_feed_dict([feat]) scores = sess.run(prob, feed_dict=feed_dict) idx = scores.argmax() # parse question and answer assert (np.unique(quest_id).size == 1) quest_id = quest_id[0] question = mc_manager.get_question(quest_id) mc_ans = mc_manager.get_candidate_answers(quest_id) vaq_answer = mc_ans[idx] real_answer = mc_ans[label.argmax()] # add result result.append({u'answer': vaq_answer, u'question_id': quest_id}) # show results if itr % 100 == 0: print('============== %d ============' % itr) print('question id: %d' % quest_id) print('question\t: %s' % question) print('answer\t: %s' % real_answer) print('VAQ answer\t: %s (%0.2f)' % (vaq_answer, scores[idx])) quest_ids = [res[u'question_id'] for res in result] # save results tf.logging.info('Saving results') res_file = 'result/rescore_state_dev_dev.json' json.dump(result, open(res_file, 'w')) from vqa_eval import evaluate_model acc = evaluate_model(res_file, quest_ids) print('Over all accuarcy: %0.2f' % acc) return acc
def process(): cands = load_results() model = N2MNWrapper() mc_ctx = MultiChoiceQuestionManger(subset='val') results = {} t = time() for i, res_key in enumerate(cands): if i % 100 == 0: avg_time = (time() - t) / 100. print('%d/%d (%0.2f sec/sample)' % (i, len(cands), avg_time)) t = time() res_i = cands[res_key] image_id = res_i['image_id'] question = res_i['target'] question_id = res_i['question_id'] gt_answer = mc_ctx.get_gt_answer(question_id) pred_answers, scores = model.inference(image_id, [question]) sc = float(scores[0]) pred_ans = pred_answers[0] is_valid = compare_answer(pred_ans, gt_answer) # if not is_valid: # continue results[res_key] = { 'pred_answer': pred_ans, 'pred_score': sc, 'gt_answer': gt_answer, 'is_valid': is_valid } save_json('result/n2mn_scores_final_v2.json', results)
def __init__(self, result_file, subset='val'): self._subset = subset self.results = load_json(result_file) self.num = len(self.results) self._im_root = get_image_feature_root() self.prog_str = '' self.mc_ctx = MultiChoiceQuestionManger(subset='val')
def process(): cands = load_results() model = N2MNWrapper() mc_ctx = MultiChoiceQuestionManger(subset='val') results = [] t = time() for i, res_i in enumerate(cands): if i % 100 == 0: avg_time = (time() - t) / 100. print('%d/%d (%0.2f sec/sample)' % (i, len(cands), avg_time)) t = time() image_id = res_i['image_id'] aug_id = res_i['question_id'] question = res_i['question'] question_id = int(aug_id / 1000) gt_answer = mc_ctx.get_gt_answer(question_id) pred_answers, scores = model.inference(image_id, [question]) sc = scores[0] pred_ans = pred_answers[0] is_valid = compare_answer(pred_ans, gt_answer) # import pdb # pdb.set_trace() if not is_valid: continue t_i = { 'image_id': int(image_id), 'question_id': aug_id, 'question': question, 'score': float(sc) } results.append(t_i) save_json('result/vae_ia_van_n2mn_flt_full.json', results)
def process(model_type='mlb'): cands = load_results() if model_type == 'mlb': model = AttentionModel() else: model = VanillaModel() mc_ctx = MultiChoiceQuestionManger(subset='val') results = {} t = time() for i, res_key in enumerate(cands): if i % 100 == 0: avg_time = (time() - t) / 100. print('%d/%d (%0.2f sec/sample)' % (i, len(cands), avg_time)) t = time() res_i = cands[res_key] image_id = res_i['image_id'] question = res_i['target'] question_id = res_i['question_id'] gt_answer = mc_ctx.get_gt_answer(question_id) pred_ans, scores = model.get_score(image_id, question) sc = float(scores) is_valid = compare_answer(pred_ans, gt_answer) # if not is_valid: # continue results[res_key] = { 'pred_answer': pred_ans, 'pred_score': sc, 'gt_answer': gt_answer, 'is_valid': is_valid } save_json('result/%s_scores_final_v2.json' % model_type, results)
def test(): top_ans_file = '/import/vision-ephemeral/fl302/code/' \ 'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' # top_ans_file = 'data/vqa_trainval_top2000_answers.txt' mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True, top_ans_file=top_ans_file) to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) answer_enc = mc_ctx.encoder # quest_ids = mc_ctx._quest_id2image_id.keys() # quest_ids = np.array(quest_ids) # qids = np.random.choice(quest_ids, size=(5,), replace=False) create_fn = create_reader('VAQ-CA', 'train') reader = create_fn(batch_size=4, subset='kprestval') reader.start() for _ in range(20): # inputs = reader.get_test_batch() inputs = reader.pop_batch() _, _, _, _, labels, ans_seq, ans_len, quest_ids, image_ids = inputs b_top_ans = answer_enc.get_top_answers(labels) for i, (quest_id, i_a) in enumerate(zip(quest_ids, b_top_ans)): print('question id: %d' % quest_id) gt = mc_ctx.get_gt_answer(quest_id) print('GT: %s' % gt) print('Top: %s' % i_a) print('SG: top: %s' % to_sentence.index_to_top_answer(labels[i])) seq = ans_seq[i][:ans_len[i]].tolist() print('SG: seq: %s\n' % to_sentence.index_to_answer(seq)) reader.stop()
def process(): cands = load_results() model = AttentionModel() mc_ctx = MultiChoiceQuestionManger(subset='val') results = [] t = time() for i, res_i in enumerate(cands): if i % 100 == 0: avg_time = (time() - t) / 100. print('%d/%d (%0.2f sec/sample)' % (i, len(cands), avg_time)) t = time() image_id = res_i['image_id'] aug_id = res_i['aug_id'] question = res_i['target'] # question_id = int(aug_id / 1000) question_id = res_i['question_id'] gt_answer = mc_ctx.get_gt_answer(question_id) pred_ans, sc = model.get_score(image_id, question) is_valid = compare_answer(pred_ans, gt_answer) # import pdb # pdb.set_trace() if not is_valid: continue t_i = { 'image_id': int(image_id), 'aug_id': aug_id, 'question_id': question_id, 'question': question, 'score': float(sc) } results.append(t_i) save_json('result/bs_vis_scores_mlb2-att.json', results)
def __init__(self, subset='val', num_eval=None, need_im_feat=True, need_attr=False, use_ans_type=False, feat_type='res152'): anno_file = 'data/MultipleChoicesQuestionsKarpathy%sV2.0.json' % subset.title( ) self._subset = subset d = load_json(anno_file) self._id2type = d['candidate_types'] self._annotations = d['annotation'] if num_eval == 0: num_eval = len(self._annotations) self._num_to_eval = num_eval self._idx = 0 self._need_attr = need_attr self._need_im_feat = need_im_feat self._quest_encoder = SentenceEncoder('question') self._answer_encoder = SentenceEncoder('answer') self._im_encoder = MCDataFetcher(subset='kp%s' % subset, feat_type=feat_type) self.num_samples = len(self._annotations) self._mc_ctx = MultiChoiceQuestionManger(subset='val') self._group_by_answer_type() self._use_ans_type = use_ans_type
def __init__(self, model_name, K=3, do_plot=True): self._gt_mgr = MultiChoiceQuestionManger(subset='trainval', load_ans=True) self._rev_map = SentenceGenerator(trainset='trainval') self._top_k = K self._do_plot = do_plot self._model_name = model_name self._cache_dir = 'att_maps/%s' % self._model_name mkdir_if_missing(self._cache_dir)
def test(): from util import unpickle import json from inference_utils.question_generator_util import SentenceGenerator from w2v_answer_encoder import MultiChoiceQuestionManger config = MLPConfig() model = SequenceMLP(config, phase='test') model.build() prob = model.prob # Load vocabulary to_sentence = SentenceGenerator(trainset='trainval') # create multiple choice question manger mc_manager = MultiChoiceQuestionManger(subset='trainval', answer_coding='sequence') sess = tf.Session() # Load model ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) checkpoint_path = ckpt.model_checkpoint_path saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # get data result = [] dataset = unpickle('data/rescore_dev.pkl') for itr, datum in enumerate(dataset): seq_index, att_mask, label = _process_datum(datum) quest_id = datum['quest_id'] quest = seq_index[0].tolist() feed_dict = model.fill_feed_dict([seq_index, att_mask]) scores = sess.run(prob, feed_dict=feed_dict) idx = scores.argmax() # parse question and answer question = to_sentence.index_to_question([0] + quest) mc_ans = mc_manager.get_candidate_answers(quest_id) vaq_answer = mc_ans[idx] real_answer = mc_ans[label.argmax()] # add result result.append({u'answer': vaq_answer, u'question_id': quest_id}) # show results if itr % 100 == 0: print('============== %d ============' % itr) print('question id: %d' % quest_id) print('question\t: %s' % question) print('answer\t: %s' % real_answer) print('VAQ answer\t: %s (%0.2f)' % (vaq_answer, scores[idx])) quest_ids = [res[u'question_id'] for res in result] # save results tf.logging.info('Saving results') res_file = 'result/rescore_dev_dev.json' json.dump(result, open(res_file, 'w')) from vqa_eval import evaluate_model acc = evaluate_model(res_file, quest_ids) print('Over all accuarcy: %0.2f' % acc)
def process(method, inf_type='rand'): if inf_type == 'rand': res_file = 'result/tmp_bs_RL2_final_%s.json' % method else: res_file = 'result/tmp_bs_RL2_final_%s_BEAM.json' % method if os.path.exists(res_file): print('File %s already exist, skipped' % res_file) return # cands = load_results() model = _TYPE2Model[method]() mc_ctx = MultiChoiceQuestionManger(subset='val') task_data = load_lm_outputs(method, inf_type) belief_sets = {} t = time() num = len(task_data) for i, ans_key in enumerate(task_data.keys()): # time it avg_time = (time() - t) print('%d/%d (%0.2f sec/sample)' % (i, num, avg_time)) t = time() # extract basis info cands = task_data[ans_key] quest_id = cands[0]['question_id'] # gt_answer = mc_ctx.get_gt_answer(quest_id) image_id = mc_ctx.get_image_id(quest_id) image = mc_ctx.get_image_file(quest_id) # process gt_question = mc_ctx.get_question(quest_id) i_scores, i_questions = [], [] for item in cands: target = item['question'] pred_ans, vqa_score = model.get_score(image_id, target) # inset check is_valid = compare_answer(pred_ans, ans_key) if not is_valid: continue i_questions.append(target) i_scores.append([float(vqa_score), item['score']]) print('%d/%d' % (len(i_questions), len(cands))) bs_i = { 'image': image, 'image_id': image_id, 'question': gt_question, 'answer': ans_key, 'belief_sets': i_questions, 'belief_strength': i_scores } belief_sets[ans_key] = bs_i save_json(res_file, belief_sets)
def __init__(self, subset='val'): anno_file = 'data/MultipleChoicesQuestionsKarpathy%s.json' % subset.title() self._subset = subset d = load_json(anno_file) self._id2type = d['candidate_types'] self._annotations = d['annotation'] self._idx = 0 self.num_samples = len(self._annotations) self._mc_ctx = MultiChoiceQuestionManger(subset='val') self._group_by_answer_type()
def encode_answers(quest_ids, subset): ctx = MultiChoiceQuestionManger(subset='trainval', load_ans=True, answer_coding='sequence') answers = [] for q_id in quest_ids: ans, seq = ctx.get_gt_answer_and_sequence_coding(q_id) answers += seq # merge questions to a matrix seq_len = [len(q) for q in answers] max_len = max(seq_len) num_capts = len(answers) ans_arr = np.zeros([num_capts, max_len], dtype=np.int32) for i, x in enumerate(ans_arr): x[:seq_len[i]] = answers[i] seq_len = np.array(seq_len, dtype=np.int32) vqa_data_file = 'data/answer_std_mscoco_%s.data' % subset save_hdf5(vqa_data_file, {'ans_arr': ans_arr, 'ans_len': seq_len})
def process(delta=0.2): # w2v_ncoder = SentenceEncoder() # load gt and answer manager ctx = MultiChoiceQuestionManger(subset='val') # load candidates candidates = load_json('result/var_vaq_beam_VAQ-VARDSDC_full.json') # load candidate scores score_list = load_json( 'result/var_vaq_beam_VAQ-VARDSDC_full_oracle_dump.json') score_d = {item['aug_quest_id']: item['CIDEr'] for item in score_list} # loop over questions dataset = {} unk_image_ids = [] question_id2image_id = {} for item in candidates: aug_id = item['question_id'] question = item['question'] image_id = item['image_id'] unk_image_ids.append(image_id) question_id = int(aug_id / 1000) score = score_d[aug_id] question_id2image_id[question_id] = image_id if question_id in dataset: assert (question not in dataset[question_id]) dataset[question_id][question] = score else: dataset[question_id] = {question: score} # get stat unk_image_ids = set(unk_image_ids) num_images = len(unk_image_ids) print('Find %d unique keys from %d images' % (len(dataset), num_images)) print('%0.3f questions on average' % (len(dataset) / float(num_images))) # visualise vis_keys = dataset.keys() np.random.shuffle(vis_keys) for quest_id in vis_keys[:20]: ans = ctx.get_gt_answer(quest_id) image_id = ctx.get_image_id(quest_id) gt = ctx.get_question(quest_id).lower() print('\ngt: %s' % gt) for quest, sc in dataset[quest_id].items(): print('%s (%0.3f)' % (quest, sc))
def convert(): model_name = 'ivaq_var_restval' checkpoint_path = 'model/var_ivqa_pretrain_restval/model.ckpt-505000' # build model from config import ModelConfig model_config = ModelConfig() model_fn = get_model_creation_fn('VAQ-Var') # create graph g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'beam') model.build() tf_embedding = model._answer_embed tf_answer_feed = model._ans tf_answer_len_feed = model._ans_len # Restore from checkpoint print('Restore from %s' % checkpoint_path) restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build reader top_ans_file = '/import/vision-ephemeral/fl302/code/' \ 'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True, top_ans_file=top_ans_file) to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) answer_encoder = mc_ctx.encoder top_answer_inds = range(2000) top_answers = answer_encoder.get_top_answers(top_answer_inds) answer_seqs = answer_encoder.encode_to_sequence(top_answers) for i, (ans, seq) in enumerate(zip(top_answers, answer_seqs)): rec_ans = to_sentence.index_to_answer(seq) ans = ' '.join(_tokenize_sentence(ans)) print('%d: Raw: %s, Rec: %s' % (i + 1, ans, rec_ans)) assert (ans == rec_ans) print('Checking passed') # extract print('Converting...') ans_arr, ans_arr_len = put_to_array(answer_seqs) import pdb pdb.set_trace() embedding = sess.run(tf_embedding, feed_dict={ tf_answer_feed: ans_arr.astype(np.int32), tf_answer_len_feed: ans_arr_len.astype(np.int32) }) # save sv_file = 'data/v1_%s_top2000_lstm_embedding.h5' % model_name from util import save_hdf5 save_hdf5(sv_file, {'answer_embedding': embedding}) print('Done')
def _load_answer_type(self, quest_ids): answer_type_file = 'data/%sanswer_type_std_mscoco_%s.data' % ( self._version_suffix, self._subset) if not os.path.exists(answer_type_file): _mc_ctx = MultiChoiceQuestionManger(subset='val') answer_type_ids = [ _mc_ctx.get_answer_type_coding(quest_id) for quest_id in quest_ids ] answer_type_ids = np.array(answer_type_ids, dtype=np.int32) save_hdf5( answer_type_file, { 'answer_type': answer_type_ids, 'quest_ids': np.array(quest_ids, dtype=np.int32) }) else: d = load_hdf5(answer_type_file) answer_type_ids = d['answer_type'] self._answer_type = answer_type_ids
def load_vqa_predictions(subset): data_file = '../iccv_vaq/data/sparse_vqa_scores_%s_0.h5' % subset data_file = '../iccv_vaq/data/sparse_vqa_scores_dev_5nogt.h5' d = load_hdf5(data_file) quest_ids = d['quest_ids'] pdb.set_trace() mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True) type_labels = [] for qid in quest_ids: coding = mc_ctx.get_answer_type_coding(qid) type_labels.append(coding) type_labels = np.array(type_labels) unique_ids = np.unique(type_labels) print(np.bincount(type_labels)) valid_types = [u'other', u'number', u'yes/no'] yes_no_id = mc_ctx._answer_type2id['yes/no'] # yes_no_id = mc_ctx._answer_type2id['other'] # yes_no_id = mc_ctx._answer_type2id['number'] is_yes_no = type_labels == yes_no_id candidates = d['top_k_index'][:, :3] num_yes_no = is_yes_no.sum() num_tot = candidates.shape[0] print('Yes/No: %d in %d' % (num_yes_no, num_tot)) yes_top_ans_id = 0 no_top_ans_id = 1 yes_no_cands = candidates[is_yes_no] has_yes = (yes_no_cands == yes_top_ans_id).sum(axis=1) == 1 print(has_yes.sum()) has_no = (yes_no_cands == no_top_ans_id).sum(axis=1) == 1 print(has_no.sum()) yn_inter = np.logical_and(has_yes, has_no) yn_union = np.logical_or(has_yes, has_no) print('Either yes/no %d:' % yn_union.sum()) print('Both yes/no %d:' % yn_inter.sum())
def __init__(self, subset='val', num_eval=None, need_im_feat=True, need_attr=False, use_ans_type=False, feat_type='res152'): anno_file = '../iccv_vaq/data/MultipleChoicesQuestionsKarpathy%sV2.0.json' % subset.title() self._subset = subset d = load_json(anno_file) self._id2type = d['candidate_types'] self._annotations = d['annotation'] if num_eval == 0: num_eval = len(self._annotations) self._num_to_eval = num_eval self._idx = 0 self._need_attr = need_attr self._need_im_feat = need_im_feat self.num_samples = len(self._annotations) self._mc_ctx = MultiChoiceQuestionManger(subset='val')
def test(checkpoint_path=None): subset = 'kptest' config = ModelConfig() config.phase = 'other' use_answer_type = FLAGS.model_type in ['VAQ-IAS', 'VQG'] config.model_type = FLAGS.model_type mc_ctx = MultiChoiceQuestionManger(subset='val') # build data reader reader = Reader(batch_size=1, subset=subset, output_attr=True, output_im=False, output_qa=True, output_capt=False, output_ans_seq=True, attr_type='res152') if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type) checkpoint_path = ckpt.model_checkpoint_path res_file = 'result/quest_vaq_%s_%s.json' % (FLAGS.model_type.upper(), subset) print(res_file) # build and restore model model = load_model_inferencer() restore_fn = model.build_graph_from_config(config, checkpoint_path) sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) restore_fn(sess) # Create the vocabulary. to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset) generator = caption_generator.CaptionGenerator(model, to_sentence.question_vocab) results = [] print('Running inference on split %s...' % subset) num_batches = reader.num_batches for i in range(num_batches): inputs, info, quest_gt_vis = pre_process_inputs( reader.get_test_batch(), mc_ctx, use_answer_type) quest_id, image_id = info captions = generator.beam_search(sess, inputs) question = to_sentence.index_to_question(quest_gt_vis) # answer = to_sentence.index_to_top_answer(ans_feed) print('============== %d ============' % i) print('image id: %d, question id: %d' % (image_id, quest_id)) print('question\t: %s' % question) tmp = [] for c, g in enumerate(captions[0:3]): quest = to_sentence.index_to_question(g.sentence) tmp.append(quest) print('<question %d>\t: %s' % (c, quest)) # print('answer\t: %s\n' % answer) caption = captions[0] sentence = to_sentence.index_to_question(caption.sentence) res_i = { 'image_id': image_id, 'question_id': quest_id, 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def test(): # Build the inference graph. config = QuestionGeneratorConfig() reader = TFRecordDataFetcher(FLAGS.input_files, config.image_feature_key) # Create model creator model_creator = create_model_fn(FLAGS.model_type) # create multiple choice question manger mc_manager = MultiChoiceQuestionManger( subset='trainval', answer_coding=model_creator.ans_coding) # Create reader post-processing function reader_post_proc_fn = build_mc_reader_proc_fn(model_creator.ans_coding) g = tf.Graph() ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) with g.as_default(): model = model_creator(config, phase='evaluate') model.build() # g.finalize() # Create the vocabulary. to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) result, rescore_data, state_rescore_data = [], [], [] with tf.Session(graph=g) as sess: # Load the model from checkpoint. saver = tf.train.Saver(var_list=tf.all_variables()) saver.restore(sess, checkpoint_path) itr = 0 while not reader.eof(): if itr > 50000: # cache at most 50k questions break outputs = reader.pop_batch() im_ids, quest_id, im_feat, ans_w2v, quest_ids, ans_ids = outputs mc_ans, mc_coding = mc_manager.get_candidate_answer_and_word_coding( quest_id) inputs = reader_post_proc_fn(outputs, mc_coding) perplexity, state = sess.run( [model.likelihood, model.final_decoder_state], feed_dict=model.fill_feed_dict(inputs)) perplexity = perplexity.reshape(inputs[-1].shape) loss = perplexity[:, :-1].mean(axis=1) # generated = [generated[0]] # sample 3 question = to_sentence.index_to_question(quest_ids) answer = to_sentence.index_to_answer(ans_ids) top1_mc_ans = mc_ans[loss.argmin()] result.append({u'answer': top1_mc_ans, u'question_id': quest_id}) # add hidden state saver label = mc_manager.get_binary_label(quest_id) state_sv = {'quest_id': quest_id, 'states': state, 'label': label} state_rescore_data.append(state_sv) if itr % 100 == 0: print('============== %d ============' % itr) print('image id: %d, question id: %d' % (im_ids, quest_id)) print('question\t: %s' % question) print('answer\t: %s' % answer) top_k_ids = loss.argsort()[:3].tolist() for i, idx in enumerate(top_k_ids): t_mc_ans = mc_ans[idx] print('VAQ answer <%d>\t: %s (%0.2f)' % (i, t_mc_ans, loss[idx])) itr += 1 # save information for train classifier mc_label = np.array([a == answer for a in mc_ans], dtype=np.float32) quest_target = inputs[-2] datum = { 'quest_seq': quest_target, 'perplex': perplexity, 'label': mc_label, 'quest_id': quest_id } rescore_data.append(datum) quest_ids = [res[u'question_id'] for res in result] # save results tf.logging.info('Saving results') res_file = FLAGS.result_file % get_model_iteration(checkpoint_path) json.dump(result, open(res_file, 'w')) tf.logging.info('Saving rescore data...') from util import pickle # pickle('data/rescore_dev.pkl', rescore_data) pickle('data/rescore_state_dev.pkl', state_rescore_data) tf.logging.info('Done!') return res_file, quest_ids
def extract_answer_proposals(checkpoint_path=None, subset='kpval'): batch_size = 100 config = ModelConfig() # Get model function # model_fn = get_model_creation_fn(FLAGS.model_type) if FLAGS.append_gt: ann_set = 'train' if 'train' in subset else 'val' mc_ctx = MultiChoiceQuestionManger(subset=ann_set, load_ans=True, answer_coding='sequence') else: mc_ctx = None # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=subset, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') # model.set_agent_ids([0]) model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) w2v_encoder = SentenceEncoder() # to_sentence = SentenceGenerator(trainset='trainval') cands_meta = [] cands_scores = [] cands_coding = [] quest_ids = [] is_oov = [] print('Running inference on split %s...' % subset) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() raw_ans = sess.run( prob, feed_dict=model.fill_feed_dict(outputs[:-2])) generated_ans = raw_ans.copy() generated_ans[:, -1] = -1.0 # by default do not predict UNK # print('Max: %0.3f, Min: %0.3f' % (raw_ans.max(), raw_ans.min())) gt_labels = outputs[-3] if FLAGS.append_gt: generated_ans[np.arange(gt_labels.size), gt_labels] = 10.0 ans_cand_ids = np.argsort(-generated_ans, axis=1) q_ids = outputs[-2] if FLAGS.append_gt: assert (np.all(np.equal(ans_cand_ids[:, 0], gt_labels))) for quest_id, ids, cand_scs, _gt in zip(q_ids, ans_cand_ids, raw_ans, gt_labels): answers = [] answer_w2v = [] # check out of vocabulary is_oov.append(_gt == 2000) cands_scores.append(cand_scs[ids[:_K]][np.newaxis, :]) for k in range(_K): aid = ids[k] if aid == 2000: # gt is out of vocab ans = mc_ctx.get_gt_answer(quest_id) else: ans = to_sentence.index_to_top_answer(aid) answer_w2v.append(w2v_encoder.encode(ans)) answers.append(ans) answer_w2v = np.concatenate(answer_w2v, axis=1) res_i = {'quest_id': int(quest_id), 'cands': answers} cands_meta.append(res_i) cands_coding.append(answer_w2v) quest_ids.append(quest_id) quest_ids = np.array(quest_ids, dtype=np.int32) is_oov = np.array(is_oov, dtype=np.bool) labels = np.zeros_like(quest_ids, dtype=np.int32) cands_scores = np.concatenate(cands_scores, axis=0).astype(np.float32) cands_coding = np.concatenate(cands_coding, axis=0).astype(np.float32) save_hdf5('data3/vqa_ap_w2v_coding_%s.data' % subset, {'cands_w2v': cands_coding, 'cands_scs': cands_scores, 'quest_ids': quest_ids, 'is_oov': is_oov, 'labels': labels}) save_json('data3/vqa_ap_cands_%s.meta' % subset, cands_meta) print('\n\nExtraction Done!') print('OOV percentage: %0.2f' % (100.*is_oov.sum()/reader._num))
def main(): split = 'test' data_file = 'data/ivqa_multiple_choices_%s_questions.pkl' % split print(data_file) mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True) # find unique questions # questions = load_questions() question_ids = mc_ctx.get_question_ids() question_dict = {} answer_dict = {} question_id2question_key = {} # set question and answer keys unique_question_idx = 0 answer_idx = 0 for i, quest_id in enumerate(question_ids): if i % 1000 == 0: print('Metric Maker: parsed %d/%d questions' % (i, len(question_ids))) question = mc_ctx.get_question(quest_id) quest_key = _generate_key(question) question_id2question_key[quest_id] = quest_key if quest_key in question_dict: question_dict[quest_key]['counts'] += 1 else: question_dict[quest_key] = {'counts': 1, 'key_idx': unique_question_idx} unique_question_idx += 1 # parse answers mc_answer = mc_ctx.get_gt_answer(quest_id) answer_key = _generate_key(mc_answer) if answer_key in answer_dict: answer_dict[answer_key]['quest_id'].append(quest_id) else: answer_dict[answer_key] = {'quest_id': [quest_id], 'answer_idx': answer_idx} answer_idx += 1 # sort questions by answer type quest_vocab = QuestionVocab(question_dict, question_id2question_key) quest_index_by_answer_type = sort_questions_by_answer_type(mc_ctx, quest_vocab) # build basic data structure for iVQA dataset = build_candidate_answers(answer_dict, split=split) # add popular questions dataset = add_popular_questions(dataset, mc_ctx, quest_vocab, quest_index_by_answer_type) # add contrastive questions dataset = add_contrastive_questions(dataset, mc_ctx, quest_vocab, num=100) # add plausible questions dataset = add_plausible_questions(dataset, quest_vocab, num=100) # add random questions dataset = add_random_questions(dataset, mc_ctx, quest_vocab, answer_dict, quest_index_by_answer_type, num=200) # save data data_file = 'data/ivqa_multiple_choices_%s_questions.pkl' % split pickle(data_file, {'dataset': dataset, 'quest_vocab': quest_vocab})
class PredictionVisualiser(object): def __init__(self, model_name, K=3, do_plot=True): self._gt_mgr = MultiChoiceQuestionManger(subset='trainval', load_ans=True) self._rev_map = SentenceGenerator(trainset='trainval') self._top_k = K self._do_plot = do_plot self._model_name = model_name self._cache_dir = 'att_maps/%s' % self._model_name mkdir_if_missing(self._cache_dir) def plot(self, quest_id, scores, att_map): if type(quest_id) != int: quest_id = int(quest_id) scores = scores.flatten() if scores.size == 2001: scores[-1] = 0 # show question and gt answer question = self._gt_mgr.get_question(quest_id) gt_ans = self._gt_mgr.get_gt_answer(quest_id) print('\n====================================') print('Q: %s' % question) print('A: %s' % gt_ans) # show top k prediction index = (-scores).argsort()[:self._top_k] for idx in index: pred_ans = self._rev_map.index_to_top_answer(idx) print('P: %-20s\t(%0.2f)' % (pred_ans, scores[idx])) print('\n') # show image im_file = self._gt_mgr.get_image_file(quest_id) im = imread(im_file) if np.rank(im) == 2: im = np.tile(im[::, np.newaxis], [1, 1, 3]) if self._do_plot: imshow(im) plt.show() else: self.save_cache_file(quest_id, im, att_map, question) return # show attention map tokens = _tokenize_sentence(question) self._show_attention_map(im, att_map, tokens) def save_cache_file(self, quest_id, im, att_map, question): from scipy.io import savemat sv_path = os.path.join(self._cache_dir, '%d.mat' % quest_id) savemat(sv_path, {'im': im, 'att_map': att_map, 'quest': question}) def _show_attention_map(self, im, att_map, tokens): att_map = att_map.reshape([-1, 14, 14]) num = att_map.shape[0] if num == 1: tokens = [' '.join(tokens)] # merge to a sentence else: tokens = [' '.join(tokens)] # merge to a sentence # mean_map = att_map.mean(axis=0)[np.newaxis, ::] # att_map = np.concatenate([att_map, mean_map], axis=0) # tokens.append('average') # render and plot for i, am in enumerate(att_map): am = resize(am, im.shape[:2], preserve_range=True) am = am / am.max() v = im * am[:, :, np.newaxis] v = np.minimum(np.round(v).astype(np.uint8), 255) if self._do_plot: imshow(v) plt.title('%s <%d>' % (tokens[0], i)) plt.show()
class MCAnnotator(object): def __init__(self, result_file, subset='val'): self._subset = subset self.results = load_json(result_file) self.num = len(self.results) self._im_root = get_image_feature_root() self.prog_str = '' self.mc_ctx = MultiChoiceQuestionManger(subset='val') def set_progress(self, prog_str): self.prog_str = prog_str def collect_annotation(self, idx): info = self.results[idx] ratings = ['Perfect', 'Correct', 'Wrong'] # get info image_id = info['image_id'] question_id = info['question_id'] question = info['question'] answer = self.mc_ctx.get_gt_answer(question_id) # load image fname = 'COCO_val2014_%012d.jpg' % image_id im_path = os.path.join(self._im_root, 'val2014', fname) im = imread(im_path) plt.imshow(im) plt.draw() # print questions def print_head(): print('=========== %s ===========' % self.prog_str) os.system('clear') print_head() while True: print('Question: %s' % question) print('Answer: %s' % answer) plt.show(block=False) instruct = '******************************************************************\n' \ 'Please choose any of the questions holds for this image and answer. \n' \ 'If any holds, type in the number in front. If not, press enter. If \n' \ 'multiple questions holders, please seperate with comma, no space.\n' \ '******************************************************************\n\n' usr_input = raw_input(instruct) if _is_int(usr_input): r_idx = int(usr_input) if r_idx >= 3: print('Should be in [0, 1, 2]') continue else: print('illegal input, choose again') continue # verify r_str = ratings[r_idx] print('\nYour rating is %d [%s]:' % (r_idx, r_str)) usr_input = raw_input("Press c to confirm, r to undo...") if usr_input == 'c': break else: continue anno = {'question_id': question_id, 'rating': r_idx} return anno
def main(_): mc = MultiChoiceQuestionManger(subset='trainval', answer_coding='word2vec') # process_dataset(mc, 'kptrain') # process_dataset(mc, 'kpval') process_dataset(mc, 'kptest')
class MultipleChoiceEvaluater(object): def __init__(self, subset='val', num_eval=None, need_im_feat=True, need_attr=False, use_ans_type=False, feat_type='res152'): anno_file = 'data/MultipleChoicesQuestionsKarpathy%sV2.0.json' % subset.title( ) self._subset = subset d = load_json(anno_file) self._id2type = d['candidate_types'] self._annotations = d['annotation'] if num_eval == 0: num_eval = len(self._annotations) self._num_to_eval = num_eval self._idx = 0 self._need_attr = need_attr self._need_im_feat = need_im_feat self._quest_encoder = SentenceEncoder('question') self._answer_encoder = SentenceEncoder('answer') self._im_encoder = MCDataFetcher(subset='kp%s' % subset, feat_type=feat_type) self.num_samples = len(self._annotations) self._mc_ctx = MultiChoiceQuestionManger(subset='val') self._group_by_answer_type() self._use_ans_type = use_ans_type def get_task_data(self): info = self._annotations[self._idx] questions = info['questions'] answer = info['answer'] answer_idx = info['answer_id'] image_id = info['image_id'] quest_id = int(info['coco_question_ids'][0]) # prepare for output outputs = [] if self._need_im_feat: im_feat = self._im_encoder.get_image_feature(image_id) outputs.append(im_feat) if self._need_attr: attr = self._im_encoder.get_attribute_feature(image_id) outputs.append(attr) quest, quest_len = self._quest_encoder.encode_sentences(questions) if self._use_ans_type: ans_type = add_answer_type(quest_id, self._mc_ctx) outputs += [quest, quest_len, None, ans_type, answer_idx, image_id] else: ans, ans_len = self._answer_encoder.encode_sentences(answer) outputs += [ quest, quest_len, None, ans, ans_len, answer_idx, image_id ] self._idx += 1 return outputs def get_labels(self, answer_ids): answer_id2labels = { info['answer_id']: info['labels'] for info in self._annotations } type_mat = [] for ans_id in answer_ids: labels = np.array(answer_id2labels[ans_id]) type_mat.append(labels[np.newaxis, :]) type_mat = np.concatenate(type_mat, axis=0) return (type_mat == 0).argmax(axis=1) def _group_by_answer_type(self): self.answer_ids_per_type = {} for info in self._annotations: for quest_id in info['coco_question_ids']: answer_id = info['answer_id'] type_str = self._mc_ctx.get_answer_type(quest_id) self.answer_ids_per_type.setdefault(type_str, []).append(answer_id) @staticmethod def _get_intersect_table(pool, target): # create hashing table hash_tab = {k: 0 for k in target} return np.array([c in hash_tab for c in pool]) def evaluate_results(self, answer_ids, scores, model_type=None): types, results = [], [] # ALL cmc = self._evaluate_worker(answer_ids, scores, 'ALL') results.append(cmc) types.append('all') # per answer type for type in self.answer_ids_per_type.keys(): target = np.array(self.answer_ids_per_type[type]) sel_tab = self._get_intersect_table(answer_ids, target) cmc = self._evaluate_worker(answer_ids[sel_tab], scores[sel_tab, :], type) results.append(cmc) types.append(type) results = np.concatenate(results, axis=0) if model_type is not None: from scipy.io import savemat res_file = 'result/mc_%s_result.mat' % model_type.lower() savemat(res_file, {'cmc': results, 'types': types}) def _evaluate_worker(self, answer_ids, scores, type): answer_id2labels = { info['answer_id']: info['labels'] for info in self._annotations } type_mat = [] for ans_id in answer_ids: labels = np.array(answer_id2labels[ans_id]) type_mat.append(labels[np.newaxis, :]) type_mat = np.concatenate(type_mat, axis=0) gt_mask = np.equal(type_mat, 0) gt_scores = [] for i, (gt, score) in enumerate(zip(gt_mask, scores)): gt_scores.append(score[gt].max()) # find the rank of gt scores gt_scores = np.array(gt_scores)[:, np.newaxis] sorted_scores = -np.sort(-scores, axis=1) gt_rank = np.equal(sorted_scores, gt_scores).argmax(axis=1) # print('\nMean rank: %0.2f' % gt_rank.mean()) # compute cmc num, num_cands = gt_mask.shape cmc = np.zeros(num_cands, dtype=np.float32) for i in range(num_cands): cmc[i] = np.less_equal(gt_rank, i).sum() cmc = cmc / num * 100. print('\n======= type %s =======' % type.upper()) print('---------- cmc -----------') print('Top 1: %0.2f' % cmc[0]) print('Top 3: %0.2f' % cmc[2]) print('Top 10: %0.2f' % cmc[9]) # top 1 analysis self.top1_analysis(scores, type_mat) return cmc[np.newaxis, :] def top1_analysis(self, scores, type_mat): # print('======= Top 1 analysis =======') print('--------- top 1 -----------') pred_labels = scores.argmax(axis=1) types = np.zeros_like(pred_labels) for i, idx in enumerate(pred_labels): types[i] = type_mat[i, idx] bin_count = np.bincount(types) num = pred_labels.size for i, c in enumerate(bin_count): type_str = self._id2type[str(i)] pnt = float(c) * 100. / num print('%s: %02.2f' % (type_str, pnt)) print('\n') def prediction_examples(self): pass
def process(delta = 0.2): w2v_ncoder = SentenceEncoder() # load gt and answer manager ctx = MultiChoiceQuestionManger(subset='train') # load candidates candidates = load_json('result/var_vaq_beam_VAQ-VAR_full_kptrain.json') # load candidate scores score_list = load_json('result/var_vaq_beam_VAQ-VAR_full_kptrain_oracle_dump.json') score_d = {item['aug_quest_id']: item['CIDEr'] for item in score_list} # loop over questions dataset = {} unk_image_ids = [] question_id2image_id = {} for item in candidates: aug_id = item['question_id'] question = item['question'] image_id = item['image_id'] unk_image_ids.append(image_id) question_id = int(aug_id / 1000) score = score_d[aug_id] question_id2image_id[question_id] = image_id if question_id in dataset: assert (question not in dataset[question_id]) dataset[question_id][question] = score else: dataset[question_id] = {question: score} # get stat unk_image_ids = set(unk_image_ids) num_images = len(unk_image_ids) print('Find %d unique keys from %d images' % (len(dataset), num_images)) print('%0.3f questions on average' % (len(dataset) / float(num_images))) # build tuple num_pairs = 0 offset = 0 cst_pairs = [] image_ids, quest_ids, question_w2v, answer_w2v = [], [], [], [] num_task = len(dataset) t = time() for _i, (quest_id, item) in enumerate(dataset.items()): if _i % 1000 == 0: print('processed: %d/%d (%0.2f sec./batch)' % (_i, num_task, time()-t)) t = time() ans = ctx.get_gt_answer(quest_id) image_id = ctx.get_image_id(quest_id) assert(image_id == question_id2image_id[quest_id]) gt = ctx.get_question(quest_id).lower() gt = ' '.join(word_tokenize(gt)) include_gt = np.any(np.array(item.values()) == 10.) sc, ps = [], [] if gt not in item and not include_gt: item[gt] = 10. for q, s in item.items(): sc.append(s) ps.append(q) sc = np.array(sc, dtype=np.float32) _this_n = len(ps) path_ind = np.arange(_this_n) + offset # data checking and assertion try: assert (np.sum(sc == 10.) <= 1) # only one gt except Exception as e: ind = np.where(sc == 10.)[0] for _idx in ind: print('%s' % (ps[_idx])) raise e # find contrastive pairs diff = sc[np.newaxis, :] - sc[:, np.newaxis] valid_entries = diff >= delta neg, pos = np.where(valid_entries) assert (np.all(np.greater_equal(sc[pos] - sc[neg], delta))) pos_q_ind = path_ind[pos] neg_q_ind = path_ind[neg] # save _this_pairs = [[p, n] for p, n in zip(pos_q_ind, neg_q_ind)] cst_pairs += _this_pairs # encode answer _ans_w2v = w2v_ncoder.encode(ans) ans_w2v = np.tile(_ans_w2v, [_this_n, 1]) answer_w2v.append(ans_w2v) # encode questions for p in ps: _q_w2v = w2v_ncoder.encode(p) question_w2v.append(_q_w2v) image_ids.append(image_id) quest_ids.append(quest_id) # update pointer offset += _this_n num_pairs += _this_n print('Total pairs: %d' % num_pairs) # merge cst_pairs = np.array(cst_pairs, dtype=np.int32) image_ids = np.array(image_ids, dtype=np.int32) quest_ids = np.array(quest_ids, dtype=np.int32) answer_w2v = np.concatenate(answer_w2v, axis=0).astype(np.float32) question_w2v = np.concatenate(question_w2v, axis=0).astype(np.float32) from util import save_hdf5 sv_file = 'result/cst_ranking_kptrain_delta%g.data' % delta save_hdf5(sv_file, {'cst_pairs': cst_pairs, 'image_ids': image_ids, 'quest_ids': quest_ids, 'answer_w2v': answer_w2v, 'question_w2v': question_w2v})
class MultipleChoiceEvaluater(object): def __init__(self, subset='val'): anno_file = 'data/MultipleChoicesQuestionsKarpathy%s.json' % subset.title() self._subset = subset d = load_json(anno_file) self._id2type = d['candidate_types'] self._annotations = d['annotation'] self._idx = 0 self.num_samples = len(self._annotations) self._mc_ctx = MultiChoiceQuestionManger(subset='val') self._group_by_answer_type() def update_annotation(self, do_update=True): man_file = 'data/distractor_analysis.json' anno = load_json(man_file)['annotation'] hash_tab = {d['answer_idx']: d['confused'] for d in anno} if not do_update: return np.array([d['answer_idx'] for d in anno if d['confused']]) # return np.array(hash_tab.keys()) for datum in self._annotations: ans_id = datum['answer_id'] datum['labels'] = np.array(datum['labels']) if ans_id in hash_tab: conf_ids = hash_tab[ans_id] if conf_ids: tmp_ids = np.array(conf_ids) datum['labels'][tmp_ids] = 0 return np.array(hash_tab.keys()) def get_labels(self, answer_ids): answer_id2labels = {info['answer_id']: info['labels'] for info in self._annotations} type_mat = [] for ans_id in answer_ids: labels = np.array(answer_id2labels[ans_id]) type_mat.append(labels[np.newaxis, :]) type_mat = np.concatenate(type_mat, axis=0) return (type_mat == 0).argmax(axis=1) def _group_by_answer_type(self): self.answer_ids_per_type = {} for info in self._annotations: for quest_id in info['coco_question_ids']: answer_id = info['answer_id'] type_str = self._mc_ctx.get_answer_type(quest_id) self.answer_ids_per_type.setdefault(type_str, []).append(answer_id) @staticmethod def _get_intersect_table(pool, target): # create hashing table hash_tab = {k: 0 for k in target} return np.array([c in hash_tab for c in pool]) def evaluate_results(self, answer_ids, scores, model_type=None): types, results = [], [] # ALL cmc = self._evaluate_worker(answer_ids, scores, 'ALL') results.append(cmc) types.append('all') # per answer type for type in self.answer_ids_per_type.keys(): target = np.array(self.answer_ids_per_type[type]) sel_tab = self._get_intersect_table(answer_ids, target) cmc = self._evaluate_worker(answer_ids[sel_tab], scores[sel_tab, :], type) results.append(cmc) types.append(type) results = np.concatenate(results, axis=0) if model_type is not None: from scipy.io import savemat res_file = 'result/mc_%s_result.mat' % model_type.lower() savemat(res_file, {'cmc': results, 'types': types}) def _evaluate_worker(self, answer_ids, scores, type): answer_id2labels = {info['answer_id']: info['labels'] for info in self._annotations} type_mat = [] for ans_id in answer_ids: labels = np.array(answer_id2labels[ans_id]) type_mat.append(labels[np.newaxis, :]) type_mat = np.concatenate(type_mat, axis=0) gt_mask = np.equal(type_mat, 0) gt_scores = [] for i, (gt, score) in enumerate(zip(gt_mask, scores)): gt_scores.append(score[gt].max()) # find the rank of gt scores gt_scores = np.array(gt_scores)[:, np.newaxis] sorted_scores = -np.sort(-scores, axis=1) gt_rank = np.equal(sorted_scores, gt_scores).argmax(axis=1) # print('\nMean rank: %0.2f' % gt_rank.mean()) # compute cmc num, num_cands = gt_mask.shape cmc = np.zeros(num_cands, dtype=np.float32) for i in range(num_cands): cmc[i] = np.less_equal(gt_rank, i).sum() cmc = cmc / num * 100. print('\n======= type %s =======' % type.upper()) print('---------- cmc -----------') print('Top 1: %0.3f' % cmc[0]) print('Top 5: %0.3f' % cmc[4]) print('Top 10: %0.3f' % cmc[9]) # top 1 analysis self.top1_analysis(scores, type_mat) return cmc[np.newaxis, :] def top1_analysis(self, scores, type_mat): # print('======= Top 1 analysis =======') print('--------- top 1 -----------') pred_labels = scores.argmax(axis=1) types = np.zeros_like(pred_labels) for i, idx in enumerate(pred_labels): types[i] = type_mat[i, idx] bin_count = np.bincount(types) num = pred_labels.size for i, c in enumerate(bin_count): type_str = self._id2type[str(i)] pnt = float(c) * 100. / num print('%s: %02.2f' % (type_str, pnt)) print('\n')
def process(): w2v_ncoder = SentenceEncoder() # load gt and answer manager ctx = MultiChoiceQuestionManger(subset='val') # load candidates candidates = load_json('result/var_vaq_beam_VAQ-VAR_full_kptest.json') # load candidate scores score_list = load_json( 'result/var_vaq_beam_VAQ-VAR_full_kptest_oracle_dump.json') score_d = {item['aug_quest_id']: item['CIDEr'] for item in score_list} # loop over questions dataset = {} unk_image_ids = [] question_id2image_id = {} for item in candidates: aug_id = item['question_id'] question = item['question'] image_id = item['image_id'] unk_image_ids.append(image_id) question_id = int(aug_id / 1000) score = score_d[aug_id] question_id2image_id[question_id] = image_id if question_id in dataset: assert (question not in dataset[question_id]) dataset[question_id][question] = (score, aug_id) else: dataset[question_id] = {question: (score, aug_id)} # get stat unk_image_ids = set(unk_image_ids) num_images = len(unk_image_ids) print('Find %d unique keys from %d images' % (len(dataset), num_images)) print('%0.3f questions on average' % (len(dataset) / float(num_images))) # build tuple num_pairs = 0 offset = 0 image_ids, quest_ids, aug_quest_ids, question_w2v, answer_w2v, scores = [], [], [], [], [], [] num_task = len(dataset) t = time() for _i, (quest_id, item) in enumerate(dataset.items()): if _i % 1000 == 0: print('processed: %d/%d (%0.2f sec./batch)' % (_i, num_task, time() - t)) t = time() ans = ctx.get_gt_answer(quest_id) image_id = ctx.get_image_id(quest_id) assert (image_id == question_id2image_id[quest_id]) ps = [] for q, (s, aug_id) in item.items(): ps.append(q) aug_quest_ids.append(aug_id) scores.append(s) _this_n = len(ps) # encode answer _ans_w2v = w2v_ncoder.encode(ans) ans_w2v = np.tile(_ans_w2v, [_this_n, 1]) answer_w2v.append(ans_w2v) # encode questions for p in ps: _q_w2v = w2v_ncoder.encode(p) question_w2v.append(_q_w2v) image_ids.append(image_id) quest_ids.append(quest_id) # update pointer offset += _this_n num_pairs += _this_n print('Total pairs: %d' % num_pairs) # merge image_ids = np.array(image_ids, dtype=np.int32) quest_ids = np.array(quest_ids, dtype=np.int32) scores = np.array(scores, dtype=np.float32) aug_quest_ids = np.array(aug_quest_ids, dtype=np.int64) answer_w2v = np.concatenate(answer_w2v, axis=0).astype(np.float32) question_w2v = np.concatenate(question_w2v, axis=0).astype(np.float32) from util import save_hdf5 sv_file = 'result/cst_ranking_kptest.data' save_hdf5( sv_file, { 'image_ids': image_ids, 'quest_ids': quest_ids, 'aug_quest_ids': aug_quest_ids, 'scores': scores, 'answer_w2v': answer_w2v, 'question_w2v': question_w2v })