def reinforce_trainstep(reader, model, env, sess, task_ops): outputs = reader.pop_batch() quest_ids, images, quest, quest_len, top_ans_ids, ans, ans_len = outputs # random sampling noise_vec, pathes, scores = model.random_sampling([images, ans, ans_len], sess) _this_batch_size = images.shape[0] scores, pathes, noise = post_process_variation_questions_noise(scores, pathes, noise_vec, _this_batch_size, find_unique=False) wrapped_sampled, sampled_flat = \ wrap_samples_for_language_model_v2(sampled=pathes, pad_token=model.pad_token - 1, max_length=20) def _show_examples(arr, arr_len, _rewards, name): ps = _parse_gt_questions(arr, arr_len) print('\n%s:' % (name)) for p, r in zip(ps, _rewards): if p[-1] == 2: p = p[:-1] sent = env.to_sentence.index_to_question(p) print('%s (%d)' % (sent, r)) # compute reward vqa_inputs = [images, ans, ans_len, top_ans_ids] rewards, rewards_all, is_gt, aug_data = env.get_reward(pathes, [quest, quest_len], [vqa_inputs, wrapped_sampled, scores, quest_ids]) max_path_arr, max_path_len, max_noise, max_rewards = \ prepare_reinforce_data(pathes, noise, rewards, pad_token=model.pad_token) # _show_examples(max_path_arr, max_path_len, is_gt, 'Sampled') # pdb.set_trace() aug_images, aug_ans, aug_ans_len, is_in_vocab = aug_data sess_in = [aug_images, max_path_arr, max_path_len, aug_ans, aug_ans_len, max_noise, max_rewards, rewards_all] sess_in = [_in[is_in_vocab] for _in in sess_in] # remove oov avg_reward = max_rewards.mean() # train op sess_outputs = sess.run(task_ops, feed_dict=model.fill_feed_dict(sess_in)) sess_outputs += [avg_reward, 'reward'] # update language model lm_scores = rewards_all[:, 2].flatten() env.lm.trainstep(_replay_buffer.get_batch()) _replay_buffer.insert(sampled_flat, lm_scores) return sess_outputs
def reinforce_trainstep(reader, model, env, sess, task_ops): outputs = reader.pop_batch() quest_ids, res5c, images, quest, quest_len, top_ans, ans, ans_len = outputs # random sampling noise_vec, pathes, scores = model.random_sampling([images, ans, ans_len], sess) _this_batch_size = images.shape[0] scores, pathes, noise = post_process_variation_questions_noise( scores, pathes, noise_vec, _this_batch_size, find_unique=False) wrapped_sampled, sampled_flat = \ wrap_samples_for_language_model_v2(sampled=pathes, pad_token=model.pad_token - 1, max_length=20) # compute reward vqa_inputs = [images, res5c, ans, ans_len, top_ans] rewards, rewards_all, is_gt, aug_data = env.get_reward( pathes, [quest, quest_len], [vqa_inputs, wrapped_sampled, scores, quest_ids]) max_path_arr, max_path_len, max_noise, max_rewards = \ prepare_reinforce_data(pathes, noise, rewards, pad_token=model.pad_token) aug_images, aug_ans, aug_ans_len, is_in_vocab = aug_data sess_in = [ aug_images, max_path_arr, max_path_len, aug_ans, aug_ans_len, max_noise, max_rewards, rewards_all ] sess_in = [_in[is_in_vocab] for _in in sess_in] # remove oov avg_reward = max_rewards.mean() # train op sess_outputs = sess.run(task_ops, feed_dict=model.fill_feed_dict(sess_in)) sess_outputs += [avg_reward, 'reward'] # update language model lm_scores = rewards_all[:, 2].flatten() env.lm.trainstep(_replay_buffer.get_batch()) _replay_buffer.insert(sampled_flat, lm_scores) return sess_outputs
def reinforce_trainstep(reader_outputs, model, env, sess, task_ops, _VQA_Belief): # reader_outputs = reader.pop_batch() # quest_ids, images, quest, quest_len, top_ans, ans, ans_len = reader_outputs # select the first image # idx = 0 # # def _reshape_array(v): # if type(v) == np.ndarray: # return v[np.newaxis, :] # else: # return np.reshape(v, (1,)) # # selected = [_reshape_array(v[idx]) for v in reader_outputs] res5c, images, quest, quest_len, top_ans, ans, ans_len, quest_ids, image_ids = reader_outputs # random sampling noise_vec, pathes, scores = model.random_sampling([images, ans, ans_len], sess) _this_batch_size = images.shape[0] scores, pathes, noise = post_process_variation_questions_noise( scores, pathes, noise_vec, _this_batch_size, find_unique=False) lm_inputs = wrap_samples_for_language_model(sampled=pathes, pad_token=model.pad_token - 1, gts=[quest, quest_len], max_length=20) def _show_examples(arr, arr_len, _rewards, name): ps = _parse_gt_questions(arr, arr_len) print('\n%s:' % (name)) for p, r in zip(ps, _rewards): if p[-1] == 2: p = p[:-1] sent = env.to_sentence.index_to_question(p) print('%s (%d)' % (sent, r)) # compute reward vqa_inputs = [images, res5c, ans, ans_len, top_ans] # lm_inputs = lm_inputs[:2] wrapped_sampled = lm_inputs[:2] rewards, rewards_all, is_gt, aug_data = env.get_reward( pathes, [quest, quest_len], [vqa_inputs, wrapped_sampled, scores, quest_ids]) max_path_arr, max_path_len, max_noise, max_rewards = \ prepare_reinforce_data(pathes, noise, rewards, pad_token=model.pad_token) vqa_scores = rewards_all[:, 0] language_scores = rewards_all[:, 2] # scores = vqa_scores * (language_scores > 0.5) scores = vqa_scores * (language_scores > env.language_thresh) new_pathes = _parse_gt_questions(max_path_arr, max_path_len) _VQA_Belief.insert(new_pathes, scores) # _show_examples(max_path_arr, max_path_len, is_gt, 'Sampled') # pdb.set_trace() aug_images, aug_ans, aug_ans_len, is_in_vocab = aug_data sess_in = [ aug_images, max_path_arr, max_path_len, aug_ans, aug_ans_len, max_noise, max_rewards, rewards_all ] sess_in = [_in[is_in_vocab] for _in in sess_in] # remove oov avg_reward = max_rewards.mean() # train op sess_outputs = sess.run(task_ops, feed_dict=model.fill_feed_dict(sess_in)) sess_outputs += [avg_reward, 'reward'] # update language model # print('Number GT: %d' % is_gt.sum()) # num_fake_in_batch = 80 - is_gt.sum() if False: # at least half is generated wrapped_gt = _Q_CTX.get_gt_batch(*lm_inputs[2:]) # random sample new corrected_inputs = correct_language_model_inputs( wrapped_sampled + wrapped_gt, is_gt) # num_fake = corrected_inputs[0].shape[0] # num_real = corrected_inputs[2].shape[0] # print('Num positive: %d, num negative %d' % (num_real, num_fake)) # _show_examples(corrected_inputs[0], corrected_inputs[1], np.zeros_like(corrected_inputs[1]), 'Fake') # _show_examples(corrected_inputs[2], corrected_inputs[3], np.zeros_like(corrected_inputs[3]), 'Real') # pdb.set_trace() if min(wrapped_sampled[1].size, wrapped_gt[1].size) > 0: env.lm.trainstep(corrected_inputs) # _VQA_Belief.vertify_vqa(env, vqa_inputs) return sess_outputs
def reinforce_trainstep(reader, model, env, sess, task_ops): outputs = reader.pop_batch() images, quest, quest_len, ans, ans_len = outputs # random sampling noise_vec, pathes, scores = model.random_sampling([images, ans, ans_len], sess) _this_batch_size = images.shape[0] scores, pathes, noise = post_process_variation_questions_noise( scores, pathes, noise_vec, _this_batch_size, find_unique=False) # diverse_rewards = env.diversity_reward.get_reward(pathes, scores) # update language model # lm = env.lm # fake = wrap_samples_for_language_model(pathes) # real = [quest, quest_len] # lm_inputs = fake + real lm_inputs = wrap_samples_for_language_model(sampled=pathes, pad_token=model.pad_token - 1, gts=[quest, quest_len], max_length=20) def _show_examples(arr, arr_len, _rewards, name): ps = _parse_gt_questions(arr, arr_len) print('\n%s:' % (name)) for p, r in zip(ps, _rewards): if p[-1] == 2: p = p[:-1] sent = env.to_sentence.index_to_question(p) print('%s (%d)' % (sent, r)) # compute reward vqa_inputs = [images, ans, ans_len] # lm_inputs = lm_inputs[:2] wrapped_sampled = lm_inputs[:2] rewards, rewards_all, is_gt, aug_data = env.get_reward( pathes, [quest, quest_len], [vqa_inputs, wrapped_sampled, scores]) max_path_arr, max_path_len, max_noise, max_rewards = \ prepare_reinforce_data(pathes, noise, rewards, pad_token=model.pad_token) # _show_examples(max_path_arr, max_path_len, is_gt, 'Sampled') # pdb.set_trace() aug_images, aug_ans, aug_ans_len, is_in_vocab = aug_data sess_in = [ aug_images, max_path_arr, max_path_len, aug_ans, aug_ans_len, max_noise, max_rewards, rewards_all ] sess_in = [_in[is_in_vocab] for _in in sess_in] # remove oov avg_reward = max_rewards.mean() # train op sess_outputs = sess.run(task_ops, feed_dict=model.fill_feed_dict(sess_in)) sess_outputs += [avg_reward, 'reward'] # update language model # print('Number GT: %d' % is_gt.sum()) num_fake_in_batch = 80 - is_gt.sum() if num_fake_in_batch > 50 or True: # at least half is generated wrapped_gt = _Q_CTX.get_gt_batch(*lm_inputs[2:]) # random sample new corrected_inputs = correct_language_model_inputs( wrapped_sampled + wrapped_gt, is_gt) # num_fake = corrected_inputs[0].shape[0] # num_real = corrected_inputs[2].shape[0] # print('Num positive: %d, num negative %d' % (num_real, num_fake)) # _show_examples(corrected_inputs[0], corrected_inputs[1], np.zeros_like(corrected_inputs[1]), 'Fake') # _show_examples(corrected_inputs[2], corrected_inputs[3], np.zeros_like(corrected_inputs[3]), 'Real') # pdb.set_trace() if num_fake_in_batch > 0: env.lm.trainstep(corrected_inputs) return sess_outputs
def reinforce_trainstep(reader, model, env, sess, task_ops): outputs = reader.pop_batch() quest_ids, images, quest, quest_len, top_ans, ans, ans_len = outputs # random sampling noise_vec, pathes, scores = model.random_sampling([images, ans, ans_len], sess) _this_batch_size = images.shape[0] scores, pathes, noise = post_process_variation_questions_noise(scores, pathes, noise_vec, _this_batch_size, find_unique=False) lm_inputs = wrap_samples_for_language_model(sampled=pathes, pad_token=model.pad_token - 1, gts=[quest, quest_len], max_length=20) def _show_examples(arr, arr_len, _rewards, name): ps = _parse_gt_questions(arr, arr_len) print('\n%s:' % (name)) for p, r in zip(ps, _rewards): if p[-1] == 2: p = p[:-1] sent = env.to_sentence.index_to_question(p) print('%s (%d)' % (sent, r)) # compute reward vqa_inputs = [images, ans, ans_len, top_ans] # lm_inputs = lm_inputs[:2] wrapped_sampled = lm_inputs[:2] rewards, rewards_all, is_gt, aug_data = env.get_reward(pathes, [quest, quest_len], [vqa_inputs, wrapped_sampled, scores, quest_ids]) max_path_arr, max_path_len, max_noise, max_rewards = \ prepare_reinforce_data(pathes, noise, rewards, pad_token=model.pad_token) # _show_examples(max_path_arr, max_path_len, is_gt, 'Sampled') # pdb.set_trace() aug_images, aug_quest, aug_quest_len, aug_ans, aug_ans_len, aug_top_ans, is_in_vocab = aug_data sess_in = [aug_images, max_path_arr, max_path_len, aug_ans, aug_ans_len, max_noise, max_rewards, rewards_all] sess_in = [_in[is_in_vocab] for _in in sess_in] # remove oov avg_reward = max_rewards.mean() # train op sess_outputs = sess.run(task_ops, feed_dict=model.fill_feed_dict(sess_in)) sess_outputs += [avg_reward, 'reward'] # update VQA model aug_legal_mask, aug_vqa_labels, hard_target_mask = correct_vqa_labels(aug_top_ans, rewards_all, is_in_vocab) gt_legal_mask = top_ans != 2000 gt_hard_target_mask = np.ones_like(top_ans, dtype=np.float32) gt_inputs = [images, quest, quest_len, top_ans, gt_hard_target_mask, gt_legal_mask] aug_inputs = [aug_images, aug_quest, aug_quest_len, aug_vqa_labels, hard_target_mask, aug_legal_mask] vqa_inputs = concat_vqa_batch(gt_inputs, aug_inputs) vqa_is_valid = vqa_inputs[-1] vqa_inputs = [_in[vqa_is_valid] for _in in vqa_inputs[:-1]] # remove invalid to save computation # print('Before VQA') # pdb.set_trace() vqa = env.get_vqa_model() vqa.trainstep(vqa_inputs) # update language model # print('Number GT: %d' % is_gt.sum()) num_fake_in_batch = 80 - is_gt.sum() if num_fake_in_batch > 50 or True: # at least half is generated wrapped_gt = _Q_CTX.get_gt_batch(*lm_inputs[2:]) # random sample new corrected_inputs = correct_language_model_inputs(wrapped_sampled + wrapped_gt, is_gt) if num_fake_in_batch > 0: env.lm.trainstep(corrected_inputs) return sess_outputs
print('Batch %d, time %0.3fs' % (i, time() - t)) continue noise_vec, pathes, scores = model.random_sampling([images, ans, ans_len], sess) # print('Total time for sampling %0.3f' % (time() - t)) _this_batch_size = images.shape[0] scores, pathes, noise = post_process_variation_questions_noise( scores, pathes, noise_vec, _this_batch_size) rewards = env.get_reward(pathes, [quests, quest_len]) rewards1 = env1.get_reward(pathes, [images, ans, ans_len]) # pdb.set_trace() max_path_arr, max_path_len, max_noise, max_rewards = \ prepare_reinforce_data(pathes, noise, rewards, pad_token=-1) # print('Total time for RL data processing %0.3f' % (time()-t)) print(max_path_arr) idx = 0 gts = _parse_gt_questions(quests, quest_len) for _var_s, _var_n, _gt in zip(pathes, noise, gts): sentence = to_sentence.index_to_question(_gt) print('\nGT: %s' % sentence) _n = len(_var_s) ind = np.arange(idx, idx + _n, 1) _max_reward_idx = rewards[ind].argmax() _max_p = _var_s[_max_reward_idx] _max_n = _var_n[_max_reward_idx] sentence = to_sentence.index_to_question(_max_p) _max_r = rewards[ind][_max_reward_idx]