def __init__(self, data_path, max_len, reward_type="levenshtein", reward_path=None, max_samples=None, debug=False, mode="train", num_questions=10): super(ClevrEnv, self).__init__() self.mode = mode self.data_path = data_path h5_questions_path = os.path.join(data_path, '{}_questions.h5'.format(self.mode)) h5_feats_path = os.path.join(data_path, '{}_features.h5'.format(self.mode)) vocab_path = os.path.join(data_path, 'vocab.json') # self.debug_true_questions = torch.randint(0,debug_len_vocab, (2,)) self.debug = debug self.num_questions = num_questions self.clevr_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path, h5_feats_path=h5_feats_path, vocab_path=vocab_path, max_samples=max_samples) # num_tokens = self.clevr_dataset.len_vocab # feats_shape = self.clevr_dataset.feats_shape SOS_idx = self.clevr_dataset.vocab_questions["<SOS>"] EOS_idx = self.clevr_dataset.vocab_questions["<EOS>"] Special_Tokens = namedtuple('Special_Tokens', ('SOS_idx', 'EOS_idx')) self.special_tokens = Special_Tokens(SOS_idx, EOS_idx) self.State = namedtuple('State', ('text', 'img')) self.Episode = namedtuple('Episode', ('img_idx', 'closest_question', 'dialog', 'rewards', 'valid_actions')) self.max_len = max_len # self.ref_questions = torch.randint(0, self.debug_len_vocab, # (3, self.max_len)) if self.debug_len_vocab is not None else None # self.reset() self.reward_func = rewards[reward_type](reward_path) self.step_idx = 0 self.state, self.dialog = None, None self.ref_questions, self.ref_questions_decoded = None, None self.img_idx, self.img_feats = None, None
def __init__(self, data_path, max_len, reward_type="levenshtein", reward_path=None, max_samples=None, debug=None, mode="train", num_questions=10, diff_reward=False, condition_answer=True, reward_vocab=None, mask_answers=False, device=torch.device("cuda" if torch.cuda.is_available() else "cpu"), reduced_answers=False, params=None, filter_numbers=False): super(ClevrEnv, self).__init__(data_path, max_len, reward_type=reward_type, reward_path=reward_path, mode=mode, debug=debug, diff_reward=diff_reward, condition_answer=condition_answer, reward_vocab=reward_vocab, mask_answers=False, device=device, reduced_answers=reduced_answers, params=params, filter_numbers=filter_numbers) modes = {"train": "train", "test_images": "val", "test_text": "train"} h5_questions_path = os.path.join(data_path, '{}_questions.h5'.format(modes[self.mode])) h5_feats_path = os.path.join(data_path, '{}_features.h5'.format(modes[self.mode])) vocab_path = os.path.join(data_path, 'vocab.json') self.dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path, h5_feats_path=h5_feats_path, vocab_path=vocab_path, max_samples=max_samples, mask_answers=mask_answers) self.num_questions = num_questions self.set_special_tokens() self.set_reward_function(reward_type=reward_type, reward_path=reward_path, reward_vocab=reward_vocab, diff_reward=diff_reward)
class ClevrEnv(GenericEnv): """Clevr Env""" metadata = {'render.modes': ['human']} def __init__(self, data_path, max_len, reward_type="levenshtein", reward_path=None, max_samples=None, debug=None, mode="train", num_questions=10, diff_reward=False, condition_answer=True, reward_vocab=None, mask_answers=False, device=torch.device("cuda" if torch.cuda.is_available() else "cpu"), reduced_answers=False, params=None, filter_numbers=False): super(ClevrEnv, self).__init__(data_path, max_len, reward_type=reward_type, reward_path=reward_path, mode=mode, debug=debug, diff_reward=diff_reward, condition_answer=condition_answer, reward_vocab=reward_vocab, mask_answers=False, device=device, reduced_answers=reduced_answers, params=params, filter_numbers=filter_numbers) modes = {"train": "train", "test_images": "val", "test_text": "train"} h5_questions_path = os.path.join(data_path, '{}_questions.h5'.format(modes[self.mode])) h5_feats_path = os.path.join(data_path, '{}_features.h5'.format(modes[self.mode])) vocab_path = os.path.join(data_path, 'vocab.json') self.dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path, h5_feats_path=h5_feats_path, vocab_path=vocab_path, max_samples=max_samples, mask_answers=mask_answers) self.num_questions = num_questions self.set_special_tokens() self.set_reward_function(reward_type=reward_type, reward_path=reward_path, reward_vocab=reward_vocab, diff_reward=diff_reward) def get_env_img_idx(self, i_episode, range_images): if i_episode is not None and i_episode < range_images[1]: img_idx = i_episode else: img_idx = np.random.randint(range_images[0], range_images[1]) return img_idx def reset(self, seed=None, i_episode=None): range_images = [int(self.debug[0]), int(self.debug[1])] if self.mode != "test_images" else [0, self.dataset.all_feats.shape[ 0]] if seed is not None: np.random.seed(seed) # getting the environment's elements: Img, ref_questions, ref_answers. self.img_idx = self.get_env_img_idx(i_episode, range_images) self.img_feats, questions, self.ref_answers = self.dataset.get_data_from_img_idx(self.img_idx) self.ref_questions = questions[:, :self.max_len] # differentiating between the environment modes. if self.mode == "train" and not self.mask_answers: self.ref_questions = self.ref_questions[0:self.num_questions, :] self.ref_answers = self.ref_answers[0:self.num_questions] elif self.mode == "test_text" and not self.mask_answers: self.ref_questions = self.ref_questions[self.num_questions:, :] self.ref_answers = self.ref_answers[self.num_questions:] # getting the ref_idx for the couple (question, answer). if i_episode is not None: np.random.seed(i_episode) self.ref_question_idx = np.random.randint(0,self.ref_questions.size(0)) self.ref_question = self.ref_questions[self.ref_question_idx] self.ref_answer = self.ref_answers[self.ref_question_idx] if self.condition_answer != "none": self.ref_questions = self.ref_questions[ self.ref_question_idx:self.ref_question_idx + 1] self.ref_answers = self.ref_answers[self.ref_question_idx:self.ref_question_idx + 1] self.ref_questions_decoded = [self.dataset.question_tokenizer.decode(question, ignored=['<SOS>', '<PAD>']) for question in self.ref_questions.numpy()] # initializing the state. state_question = [self.special_tokens.SOS_idx] self.state = self.State(torch.LongTensor(state_question).view(1, len(state_question)), self.img_feats.unsqueeze(0), self.ref_answer) self.step_idx = 0 self.dialog = None # check the correctness of the reward function. if self.reward_type == "levenshtein" and not self.diff_reward: reward_true_question, _, _ = self.reward_func.get(question=self.ref_questions_decoded[0], ep_questions_decoded=self.ref_questions_decoded, step_idx=self.step_idx, done=True) assert reward_true_question == 0, "ERROR IN REWARD FUNCTION" return self.state
def train_episodes_batch(log_probs_batch, returns_batch, optimizer): reinforce_loss = -log_probs_batch * returns_batch # shape (bs, max_len, 1) # opposite of REINFORCE objective function to apply a gradient descent algo. reinforce_loss = reinforce_loss.squeeze(-1).sum(dim=1).mean( dim=0) # sum over timesteps, mean over batches. optimizer.zero_grad() reinforce_loss.backward() optimizer.step() return reinforce_loss.item() if __name__ == '__main__': h5_questions_path = os.path.join("../../data", 'train_questions.h5') h5_feats_path = os.path.join("../../data", 'train_features.h5') vocab_path = os.path.join("../../data", 'vocab.json') clevr_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path, h5_feats_path=h5_feats_path, vocab_path=vocab_path, max_samples=21) # ---- test of get dummy reward ----------------------------------------- sample_questions = clevr_dataset.get_questions_from_img_idx(0) temp_state_text = torch.LongTensor( [1, 7, 86, 70, 70, 21, 54, 81, 51, 84, 86, 50, 38, 17, 2]).unsqueeze(0) temp_reward = get_dummy_reward(temp_state_text, sample_questions, 2) print('reward', temp_reward) State = namedtuple('State', ('text', 'img')) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") SOS_idx = clevr_dataset.vocab_questions["<SOS>"] EOS_idx = clevr_dataset.vocab_questions["<EOS>"] PAD_idx = clevr_dataset.vocab_questions["<PAD>"] Special_Tokens = namedtuple('Special_Tokens',
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_path = os.path.join(args.out_path, 'model.pt') with open(model_path, 'rb') as f: model = torch.load(f, map_location=device).to(device) model.eval() # TODO: add a model.flatten_parameters() ? h5_questions_path = os.path.join( args.data_path, 'train_questions.h5' ) #TODO: check why uploading the test dataset does not work. h5_feats_path = os.path.join(args.data_path, 'train_features.h5') vocab_path = os.path.join(args.data_path, 'vocab.json') test_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path, h5_feats_path=h5_feats_path, vocab_path=vocab_path, max_samples=21) num_tokens = test_dataset.len_vocab SOS_idx = test_dataset.vocab_questions["<SOS>"] test_loader = DataLoader(dataset=test_dataset, batch_size=len(test_dataset), drop_last=True, num_workers=args.num_workers) out_file_top_k_words = os.path.join( args.out_path, 'generate_top_k_words_k_{}_seed_{}.json'.format(args.top_k, args.seed)) out_file_log = os.path.join(args.out_path, 'eval_log.log') logger = create_logger(out_file_log) log_interval = int(args.words / 10)
class ClevrEnv(gym.Env): """Clevr Env""" metadata = {'render.modes': ['human']} def __init__(self, data_path, max_len, reward_type="levenshtein", reward_path=None, max_samples=None, debug=False, mode="train", num_questions=10): super(ClevrEnv, self).__init__() self.mode = mode self.data_path = data_path h5_questions_path = os.path.join(data_path, '{}_questions.h5'.format(self.mode)) h5_feats_path = os.path.join(data_path, '{}_features.h5'.format(self.mode)) vocab_path = os.path.join(data_path, 'vocab.json') # self.debug_true_questions = torch.randint(0,debug_len_vocab, (2,)) self.debug = debug self.num_questions = num_questions self.clevr_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path, h5_feats_path=h5_feats_path, vocab_path=vocab_path, max_samples=max_samples) # num_tokens = self.clevr_dataset.len_vocab # feats_shape = self.clevr_dataset.feats_shape SOS_idx = self.clevr_dataset.vocab_questions["<SOS>"] EOS_idx = self.clevr_dataset.vocab_questions["<EOS>"] Special_Tokens = namedtuple('Special_Tokens', ('SOS_idx', 'EOS_idx')) self.special_tokens = Special_Tokens(SOS_idx, EOS_idx) self.State = namedtuple('State', ('text', 'img')) self.Episode = namedtuple('Episode', ('img_idx', 'closest_question', 'dialog', 'rewards', 'valid_actions')) self.max_len = max_len # self.ref_questions = torch.randint(0, self.debug_len_vocab, # (3, self.max_len)) if self.debug_len_vocab is not None else None # self.reset() self.reward_func = rewards[reward_type](reward_path) self.step_idx = 0 self.state, self.dialog = None, None self.ref_questions, self.ref_questions_decoded = None, None self.img_idx, self.img_feats = None, None def step(self, action): action = torch.tensor(action).view(1, 1) self.state = self.State(torch.cat([self.state.text, action], dim=1), self.state.img) question = self.clevr_dataset.idx2word(self.state.text.numpy()[0]) done = True if action.item( ) == self.special_tokens.EOS_idx or self.step_idx == (self.max_len - 1) else False # question = preprocess_final_state(state_text=self.state.text, dataset=self.clevr_dataset, # EOS_idx=self.special_tokens.EOS_idx) reward, closest_question = self.reward_func.get( question=question, ep_questions_decoded=self.ref_questions_decoded) if done else ( 0, None) self.step_idx += 1 if done: self.dialog = question logging.info(question) return self.state, (reward, closest_question), done, {} def reset(self): self.img_idx = np.random.randint( 0, self.clevr_dataset.all_feats.shape[0] ) if not self.debug else np.random.randint(0, self.debug) # self.img_idx = 0 self.ref_questions = self.clevr_dataset.get_questions_from_img_idx( self.img_idx)[:, :self.max_len] # shape (10, 45) #if self.debug > 0: self.ref_questions = self.ref_questions[0:self.num_questions] # if self.debug: # self.ref_questions = torch.tensor([[7, 8, 10, 12, 14]]) self.ref_questions_decoded = [ self.clevr_dataset.idx2word(question, clean=True) for question in self.ref_questions.numpy() ] logging.info("Questions for image {} : {}".format( self.img_idx, self.ref_questions_decoded)) # self.ref_questions_decoded = [self.ref_questions_decoded[0]] # FOR DEBUGGING. self.img_feats = self.clevr_dataset.get_feats_from_img_idx( self.img_idx) # shape (1024, 14, 14) self.state = self.State( torch.LongTensor([self.special_tokens.SOS_idx]).view(1, 1), self.img_feats.unsqueeze(0)) self.step_idx = 0 self.dialog = None self.current_episode = self.Episode(self.img_idx, None, None, None, None) return self.state def decode_current_episode(self): valid_actions = self.current_episode.valid_actions assert valid_actions is not None valid_actions_decoded = [ self.clevr_dataset.idx2word(actions, delim=',') for actions in valid_actions ] # dialog_split = [self.current_episode.dialog.split()[:i] for i in range(valid_actions)] # return dict(zip(dialog_split, valid_actions_decoded)) return valid_actions_decoded def clean_ref_questions(self): questions_decoded = [ tokens.replace('<PAD>', '') for tokens in self.ref_questions_decoded ] questions_decoded = [q.strip() for q in questions_decoded] self.ref_questions_decoded = questions_decoded def get_reduced_action_space(self): assert self.ref_questions_decoded is not None reduced_vocab = [q.split() for q in self.ref_questions_decoded] reduced_vocab = [i for l in reduced_vocab for i in l] reduced_vocab = list(set(reduced_vocab)) unique_tokens = self.clevr_dataset.word2idx(seq_tokens=reduced_vocab) dict_tokens = dict( zip([i for i in range(len(unique_tokens))], unique_tokens)) return dict_tokens, reduced_vocab def render(self, mode='human', close=False): pass
args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_path = os.path.join(args.out_path, 'model.pt') with open(model_path, 'rb') as f: model = torch.load(f, map_location=device).to(device) model.eval() # TODO: add a model.flatten_parameters() ? h5_questions_path = os.path.join(args.data_path, 'train_questions.h5') #TODO: check why uploading the test dataset does not work. h5_feats_path = os.path.join(args.data_path, 'train_features.h5') vocab_path = os.path.join(args.data_path, 'vocab.json') test_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path, h5_feats_path=h5_feats_path, vocab_path=vocab_path, max_samples=21) num_tokens = test_dataset.len_vocab SOS_idx = test_dataset.vocab_questions["<SOS>"] test_loader = DataLoader(dataset=test_dataset, batch_size=len(test_dataset), drop_last=True, num_workers=args.num_workers) out_file_top_k_words = os.path.join(args.out_path, 'generate_top_k_words_k_{}_seed_{}.json'.format(args.top_k, args.seed)) out_file_log = os.path.join(args.out_path, 'eval_log.log') logger = create_logger(out_file_log) log_interval = int(args.words / 10) ############################################################################### # generate words ###############################################################################
from statistics.word_cloud import WordCloud import h5py import numpy as np import os from data_provider.CLEVR_Dataset import CLEVR_Dataset out_path = "../../output/RL/2000_img_len_20/experiments/train/10-proba_thr0.05/proba_thr_0.05_eval" dialog_path = os.path.join(out_path, "test_dialog.h5") dialog_hf = h5py.File(dialog_path, 'r') test_text_greedy_dialog = np.array( dialog_hf.get('test_text_greedy_with_trunc_dialog'), dtype=np.int32) # create CLEVR dataset. data_path = '../../data' vocab_path = os.path.join(data_path, "vocab.json") h5_questions_path = os.path.join(data_path, "train_questions.h5") h5_feats_path = os.path.join( data_path, "train_features.h5" ) # Caution, here train_features.h5 corresponds only to the first 21 img of the train dataset. clevr_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path, h5_feats_path=h5_feats_path, vocab_path=vocab_path) wc = WordCloud(path=out_path, questions=test_text_greedy_dialog, suffix='wc_test_text_greedy_dialog', dataset=clevr_dataset)
def get_datasets(args, device): if args.dataset == "clevr": if args.dataset_ext == 0: train_questions_path = os.path.join(args.data_path, "train_questions.h5") val_questions_path = os.path.join(args.data_path, "val_questions.h5") test_questions_path = os.path.join(args.data_path, "test_questions.h5") train_feats_path = os.path.join(args.data_path, 'train_features.h5') val_feats_path = os.path.join(args.data_path, 'val_features.h5') vocab_path = os.path.join(args.data_path, "vocab.json") if args.task == "lm": train_dataset = QuestionsDataset( h5_questions_path=train_questions_path, vocab_path=vocab_path, range_samples=args.range_samples) val_dataset = QuestionsDataset( h5_questions_path=val_questions_path, vocab_path=vocab_path) test_dataset = QuestionsDataset( h5_questions_path=test_questions_path, vocab_path=vocab_path) elif args.task == "policy": train_dataset = CLEVR_Dataset( h5_questions_path=train_questions_path, h5_feats_path=train_feats_path, vocab_path=vocab_path, max_samples=args.max_samples) val_dataset = CLEVR_Dataset( h5_questions_path=val_questions_path, h5_feats_path=val_feats_path, vocab_path=vocab_path, max_samples=args.max_samples) test_dataset = val_dataset else: vocab_path = os.path.join(args.data_path, "vocab.json") data_path = os.path.join(args.data_path, "clevr_ext") full_dataset = QuestionsDataset( h5_questions_path=data_path, vocab_path=vocab_path, range_samples=args.range_samples, dataset_ext=1) train_size = int(0.9 * len(full_dataset)) test_size = len(full_dataset) - train_size train_dataset, test_dataset = torch.utils.data.random_split( full_dataset, [train_size, test_size]) train_dataset = copy_attributes(train_dataset, train_dataset.dataset) test_dataset = copy_attributes(test_dataset, test_dataset.dataset) val_dataset = copy.deepcopy(test_dataset) elif args.dataset == "vqa": lm_tokenizer = GPT2Tokenizer.from_pretrained("gpt2") reward_tokenizer = BertTokenizer.from_pretrained('bert-base-cased') images_feature_reader = ImageFeaturesH5Reader( args.features_path, False) question_tokenizer = VQATokenizer(lm_tokenizer=lm_tokenizer) if args.min_data: vocab_path = os.path.join(args.data_path, 'cache/vocab_min.json') train_split = "mintrain" val_split = "mintrain" if device.type == "cpu" else "minval" else: vocab_path = os.path.join(args.data_path, 'cache/vocab.json') train_split = "mintrain" if device.type == "cpu" else "train" val_split = "mintrain" if device.type == "cpu" else "val" train_dataset = VQADataset( split=train_split, dataroot=args.data_path, question_tokenizer=question_tokenizer, image_features_reader=images_feature_reader, reward_tokenizer=reward_tokenizer, clean_datasets=True, max_seq_length=23, num_images=None, vocab_path=vocab_path, filter_entries=True, rl=False) val_dataset = VQADataset( split=val_split, dataroot=args.data_path, question_tokenizer=question_tokenizer, image_features_reader=images_feature_reader, reward_tokenizer=reward_tokenizer, clean_datasets=True, max_seq_length=23, num_images=None, vocab_path=vocab_path, filter_entries=True, rl=False, filter_numbers=args.filter_numbers) test_dataset = val_dataset return train_dataset, val_dataset, test_dataset