示例#1
0
文件: clevr_env.py 项目: gqkc/RL-NLP
    def __init__(self,
                 data_path,
                 max_len,
                 reward_type="levenshtein",
                 reward_path=None,
                 max_samples=None,
                 debug=False,
                 mode="train",
                 num_questions=10):
        super(ClevrEnv, self).__init__()
        self.mode = mode
        self.data_path = data_path
        h5_questions_path = os.path.join(data_path,
                                         '{}_questions.h5'.format(self.mode))
        h5_feats_path = os.path.join(data_path,
                                     '{}_features.h5'.format(self.mode))
        vocab_path = os.path.join(data_path, 'vocab.json')
        # self.debug_true_questions = torch.randint(0,debug_len_vocab, (2,))
        self.debug = debug
        self.num_questions = num_questions
        self.clevr_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path,
                                           h5_feats_path=h5_feats_path,
                                           vocab_path=vocab_path,
                                           max_samples=max_samples)

        # num_tokens = self.clevr_dataset.len_vocab
        # feats_shape = self.clevr_dataset.feats_shape
        SOS_idx = self.clevr_dataset.vocab_questions["<SOS>"]
        EOS_idx = self.clevr_dataset.vocab_questions["<EOS>"]

        Special_Tokens = namedtuple('Special_Tokens', ('SOS_idx', 'EOS_idx'))
        self.special_tokens = Special_Tokens(SOS_idx, EOS_idx)
        self.State = namedtuple('State', ('text', 'img'))
        self.Episode = namedtuple('Episode',
                                  ('img_idx', 'closest_question', 'dialog',
                                   'rewards', 'valid_actions'))
        self.max_len = max_len
        # self.ref_questions = torch.randint(0, self.debug_len_vocab,
        #                                  (3, self.max_len)) if self.debug_len_vocab is not None else None
        # self.reset()

        self.reward_func = rewards[reward_type](reward_path)
        self.step_idx = 0
        self.state, self.dialog = None, None
        self.ref_questions, self.ref_questions_decoded = None, None
        self.img_idx, self.img_feats = None, None
示例#2
0
    def __init__(self, data_path, max_len, reward_type="levenshtein",
                 reward_path=None, max_samples=None, debug=None, mode="train", num_questions=10, diff_reward=False,
                 condition_answer=True, reward_vocab=None, mask_answers=False,
                 device=torch.device("cuda" if torch.cuda.is_available() else "cpu"), reduced_answers=False,
                 params=None, filter_numbers=False):
        super(ClevrEnv, self).__init__(data_path, max_len, reward_type=reward_type,
                                       reward_path=reward_path, mode=mode, debug=debug, diff_reward=diff_reward,
                                       condition_answer=condition_answer, reward_vocab=reward_vocab, mask_answers=False,
                                       device=device, reduced_answers=reduced_answers, params=params,
                                       filter_numbers=filter_numbers)

        modes = {"train": "train", "test_images": "val", "test_text": "train"}
        h5_questions_path = os.path.join(data_path, '{}_questions.h5'.format(modes[self.mode]))
        h5_feats_path = os.path.join(data_path, '{}_features.h5'.format(modes[self.mode]))
        vocab_path = os.path.join(data_path, 'vocab.json')
        self.dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path,
                                     h5_feats_path=h5_feats_path,
                                     vocab_path=vocab_path,
                                     max_samples=max_samples, mask_answers=mask_answers)

        self.num_questions = num_questions
        self.set_special_tokens()
        self.set_reward_function(reward_type=reward_type, reward_path=reward_path, reward_vocab=reward_vocab,
                                 diff_reward=diff_reward)
示例#3
0
class ClevrEnv(GenericEnv):
    """Clevr Env"""
    metadata = {'render.modes': ['human']}

    def __init__(self, data_path, max_len, reward_type="levenshtein",
                 reward_path=None, max_samples=None, debug=None, mode="train", num_questions=10, diff_reward=False,
                 condition_answer=True, reward_vocab=None, mask_answers=False,
                 device=torch.device("cuda" if torch.cuda.is_available() else "cpu"), reduced_answers=False,
                 params=None, filter_numbers=False):
        super(ClevrEnv, self).__init__(data_path, max_len, reward_type=reward_type,
                                       reward_path=reward_path, mode=mode, debug=debug, diff_reward=diff_reward,
                                       condition_answer=condition_answer, reward_vocab=reward_vocab, mask_answers=False,
                                       device=device, reduced_answers=reduced_answers, params=params,
                                       filter_numbers=filter_numbers)

        modes = {"train": "train", "test_images": "val", "test_text": "train"}
        h5_questions_path = os.path.join(data_path, '{}_questions.h5'.format(modes[self.mode]))
        h5_feats_path = os.path.join(data_path, '{}_features.h5'.format(modes[self.mode]))
        vocab_path = os.path.join(data_path, 'vocab.json')
        self.dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path,
                                     h5_feats_path=h5_feats_path,
                                     vocab_path=vocab_path,
                                     max_samples=max_samples, mask_answers=mask_answers)

        self.num_questions = num_questions
        self.set_special_tokens()
        self.set_reward_function(reward_type=reward_type, reward_path=reward_path, reward_vocab=reward_vocab,
                                 diff_reward=diff_reward)

    def get_env_img_idx(self, i_episode, range_images):
        if i_episode is not None and i_episode < range_images[1]:
            img_idx = i_episode
        else:
            img_idx = np.random.randint(range_images[0], range_images[1])
        return img_idx

    def reset(self, seed=None, i_episode=None):
        range_images = [int(self.debug[0]), int(self.debug[1])] if self.mode != "test_images" else [0,
                                                                                                    self.dataset.all_feats.shape[
                                                                                                        0]]
        if seed is not None:
            np.random.seed(seed)
        # getting the environment's elements: Img, ref_questions, ref_answers.
        self.img_idx = self.get_env_img_idx(i_episode, range_images)
        self.img_feats, questions, self.ref_answers = self.dataset.get_data_from_img_idx(self.img_idx)
        self.ref_questions = questions[:, :self.max_len]

        # differentiating between the environment modes.
        if self.mode == "train" and not self.mask_answers:
            self.ref_questions = self.ref_questions[0:self.num_questions, :]
            self.ref_answers = self.ref_answers[0:self.num_questions]
        elif self.mode == "test_text" and not self.mask_answers:
            self.ref_questions = self.ref_questions[self.num_questions:, :]
            self.ref_answers = self.ref_answers[self.num_questions:]

        # getting the ref_idx for the couple (question, answer).
        if i_episode is not None:
            np.random.seed(i_episode)
        self.ref_question_idx = np.random.randint(0,self.ref_questions.size(0))
        self.ref_question = self.ref_questions[self.ref_question_idx]
        self.ref_answer = self.ref_answers[self.ref_question_idx]

        if self.condition_answer != "none":
            self.ref_questions = self.ref_questions[
                                 self.ref_question_idx:self.ref_question_idx + 1]
            self.ref_answers = self.ref_answers[self.ref_question_idx:self.ref_question_idx + 1]

        self.ref_questions_decoded = [self.dataset.question_tokenizer.decode(question, ignored=['<SOS>', '<PAD>'])
                                      for question in self.ref_questions.numpy()]

        # initializing the state.
        state_question = [self.special_tokens.SOS_idx]
        self.state = self.State(torch.LongTensor(state_question).view(1, len(state_question)),
                                self.img_feats.unsqueeze(0), self.ref_answer)
        self.step_idx = 0
        self.dialog = None

        # check the correctness of the reward function.
        if self.reward_type == "levenshtein" and not self.diff_reward:
            reward_true_question, _, _ = self.reward_func.get(question=self.ref_questions_decoded[0],
                                                              ep_questions_decoded=self.ref_questions_decoded,
                                                              step_idx=self.step_idx, done=True)
            assert reward_true_question == 0, "ERROR IN REWARD FUNCTION"

        return self.state
示例#4
0
def train_episodes_batch(log_probs_batch, returns_batch, optimizer):
    reinforce_loss = -log_probs_batch * returns_batch  # shape (bs, max_len, 1) # opposite of REINFORCE objective function to apply a gradient descent algo.
    reinforce_loss = reinforce_loss.squeeze(-1).sum(dim=1).mean(
        dim=0)  # sum over timesteps, mean over batches.
    optimizer.zero_grad()
    reinforce_loss.backward()
    optimizer.step()
    return reinforce_loss.item()


if __name__ == '__main__':
    h5_questions_path = os.path.join("../../data", 'train_questions.h5')
    h5_feats_path = os.path.join("../../data", 'train_features.h5')
    vocab_path = os.path.join("../../data", 'vocab.json')
    clevr_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path,
                                  h5_feats_path=h5_feats_path,
                                  vocab_path=vocab_path,
                                  max_samples=21)

    # ---- test of get dummy reward -----------------------------------------
    sample_questions = clevr_dataset.get_questions_from_img_idx(0)
    temp_state_text = torch.LongTensor(
        [1, 7, 86, 70, 70, 21, 54, 81, 51, 84, 86, 50, 38, 17, 2]).unsqueeze(0)
    temp_reward = get_dummy_reward(temp_state_text, sample_questions, 2)
    print('reward', temp_reward)

    State = namedtuple('State', ('text', 'img'))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    SOS_idx = clevr_dataset.vocab_questions["<SOS>"]
    EOS_idx = clevr_dataset.vocab_questions["<EOS>"]
    PAD_idx = clevr_dataset.vocab_questions["<PAD>"]
    Special_Tokens = namedtuple('Special_Tokens',
示例#5
0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_path = os.path.join(args.out_path, 'model.pt')
    with open(model_path, 'rb') as f:
        model = torch.load(f, map_location=device).to(device)
    model.eval()

    # TODO: add a model.flatten_parameters() ?
    h5_questions_path = os.path.join(
        args.data_path, 'train_questions.h5'
    )  #TODO: check why uploading the test dataset does not work.
    h5_feats_path = os.path.join(args.data_path, 'train_features.h5')
    vocab_path = os.path.join(args.data_path, 'vocab.json')
    test_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path,
                                 h5_feats_path=h5_feats_path,
                                 vocab_path=vocab_path,
                                 max_samples=21)
    num_tokens = test_dataset.len_vocab
    SOS_idx = test_dataset.vocab_questions["<SOS>"]

    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=len(test_dataset),
                             drop_last=True,
                             num_workers=args.num_workers)
    out_file_top_k_words = os.path.join(
        args.out_path,
        'generate_top_k_words_k_{}_seed_{}.json'.format(args.top_k, args.seed))
    out_file_log = os.path.join(args.out_path, 'eval_log.log')
    logger = create_logger(out_file_log)
    log_interval = int(args.words / 10)
示例#6
0
文件: clevr_env.py 项目: gqkc/RL-NLP
class ClevrEnv(gym.Env):
    """Clevr Env"""
    metadata = {'render.modes': ['human']}

    def __init__(self,
                 data_path,
                 max_len,
                 reward_type="levenshtein",
                 reward_path=None,
                 max_samples=None,
                 debug=False,
                 mode="train",
                 num_questions=10):
        super(ClevrEnv, self).__init__()
        self.mode = mode
        self.data_path = data_path
        h5_questions_path = os.path.join(data_path,
                                         '{}_questions.h5'.format(self.mode))
        h5_feats_path = os.path.join(data_path,
                                     '{}_features.h5'.format(self.mode))
        vocab_path = os.path.join(data_path, 'vocab.json')
        # self.debug_true_questions = torch.randint(0,debug_len_vocab, (2,))
        self.debug = debug
        self.num_questions = num_questions
        self.clevr_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path,
                                           h5_feats_path=h5_feats_path,
                                           vocab_path=vocab_path,
                                           max_samples=max_samples)

        # num_tokens = self.clevr_dataset.len_vocab
        # feats_shape = self.clevr_dataset.feats_shape
        SOS_idx = self.clevr_dataset.vocab_questions["<SOS>"]
        EOS_idx = self.clevr_dataset.vocab_questions["<EOS>"]

        Special_Tokens = namedtuple('Special_Tokens', ('SOS_idx', 'EOS_idx'))
        self.special_tokens = Special_Tokens(SOS_idx, EOS_idx)
        self.State = namedtuple('State', ('text', 'img'))
        self.Episode = namedtuple('Episode',
                                  ('img_idx', 'closest_question', 'dialog',
                                   'rewards', 'valid_actions'))
        self.max_len = max_len
        # self.ref_questions = torch.randint(0, self.debug_len_vocab,
        #                                  (3, self.max_len)) if self.debug_len_vocab is not None else None
        # self.reset()

        self.reward_func = rewards[reward_type](reward_path)
        self.step_idx = 0
        self.state, self.dialog = None, None
        self.ref_questions, self.ref_questions_decoded = None, None
        self.img_idx, self.img_feats = None, None

    def step(self, action):
        action = torch.tensor(action).view(1, 1)
        self.state = self.State(torch.cat([self.state.text, action], dim=1),
                                self.state.img)
        question = self.clevr_dataset.idx2word(self.state.text.numpy()[0])
        done = True if action.item(
        ) == self.special_tokens.EOS_idx or self.step_idx == (self.max_len -
                                                              1) else False
        # question = preprocess_final_state(state_text=self.state.text, dataset=self.clevr_dataset,
        #                               EOS_idx=self.special_tokens.EOS_idx)
        reward, closest_question = self.reward_func.get(
            question=question,
            ep_questions_decoded=self.ref_questions_decoded) if done else (
                0, None)
        self.step_idx += 1
        if done:
            self.dialog = question
            logging.info(question)
        return self.state, (reward, closest_question), done, {}

    def reset(self):
        self.img_idx = np.random.randint(
            0, self.clevr_dataset.all_feats.shape[0]
        ) if not self.debug else np.random.randint(0, self.debug)
        # self.img_idx = 0
        self.ref_questions = self.clevr_dataset.get_questions_from_img_idx(
            self.img_idx)[:, :self.max_len]  # shape (10, 45)
        #if self.debug > 0:
        self.ref_questions = self.ref_questions[0:self.num_questions]
        # if self.debug:
        # self.ref_questions = torch.tensor([[7, 8, 10, 12, 14]])
        self.ref_questions_decoded = [
            self.clevr_dataset.idx2word(question, clean=True)
            for question in self.ref_questions.numpy()
        ]
        logging.info("Questions for image {} : {}".format(
            self.img_idx, self.ref_questions_decoded))
        # self.ref_questions_decoded = [self.ref_questions_decoded[0]]  # FOR DEBUGGING.
        self.img_feats = self.clevr_dataset.get_feats_from_img_idx(
            self.img_idx)  # shape (1024, 14, 14)
        self.state = self.State(
            torch.LongTensor([self.special_tokens.SOS_idx]).view(1, 1),
            self.img_feats.unsqueeze(0))
        self.step_idx = 0
        self.dialog = None
        self.current_episode = self.Episode(self.img_idx, None, None, None,
                                            None)

        return self.state

    def decode_current_episode(self):
        valid_actions = self.current_episode.valid_actions
        assert valid_actions is not None
        valid_actions_decoded = [
            self.clevr_dataset.idx2word(actions, delim=',')
            for actions in valid_actions
        ]
        # dialog_split = [self.current_episode.dialog.split()[:i] for i in range(valid_actions)]
        # return dict(zip(dialog_split, valid_actions_decoded))
        return valid_actions_decoded

    def clean_ref_questions(self):
        questions_decoded = [
            tokens.replace('<PAD>', '')
            for tokens in self.ref_questions_decoded
        ]
        questions_decoded = [q.strip() for q in questions_decoded]
        self.ref_questions_decoded = questions_decoded

    def get_reduced_action_space(self):
        assert self.ref_questions_decoded is not None
        reduced_vocab = [q.split() for q in self.ref_questions_decoded]
        reduced_vocab = [i for l in reduced_vocab for i in l]
        reduced_vocab = list(set(reduced_vocab))
        unique_tokens = self.clevr_dataset.word2idx(seq_tokens=reduced_vocab)
        dict_tokens = dict(
            zip([i for i in range(len(unique_tokens))], unique_tokens))
        return dict_tokens, reduced_vocab

    def render(self, mode='human', close=False):
        pass
示例#7
0
    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_path = os.path.join(args.out_path, 'model.pt')
    with open(model_path, 'rb') as f:
        model = torch.load(f, map_location=device).to(device)
    model.eval()

    # TODO: add a model.flatten_parameters() ?
    h5_questions_path = os.path.join(args.data_path, 'train_questions.h5') #TODO: check why uploading the test dataset does not work.
    h5_feats_path = os.path.join(args.data_path, 'train_features.h5')
    vocab_path = os.path.join(args.data_path, 'vocab.json')
    test_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path,
                                 h5_feats_path=h5_feats_path,
                                 vocab_path=vocab_path,
                                 max_samples=21)
    num_tokens = test_dataset.len_vocab
    SOS_idx = test_dataset.vocab_questions["<SOS>"]

    test_loader = DataLoader(dataset=test_dataset, batch_size=len(test_dataset), drop_last=True,
                             num_workers=args.num_workers)
    out_file_top_k_words = os.path.join(args.out_path,
                                        'generate_top_k_words_k_{}_seed_{}.json'.format(args.top_k, args.seed))
    out_file_log = os.path.join(args.out_path, 'eval_log.log')
    logger = create_logger(out_file_log)
    log_interval = int(args.words / 10)

    ###############################################################################
    # generate words
    ###############################################################################
示例#8
0
from statistics.word_cloud import WordCloud
import h5py
import numpy as np
import os
from data_provider.CLEVR_Dataset import CLEVR_Dataset

out_path = "../../output/RL/2000_img_len_20/experiments/train/10-proba_thr0.05/proba_thr_0.05_eval"
dialog_path = os.path.join(out_path, "test_dialog.h5")

dialog_hf = h5py.File(dialog_path, 'r')
test_text_greedy_dialog = np.array(
    dialog_hf.get('test_text_greedy_with_trunc_dialog'), dtype=np.int32)

# create CLEVR dataset.
data_path = '../../data'
vocab_path = os.path.join(data_path, "vocab.json")
h5_questions_path = os.path.join(data_path, "train_questions.h5")
h5_feats_path = os.path.join(
    data_path, "train_features.h5"
)  # Caution, here train_features.h5 corresponds only to the first 21 img of the train dataset.
clevr_dataset = CLEVR_Dataset(h5_questions_path=h5_questions_path,
                              h5_feats_path=h5_feats_path,
                              vocab_path=vocab_path)

wc = WordCloud(path=out_path,
               questions=test_text_greedy_dialog,
               suffix='wc_test_text_greedy_dialog',
               dataset=clevr_dataset)
示例#9
0
    def get_datasets(args, device):
        if args.dataset == "clevr":
            if args.dataset_ext == 0:
                train_questions_path = os.path.join(args.data_path,
                                                    "train_questions.h5")
                val_questions_path = os.path.join(args.data_path,
                                                  "val_questions.h5")
                test_questions_path = os.path.join(args.data_path,
                                                   "test_questions.h5")
                train_feats_path = os.path.join(args.data_path,
                                                'train_features.h5')
                val_feats_path = os.path.join(args.data_path,
                                              'val_features.h5')
                vocab_path = os.path.join(args.data_path, "vocab.json")

                if args.task == "lm":
                    train_dataset = QuestionsDataset(
                        h5_questions_path=train_questions_path,
                        vocab_path=vocab_path,
                        range_samples=args.range_samples)
                    val_dataset = QuestionsDataset(
                        h5_questions_path=val_questions_path,
                        vocab_path=vocab_path)
                    test_dataset = QuestionsDataset(
                        h5_questions_path=test_questions_path,
                        vocab_path=vocab_path)
                elif args.task == "policy":
                    train_dataset = CLEVR_Dataset(
                        h5_questions_path=train_questions_path,
                        h5_feats_path=train_feats_path,
                        vocab_path=vocab_path,
                        max_samples=args.max_samples)
                    val_dataset = CLEVR_Dataset(
                        h5_questions_path=val_questions_path,
                        h5_feats_path=val_feats_path,
                        vocab_path=vocab_path,
                        max_samples=args.max_samples)
                    test_dataset = val_dataset

            else:
                vocab_path = os.path.join(args.data_path, "vocab.json")
                data_path = os.path.join(args.data_path, "clevr_ext")
                full_dataset = QuestionsDataset(
                    h5_questions_path=data_path,
                    vocab_path=vocab_path,
                    range_samples=args.range_samples,
                    dataset_ext=1)
                train_size = int(0.9 * len(full_dataset))
                test_size = len(full_dataset) - train_size
                train_dataset, test_dataset = torch.utils.data.random_split(
                    full_dataset, [train_size, test_size])
                train_dataset = copy_attributes(train_dataset,
                                                train_dataset.dataset)
                test_dataset = copy_attributes(test_dataset,
                                               test_dataset.dataset)
                val_dataset = copy.deepcopy(test_dataset)

        elif args.dataset == "vqa":
            lm_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
            reward_tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
            images_feature_reader = ImageFeaturesH5Reader(
                args.features_path, False)
            question_tokenizer = VQATokenizer(lm_tokenizer=lm_tokenizer)

            if args.min_data:
                vocab_path = os.path.join(args.data_path,
                                          'cache/vocab_min.json')
                train_split = "mintrain"
                val_split = "mintrain" if device.type == "cpu" else "minval"
            else:
                vocab_path = os.path.join(args.data_path, 'cache/vocab.json')
                train_split = "mintrain" if device.type == "cpu" else "train"
                val_split = "mintrain" if device.type == "cpu" else "val"

            train_dataset = VQADataset(
                split=train_split,
                dataroot=args.data_path,
                question_tokenizer=question_tokenizer,
                image_features_reader=images_feature_reader,
                reward_tokenizer=reward_tokenizer,
                clean_datasets=True,
                max_seq_length=23,
                num_images=None,
                vocab_path=vocab_path,
                filter_entries=True,
                rl=False)
            val_dataset = VQADataset(
                split=val_split,
                dataroot=args.data_path,
                question_tokenizer=question_tokenizer,
                image_features_reader=images_feature_reader,
                reward_tokenizer=reward_tokenizer,
                clean_datasets=True,
                max_seq_length=23,
                num_images=None,
                vocab_path=vocab_path,
                filter_entries=True,
                rl=False,
                filter_numbers=args.filter_numbers)
            test_dataset = val_dataset

        return train_dataset, val_dataset, test_dataset