示例#1
0
    def __init__(self, env, vocab_size, results_path, batch_size, episode_len=20):
        super(ActorCriticAgent, self).__init__(env, results_path)

        #For evaluation
        self.ev = Evaluation(['train'])

        #For navigation
        self.episode_len = episode_len
        self.losses = []

        ''' Define instruction encoder '''
        word_embedding_size = 256
        hidden_size = 512
        bidirectional = False
        dropout_ratio = 0.5

	enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
	self.encoder = EncoderLSTM(vocab_size, word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda()

        context_size = 1024
        self.hist_encoder = EncoderHistory(len(self.model_actions), 32, 2048, context_size).cuda()
        self.a2c_agent = A2CAgent(enc_hidden_size, context_size, len(self.model_actions) - 2).cuda()
        self.saved_actions = []

        params = list(self.encoder.parameters()) + list(self.hist_encoder.parameters()) + list(self.a2c_agent.parameters())
	self.losses = []
        self.optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=1e-5)
示例#2
0
def test_submission(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind):
    ''' Train on combined training and validation sets, and generate test submission. '''
  
    setup()

    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAINVAL_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok,
                         path_type=path_type, history=history, blind=blind)
    
    # Build models and train
    enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx, 
                  dropout_ratio, bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(action_embedding_size, hidden_size, dropout_ratio).cuda()

    train(train_env, encoder, decoder, n_iters, path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix)

    # Generate test submission
    test_env = R2RBatch(features, batch_size=batch_size, splits=['test'], tokenizer=tok,
                        path_type=path_type, history=history, blind=blind)
    agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len)
    agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, model_prefix, 'test', 5000)
    agent.test(use_dropout=False, feedback='argmax')
    agent.write_results()
示例#3
0
文件: train.py 项目: YzyLmc/ACGG_Sim
def train_val():
    ''' Train on the training set, and validate on seen and unseen splits. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features,
                         batch_size=batch_size,
                         splits=['train'],
                         tokenizer=tok)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(features,
                         batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok), Evaluation([split]))
        for split in ['val_seen', 'val_unseen']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio).cuda()
    train(train_env, encoder, decoder, n_iters, val_envs=val_envs)
示例#4
0
文件: train.py 项目: YzyLmc/AC-GG_0.2
def make_env_and_models(args, train_vocab_path, train_splits, test_splits,
                        batch_size=BATCH_SIZE):
    setup()
    image_features_list = ImageFeatures.from_args(args)
    vocab = read_vocab(train_vocab_path)
    tok = Tokenizer(vocab=vocab)
    train_env = R2RBatch(image_features_list, batch_size=batch_size,
                         splits=train_splits, tokenizer=tok)

    enc_hidden_size = hidden_size//2 if args.bidirectional else hidden_size
    glove = np.load(glove_path)
    feature_size = FEATURE_SIZE
    encoder = try_cuda(EncoderLSTM(
        len(vocab), word_embedding_size, enc_hidden_size, vocab_pad_idx,
        dropout_ratio, bidirectional=args.bidirectional, glove=glove))
    decoder = try_cuda(AttnDecoderLSTM(
        action_embedding_size, hidden_size, dropout_ratio,
        feature_size=feature_size))
    test_envs = {
        split: (R2RBatch(image_features_list, batch_size=batch_size,
                         splits=[split], tokenizer=tok),
                eval.Evaluation([split]))
        for split in test_splits}

    return train_env, test_envs, encoder, decoder
示例#5
0
def train_all(eval_type, seed, max_episode_len, max_input_length, feedback,
              n_iters, prefix, blind, debug, train_vocab, trainval_vocab,
              batch_size, action_embedding_size, target_embedding_size,
              bidirectional, dropout_ratio, weight_decay, feature_size,
              hidden_size, word_embedding_size, lr, result_dir, snapshot_dir,
              plot_dir, train_splits, test_splits):
    ''' Train on the training set, and validate on the test split. '''

    setup(seed, train_vocab, trainval_vocab)
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(train_vocab if eval_type == 'val' else trainval_vocab)
    tok = Tokenizer(vocab=vocab, encoding_length=max_input_length)
    train_env = R2RBatch(batch_size=batch_size,
                         splits=train_splits,
                         tokenizer=tok,
                         seed=seed,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         seed=seed,
                         blind=blind), Evaluation([split], seed=seed))
        for split in test_splits
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio, feature_size).cuda()

    train(eval_type,
          train_env,
          encoder,
          decoder,
          n_iters,
          seed,
          feedback,
          max_episode_len,
          max_input_length,
          prefix,
          blind,
          lr,
          weight_decay,
          result_dir,
          snapshot_dir,
          plot_dir,
          val_envs=val_envs,
          debug=debug)
示例#6
0
def train_test(path_type, max_episode_len, history, MAX_INPUT_LENGTH,
               feedback_method, n_iters, model_prefix, blind):
    ''' Train on the training set, and validate on the test split. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAINVAL_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    train_env = R2RBatch(features,
                         batch_size=batch_size,
                         splits=['train', 'val_seen', 'val_unseen'],
                         tokenizer=tok,
                         path_type=path_type,
                         history=history,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(features,
                         batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         path_type=path_type,
                         history=history,
                         blind=blind), Evaluation([split],
                                                  path_type=path_type))
        for split in ['test']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio).cuda()
    train(train_env,
          encoder,
          decoder,
          n_iters,
          path_type,
          history,
          feedback_method,
          max_episode_len,
          MAX_INPUT_LENGTH,
          model_prefix,
          val_envs=val_envs)
示例#7
0
def train_val(eval_type, seed, max_episode_len, history, max_input_length,
              feedback_method, n_iters, model_prefix, blind, debug):
    ''' Train on the training set, and validate on seen and unseen splits. '''

    setup(seed)
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=max_input_length)
    train_env = R2RBatch(batch_size=batch_size,
                         splits=['train'],
                         tokenizer=tok,
                         seed=seed,
                         history=history,
                         blind=blind)

    # Creat validation environments
    val_envs = {
        split: (R2RBatch(batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         seed=seed,
                         history=history,
                         blind=blind), Evaluation([split], seed=seed))
        for split in ['val_seen']
    }

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio, feature_size).cuda()
    train(eval_type,
          train_env,
          encoder,
          decoder,
          n_iters,
          seed,
          history,
          feedback_method,
          max_episode_len,
          max_input_length,
          model_prefix,
          val_envs=val_envs,
          debug=debug)
示例#8
0
def test_submission():
    ''' Train on combined training and validation sets, and generate test submission. '''

    setup()
    # Create a batch training environment that will also preprocess text
    vocab = read_vocab(TRAIN_VOCAB)
    tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    # train_env = R2RBatch(features, batch_size=batch_size, splits=['train', 'val_seen', 'val_unseen'], tokenizer=tok)

    # Build models and train
    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    encoder = EncoderLSTM(len(vocab),
                          word_embedding_size,
                          enc_hidden_size,
                          padding_idx,
                          dropout_ratio,
                          bidirectional=bidirectional).cuda()
    decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(),
                              Seq2SeqAgent.n_outputs(), action_embedding_size,
                              hidden_size, dropout_ratio).cuda()
    # train(train_env, encoder, decoder, n_iters)

    encoder.load_state_dict(torch.load('%s/seq2seq_enc.pt' % (SNAPSHOT_DIR)))
    decoder.load_state_dict(torch.load('%s/seq2seq_dec.pt' % (SNAPSHOT_DIR)))

    # Generate test submission
    test_env = R2RBatch(features,
                        batch_size=batch_size,
                        splits=['test1'],
                        tokenizer=tok)

    agent = Seq2SeqAgent(test_env, "", encoder, decoder, max_episode_len)
    agent.results_path = '%s%s_%s_iter_%d.json' % (RESULT_DIR, 'seq2seq',
                                                   'test1', 20000)
    agent.test(use_dropout=False, feedback='argmax')
    agent.write_results()
示例#9
0
class ActorCriticAgent(BaseAgent):

    model_actions = ['left', 'right', 'up', 'down', 'forward', '<end>', '<start>', '<ignore>']
    env_actions = [
        (0,-1, 0), # left
        (0, 1, 0), # right
        (0, 0, 1), # up
        (0, 0,-1), # down
        (1, 0, 0), # forward
        (0, 0, 0), # <end>
        (0, 0, 0), # <start>
        (0, 0, 0)  # <ignore>
    ]

    SavedAction = namedtuple('SavedAction', ['log_prob', 'value', 'step'])
    eps = np.finfo(np.float32).eps.item()

    def __init__(self, env, vocab_size, results_path, batch_size, episode_len=20):
        super(ActorCriticAgent, self).__init__(env, results_path)

        #For evaluation
        self.ev = Evaluation(['train'])

        #For navigation
        self.episode_len = episode_len
        self.losses = []

        ''' Define instruction encoder '''
        word_embedding_size = 256
        hidden_size = 512
        bidirectional = False
        dropout_ratio = 0.5

	enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
	self.encoder = EncoderLSTM(vocab_size, word_embedding_size, enc_hidden_size, padding_idx, dropout_ratio, bidirectional=bidirectional).cuda()

        context_size = 1024
        self.hist_encoder = EncoderHistory(len(self.model_actions), 32, 2048, context_size).cuda()
        self.a2c_agent = A2CAgent(enc_hidden_size, context_size, len(self.model_actions) - 2).cuda()
        self.saved_actions = []

        params = list(self.encoder.parameters()) + list(self.hist_encoder.parameters()) + list(self.a2c_agent.parameters())
	self.losses = []
        self.optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=1e-5)


    def _sort_batch(self, obs):
        seq_tensor = np.array([ob['instr_encoding'] for ob in obs])
        seq_lengths = np.argmax(seq_tensor == padding_idx, axis=1)
        seq_lengths[seq_lengths == 0] = seq_tensor.shape[1] # Full length

        seq_tensor = torch.from_numpy(seq_tensor)
        seq_lengths = torch.from_numpy(seq_lengths)

        # Sort sequences by lengths
        seq_lengths, perm_idx = seq_lengths.sort(0, True)
        sorted_tensor = seq_tensor[perm_idx]
        mask = (sorted_tensor == padding_idx)[:,:seq_lengths[0]]

        return Variable(sorted_tensor, requires_grad=False).long().cuda(), \
               mask.byte().cuda(), \
               list(seq_lengths), list(perm_idx)


    def _feature_variable(self, obs):
        feature_size = obs[0]['feature'].shape[0]
        features = np.empty((len(obs),feature_size), dtype=np.float32)
        for i,ob in enumerate(obs):
            features[i,:] = ob['feature']
        return Variable(torch.from_numpy(features), requires_grad=False).cuda()


    def _teacher_action(self, obs, ended):
        a = torch.LongTensor(len(obs))
        for i,ob in enumerate(obs):
            # Supervised teacher only moves one axis at a time
            ix,heading_chg,elevation_chg = ob['teacher']
            if heading_chg > 0:
                a[i] = self.model_actions.index('right')
            elif heading_chg < 0:
                a[i] = self.model_actions.index('left')
            elif elevation_chg > 0:
                a[i] = self.model_actions.index('up')
            elif elevation_chg < 0:
                a[i] = self.model_actions.index('down')
            elif ix > 0:
                a[i] = self.model_actions.index('forward')
            elif ended[i]:
                a[i] = self.model_actions.index('<ignore>')
            else:
                a[i] = self.model_actions.index('<end>')
        return Variable(a, requires_grad=False).cuda()


    def rollout(self, guide_prob):
        #For navigation
        obs = np.array(self.env.reset())
        batch_size = len(obs)

        seq, seq_mask, seq_lengths, perm_idx = self._sort_batch(obs)
        perm_obs = obs[perm_idx]

        traj = [{
            'instr_id': ob['instr_id'],
            'path': [(ob['viewpoint'], ob['heading'], ob['elevation'])]
        } for ob in perm_obs]

        ctx,h_t,c_t = self.encoder(seq, seq_lengths)

        a_t = Variable(torch.ones(batch_size).long() * self.model_actions.index('<start>'), requires_grad=False).cuda()

        ended = np.array([False] * len(obs))
        env_action = [None] * batch_size

        h_n, c_n = self.hist_encoder.init_hidden(batch_size)

        for t in range(self.episode_len):
            f_t = self._feature_variable(perm_obs)

            enc_data, h_n, c_n =self.hist_encoder(a_t, f_t, h_n, c_n)
            action_prob, critic_value = self.a2c_agent(ctx, seq_lengths, enc_data)

            guided = np.random.choice(2, batch_size, p=[1.0 - guide_prob, guide_prob])

            demo = self._teacher_action(perm_obs, ended)

            if guided[0] == 1:
                a_t = demo
            else:

                if len(perm_obs[0]['navigableLocations']) <= 1:
                    action_prob[0, self.model_actions.index('forward')] = -float('inf')

                action_prob = F.softmax(action_prob, dim=1)

                m = Categorical(action_prob)
                a_t = m.sample()
                if not ended[0]:
                    self.saved_actions.append(self.SavedAction(m.log_prob(a_t), critic_value, t))

            for i, (idx, ob) in enumerate(zip(perm_idx, perm_obs)):
                action_idx = a_t[i]
                if action_idx == self.model_actions.index('<end>'):
                    ended[i] = True
                env_action[idx] = self.env_actions[action_idx]

            obs = np.array(self.env.step(env_action))
            perm_obs = obs[perm_idx]

            for i,ob in enumerate(perm_obs):
                if not ended[i]:
                    traj[i]['path'].append((ob['viewpoint'], ob['heading'], ob['elevation']))

            if ended.all():
                break

        return traj


    def clear_saved_actions(self):
        del self.saved_actions[:]


    def test(self, guide_prob):
        self.encoder.eval()
        self.hist_encoder.eval()
        self.a2c_agent.eval()

	self.env.reset_epoch()
        self.losses = []
        self.results = {}
        # We rely on env showing the entire batch before repeating anything
        #print 'Testing %s' % self.__class__.__name__
        looped = False
        while True:
            for traj in self.rollout(guide_prob):
                if traj['instr_id'] in self.results:
                    looped = True
                else:
                    self.results[traj['instr_id']] = traj['path']
            if looped:
                break

        self.clear_saved_actions()


    def train(self, n_iters, guide_prob):
        self.encoder.train()
        self.hist_encoder.train()
        self.a2c_agent.train()

        policy_losses = []
        value_losses = []
	self.losses = []

        total_num = 0
        success_num = 0
        for iter in range(1, n_iters + 1):
            traj = self.rollout(guide_prob)
            for i, t in enumerate(traj):
                nav_error, oracle_error, trajectory_step, trajectory_length = self.ev._score_item(t['instr_id'], t['path'])
                reward = 1.0 if nav_error < 3.0 else 0.0

                total_num += 1.0
                success_num += reward

                for log_prob, value, step in self.saved_actions:
                    discounted_reward = pow(0.99, trajectory_step - step) * reward
                    advantage = discounted_reward - value.item()
                    policy_losses.append(-log_prob * advantage)
                    value_losses.append(F.smooth_l1_loss(value, Variable(torch.tensor([[discounted_reward]]).cuda(), requires_grad=False)))

            data_len = len(policy_losses)
            if data_len > 64:
                self.optimizer.zero_grad()
                value_loss = torch.stack(value_losses).sum()
                policy_loss = torch.stack(policy_losses).sum() 
                loss = value_loss + policy_loss
		self.losses.append(value_loss.item() / data_len)
		#print('sub iter [%d/%d], Average Value Loss: %.4f' %(iter, n_iters, value_loss.item() / data_len))
                loss.backward()
                self.optimizer.step()
                self.clear_saved_actions()
                policy_losses = []
                value_losses = []

        data_len = len(policy_losses)
        if data_len > 0:
            self.optimizer.zero_grad()
            loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum()
            self.losses.append(loss.item() / data_len)
            loss.backward()
            self.optimizer.step()
            self.clear_saved_actions()

        print('guide prob: %.2f, train value loss: %.4f, success: %.2f' % (guide_prob, np.average(np.array(self.losses)), (success_num / total_num)))
示例#10
0
n_iters = 5000
log_every = 100
save_every = 100

vocab = read_vocab(TRAIN_VOCAB)
tok = Tokenizer(vocab=vocab)
glove = np.load(glove_path)

enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
feature_size = FEATURE_SIZE

visEncoder = try_cuda(SpeakerEncoderLSTM(
        action_embedding_size, feature_size, enc_hidden_size, dropout_ratio,
        bidirectional=bidirectional))    
lanEncoder = try_cuda(EncoderLSTM(
        len(vocab), word_embedding_size, enc_hidden_size, vocab_pad_idx,
        dropout_ratio, bidirectional=False, glove=glove))
dotSim = try_cuda(dotSimilarity(batch_size, enc_hidden_size))

agent = compatModel(None, "", visEncoder, lanEncoder, dotSim)
#agent.load('tasks/R2R/snapshots/release/speaker_final_release', map_location = 'cpu')
agent.load('tasks/R2R/compat/trained_1/compat_sample_imagenet_mean_pooled_train_iter_1000', map_location = 'cpu')
if __name__ == "__main__":
    traj = {'scan':'5q7pvUzZiYa', 'path':["7dc12a67ddfc4a4a849ce620db5b777b", "0e84cf4dec784bc28b78a80bee35c550", "a77784b955454209857d745976a1676d", "67971a17c26f4e2ca117b4fca73507fe", "8db06d3a0dd44508b3c078d60126ce19", "43ac37dfa1db4a13a8a9df4e454eb016", "4bd82c990a6548a994daa97c8f52db06", "6d11ca4d41e04bb1a725c2223c36b2aa", "29fb3c58b29348558d36a9f9440a1379", "c23f26401359426982d11ca494ee739b", "397403366d784caf804d741f32fd68b9", "3c6a35e15ada4b649990d6568cce8bd9", "55e4436f528c4bf09e4550079c572f7b", "69fad7dd177847dbabf69e8fb7c00ddf", "c629c7f1cf6f47a78c45a8ae9ff82247", "21fca0d6192940e580587fe317440f56", "4b85d61dd3a94e8a812affe78f3a322d", "3c025b8e3d2040969cd00dd0e9f29b09"][:2], 'heading':0.0,'elevation_init':0.0}
    encoded_instructions, _ = tok.encode_sentence('')
    encoded_instructions = torch.tensor([encoded_instructions], device = 'cpu')
    rdv_test = rdv(traj)
    
    path_obs, path_actions = rdv_test.obs_and_acts()   
        # predicted
    score = agent.predict(path_obs,path_actions,encoded_instructions)
示例#11
0
from model import AlignModel, Seq2Seq, EncoderLSTM, Decoder
from data_gen import dev_iter, vocab
from torch.utils.tensorboard import SummaryWriter
import torch
import torch.nn as nn
attn = AlignModel()
enc = EncoderLSTM()
dec = Decoder()
special_ids = [
    vocab.stoi['<sos>'], vocab.stoi['<eos>'], vocab.stoi['<unk>'],
    vocab.stoi['<pad>']
]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Seq2Seq(enc, dec, device).to(device)
model.load_state_dict(torch.load('./checkpoint/Seq2Seq_2020-05-28 16:21%.pth'))
criterion = nn.CrossEntropyLoss(ignore_index=vocab.stoi['<pad>'])


#将输出转换为词汇,并打印
def id2doc(data, my_vocab, special_ids=special_ids):
    #data = data.numpy()
    res = []
    for text in data:
        tmp = []
        for num in text:
            if num not in special_ids:
                tmp.append(my_vocab.itos[num])
        res.append(tmp)
    for sen in res:
        print(sen)
示例#12
0
def make_env_and_models(args,
                        train_vocab_path,
                        train_splits,
                        test_splits,
                        test_instruction_limit=None):
    setup()
    image_features_list = ImageFeatures.from_args(args)
    vocab = read_vocab(train_vocab_path)
    tok = Tokenizer(vocab=vocab)
    train_env = R2RBatch(image_features_list,
                         batch_size=batch_size,
                         splits=train_splits,
                         tokenizer=tok)

    train_env.data.extend(hardNeg_train)  # extend train data and shuffle
    random.shuffle(train_env.data)

    enc_hidden_size = hidden_size // 2 if bidirectional else hidden_size
    glove = np.load(glove_path)
    feature_size = FEATURE_SIZE

    # =============================================================================
    #     visEncoder = try_cuda(CompatVisEncoderLSTM(
    #         action_embedding_size, feature_size, enc_hidden_size, dropout_ratio,
    #         bidirectional=bidirectional))
    # =============================================================================
    visEncoder = try_cuda(
        SpeakerEncoderLSTM(action_embedding_size,
                           feature_size,
                           enc_hidden_size,
                           dropout_ratio,
                           bidirectional=bidirectional))
    # =============================================================================
    #     lanEncoder = try_cuda(CompatLanEncoderLSTM(
    #         len(vocab), word_embedding_size, enc_hidden_size, vocab_pad_idx,
    #         dropout_ratio, bidirectional=True, glove=glove))
    # =============================================================================
    lanEncoder = try_cuda(
        EncoderLSTM(len(vocab),
                    word_embedding_size,
                    enc_hidden_size,
                    vocab_pad_idx,
                    dropout_ratio,
                    bidirectional=False,
                    glove=glove))
    dotSim = try_cuda(dotSimilarity(batch_size, enc_hidden_size))
    #visEncoder.load_state_dict(torch.load('tasks/R2R/snapshots/release/speaker_final_release_enc'))
    #lanEncoder.load_state_dict(torch.load('tasks/R2R/snapshots/release/follower_final_release_enc'))

    test_envs = {
        split: (R2RBatch(image_features_list,
                         batch_size=batch_size,
                         splits=[split],
                         tokenizer=tok,
                         instruction_limit=test_instruction_limit),
                eval_speaker.SpeakerEvaluation(
                    [split], instructions_per_path=test_instruction_limit))
        for split in test_splits
    }

    #test_envs['val_seen'][0].data.extend(hardNeg_val_seen)
    test_envs['val_unseen'][0].data.extend(hardNeg_val_unseen)
    test_envs['val_unseen'][0].data = test_envs['val_unseen'][0].data[
        3000:4000]
    return train_env, test_envs, visEncoder, lanEncoder, dotSim
示例#13
0
def train_val(path_type, max_episode_len, history, MAX_INPUT_LENGTH, feedback_method, n_iters, model_prefix, blind, args):
    ''' Train on the training set, and validate on seen and unseen splits. '''

    nav_graphs = setup(args.action_space, args.navigable_locs_path)
    # Create a batch training environment that will also preprocess text
    use_bert = (args.encoder_type in ['bert','vlbert'])  # for tokenizer and dataloader
    if use_bert:
        tok = BTokenizer(MAX_INPUT_LENGTH)
    else:
        vocab = read_vocab(TRAIN_VOCAB)
        tok = Tokenizer(vocab=vocab, encoding_length=MAX_INPUT_LENGTH)
    #train_env = R2RBatch(features, batch_size=batch_size, splits=['train'], tokenizer=tok,
    #                     path_type=path_type, history=history, blind=blind)

    feature_store = Feature(features, args.panoramic)
    train_env = R2RBatch(feature_store, nav_graphs, args.panoramic,args.action_space,batch_size=args.batch_size, splits=['train'], tokenizer=tok,
                         path_type=path_type, history=history, blind=blind)

    # Creat validation environments
    #val_envs = {split: (R2RBatch(features, batch_size=batch_size, splits=[split],
    #            tokenizer=tok, path_type=path_type, history=history, blind=blind),
    #            Evaluation([split], path_type=path_type)) for split in ['val_seen', 'val_unseen']}

    val_envs = {split: (R2RBatch(feature_store,nav_graphs, args.panoramic, args.action_space,batch_size=args.batch_size, splits=[split],
                tokenizer=tok, path_type=path_type, history=history, blind=blind),
                Evaluation([split], path_type=path_type)) for split in ['val_seen','val_unseen']}

    # Build models and train
    #enc_hidden_size = hidden_size//2 if bidirectional else hidden_size

    if args.encoder_type == 'vlbert':
        if args.pretrain_model_name is not None:
            print("Using the pretrained lm model from %s" %(args.pretrain_model_name))
            encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type)
            premodel = DicAddActionPreTrain.from_pretrained(args.pretrain_model_name)
            encoder.bert = premodel.bert
            encoder.drop = nn.Dropout(p=args.dropout_ratio)
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size
            encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update
            encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer
            encoder = encoder.cuda()

        else:
            encoder = DicEncoder(FEATURE_ALL_SIZE,args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm,args.vl_layers,args.la_layers,args.bert_type).cuda()
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size

    elif args.encoder_type == 'bert':
        if args.pretrain_model_name is not None:
            print("Using the pretrained lm model from %s" %(args.pretrain_model_name))
            encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type)
            premodel = BertForMaskedLM.from_pretrained(args.pretrain_model_name)
            encoder.bert = premodel.bert
            encoder.drop = nn.Dropout(p=args.dropout_ratio)
            encoder.bert._resize_token_embeddings(len(tok)) # remember to resize tok embedding size
            #encoder.bert.update_lang_bert, encoder.bert.config.update_lang_bert = args.transformer_update, args.transformer_update
            #encoder.bert.update_add_layer, encoder.bert.config.update_add_layer = args.update_add_layer, args.update_add_layer
            encoder = encoder.cuda()
        else:
            encoder = BertEncoder(args.enc_hidden_size, args.hidden_size, args.dropout_ratio, args.bidirectional, args.transformer_update, args.bert_n_layers, args.reverse_input, args.top_lstm, args.bert_type).cuda()
            encoder.bert._resize_token_embeddings(len(tok))
    else:
        enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
        encoder = EncoderLSTM(len(vocab), word_embedding_size, enc_hidden_size, padding_idx,
                            dropout_ratio, bidirectional=bidirectional).cuda()


    #decoder = AttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
    #              action_embedding_size, args.hidden_size, args.dropout_ratio).cuda()
    ctx_hidden_size = args.enc_hidden_size * (2 if args.bidirectional else 1)
    if use_bert and not args.top_lstm:
        ctx_hidden_size = 768

    decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
                  action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda()
    decoder = R2RAttnDecoderLSTM(Seq2SeqAgent.n_inputs(), Seq2SeqAgent.n_outputs(),
                  action_embedding_size, ctx_hidden_size, args.hidden_size, args.dropout_ratio,FEATURE_SIZE, args.panoramic,args.action_space,args.dec_h_type).cuda()


    train(train_env, encoder, decoder, n_iters,
          path_type, history, feedback_method, max_episode_len, MAX_INPUT_LENGTH, model_prefix, val_envs=val_envs, args=args)
示例#14
0
from vocab import SUBTRAIN_VOCAB, TRAIN_VOCAB, TRAINVAL_VOCAB

MAX_INPUT_LENGTH = 80
feature_size = 2048+128
max_episode_len = 10
word_embedding_size = 300
glove_path = 'tasks/R2R/data/train_glove.npy'
action_embedding_size = 2048+128
hidden_size = 512
dropout_ratio = 0.5
vocab = read_vocab(TRAIN_VOCAB)
tok = Tokenizer(vocab=vocab)
glove = np.load(glove_path)

encoder = try_cuda(EncoderLSTM(
        len(vocab), word_embedding_size, hidden_size, vocab_pad_idx,
        dropout_ratio, glove=glove))
decoder = try_cuda(AttnDecoderLSTM(
    action_embedding_size, hidden_size, dropout_ratio,
    feature_size=feature_size))

agent = Seq2SeqAgent(
        None, "", encoder, decoder, max_episode_len,
        max_instruction_length=MAX_INPUT_LENGTH)
            
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent.load('tasks/R2R/snapshots/release/follower_final_release', map_location = device)