예제 #1
0
    def forward_sum_symbolic(self, landmark_r_theta_dict_list):
        x_list = []
        for landmark_r_theta_dict in landmark_r_theta_dict_list:
            x = cuda_var(torch.zeros(1, self.image_emb_size))
            for landmark, (r, theta) in landmark_r_theta_dict.iteritems():
                if theta == -1:
                    # not visible
                    continue
                # get landmark embedding
                landmark_id = self.landmark_names.index(landmark)
                landmark_var = cuda_var(
                    torch.from_numpy(np.array([landmark_id])))
                landmark_embedding = self.landmark_embedding(landmark_var)

                # get r embedding
                r_var = cuda_var(torch.from_numpy(np.array([r])))
                r_embedding = self.r_embedding(r_var)

                # get theta embedding
                theta_var = cuda_var(torch.from_numpy(np.array([theta])))
                theta_embedding = self.theta_embedding(theta_var)

                embedding = torch.cat(
                    [landmark_embedding, r_embedding, theta_embedding], dim=1)
                #embedding = F.relu(self.dense(embedding))
                x = x + embedding
            x_list.append(x)

        return torch.cat(x_list)
예제 #2
0
    def calc_loss_entropy(self, batch_replay_items):

        agent_observation_state_ls = []
        immediate_rewards = []
        action_batch = []
        for replay_item in batch_replay_items:
            agent_observation_state_ls.append(
                replay_item.get_agent_observed_state())
            action_batch.append(replay_item.get_action())
            immediate_rewards.append(replay_item.get_reward())

        action_batch = cuda_var(torch.from_numpy(np.array(action_batch)))
        immediate_rewards = cuda_var(
            torch.from_numpy(np.array(immediate_rewards)).float())

        num_states = int(action_batch.size()[0])
        model_prob_batch = self.model.get_probs_batch(
            agent_observation_state_ls)
        chosen_log_probs = model_prob_batch.gather(1, action_batch.view(-1, 1))
        reward_log_probs = immediate_rewards * chosen_log_probs.view(-1)

        entropy = -torch.mean(
            torch.sum(model_prob_batch * torch.exp(model_prob_batch), 1))
        objective = torch.sum(reward_log_probs) / num_states
        loss = -(objective + self.entropy_coef * entropy)
        self.entropy = entropy

        return loss
예제 #3
0
    def calc_loss(self, batch_replay_items):

        log_probabilities = []
        rewards = []
        action_batch = []
        for replay_item in batch_replay_items:
            log_probabilities.append(replay_item.get_log_prob())
            action_batch.append(replay_item.get_action())
            rewards.append(replay_item.get_reward())

        action_batch = cuda_var(torch.from_numpy(np.array(action_batch)))
        rewards = cuda_var(torch.from_numpy(np.array(rewards))).float()

        num_states = int(action_batch.size()[0])
        model_prob_batch = torch.cat(log_probabilities, dim=0)

        chosen_log_probs = model_prob_batch.gather(1, action_batch.view(-1, 1))
        reward_log_probs = rewards * chosen_log_probs.view(-1)

        entropy = -torch.mean(
            torch.sum(model_prob_batch * torch.exp(model_prob_batch), 1))
        objective = torch.sum(reward_log_probs) / num_states
        loss = -(objective + self.entropy_coef * entropy)
        self.entropy = entropy

        return loss
예제 #4
0
    def get_probs(self,
                  agent_observed_state,
                  model_state,
                  mode=None,
                  volatile=False):

        assert isinstance(agent_observed_state, AgentObservedState)

        # Image list is already padded with zero-images if <5 images are available
        images = agent_observed_state.get_image()[-5:]
        image_batch = cuda_var(
            torch.from_numpy(np.array(images)).float(), volatile)

        # Flatten them? TODO: maybe don't hardcode this later on? batch size is 1 ;)
        image_batch = image_batch.view(1, 15, self.config["image_height"],
                                       self.config["image_width"])

        # List of instructions. False is there because it expects a second argument. TODO: figure out what this is
        instructions_batch = ([agent_observed_state.get_instruction()], False)

        # Previous action
        prev_actions_raw = [agent_observed_state.get_previous_action()]

        # If previous action is non-existant then encode that as a stop?
        prev_actions = [
            self.none_action if a is None else a for a in prev_actions_raw
        ]
        prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions)))

        # Get probabilities
        probs_batch, new_model_state = self.final_module(
            image_batch, instructions_batch, prev_actions_batch, model_state)

        # last two we don't really need...
        return probs_batch, new_model_state, None, None
예제 #5
0
    def get_probs(self,
                  agent_observed_state,
                  model_state,
                  mode=None,
                  volatile=False):

        assert isinstance(agent_observed_state, AgentObservedState)
        agent_observed_state_list = [agent_observed_state]

        # Extract the last 4 images or add dummy paddings
        image_seqs = [[aos.get_image()] for aos in agent_observed_state_list]
        image_batch = cuda_var(
            torch.from_numpy(np.array(image_seqs)).float(), volatile)

        instructions = [
            aos.get_instruction() for aos in agent_observed_state_list
        ]
        instructions_batch = cuda_var(
            torch.from_numpy(np.array(instructions)).long())

        time = agent_observed_state.time_step
        time = cuda_var(torch.from_numpy(np.array([time])).long())

        probs_batch, new_model_state, image_emb_seq, state_feature = self.final_module(
            image_batch, instructions_batch, time, mode, model_state)
        return probs_batch, new_model_state, image_emb_seq, state_feature
예제 #6
0
    def calc_loss(self, batch):

        curr_obs = cuda_var(
            torch.cat([
                torch.from_numpy(np.array(point[0])).view(1, -1)
                for point in batch
            ],
                      dim=0)).float()
        actions = cuda_var(
            torch.cat([
                torch.from_numpy(np.array(point[1])).view(1, -1)
                for point in batch
            ],
                      dim=0)).float()
        next_obs = cuda_var(
            torch.cat([
                torch.from_numpy(np.array(point[2])).view(1, -1)
                for point in batch
            ],
                      dim=0)).float()
        gold_labels = cuda_var(
            torch.cat([
                torch.from_numpy(np.array(point[3])).view(1, -1)
                for point in batch
            ],
                      dim=0)).long()

        log_probs = self.forward(curr_obs, actions, next_obs)
        classification_loss = -torch.mean(
            log_probs.gather(1, gold_labels.view(-1, 1)))

        return classification_loss
예제 #7
0
    def calc_loss_old(self, batch_replay_items):

        angle_batch = []
        distance_batch = []
        batch_next_state_feature = []

        for replay_item in batch_replay_items:
            angle, distance = replay_item.get_goal()
            angle_batch.append(angle)
            distance_batch.append(distance)
            batch_next_state_feature.append(replay_item.get_state_feature())

        angle_batch = cuda_var(torch.from_numpy(np.array(angle_batch)))
        distance_batch = cuda_var(torch.from_numpy(np.array(distance_batch)))
        batch_next_state_feature = torch.cat(batch_next_state_feature)

        # Compute the negative log probability loss
        goal_angle_log_probability, goal_distance_log_probability = self.model.predict_goal_result(
            batch_next_state_feature)

        chosen_angle_log_probs = goal_angle_log_probability.gather(
            1, angle_batch.view(-1, 1))
        chosen_distance_log_probs = goal_distance_log_probability.gather(
            1, distance_batch.view(-1, 1))

        goal_probability_loss = -torch.sum(chosen_angle_log_probs) - torch.sum(
            chosen_distance_log_probs)
        num_states = float(len(batch_replay_items))
        goal_probability_loss = goal_probability_loss / num_states

        return goal_probability_loss
예제 #8
0
    def get_probs(self, agent_observed_state, model_state, mode=None, volatile=False):

        assert isinstance(agent_observed_state, AgentObservedState)
        agent_observed_state_list = [agent_observed_state]

        image_seq_lens = [1]
        image_seq_lens_batch = cuda_tensor(
            torch.from_numpy(np.array(image_seq_lens)))
        # max_len = max(image_seq_lens)
        # image_seqs = [aos.get_image()[:max_len]
        #               for aos in agent_observed_state_list]
        image_seqs = [[aos.get_last_image()]
                      for aos in agent_observed_state_list]
        image_batch = cuda_var(torch.from_numpy(np.array(image_seqs)).float(), volatile)

        instructions = [aos.get_instruction()
                        for aos in agent_observed_state_list]
        read_pointers = [aos.get_read_pointers()
                         for aos in agent_observed_state_list]
        instructions_batch = (instructions, read_pointers)

        prev_actions_raw = [aos.get_previous_action()
                            for aos in agent_observed_state_list]
        prev_actions = [self.none_action if a is None else a
                        for a in prev_actions_raw]
        prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions)), volatile)

        probs_batch, new_model_state, image_emb_seq, state_feature = self.final_module(
            image_batch, image_seq_lens_batch, instructions_batch, prev_actions_batch, mode, model_state)
        return probs_batch, new_model_state, image_emb_seq, state_feature
예제 #9
0
    def calc_loss(self, batch_replay_items):
        """ Given a set of replay items this function calculates the loss variable """

        agent_observation_state_ls = []
        immediate_rewards = []
        action_batch = []
        log_probabilities = []
        for replay_item in batch_replay_items:
            agent_observation_state_ls.append(
                replay_item.get_agent_observed_state())
            action_batch.append(replay_item.get_action())
            immediate_rewards.append(replay_item.get_reward())
            log_probabilities.append(replay_item.get_log_prob())

        log_probabilities = torch.cat(log_probabilities)
        action_batch = cuda_var(torch.from_numpy(np.array(action_batch)))
        immediate_rewards = cuda_var(
            torch.from_numpy(np.array(immediate_rewards)).float())

        model_log_prob_batch = log_probabilities
        chosen_log_probs = model_log_prob_batch.gather(
            1, action_batch.view(-1, 1))
        reward_log_probs = immediate_rewards * chosen_log_probs.view(-1)
        model_prob_batch = torch.exp(model_log_prob_batch)

        self.entropy = -torch.sum(
            torch.sum(model_log_prob_batch * model_prob_batch, 1))
        objective = torch.sum(reward_log_probs)
        loss = -objective - self.entropy_coef * self.entropy

        return loss
예제 #10
0
    def calc_loss(self, batch_replay_items):

        agent_observation_state_ls = []
        immediate_rewards = []
        action_batch = []
        for replay_item in batch_replay_items:
            agent_observation_state_ls.append(
                replay_item.get_agent_observed_state())
            action_batch.append(replay_item.get_action())
            immediate_rewards.append(replay_item.get_reward())

        action_batch = cuda_var(torch.from_numpy(np.array(action_batch)))
        immediate_rewards = cuda_var(
            torch.from_numpy(np.array(immediate_rewards)).float())

        num_states = int(action_batch.size()[0])
        model_prob_batch = self.model.get_probs_batch(
            agent_observation_state_ls)
        chosen_log_probs = model_prob_batch.gather(1, action_batch.view(-1, 1))
        reward_log_probs = immediate_rewards * chosen_log_probs.view(-1)

        gold_distribution = cuda_var(
            torch.FloatTensor([0.6719, 0.1457, 0.1435, 0.0387]))
        cross_entropy = -torch.mean(
            torch.sum(gold_distribution * model_prob_batch, 1))
        objective = torch.sum(reward_log_probs) / num_states
        loss = -(objective - self.entropy_coef * cross_entropy)
        self.cross_entropy = cross_entropy

        return loss
    def get_probs(self,
                  agent_observed_state,
                  model_state,
                  mode=None,
                  volatile=False):

        assert isinstance(agent_observed_state, AgentObservedState)
        agent_observed_state_list = [agent_observed_state]

        image_seqs = [[aos.get_last_image()]
                      for aos in agent_observed_state_list]
        image_batch = cuda_var(
            torch.from_numpy(np.array(image_seqs)).float(), volatile)

        instructions = [
            aos.get_instruction() for aos in agent_observed_state_list
        ]
        instructions_batch = cuda_var(
            torch.from_numpy(np.array(instructions)).long())

        time = agent_observed_state.time_step
        time = cuda_var(torch.from_numpy(np.array([time])).long())

        previous_action = agent_observed_state.previous_action
        if previous_action is None:
            previous_action = 4  # num_actions + 1
        previous_action = cuda_var(
            torch.from_numpy(np.array([previous_action])).long())

        probs_batch, new_model_state, image_emb_seq, state_feature = self.final_module(
            image_batch, instructions_batch, time, previous_action, mode,
            model_state)
        return probs_batch, new_model_state, image_emb_seq, state_feature
예제 #12
0
    def get_probs_symbolic_text(self, agent_observed_state, symbolic_text, model_state, mode=None, volatile=False):
        """ Same as get_probs instead forces the model to use the given symbolic text """

        assert isinstance(agent_observed_state, AgentObservedState)
        agent_observed_state_list = [agent_observed_state]

        image_seq_lens = [1]
        image_seq_lens_batch = cuda_tensor(
            torch.from_numpy(np.array(image_seq_lens)))
        image_seqs = [[aos.get_last_image()]
                      for aos in agent_observed_state_list]
        image_batch = cuda_var(torch.from_numpy(np.array(image_seqs)).float(), volatile)

        instructions_batch = [symbolic_text]

        prev_actions_raw = [aos.get_previous_action()
                            for aos in agent_observed_state_list]
        prev_actions = [self.none_action if a is None else a
                        for a in prev_actions_raw]
        prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions)), volatile)

        probs_batch, new_model_state, image_emb_seq, state_feature = self.final_module(image_batch, image_seq_lens_batch,
                                                                        instructions_batch, prev_actions_batch,
                                                                        mode, model_state)
        return probs_batch, new_model_state, image_emb_seq, state_feature
    def get_probs_batch(self, agent_observed_state_list, mode=None):
        for aos in agent_observed_state_list:
            assert isinstance(aos, AgentObservedState)
        # print "batch size:", len(agent_observed_state_list)

        # sort list by instruction length
        agent_observed_state_list = sorted(
            agent_observed_state_list,
            key=lambda aos_: len(aos_.get_instruction()),
            reverse=True
        )

        images = [aos.get_image() for aos in agent_observed_state_list]
        image_batch = cuda_var(torch.from_numpy(np.array(images)).float())

        instructions = [aos.get_instruction()
                        for aos in agent_observed_state_list]
        read_pointers = [aos.get_read_pointers()
                         for aos in agent_observed_state_list]
        instructions_batch = (instructions, read_pointers)

        prev_actions_raw = [aos.get_previous_action()
                            for aos in agent_observed_state_list]
        prev_actions = [self.none_action if a is None else a
                        for a in prev_actions_raw]
        prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions)))

        probs_batch = self.final_module(image_batch, instructions_batch,
                                        prev_actions_batch, mode)
        return probs_batch
    def get_attention_prob(self,
                           agent_observed_state,
                           model_state,
                           mode=None,
                           volatile=False):

        assert isinstance(agent_observed_state, AgentObservedState)
        agent_observed_state_list = [agent_observed_state]

        image_seqs = [[aos.get_last_image()]
                      for aos in agent_observed_state_list]
        image_batch = cuda_var(
            torch.from_numpy(np.array(image_seqs)).float(), volatile)

        instructions = [
            aos.get_instruction() for aos in agent_observed_state_list
        ]
        instructions_batch = cuda_var(
            torch.from_numpy(np.array(instructions)).long())

        time = agent_observed_state.time_step
        time = cuda_var(torch.from_numpy(np.array([time])).long())

        instruction_string = instruction_to_string(
            agent_observed_state.instruction, self.config)

        state_feature = self.final_module.get_attention_prob(
            image_batch, instructions_batch, instruction_string,
            agent_observed_state.goal)
        return state_feature
    def get_probs(self, agent_observed_state, model_state, mode=None):

        assert isinstance(agent_observed_state, AgentObservedState)
        agent_observed_state_list = [agent_observed_state]

        image_seq_lens = [1]
        image_seq_lens_batch = cuda_tensor(
            torch.from_numpy(np.array(image_seq_lens)))
        image_seqs = [[aos.get_last_image()]
                      for aos in agent_observed_state_list]
        image_batch = cuda_var(torch.from_numpy(np.array(image_seqs)).float())

        goal_image_seqs = [[aos.get_goal_image()] for aos in agent_observed_state_list]
        goal_image_batch = cuda_var(torch.from_numpy(np.array(goal_image_seqs)).float())

        prev_actions_raw = [aos.get_previous_action()
                            for aos in agent_observed_state_list]
        prev_actions = [self.none_action if a is None else a
                        for a in prev_actions_raw]
        prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions)))

        probs_batch, new_model_state, image_emb_seq = self.final_module(image_batch, image_seq_lens_batch,
                                                                        goal_image_batch, prev_actions_batch,
                                                                        mode, model_state)
        return probs_batch, new_model_state, image_emb_seq
    def calc_loss(self, batch_replay_items):

        agent_observation_state_ls = []
        landmark = []
        theta_1 = []
        theta_2 = []
        r = []
        for replay_item in batch_replay_items:
            agent_observation_state_ls.append(
                replay_item.get_agent_observed_state())
            landmark_, theta_1_, theta_2_, r_ = replay_item.get_symbolic_text()
            landmark.append(landmark_)
            theta_1.append(theta_1_)
            theta_2.append(theta_2_)
            r.append(r_)

        num_states = len(agent_observation_state_ls)

        landmark_batch = cuda_var(torch.from_numpy(np.array(landmark)))
        theta_1_batch = cuda_var(torch.from_numpy(np.array(theta_1)))
        theta_2_batch = cuda_var(torch.from_numpy(np.array(theta_2)))
        r_batch = cuda_var(torch.from_numpy(np.array(r)))

        model_prob_landmark, model_prob_theta_1, model_prob_theta_2, model_prob_r \
            = self.model.get_symbolic_text_batch(agent_observation_state_ls)

        # compute expected theta
        model_prob_theta_1_ = torch.exp(model_prob_theta_1)
        model_prob_theta_2_ = torch.exp(model_prob_theta_2)
        expected_theta_1 = torch.matmul(model_prob_theta_1_,
                                        self.theta_values)  # batch
        expected_theta_2 = torch.matmul(model_prob_theta_2_,
                                        self.theta_values)  # batch

        gold_theta_1 = self.theta_values.gather(0, theta_1_batch.view(-1, 1))
        gold_theta_2 = self.theta_values.gather(0, theta_2_batch.view(-1, 1))

        theta_1_diff_1 = torch.remainder(gold_theta_1 - expected_theta_1, 360)
        theta_1_diff_2 = torch.remainder(expected_theta_1 - gold_theta_1, 360)
        theta_1_diff = torch.min(theta_1_diff_1, theta_1_diff_2)
        theta_1_loss = torch.mean(theta_1_diff**2)

        theta_2_diff_1 = torch.remainder(gold_theta_2 - expected_theta_2, 360)
        theta_2_diff_2 = torch.remainder(expected_theta_2 - gold_theta_2, 360)
        theta_2_diff = torch.min(theta_2_diff_1, theta_2_diff_2)
        theta_2_loss = torch.mean(theta_2_diff**2)

        chosen_log_probs_landmark = model_prob_landmark.gather(
            1, landmark_batch.view(-1, 1))
        # chosen_log_probs_theta_1 = model_prob_theta_1.gather(1, theta_1_batch.view(-1, 1))
        # chosen_log_probs_theta_2 = model_prob_theta_2.gather(1, theta_2_batch.view(-1, 1))
        chosen_log_probs_r = model_prob_r.gather(1, r_batch.view(-1, 1))

        cross_entropy_loss_objective = torch.sum(chosen_log_probs_landmark) / num_states \
                    + torch.sum(chosen_log_probs_r) / num_states
        loss = -cross_entropy_loss_objective + 0.0002 * theta_1_loss + 0.0002 * theta_2_loss

        return loss
예제 #17
0
    def get_loss_and_prob(volatile_features, goal, final_height, final_width):
        attention_probs = volatile_features["attention_probs"]
        attention_logits = volatile_features["attention_logits"]
        attention_log_prob = F.log_softmax(attention_logits, dim=0)
        row, col, row_real, col_real = goal
        gold_prob = GoalPrediction.generate_gold_prob(goal, final_height,
                                                      final_width)

        if row is None:
            cross_entropy_loss = -torch.sum(
                gold_prob * attention_log_prob)  # cross entropy loss
            meta = {"cross_entropy": cross_entropy_loss, "dist_loss": None}
            return cross_entropy_loss, attention_log_prob[final_height *
                                                          final_width], meta

        row_, col_ = row + 0.5, col + 0.5

        position_height = cuda_var(
            torch.from_numpy(np.array(list(range(
                0, final_height))))).float().view(-1, 1) + 0.5
        position_width = cuda_var(
            torch.from_numpy(np.array(list(range(
                0, final_width))))).float().view(-1, 1) + 0.5
        attention_prob = attention_probs[:-1].view(final_height, final_width)

        expected_row = torch.sum(position_height * attention_prob)
        expected_col = torch.sum(position_width.view(1, -1) * attention_prob)

        dist_loss = torch.sqrt((expected_row - row_) * (expected_row - row_) +
                               (expected_col - col_) * (expected_col - col_))
        cross_entropy_loss = -torch.sum(
            gold_prob * attention_log_prob)  # cross entropy loss

        if row is None or col is None:
            ix = final_height * final_width
        else:
            ix = row * final_width + col

        if GoalPrediction.loss_type == GoalPrediction.LOGLOSS:
            loss = -attention_log_prob[ix]
        elif GoalPrediction.loss_type == GoalPrediction.LOGLOSS_DIST:
            loss = -attention_log_prob[ix] + dist_loss
        elif GoalPrediction.loss_type == GoalPrediction.CROSS_ENTROPY:
            loss = cross_entropy_loss
        elif GoalPrediction.loss_type == GoalPrediction.DIST_LOSS:
            loss = dist_loss
        elif GoalPrediction.loss_type == GoalPrediction.CROSS_ENTROPY_AND_DIST_LOSS:
            loss = cross_entropy_loss + dist_loss
        else:
            raise AssertionError("Unhandled loss type ",
                                 GoalPrediction.loss_type)

        prob = attention_log_prob[ix]

        meta = {"cross_entropy": cross_entropy_loss, "dist_loss": dist_loss}
        return loss, prob, meta
예제 #18
0
    def forward(self, instructions_batch):
        token_lists, text_pointers = instructions_batch
        batch_size = len(token_lists)
        text_lengths = np.array([len(tokens) for tokens in token_lists])
        dims = (self.num_layers, batch_size, self.hidden_dim)
        hidden_f = (Variable(cuda_tensor(torch.zeros(*dims)),
                             requires_grad=False),
                    Variable(cuda_tensor(torch.zeros(*dims)),
                             requires_grad=False))
        hidden_b = (Variable(cuda_tensor(torch.zeros(*dims)),
                             requires_grad=False),
                    Variable(cuda_tensor(torch.zeros(*dims)),
                             requires_grad=False))

        # pad text tokens with 0's
        tokens_batch_f = [[] for _ in xrange(batch_size)]
        tokens_batch_b = [[] for _ in xrange(batch_size)]
        for i in xrange(batch_size):
            num_zeros = text_lengths[0] - text_lengths[i]
            tokens_batch_f[i] = token_lists[i] + [0] * num_zeros
            tokens_batch_b[i] = token_lists[i][::-1] + [0] * num_zeros
        tokens_batch_f = cuda_var(torch.from_numpy(np.array(tokens_batch_f)))
        tokens_batch_b = cuda_var(torch.from_numpy(np.array(tokens_batch_b)))

        # swap so batch dimension is second, sequence dimension is first
        tokens_batch_f = tokens_batch_f.transpose(0, 1)
        tokens_batch_b = tokens_batch_b.transpose(0, 1)
        emb_sentence_f = self.embedding(tokens_batch_f)
        emb_sentence_b = self.embedding(tokens_batch_b)
        packed_input_f = pack_padded_sequence(emb_sentence_f, text_lengths)
        packed_input_b = pack_padded_sequence(emb_sentence_b, text_lengths)
        lstm_out_packed_f, _ = self.lstm_f(packed_input_f, hidden_f)
        lstm_out_packed_b, _ = self.lstm_b(packed_input_b, hidden_b)

        # return average output embedding
        lstm_out_f, _ = pad_packed_sequence(lstm_out_packed_f)
        lstm_out_b, _ = pad_packed_sequence(lstm_out_packed_b)
        lstm_out_f = lstm_out_f.transpose(0, 1)
        lstm_out_b = lstm_out_b.transpose(0, 1)
        embeddings_list = []
        for i, (start_i, end_i) in enumerate(text_pointers):
            embeddings = []
            if start_i > 0:
                embeddings.append(lstm_out_f[i][start_i - 1])
            else:
                embeddings.append(cuda_var(torch.zeros(self.hidden_dim)))
            embeddings.append(lstm_out_f[i][end_i - 1])
            embeddings.append(lstm_out_b[i][start_i])
            if end_i < text_lengths[i]:
                embeddings.append(lstm_out_b[i][end_i])
            else:
                embeddings.append(cuda_var(torch.zeros(self.hidden_dim)))
            embeddings_list.append(torch.cat(embeddings).view(1, -1))

        embeddings_batch = torch.cat(embeddings_list)
        return embeddings_batch
def save_correlation_figure_(num_homing_policies, model, test_batches,
                             exp_name):

    correlation_stats = {}

    for batch in test_batches:

        prev_observations = cuda_var(
            torch.cat([
                torch.from_numpy(np.array(point.get_curr_obs())).view(1, -1)
                for point in batch
            ],
                      dim=0)).float()
        actions = cuda_var(
            torch.cat([
                torch.from_numpy(np.array(point.get_action())).view(1, -1)
                for point in batch
            ],
                      dim=0)).long()
        observations = cuda_var(
            torch.cat([
                torch.from_numpy(np.array(point.get_next_obs())).view(1, -1)
                for point in batch
            ],
                      dim=0)).float()

        # Compute loss
        _, info_dict = model.gen_prob(prev_observations, actions,
                                      observations)  # batch x 2
        assigned_states = info_dict["assigned_states"]

        for i, point in enumerate(batch):
            assigned_state = int(assigned_states[i])
            if point.get_next_state() in correlation_stats:
                correlation_stats[
                    point.get_next_state()][assigned_state] += 1.0
            else:
                vec = np.zeros(num_homing_policies, dtype=np.float32)
                vec[assigned_state] = 1.0
                correlation_stats[point.get_next_state()] = vec

    num_states = 0
    image = []
    for key in sorted(correlation_stats):
        vec = correlation_stats[key]
        vec = vec / max(1.0, vec.sum())
        image.append(vec)
        num_states += 1
    image = np.vstack(image)
    image = scipy.misc.imresize(image,
                                (num_states * 100, num_homing_policies * 100))

    filelist = os.listdir('./%s' % exp_name)
    num_images = len(filelist)
    scipy.misc.imsave("./%s/image_%d.png" % (exp_name, num_images + 1), image)
예제 #20
0
    def get_log_prob(self, replay_item):

        image_batch, instruction = replay_item
        image_seqs = [image_batch]
        image_batch = cuda_var(torch.from_numpy(np.array(image_seqs)).float())

        instructions = [instruction]
        instructions_batch = cuda_var(
            torch.from_numpy(np.array(instructions)).long())

        return self.final_module(image_batch, instructions_batch)
    def get_probs(self,
                  agent_observed_state,
                  model_state,
                  mode=None,
                  volatile=False):

        assert isinstance(agent_observed_state, AgentObservedState)

        #supposedly this is already padded with zeros, but i need to double check that code
        images = agent_observed_state.get_image()[-5:]

        # image_seqs = [[aos.get_last_image()]
        #               for aos in agent_observed_state_list]
        image_batch = cuda_var(
            torch.from_numpy(np.array(images)).float(), volatile)

        #flatten them? TODO: maybe don't hardcode this later on? batch size is 1 ;)
        image_batch = image_batch.view(1, 15, 128, 128)

        # list of list :)
        instructions_batch = ([agent_observed_state.get_instruction()], False)
        #instructions_batch = (cuda_var(torch.from_numpy(np.array(instructions)).long()), False)

        #print("instructions", instructions)
        #print("instructins_batch", instructions_batch)

        prev_actions_raw = agent_observed_state.get_previous_action()
        prev_actions_raw = self.none_action if prev_actions_raw is None else prev_actions_raw

        if prev_actions_raw == 81:
            previous_direction_id = [4]
        else:
            previous_direction_id = [prev_actions_raw % 4]
        #this input is is over the space 81 things :)
        previous_block_id = [int(prev_actions_raw / 4)]

        prev_block_id_batch = cuda_var(
            torch.from_numpy(np.array(previous_block_id)))
        prev_direction_id_batch = cuda_var(
            torch.from_numpy(np.array(previous_direction_id)))

        # prev_actions = [self.none_action if a is None else a
        #                 for a in prev_actions_raw]
        #prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions)))

        probs_batch, new_model_state = self.final_module(
            image_batch, instructions_batch, prev_block_id_batch,
            prev_direction_id_batch, model_state)

        # last two we don't really need...
        return probs_batch, new_model_state, None, None
예제 #22
0
    def calc_loss(self, batch_replay_items):

        if len(batch_replay_items) <= 1:
            return None

        action_batch = []
        batch_image_feature = []
        batch_next_image_feature = []

        for replay_item in batch_replay_items:
            next_image_emb = replay_item.get_next_image_emb()
            if next_image_emb is None:  # sometimes it can None for the last item in a rollout
                continue
            action_batch.append(replay_item.get_action())
            batch_image_feature.append(replay_item.get_image_emb())
            batch_next_image_feature.append(next_image_emb)

        action_batch = cuda_var(torch.from_numpy(np.array(action_batch)))
        batch_image_feature = torch.cat(batch_image_feature)
        batch_next_image_feature = torch.cat(batch_next_image_feature)

        # Predict the feature of next image
        batch_predicted_next_image_feature = self.model.predict_action_result(
            batch_image_feature, action_batch)

        # Compute the squared mean loss
        diff = (batch_predicted_next_image_feature - batch_next_image_feature)
        temporal_autoencoding_loss = torch.mean(diff**2)

        return temporal_autoencoding_loss
예제 #23
0
    def calc_loss(self, batch_replay_items):

        images = []
        visible_objects = []
        for replay_item in batch_replay_items:
            image, visible_objects_ = replay_item
            visible_objects.append(visible_objects_)
            images.append([image])

        theta_logits = self.model.get_probs(images)  # batch x 67 x 12
        num_states = int(theta_logits.size()[0])
        one_hot_vector = torch.zeros(theta_logits.size())

        for i in range(0, num_states):
            visible_objects_example = visible_objects[i]
            for landmark in range(0, self.num_landmark):

                # See if the landmark is present and visible in the agent's field of view
                if landmark in visible_objects_example and visible_objects_example[
                        landmark][1] != -1:
                    r, theta = visible_objects_example[landmark]
                    one_hot_vector[i, landmark, theta] = 1.0

        loss = F.binary_cross_entropy_with_logits(
            theta_logits,
            cuda_var(one_hot_vector).float())

        return loss
예제 #24
0
    def _gather_fqi_samples(replay_dataset, step, horizon, reward_func, learned_policy):

        dataset = []
        for replay_item in replay_dataset[step]:

            assert type(replay_item) == TransitionDatapoint and \
                   replay_item.get_timestep() == step and \
                   replay_item.is_valid() == 1

            current_obs = replay_item.get_curr_obs()
            next_obs = replay_item.get_next_obs()

            if reward_func is None:
                total_reward = replay_item.get_reward()
            else:
                total_reward = reward_func(current_obs, step)

            if step < horizon:
                obs_var = cuda_var(torch.from_numpy(next_obs)).float().view(1, -1)
                q_val = learned_policy[step + 1].gen_q_val(obs_var).view(-1)        # num_actions
                total_reward += float(q_val.max(0)[0].data.cpu())                   # Predict reward and take max

            datapoint = (current_obs,
                         replay_item.get_action_prob(),
                         replay_item.get_action(),
                         total_reward,
                         replay_item.get_curr_state(),
                         replay_item.get_next_state(),
                         replay_item.get_policy_index())

            dataset.append(datapoint)

        return dataset
    def get_probs_batch(self, agent_observed_state_list, mode=None):
        for aos in agent_observed_state_list:
            assert isinstance(aos, AgentObservedState)
        # print "batch size:", len(agent_observed_state_list)

        # sort list by instruction length
        agent_observed_state_list = sorted(
            agent_observed_state_list,
            key=lambda aos_: len(aos_.get_instruction()),
            reverse=True
        )

        symbolic_image_list = []
        for aos in agent_observed_state_list:
            x_pos, z_pos, y_angle = aos.get_position_orientation()
            landmark_pos_dict = aos.get_landmark_pos_dict()
            symbolic_image = get_visible_landmark_r_theta(
                x_pos, z_pos, y_angle, landmark_pos_dict)
            symbolic_image_list.append(symbolic_image)
        image_batch = symbolic_image_list

        instructions_batch = [aos.get_symbolic_instruction()
                              for aos in agent_observed_state_list]

        prev_actions_raw = [aos.get_previous_action()
                            for aos in agent_observed_state_list]
        prev_actions = [self.none_action if a is None else a
                        for a in prev_actions_raw]
        prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions)))

        probs_batch = self.final_module(image_batch, instructions_batch,
                                        prev_actions_batch, mode)
        return probs_batch
예제 #26
0
    def get_probs_and_visible_objects(self, agent_observed_state_list):
        for aos in agent_observed_state_list:
            assert isinstance(aos, AgentObservedState)
        # print "batch size:", len(agent_observed_state_list)

        # sort list by instruction length
        agent_observed_state_list = sorted(
            agent_observed_state_list,
            key=lambda aos_: len(aos_.get_instruction()),
            reverse=True)

        images = [[aos.get_last_image()] for aos in agent_observed_state_list]
        image_batch = cuda_var(torch.from_numpy(np.array(images)).float())

        landmarks_visible = []
        for aos in agent_observed_state_list:
            x_pos, z_pos, y_angle = aos.get_position_orientation()
            landmark_pos_dict = aos.get_landmark_pos_dict()
            visible_landmarks = get_visible_landmark_r_theta(
                x_pos, z_pos, y_angle, landmark_pos_dict, self.landmark_names)
            landmarks_visible.append(visible_landmarks)

        # shape is BATCH_SIZE x 63 x 2
        probs_batch = self.final_module(image_batch)

        # landmarks_visible is list of length BATCH_SIZE, each item is a set containing landmark indices
        return probs_batch, landmarks_visible
예제 #27
0
    def log_homing_policy_reward(self, env, homing_policies, step, logger):

        num_samples = self.constants["eval_homing_policy_sample_size"]
        all_total_reward = 0.0

        for ix, policy in enumerate(homing_policies[step]):

            total_reward = 0.0
            for _ in range(0, num_samples):

                # Rollin for steps
                obs, meta = env.reset()

                for step_ in range(1, step + 1):
                    obs_var = cuda_var(torch.from_numpy(obs)).float().view(1, -1)
                    action = policy[step_].sample_action(obs_var)
                    obs, reward, done, meta = env.step(action)
                    total_reward = total_reward + reward

            total_reward = total_reward / float(max(1, num_samples))
            all_total_reward = all_total_reward + total_reward
            logger.log("After horizon %r. Policy Number %r receives mean reward %r" % (step, ix + 1, total_reward))

        all_total_reward = all_total_reward / float(max(1, len(homing_policies[step])))
        logger.log("After horizon %r. Random Policy receives reward %r" % (step, all_total_reward))
예제 #28
0
    def generate_gold_prob(goal, final_height, final_width, sigma2=0.5):
        row, col, row_real, col_real = goal
        gold_prob = cuda_var(torch.zeros(final_height * final_width +
                                         1)).float()

        if row is None or col is None:
            gold_prob[final_height *
                      final_width] = 1.0  # last value indicates not present
            return gold_prob

        row_ = float(round(row_real)) + 0.5
        col_ = float(round(col_real)) + 0.5

        for i in range(0, final_height):
            for j in range(0, final_width):
                ix = i * final_width + j
                center = (i + 0.5, j + 0.5)
                # dist2 = (center[0] - row_real) * (center[0] - row_real) + \
                #         (center[1] - col_real) * (center[1] - col_real)
                dist2 = (center[0] - row_) * (center[0] - row_) + \
                        (center[1] - col_) * (center[1] - col_)
                gold_prob[ix] = -dist2 / (2.0 * sigma2)

        gold_prob = torch.exp(gold_prob).float()
        gold_prob[final_height * final_width] = 0.0
        gold_prob = gold_prob / (gold_prob.sum() + 0.00001)

        return gold_prob
    def _gather_last_observation(env, actions, step, homing_policies,
                                 selection_weights):

        start_obs, meta = env.reset()

        if step > 1:

            if selection_weights is None:
                # Select a homing policy for the previous time step randomly uniformly
                policy = random.choice(homing_policies[step - 1])
            else:
                # Select a homing policy for the previous time step using the given weights
                # policy = random.choices(homing_policies[step - 1], weights=selection_weights, k=1)[0]
                ix = gp.sample_action_from_prob(selection_weights)
                policy = homing_policies[step - 1][ix]

            obs = start_obs

            for step_ in range(1, step):
                obs_var = cuda_var(torch.from_numpy(obs)).float().view(1, -1)
                action = policy[step_].sample_action(obs_var)
                obs, reward, done, meta = env.step(action)

        action = random.choice(actions)
        new_obs, reward, done, meta = env.step(action)

        return new_obs, meta
예제 #30
0
    def forward(self, instructions_batch):
        token_lists, _ = instructions_batch
        batch_size = len(token_lists)
        dims = (self.num_layers, batch_size, self.hidden_dim)
        hidden = (Variable(cuda_tensor(torch.zeros(*dims)), requires_grad=False),
                  Variable(cuda_tensor(torch.zeros(*dims)), requires_grad=False))

        # pad text tokens with 0's
        text_lengths = np.array([len(tokens) for tokens in token_lists])
        tokens_batch = [[] for _ in range(batch_size)]
        for i in range(batch_size):
            num_zeros = text_lengths[0] - text_lengths[i]
            tokens_batch[i] = token_lists[i] + [0] * num_zeros
        tokens_batch = cuda_var(torch.from_numpy(np.array(tokens_batch)))

        # swap so batch dimension is second, sequence dimension is first
        tokens_batch = tokens_batch.transpose(0, 1)
        emb_sentence = self.embedding(tokens_batch)
        packed_input = pack_padded_sequence(emb_sentence, text_lengths)
        lstm_out_packed, _ = self.lstm(packed_input, hidden)
        # return average output embedding
        lstm_out, seq_lengths = pad_packed_sequence(lstm_out_packed)
        lstm_out = lstm_out.transpose(0, 1)
        sum_emb_list = []
        for i, seq_out in enumerate(lstm_out):
            seq_len = seq_lengths[i]
            sum_emb = torch.sum(seq_out[:seq_len], 0) / seq_len
            sum_emb_list.append(sum_emb.view(1, -1))
        return torch.cat(sum_emb_list)