Python augment_ma 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utils.for_agents

메소드/함수: augment_ma

hotexamples.com에서의 예제들: 7

Python augment_ma - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utils.for_agents.augment_ma에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

    def select_action(self, observation):
        stpt_actions = []
        therm_actions = []
        blind_actions = []
        idxs = []

        for i in range(0, self.num_sat_actions):
            rand = np.random.choice(self.stpt_action_space)
            stpt_actions.append(rand)
            idxs.append(np.where(self.stpt_action_space == rand)[0].item())
        for i in range(0, self.num_therm_actions):
            rand = np.random.choice(self.therm_action_space)
            therm_actions.append(rand)
            idxs.append(np.where(self.therm_action_space == rand)[0].item())
        for i in range(0, self.num_blind_actions):
            rand = np.random.choice(self.blind_action_space)
            blind_actions.append(rand)
            idxs.append(np.where(self.blind_action_space == rand)[0].item())

        sat_actions_tups = []
        for a in stpt_actions:
            action_stpt, sat_sp = augment_ma(observation, a)
            sat_actions_tups.append((action_stpt, sat_sp))

        for i in range(len(idxs)):
            observation[1][f"Action idx {i}"] = idxs[i]

        return sat_actions_tups, therm_actions, blind_actions, idxs

예제 #2

파일 보기

파일: PPOAgentBlinds.py 프로젝트: sustainable-computing/COBS-joint-control

    def agent_start(self, state):
        action, blind_action = self.policy_old.act(state[0], self.memory)

        self.last_action = action
        self.last_state = state
        if self.discrete:
            action = self.action_space[action]
        blind_action = self.blind_action_space[blind_action]

        action_stpt, sat_sp = augment_ma(state, action)
        return action_stpt, sat_sp, blind_action

예제 #3

파일 보기

파일: DQNAgent.py 프로젝트: sustainable-computing/COBS-joint-control

    def choose_action(self, observation):
        if np.random.random() > self.epsilon:
            state, _, _ = observation

            actions = self.q_eval.forward(state)
            action_idx = T.argmax(actions).item()
            action = self.action_space[action_idx]

        else:
            action = np.random.choice(self.action_space)

        action, sat_sp = augment_ma(observation, action)

        return action, sat_sp

예제 #4

파일 보기

파일: DDQNAgent.py 프로젝트: sustainable-computing/COBS-joint-control

    def choose_action(self, observation):
        if np.random.random() > self.epsilon:
            # state = T.tensor([observation],dtype=T.float).to(self.q_eval.device)
            # actions = self.q_eval.forward(state)
            # action = T.argmax(actions).item()

            state, _, _ = observation

            actions = self.q_eval.forward(state)
            action_idx = T.argmax(actions).item()
            action = self.action_space[action_idx]
        else:
            action = np.random.choice(self.action_space)

        action, sat_sp = augment_ma(observation, action)

        return action, sat_sp

예제 #5

파일 보기

파일: SACAgent.py 프로젝트: sustainable-computing/COBS-joint-control

    def select_action(self, observation, evaluate=False):
        """

        :param observation:
        :param evaluate:
        :return: action is for the reward function
                 sat_sp is whats used by env_step and by the model for training
        """
        actions = []
        if self.start_steps > self.total_numsteps:
            for i in range(self.action_space.shape[0]):
                a = np.random.uniform(self.action_space[i].min(),
                                      self.action_space[i].max())
                actions.append(a)

        else:
            state, _, _ = observation
            state = torch.FloatTensor(state.float()).to(
                self.device).unsqueeze(0)
            if evaluate is False:
                action, _, _ = self.policy.sample(state)
            else:
                _, _, action = self.policy.sample(state)
            for i in range(self.action_space.shape[0]):
                a = action.detach().cpu().numpy()[0][i]
                actions.append(a)

        sat_actions = actions[0:self.num_sat_actions]
        therm_actions = actions[self.num_sat_actions:self.num_therm_actions +
                                self.num_sat_actions]
        blind_actions = actions[self.num_therm_actions + self.num_sat_actions:]

        sat_actions_tups = []
        for a in sat_actions:
            action_stpt, sat_sp = augment_ma(observation, a)
            sat_actions_tups.append((action_stpt, sat_sp))
        if len(sat_actions) == 0:
            # this is hacky but makes the parsing in the main file cleaner
            sat_actions_tups.append(([], []))

        return sat_actions_tups, therm_actions, blind_actions, actions

예제 #6

파일 보기

    def select_action(self, observation):
        if np.random.random() > self.epsilon:
            state, _, _ = observation

            actions_stpt, actions_blinds = self.q_eval.forward(state)

            # setpoint actions
            actions_stpt_idx = T.argmax(actions_stpt).item()
            action_stpt = self.action_space[0][actions_stpt_idx]

            # blind actions
            actions_blinds_idx = T.argmax(actions_blinds).item()
            action_blinds = self.action_space[1][actions_blinds_idx]

        else:
            action_stpt = np.random.choice(self.action_space[0])
            action_blinds = np.random.choice(self.action_space[1])

        action_stpt, sat_sp = augment_ma(observation, action_stpt)

        return action_stpt, sat_sp, action_blinds

예제 #7

파일 보기

    def agent_start(self, state):
        action_idx = self.policy_old.act(state[0], self.memory)

        self.last_action = action_idx[:]
        self.last_state = state
        actions = list()
        for i, action in enumerate(action_idx):
            actions.append(self.action_space[i][action])

        sat_actions = actions[:self.num_sat_actions]
        therm_actions = actions[self.num_sat_actions:self.num_therm_actions +
                                self.num_sat_actions]
        blind_actions = actions[self.num_therm_actions + self.num_sat_actions:]

        sat_actions_tups = []
        for action in sat_actions:
            action_stpt, sat_sp = augment_ma(state, action)
            sat_actions_tups.append((action_stpt, sat_sp))
        if len(sat_actions) == 0:
            # this is hacky but makes the parsing in the main file cleaner
            sat_actions_tups.append(([], []))

        return sat_actions_tups, therm_actions, blind_actions