Python ResetNeeded примеры использования

Язык программирования: Python

Пространство имен/Пакет: gym.error

Метод/Функция: ResetNeeded

Примеров на hotexamples.com: 6

Python ResetNeeded - 6 примеров найдено. Это лучшие примеры Python кода для gym.error.ResetNeeded, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: stats_recorder.py Проект: srivignessh/cartpole-ai

    def before_step(self, action):
        assert not self.closed

        if self.done:
            raise error.ResetNeeded("Trying to step environment which is currently done. While the monitor is active for {}, you cannot step beyond the end of an episode. Call 'env.reset()' to start the next episode.".format(self.env_id))
        elif self.steps is None:
            raise error.ResetNeeded("Trying to step an environment before reset. While the monitor is active for {}, you must call 'env.reset()' before taking an initial step.".format(self.env_id))

Пример #2

Показать файл

Файл: trace_recorder.py Проект: passiweinberger/gym

    def before_step(self, action):
        assert not self.closed

        if self.done:
            raise error.ResetNeeded(
                "Trying to step environment which is currently done. While the monitor is active, you cannot step beyond the end of an episode. Call 'env.reset()' to start the next episode."
            )

        self.actions.append(action)

Пример #3

Показать файл

    def step(self,
             action: Tuple[int, int],
             player: Optional[int] = None) -> Tuple[Any, float, bool, Dict]:
        """

        Args:
            action: locaton we
            player: In more complex environments, we'll want to ensure we're not playing as the
                the same player twice. This provides a way of checking we're not breaking
                order by mistake

        Returns:
            observation, reward, done, info

        """
        # check the action is valid and the game isn't over
        action = tuple(action)
        if self.board[action] != 0:
            raise error.InvalidAction(f"action {action} is not a vaid choice")
        if self.done:
            raise error.ResetNeeded("Call reset as game is over")
        if player and player != self.curr_turn:
            raise error.InvalidAction(
                f"Player {self.curr_turn}'s turn. Move request from {player}")

        logger.debug("Selected action: %s on turn %d", action,
                     self.turns_played + 1)

        # set the location on the board to the current player. Since curr_turn
        # and current player use the same indicator, we just use that
        self.board[action] = self.curr_turn

        # check if the game is over. Reward is player that won (1 or -1)
        reward = check_win(self.board)
        if reward:
            self.done = True
            return self._get_obs(), float(reward), self.done, {}

        # check if the game is over (i.e. no more turns). Since we don't have a win
        # it must be a draw
        if self.turns_played == 9:
            self.done = True
            return self._get_obs(), 0.0, self.done, {}

        # otherwise game is still going. Advance turn and return state + no reward
        self.curr_turn = next(self.turn_iterator)
        return self._get_obs(), 0.0, self.done, {}

Пример #4

Показать файл

Файл: unrealizedPnL.py Проект: Tornadoofsoul/gym-cryptotrading

    def step(self, action):
        if not self.episode_number or self.timesteps is self.horizon:
            raise error.ResetNeeded()

        state = self._get_new_state()
        self._take_action(action)
        reward = self._get_reward()

        message = "Timestep {}:==: Action: {} ; Reward: {}".format(
            self.timesteps, BaseEnv.action_space.lookup[action], reward)
        self.logger.debug(message)

        self.timesteps = self.timesteps + 1
        if self.timesteps is not self.horizon:
            self.current = self.current + 1
            return state, reward, False, float(self.horizon - self.timesteps)
        else:
            return state, reward, True, 0.0

Пример #5

Показать файл

    def step(self, action):
        if self.done:
            raise error.ResetNeeded("")

        r, c, stone = action
        if self.board[r][c] != self.EMPTY:
            raise error.InvalidAction(
                "Stone '{}' already exists in row: {}, col: {}".format(
                    self.board[r][c], r, c))

        if stone >= self.STONE_TYPE_COUNT:
            raise error.InvalidAction("Unknown stone type '{}'".format(stone))

        if stone == self.last_stone:
            raise error.InvalidAction("Need to change stone.")

        self.board[r][c] = self.STONES[stone]
        self.last_stone = self.STONES[stone]
        self.remaining_place -= 1

        reward, self.done = self._check_status()

        return copy.deepcopy(self.board), reward, self.done, {}

Пример #6

Показать файл

    def step(self, action: list):
        # sanity checks
        if self.done:
            raise error.ResetNeeded(
                "Environment is finished, please run env.reset() before taking actions"
            )
        if get_init_len(action) != self.n_agents:
            raise error.InvalidAction(
                f"Length of action array must be same as n_agents({self.n_agents})"
            )
        if any(np.array(action) < 0):
            raise error.InvalidAction(
                f"You can't order negative amount. You agents actions are: {action}"
            )

        # concatenate previous states, self.prev_states in an queue of previous states
        self.prev_states.popleft()
        self.prev_states.append(self._get_observations())
        # make incoming step
        demand = self._get_demand()
        orders_inc = [order.popleft() for order in self.orders]
        self.next_incoming_orders = [
            demand
        ] + orders_inc[:-1]  # what's the demand for each agent
        ship_inc = [shipment.popleft() for shipment in self.inbound_shipments]
        # calculate inbound shipments respecting orders and stock levels
        for i in range(self.n_agents -
                       1):  # manufacturer is assumed to have no constraints
            max_possible_shipment = (max(0, self.stocks[i + 1]) +
                                     ship_inc[i + 1]
                                     )  # stock + incoming shipment
            order = orders_inc[i] + max(
                0,
                -self.stocks[i + 1])  # incoming order + stockout (backorder)
            shipment = min(order, max_possible_shipment)
            self.inbound_shipments[i].append(shipment)
        self.inbound_shipments[-1].append(orders_inc[-1])
        # update stocks
        self.stocks = [(stock + inc)
                       for stock, inc in zip(self.stocks, ship_inc)]
        for i in range(1, self.n_agents):
            self.stocks[i] -= orders_inc[i - 1]
        self.stocks[0] -= demand  # for the retailer
        # update orders
        for i in range(self.n_agents):
            self.orders[i].append(action[i])
        self.next_incoming_orders = [self._get_demand()
                                     ] + [x[0] for x in self.orders[:-1]]

        # calculate costs
        self.holding_cost = np.zeros(self.n_agents, dtype=np.float)
        self.stockout_cost = np.zeros(self.n_agents, dtype=np.float)
        for i in range(self.n_agents):
            if self.stocks[i] >= 0:
                self.holding_cost[i] = (max(0, self.stocks[i]) *
                                        self.score_weight[0][i]
                                        )  # only applicable when stocks > 0
            else:
                self.stockout_cost[i] = (-min(0, self.stocks[i]) *
                                         self.score_weight[1][i]
                                         )  # only applicable when stocks < 0
        self.cum_holding_cost += self.holding_cost
        self.cum_stockout_cost += self.stockout_cost
        # calculate reward
        rewards = self._get_rewards()

        # check if done
        if self.turn == self.n_turns - 1:
            print(
                f"\nTotal cost is: EUR {sum(self.cum_holding_cost + self.cum_stockout_cost)}"
            )
            self.done = True
        else:
            self.turn += 1
        state = self._get_observations()
        # todo flatten observation dict
        return state, rewards, self.done, {}