def reset(self, env: IEnvironment[State, Action, Reward], root_option: Node[Option[OptionData]], random_seed: Union[int, RandomState] = None) -> None: """ Parameters ---------- env: IEnvironment root_option: Node[Option] random_seed: Union[int, RandomState] = None """ if random_seed is not None: self.random = optional_random(random_seed) assert root_option not in self._num_options # check that we haven't seen this before if len(self.list_roots) >= self.max_length: to_remove: Node[Option[OptionData]] = self.list_roots.pop() del self._num_options[to_remove] del self._num_transitions[to_remove] self.list_roots.append(root_option) self._num_options[root_option] = 0 self._num_transitions[root_option] = 0 self._trajectory_for[root_option] = [] self.current_option = root_option
def _random_tile_of_type( self, required_type: str = None, rand_seed: Union[int, RandomState] = None) -> Point: random: np.random = optional_random(rand_seed) possibilities: List[Point] = self._all_tiles_of_type(required_type) return array_random_choice(possibilities, random=random)
def reset(self, env: IEnvironment[State, Action, Reward], root_option: Option[OptionData], random_seed: Union[int, RandomState] = None) -> None: """ Reset the agent to function in a new environment/episode. Parameters ---------- env: IEnvironment[State, Action, Reward] the environment the agent is about to act in root_option: Option the base option that the agent begins executing """ self.root_option_node = Node(root_option) self.current_option_node = self.root_option_node self.prev_option_node = None if random_seed is not None: self.random = optional_random(random_seed) self.evaluator.reset(env, random_seed) self.generator.reset(env, random_seed) self.planning_terminator.reset(env, random_seed) self.policy_terminator.reset(env, random_seed) self.low_level.reset(env, root_option, random_seed) self.memory.reset(env, self.root_option_node, random_seed)
def __init__(self): self.target_path: Optional[List[DirectedPoint]] = None self.waypoints: PriorityQueue[List[DirectedPoint]] = PriorityQueue() self.history: List[DirectedPoint] = [] self.visited: NumPyDict[Point, bool] = NumPyDict(dtype=np.int8) self.current_goal: Optional[Point] = None self.random: RandomState = optional_random() self.backstep: int = 0 # used if we are in the process of backing up
def reset(self, env: MazeWorld=None, random_seed: Union[int, RandomState] = None): if self.env is None: assert env is not None self.env = env self.eps = 0.03 self.eps_max = 0.7 self.history = [] self.n_repeats = 0 self.random = optional_random(random_seed)
def _create_random_problem( self, rand_seed: Union[int, RandomState] = None) -> Tuple[Point, Point]: random = optional_random(rand_seed) start = self._random_tile_of_type("Empty", random) end = self._random_tile_of_type("Empty", random) while np.array_equal(start, end): end = self._random_tile_of_type("Empty", random) return start, end
def __init__(self, v_model: IVModel[State, Reward, OptionData], q_model: IQModel[State, Reward, OptionData], settings: Dict[str, Any], get_beta: Callable[[int], float], gamma: float): self.v_model: IVModel[State, Reward, OptionData] = v_model self.q_model: IQModel[State, Reward, OptionData] = q_model self.get_beta: Callable[[int], float] = get_beta self.step: int = 0 self.gamma: float = gamma self.random: RandomState = optional_random(settings['random'])
def reset(self, env: Optional[IEnvironment[OneHotImg, Action, Reward]], root_option: Option[Point], random_seed: Union[int, RandomState] = None) -> None: self.target_path = None self.waypoints = PriorityQueue() self.history = [] self.visited = NumPyDict(dtype=np.int8) self.current_goal = root_option.value self.backstep = 0 if random_seed is not None: self.random = optional_random(random_seed)
def __init__(self, source_network: keras.Model, target_network: keras.Model, action_space: gym.spaces.Discrete, batch_size: int, replay_size: int, device: tf.device = None, gamma: float = 0.99, learning_rate: float = 3e-4, random: Union[int, RandomState] = None, target_update_freq: int = 1000, **kwargs): """ :param source_network: nn.Module[f32, dev] : State* => [batch, n_stations] Network used for scoring State should be expected state format with the first dimension reserved for batches :param target_network: Should basically be a copy of source_network :param action_space: gym.spaces.Discrete :param batch_size: int batch_size for training :param replay_size: int number of instances to save in memory :param (optional) device: tf.device defaults to first gpu if available :param (optional) gamma: float in (0, 1) defaults to 0.99 :param (optional) learning_rate: float defaults to 3e-4 :param (optional) random: Union[int, RandomState] initial seed defaults to using global random :param kwargs: used for ReplayBuffer """ kwargs = kwargify(locals()) self.device: tf.device = optional_device(device) self.n_actions: int = action_space.n self.random: RandomState = optional_random(random) self.gamma = gamma learning_rate: float = learning_rate self.batch_size: int = batch_size self.target_update_freq: int = target_update_freq assert self.batch_size > 0 self.q_network: keras.Model = source_network self.target_q_network: keras.Model = target_network self.replay_buffer = ReplayBuffer(capacity=replay_size, **kwargs) self.optimizer = optimizers.Adam(learning_rate=learning_rate)
def __init__(self, capacity: int, random: Union[int, RandomState] = None, **kwargs): """ :param capacity: int number of replay transitions to hold :param (optional) random: Union[int, RandomState] random seed to use defaults to global numpy random :param kwargs: for compatibility """ self.capacity: int = capacity self.buffer: deque = deque(maxlen=self.capacity) random: Union[int, RandomState, None] = random self.random: RandomState = optional_random(random)
def reset(self, env: IEnvironment[State, Action, Reward], random_seed: Union[int, RandomState] = None) -> None: """ Prepares the agent to begin functioning in the new environment. Should be called each time there is a new episode or a new env. random_seed may be passed in as well to set the random seed Parameters ---------- env: Environment the new environment (may be the same as the old) random_seed: Option[Union[int, RandomState]] = None the new random seed to be used. If None, no changes are made """ if random_seed is not None: self.random = optional_random(random_seed) self.q_model.reset(env, random_seed) self.v_model.reset(env, random_seed)
def __init__(self, max_length: int = None, random_seed: Union[int, RandomState] = None): """ Parameters ---------- max_length: int>0 = None random_seed: Union[int, RandomState] = None """ assert max_length is None or max_length > 0 self.max_length = max_length self.random_seed: RandomState = optional_random(random_seed) self._num_options: Dict[Node[Option[OptionData]], int] = {} self._num_transitions: Dict[Node[Option[OptionData]], int] = {} self._trajectory_for: Dict[Node[Option[OptionData]], Trajectory[State, Action, Reward]] = {} self.list_roots: List[Node[Option]] = [] self.current_option: Node[Option] = None
def __init__(self, arrivals: Arrivals, departures: List[List[int]], initial_station_state: np.ndarray, station_info: np.ndarray, sample_distance: float, sample_amount: float, max_cars: int, car_speed: float, **kwargs): """ :param arrivals: List[List[ArrivalEvents]]: [max_t, ] list of arrivals at each timestep. Note: some of these will becomes queries :param departures: List[List[int]] list of indices of each station with departing cars at each timestep Note: multiple cars may depart from the same station at the same tstep Note: this only lists departures that are not associated with an arrival :param initial_station_state: np.ndarray[int8] : [n_stations, 1] initial number of occupied slots at stations :param station_info: np.ndarray[float] : [n_stations, 3] idx => (x, y, max_occupancy) information about each station, indexed by rows :param sample_distance: float samples are generated around arrival event with range Normal(0, sample_distance) :param sample_amount: Union[float, int] int => generates this many queries float => turns this percentage of arrivals in to queries :param max_cars: int the number of slots to allocate for holding car data :param car_speed: float how far each car moves towards destination at each timestep :param kwargs: for compatibility """ kwargs = kwargify(locals()) self.arrivals: Arrivals = arrivals self.departures: List[List[int]] = departures self.initial_station_state: np.ndarray = initial_station_state self.station_info: np.ndarray = station_info self.sample_distance: int = sample_distance self.sample_amount: Union[int, float] = sample_amount self.car_speed: float = car_speed self.max_cars: int = max_cars self.random: RandomState = optional_random() self.kwargs = kwargs
def __init__(self, evaluator: IEvaluator[State, Action, Reward, OptionData], generator: IGenerator[State, Action, Reward, OptionData], planning_terminator: IPlanningTerminator[State, Action, Reward, OptionData], policy_terminator: IPolicyTerminator[State, Action, Reward, OptionData], low_level: IOptionBasedAgent[State, Action, Reward, OptionData], memory: IMemory[State, Action, Reward, Option], settings: Dict[str, Any]): self.evaluator: IEvaluator[State, Action, Reward, OptionData] = evaluator self.generator: IGenerator[State, Action, Reward, OptionData] = generator self.planning_terminator: IPlanningTerminator[State, Action, Reward, OptionData] = \ planning_terminator self.policy_terminator: IPolicyTerminator[State, Action, Reward, OptionData] = \ policy_terminator self.low_level: IOptionBasedAgent[State, Action, Reward, OptionData] = low_level self.memory: IMemory[State, Action, Reward, OptionData] = memory self.random: RandomState = optional_random(settings['random']) self.current_option_node: Node[Option[OptionData]] = None self.prev_option_node: Optional[Node[Option[OptionData]]] = None self.actionable_option_node: Node[Option[OptionData]] = None self.root_option_node: Node[Option[OptionData]] = None
def seed(self, random: Union[int, RandomState] = None) -> None: self.random = optional_random(random)
def reset(self, rand_seed: Union[int, RandomState] = None) -> Tuple[State, Goal]: random = optional_random(rand_seed) self._location, self._goal = self._create_random_problem(random) return self.state(), self._goal