def __init__(self, buffer_name: str, group: RpcGroup, buffer_size: int, *_, **__): """ Create a distributed prioritized replay buffer instance. To avoid issues caused by tensor device difference, all transition objects are stored in device "cpu". Distributed prioritized replay buffer constitutes of many local buffers held per process, since it is very inefficient to maintain a weight tree across processes, each process holds a weight tree of records in its local buffer and a local buffer (same as ``DistributedBuffer``). The sampling process(es) will first use rpc to acquire the wr_lock, signalling "stop" to appending performed by actor processes, then perform a sum of all local weight trees, and finally perform sampling, after sampling and updating the importance weight, the lock will be released. During sampling, the tensors in "state", "action" and "next_state" dictionaries, along with "reward", will be concatenated in dimension 0. any other custom keys specified in ``**kwargs`` will not be concatenated. .. seealso:: :class:`PrioritizedBuffer` Note: :class:`DistributedPrioritizedBuffer` is not split into an accessor and an implementation, because we would like to operate on the buffer directly, when calling "size()" or "append()", to increase efficiency (since rpc layer is bypassed). Args: buffer_size: Maximum local buffer size. group: Process group which holds this buffer. """ super().__init__(buffer_size, "cpu") self.buffer_name = buffer_name self.buffer_version_table = np.zeros([buffer_size], dtype=np.uint64) self.group = group assert group.is_member() # register services, so that we may access other buffers _name = "/" + group.get_cur_name() self.group.register(buffer_name + _name + "/_size_service", self._size_service) self.group.register(buffer_name + _name + "/_clear_service", self._clear_service) self.group.register(buffer_name + _name + "/_weight_sum_service", self._weight_sum_service) self.group.register( buffer_name + _name + "/_update_priority_service", self._update_priority_service, ) self.group.register(buffer_name + _name + "/_sample_service", self._sample_service) self.wr_lock = RLock()
def __init__(self, server_name: str, group: RpcGroup, model_name: str = "model", o_server: OrderedServerBase = None): """ This init function must be only invoked on the runner process, and the runner process must be a member process of ``group``. Args: server_name: Name of this server, used to registered the server as a paired class of ``group``. group: RpcGroup of the default server :class:`.OrderedServerSimple` mutually exclusive with ``o_server`` model_name: Name of the managed model in the ordered server, only needed if ``server`` needs such a identifier. The default ordered server does not require this. o_server: Custom ordered server accessor. """ self.server_name = server_name self.group = group self.model_name = model_name # actual running server started by OrderedServerSimpleStarter self._o_server_impl = None if o_server is None: self._o_server_impl = OrderedServerSimpleImpl( server_name + "_o_server", group ) self.o_server = group.get_paired(server_name + "_o_server")\ .to_here() else: # pragma: no cover self.o_server = o_server # pair an accessor to group self.group.pair(server_name, PushPullModelServer(self.model_name, self.o_server))
def __init__(self, buffer_name: str, group: RpcGroup, buffer_size: int, *_, **__): """ Create a distributed replay buffer instance. To avoid issues caused by tensor device difference, all transition objects are stored in device "cpu". Distributed replay buffer constitutes of many local buffers held per process, transmissions between processes only happen during sampling. During sampling, the tensors in "state", "action" and "next_state" dictionaries, along with "reward", will be concatenated in dimension 0. any other custom keys specified in ``**kwargs`` will not be concatenated. .. seealso:: :class:`.Buffer` Note: Since ``append()`` operates on the local buffer, in order to append to the distributed buffer correctly, please make sure that your actor is also the local buffer holder, i.e. a member of the ``group`` Args: buffer_size: Maximum local buffer size. group: Process group which holds this buffer. buffer_name: A unique name of your buffer. """ super().__init__(buffer_size, "cpu") self.buffer_name = buffer_name self.group = group assert group.is_member() # register services, so that we may access other buffers _name = "/" + group.get_cur_name() self.group.register(buffer_name + _name + "/_size_service", self._size_service) self.group.register(buffer_name + _name + "/_clear_service", self._clear_service) self.group.register(buffer_name + _name + "/_sample_service", self._sample_service) self.wr_lock = RLock()
def __init__(self, server_name: str, group: RpcGroup, model_name: str = "model", primary_reducer: str = None, secondary_reducers: List[str] = None, o_server: OrderedServerBase = None, reduce_method: str = "sum", reduce_device: Union[t.device, str] = "cpu", reduce_batch_size: int = 4, max_queue_size: int = 64): """ Note: You should initialize ``PushPullGradServer`` on all members of ``secondary_reducers``, and ``primary_reducer``. Both of them should be members of the ``group``. Note: Internally the primary reducer will push updated versions to the ordered server. Hint: Reduction is performed in a tree fashion: 1. In the first step, clients will push new gradients to a random secondary reducer, and the secondary reducer will perform the first reduction pass, then secondary reducers will push their results to the primary reducer. 2. In the second step, the primary reducer will reduce results from the secondary reducer to get the final reduced gradient dictionary (has the same structure as state_dict), and assign gradients to its **managed model**, and perform the optimization. 3. In the final step, the primary reducer will push the final model to the model server group, then clients can pull the newest model. Args: server_name: Name of this server, used to registered the server as a paired class of ``group``. group: Server group. model_name: Name of the managed model in the ordered server, only needed if ``server`` needs such a identifier. The default ordered server does not require this. primary_reducer: Name of the process serving as the primary reducer, which collects reduced gradients from secondary reducers and perform the final reduction. secondary_reducers: Name of the process serving as secondary reducers. o_server: Custom ordered server accessor. By default, the ordered server is a :class:`.OrderedServerSimple` hosted on the primary reducer. reduce_method: "mean" or "sum" reduce_device: Device to perform reduction, by default it is "cpu". reduce_batch_size: Size of a single reduction batch, server will wait until the number of requests in the reduction queue have reached this size. max_queue_size: Maximum reduction request queue size. """ self.server_name = server_name self.group = group self.model_name = model_name if primary_reducer is None: primary_reducer = group.get_group_members()[0] assert group.is_member(primary_reducer) assert group.is_member() # actual running server started by OrderedServerSimpleStarter self._o_server_impl = None self.o_server = None if o_server is None: if group.get_cur_name() == primary_reducer: self._o_server_impl = OrderedServerSimpleImpl( server_name + "_o_server", group ) self.o_server = OrderedServerSimple(server_name + "_o_server", group) else: # pragma: no cover self.o_server = o_server if secondary_reducers is None: secondary_reducers = group.get_group_members() self.primary_reducer = primary_reducer self.primary_service = (server_name + "/" + primary_reducer + "/_push_service") self.secondary_reducers = secondary_reducers self.secondary_services = [server_name + "/" + m + "/_push_service" for m in secondary_reducers] # register secondary reducer service self.group.register(server_name + "/" + group.get_cur_name() + "/_push_service", self._push_service) # pair an accessor to group if self.group.get_cur_name() == self.primary_reducer: self.group.pair( self.server_name, PushPullGradServer(self.server_name, self.group, self.model_name, self.secondary_reducers, self.o_server) ) # prepare to start the reduction sub-thread assert reduce_method in ("mean", "sum") assert max_queue_size > 1 assert reduce_batch_size > 1 assert max_queue_size > reduce_batch_size self.started = False self.reduce_method = reduce_method self.reduce_batch_size = reduce_batch_size self.reduce_device = reduce_device self.max_queue_size = max_queue_size self.model = None # type: Union[nn.Module, None] self.optimizer = None # do not set max_queue_size here, will raise queue.Full self.master_queue = Queue() self.secondary_queue = Queue() self.work_event = Event() self.stop_event = Event() self.reduce_task = Thread(target=self._task_reduce_grad) self.reduce_task.daemon = True
def __init__( self, actor: Union[NeuralNetworkModule, nn.Module], optimizer: Callable, ars_group: RpcGroup, model_server: Tuple[PushPullModelServer], *_, lr_scheduler: Callable = None, lr_scheduler_args: Tuple[Tuple] = None, lr_scheduler_kwargs: Tuple[Dict] = None, learning_rate: float = 0.01, gradient_max: float = np.inf, noise_std_dev: float = 0.02, noise_size: int = 250000000, rollout_num: int = 32, used_rollout_num: int = 32, normalize_state: bool = True, noise_seed: int = 12345, sample_seed: int = 123, **__, ): """ Note: The first process in `ars_group` will be the manager process. Args: actor: Actor network module. optimizer: Optimizer used to optimize ``actor`` and ``critic``. ars_group: Group of all processes using the ARS framework. model_server: Custom model sync server accessor for ``actor``. lr_scheduler: Learning rate scheduler of ``optimizer``. lr_scheduler_args: Arguments of the learning rate scheduler. lr_scheduler_kwargs: Keyword arguments of the learning rate scheduler. learning_rate: Learning rate of the optimizer, not compatible with ``lr_scheduler``. gradient_max: Maximum gradient. noise_std_dev: Standard deviation of the shared noise array. noise_size: Size of the shared noise array. rollout_num: Number of rollouts executed by workers in group. used_rollout_num: Number of used rollouts. normalize_state: Whether to normalize the state seen by actor. noise_seed: Random seed used to generate noise. sample_seed: Based random seed used to sample noise. """ assert rollout_num >= used_rollout_num self.grad_max = gradient_max self.rollout_num = rollout_num self.used_rollout_num = used_rollout_num self.normalize_state = normalize_state self.ars_group = ars_group # determine the number of rollouts(pair of actors with neg/pos delta) # assigned to current worker process w_num = len(ars_group.get_group_members()) w_index = ars_group.get_group_members().index(ars_group.get_cur_name()) segment_length = int(np.ceil(rollout_num / w_num)) self.local_rollout_min = w_index * segment_length self.local_rollout_num = min( segment_length, rollout_num - self.local_rollout_min ) self.actor = actor # `actor_with_delta` use rollout index and delta sign as key. # where rollout index is the absolute global index of rollout # and delta sign is true for positive, false for negative self.actor_with_delta = {} # type: Dict[Tuple[int, bool], t.nn.Module] self.actor_optim = optimizer(self.actor.parameters(), lr=learning_rate) self.actor_model_server = model_server[0] # `filter` use state name as key # eg: "state_1" self.filter = {} # type: Dict[str, MeanStdFilter] # `delta_idx` use rollout index as key # The inner dict use model parameter name as key, and starting # noise index in the noise array as value. self.delta_idx = {} # type: Dict[int, Dict[str, int]] # `reward` use rollout index as key, the first list stores # rewards of model with negative noise delta, the second list # stores rewards of model with positive noise delta. self.reward = {} # type: Dict[int, Tuple[List, List]] if lr_scheduler is not None: if lr_scheduler_args is None: lr_scheduler_args = ((),) if lr_scheduler_kwargs is None: lr_scheduler_kwargs = ({},) self.actor_lr_sch = lr_scheduler( self.actor_optim, *lr_scheduler_args[0], **lr_scheduler_kwargs[0], ) # generate shared noise # estimate model parameter num first param_max_num = 0 for param in actor.parameters(): param_max_num = max(np.prod(np.array(param.shape)), param_max_num) if param_max_num * 10 > noise_size: default_logger.warning( "Maximum parameter size of your model is " "{}, which is more than 1/10 of your noise" "size {}, consider increasing noise_size.".format( param_max_num, noise_size ) ) elif param_max_num >= noise_size: raise ValueError( "Noise size {} is too small compared to" "maximum parameter size {}!".format(noise_size, param_max_num) ) # create shared noise array self.noise_array = t.tensor( np.random.RandomState(noise_seed).randn(noise_size).astype(np.float64) * noise_std_dev ) # create a sampler for each parameter in each rollout model # key is model parameter name self.noise_sampler = {} # type: Dict[int, Dict[str, SharedNoiseSampler]] param_num = len(list(actor.parameters())) for lrn in range(self.local_rollout_num): r_idx = lrn + self.local_rollout_min sampler = {} for p_idx, (name, param) in enumerate(actor.named_parameters()): # each model and its inner parameters use a different # sampling stream of the same noise array. sampler[name] = SharedNoiseSampler( self.noise_array, sample_seed + r_idx * param_num + p_idx ) self.noise_sampler[r_idx] = sampler # synchronize base actor parameters self._sync_actor() self._generate_parameter() self._reset_reward_dict() super().__init__()