def track_absolute(self, value): """ Returns: True if we enter the next period """ U.assert_type(value, int) self.value = value return self._update_endpoint()
def __init__(self, env, agent_id, session_config, separate_plots=True): """ Display "reward" and "step_per_s" curves on Tensorboard Args: env: agent_id: int. session_config: to construct AgentTensorplex - interval: log to Tensorplex every N episodes. - average_episodes: average rewards/speed over the last N episodes separate_plots: True to put reward plot in a separate section on Tensorboard, False to put all plots together """ super().__init__(env) U.assert_type(agent_id, int) self.tensorplex = get_tensorplex_client( '{}/{}'.format('agent', agent_id), session_config ) interval = session_config['tensorplex']['update_schedule']['training_env'] self._periodic = PeriodicTracker(interval) self._avg = interval self._separate_plots = separate_plots
def __init__(self, obs_spec, action_spec, gamma): U.assert_type(obs_spec, dict) U.assert_type(action_spec, dict) self.action_type = ActionType[action_spec['type']] self.action_spec = action_spec self.obs_spec = obs_spec self.gamma = gamma
def track_increment(self, incr=1): """ Returns: True if we enter the next period """ U.assert_type(incr, int) self.value += incr return self._update_endpoint()
def init_dueling(self, *, action_dim, prelinear_size, fc_hidden_sizes, dueling): """ Args: - prelinear_size: size of feature vector before the linear layers, like flattened conv or LSTM features - fc_hidden_sizes: list of fully connected layer sizes before `action_dim` softmax """ self.dueling = dueling self.prelinear_size = prelinear_size U.assert_type(fc_hidden_sizes, list) hiddens = [prelinear_size] + fc_hidden_sizes self.fc_action_layers = nn.ModuleList() hidden_list = hiddens + [action_dim] for prev_h, next_h in zip(hidden_list[:-1], hidden_list[1:]): lin = nn.Linear(prev_h, next_h) U.conv_fc_init(lin) self.fc_action_layers.append(lin) if dueling: self.fc_state_layers = nn.ModuleList() # output a single state value hidden_list = hiddens + [1] for prev_h, next_h in zip(hidden_list[:-1], hidden_list[1:]): lin = nn.Linear(prev_h, next_h) U.conv_fc_init(lin) self.fc_state_layers.append(lin)
def __init__(self, tensorplex, min_update_interval): U.assert_type(tensorplex, TensorplexClient) self.tensorplex = tensorplex self.min_update_interval = min_update_interval self.history = U.AverageDictionary() self.lock = Lock() self.tracker = U.TimedTracker(self.min_update_interval) self.init_time = time.time()
def __init__(self, period, init_value=0, init_endpoint=0): """ first: if True, triggers at the first time """ U.assert_type(period, int) assert period > 0 U.assert_type(init_value, int) self.period = period self.value = init_value self._endpoint = init_endpoint
def __init__(self, *, host, port, flush_iteration): """ Args: flush_iteration: how many send() calls before we flush the buffer """ U.assert_type(flush_iteration, int) self._client = ZmqSender(host=host, port=port) self._exp_buffer = ExpBuffer() self._flush_tracker = PeriodicTracker(flush_iteration)
def add(self, hash_dict, nonhash_dict): """ Args: hash_dict: {obs_hash: [ .. can be nested .. ]} nonhash_dict: {reward: -1.2, done: True, ...} """ U.assert_type(hash_dict, dict) U.assert_type(nonhash_dict, dict) exp = {} for key, values in hash_dict.items(): assert not key.endswith('_hash'), 'do not manually append `_hash`' exp[key + '_hash'] = self._hash_nested(values) exp.update(nonhash_dict) self.exp_list.append(exp)
def _sample_request_handler(self, req): """ Handle requests to the learner https://stackoverflow.com/questions/29082268/python-time-sleep-vs-event-wait Since we don't have external notify, we'd better just use sleep """ batch_size = U.deserialize(req) U.assert_type(batch_size, int) while not self.start_sample_condition(): time.sleep(0.01) self.cumulative_sampled_count += batch_size self.cumulative_request_count += 1 with self.sample_time.time(): sample = self.sample(batch_size) with self.serialize_time.time(): return U.serialize(sample)
def extend_config(config, default_config): """ default_config must specify all the expected keys. Use the following special values for required placeholders: * _req_: require a single value (not a list or dict) * _req_DICT_: require a dict * _req_LIST_: require a list Returns: AttributeDict `config` filled by default values if certain keys are unspecified Raises: ConfigError if required placeholders are not satisfied """ U.assert_type(config, dict) U.assert_type(default_config, dict) return Config(_fill_default_config(config, default_config, []))
def get_exploration_schedule(self): C = self.learner_config.algo.exploration if C.schedule.lower() == 'linear': return U.LinearSchedule( initial_p=1.0, final_p=C.final_eps, schedule_timesteps=int(C.steps), ) else: steps = C.steps final_epses = C.final_eps U.assert_type(steps, list) U.assert_type(final_epses, list) assert len(steps) == len(final_epses) endpoints = [(0, 1.0)] for step, eps in zip(steps, final_epses): endpoints.append((step, eps)) return U.PiecewiseSchedule( endpoints=endpoints, outside_value=final_epses[-1] )
def __init__(self, module_dict): U.assert_type(module_dict, dict) for k, m in module_dict.items(): U.assert_type(k, str, 'Key "{}" must be string.'.format(k)) U.assert_type(m, nnx.Module, '"{}" must be torchx.nn.Module.'.format(m)) self._module_dict = module_dict
def __init__(self, tensorplex, period, is_average=True, keep_full_history=False): """ Args: tensorplex: TensorplexClient object period: when you call `update()`, it will only send to Tensorplex at the specified period. is_average: if True, send the averaged value over the last `period`. keep_full_history: if False, only keep the last `period` of history. """ if tensorplex is not None: # None to turn off tensorplex U.assert_type(tensorplex, TensorplexClient) U.assert_type(period, int) assert period > 0 self._tplex = tensorplex self._period = period self._is_average = is_average self._keep_full_history = keep_full_history self._tracker = PeriodicTracker(period) self._history = {} self._max_deque_size = None if keep_full_history else period
def extend(self, default_config): U.assert_type(default_config, dict) return _fill_default_config(self, default_config, [])