def __init__(self, config): config.default(PolicyGradientModel.default_config) # distribution self.distribution = dict() for name, action in config.actions: if 'distribution' in action: kwargs = dict(action) self.distribution[name] = Distribution.from_config( config=action.distribution, kwargs=kwargs) elif action.continuous: if action.min_value is None: assert action.max_value is None self.distribution[name] = Gaussian(shape=action.shape) else: assert action.max_value is not None self.distribution[name] = Beta(min_value=action.min_value, max_value=action.max_value, shape=action.shape) else: self.distribution[name] = Categorical( shape=action.shape, num_actions=action.num_actions) # baseline if config.baseline is None: self.baseline = None else: self.baseline = Baseline.from_config(config=config.baseline) # advantage estimation self.gae_rewards = config.gae_rewards self.gae_lambda = config.gae_lambda self.normalize_rewards = config.normalize_rewards super(PolicyGradientModel, self).__init__(config)
def __init__(self, config): config.default(PolicyGradientModel.default_config) # distribution self.distribution = dict() for name, action in config.actions: if 'distribution' in action: if not action.continuous: kwargs = dict(num_actions=action.num_actions) elif 'min_value' in action: kwargs = dict(min_value=action.min_value, max_value=action.max_value) else: kwargs = dict() self.distribution[name] = Distribution.from_config(config=action.distribution, kwargs=kwargs) # elif 'min_value' in action: # ... elif action.continuous: self.distribution[name] = Gaussian() else: self.distribution[name] = Categorical(num_actions=action.num_actions) # baseline if config.baseline is None: self.baseline = None else: self.baseline = Baseline.from_config(config=config.baseline) super(PolicyGradientModel, self).__init__(config) # advantage estimation self.generalized_advantage_estimation = config.generalized_advantage_estimation if self.generalized_advantage_estimation: self.gae_lambda = config.gae_lambda self.normalize_advantage = config.normalize_advantage