def __init__(self, environment, policy, max_it=1000, num_workers=1, rate=0.1, discount=0.1, log_file=False): """Initialize A3C. Parameters ---------- environment: Environment we want to optimize the policy on. policy: The policy we want to optimize. The policy needs be defined by a tensorflow neural network and define certain attributes. max_it: int Maximum number of iterations. num_workers: int Number of workers. rate: float Update rate passed to the optimizer. discount: float Discount for the computation of the discounted reward. log_file: string Indicate the relative path to directory where a summary event file should be generated. If `None` no tensorflow logs will be stored. """ add_dependency(tf, 'TensorFlow') if policy.is_set_up: raise (ValueError('Policy should not be set up.')) super(A3C, self).__init__(environment, policy, max_it) self.num_workers = num_workers self.rate = rate self.discount = discount self.done = False self.log_file = log_file self.policy = policy # init networks with tf.device("/cpu:0"): with tf.variable_scope('global'): self.p_net = _PolicyNet(self.policy, rate) self.v_net = _ValueNet(self.policy, rate) self.workers = [] self.threads = [] self.global_counter = 0 self.sess = None self.coord = None
def __init__(self, env, horizon=100, render=False): """Initialize attributes. Parameters ---------- env : gym environment Instance of the gym environment that should be optimized on. horizon : integer Horizon for rollout. render : boolean Default: False ; If True simulation will be rendered during rollouts on this instance. """ add_dependency(gym, 'Gym') EnvironmentBase.__init__(self, env.observation_space, env.action_space, horizon) self.environment = env.unwrapped self.render = render self.done = False self.environment.reset()
def __init__(self, layers, weights=None, init_weights=None, activation=None, dtype='float', scope='global', do_setup=False): """Initialize Neural Network wrapper.""" add_dependency(tf, 'TensorFlow') if (len(layers) < 2): raise ValueError('At least two layers needed.') # determine state and action space state_space = RdSpace((layers[0], )) action_space = RdSpace((layers[-1], )) # store arguments convenient for copy operation self.args = [layers] self.kwargs = { 'weights': weights, 'init_weights': init_weights, 'activation': activation, 'dtype': dtype } self.state_space = state_space self.action_space = action_space self.dtype = dtype self.layers = layers self.scope = scope self.is_set_up = False if init_weights is None: self.init_weights = default_init_weights else: self.init_weights = init_weights # Activation function if activation is None: activation = (len(layers) - 2) * [tf.sigmoid] elif (isinstance(activation, list) and (len(activation) != len(layers) - 2)): raise ValueError('Activation list has wrong size.') else: activation = (len(layers) - 2) * [activation] self.activation = activation # Symbols self.X = tf.placeholder(dtype, shape=[None, layers[0]], name='X') self.a = tf.placeholder(dtype, shape=[None, layers[-1]], name='a') if do_setup: with tf.variable_scope(self.scope): self.setup() else: # Make sure all fields exist self.W_action = None self.W_var = None self.a_pred = None self.var = None self.h = None self.sess = None
def __init__(self, environment, policy, max_it, avg_reward, window, kernel, likelihood, fmin, bounds, beta=3.0, threshold=0, scaling='auto', swarm_size=20, info=None): """Initialize Attributes. Parameters ---------- environment : Environment to be optimized. policy : policy to be optimized. max_it : maximal number of iterations before we abort. avg_reward : integer average reward at which the optimization will be finished. window : integer window for the average reward kernel : GPy kernel Kernel used to initialize the gaussian process. If this is a list multiple kernels will be initialized. The size of this argument has to agree with the size of the likelihood. likelihood : GPy likelihood Likelihood used to initialize kernels. If this is a list, multiple kernels will be initialized. The size of this argument has to agree with the size of the likelihood. fmin : list of floats Safety threshold for the function value. If multiple safety constraints are used this can also be a list of floats (the first one is always the one for the values, can be set to None if not wanted) bounds : pair of floats or list of pairs of floats If a list is given, then each pair represents the lower/upper bound in each dimension. Otherwise, we assume the same bounds for all dimensions. This is mostly important for plotting or to restrict particles to a certain domain. beta : float or callable A constant or a function of the time step that scales the confidence interval of the acquisition function. threshold : float or list of floats The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop expanding points eventually. If a list, this represents the stopping criterion for all the gps. This ignores the scaling factor. scaling : list of floats or "auto" A list used to scale the GP uncertainties to compensate for different input sizes. This should be set to the maximal variance of each kernel. You should probably set this to "auto" unless your kernel is non-stationary swarm_size : int The number of particles in each of the optimization swarms info : Dummy argument that can hold anything usable to identify the configuration. """ add_dependency(safeopt, 'SafeOpt') add_dependency(GPy, 'GPy') # store the `SafeOpt` arguments. gp_opt_par = { 'fmin': fmin, 'bounds': bounds, 'beta': beta, 'threshold': threshold, 'scaling': scaling, 'swarm_size': swarm_size } # store the kernel arguments if not isinstance(kernel, list): kernel = [kernel] if not isinstance(likelihood, list): likelihood = [likelihood] assert len(likelihood) == len(kernel), ( 'kernel and likelihood need to have same length (%d /= %d)' % (len(likelihood), len(kernel))) gp_par = (kernel, likelihood) super(SafeOptSwarm, self).__init__(safeopt.SafeOptSwarm, gp_opt_par, gp_par, environment, policy, max_it, avg_reward, window)
def __init__(self, environment, policy, max_it, avg_reward, window, kernel, likelihood, parameter_set, fmin, lipschitz=None, beta=3.0, num_contexts=0, threshold=0, scaling='auto', info=None): """Initialize Attributes. Parameters ---------- environment : environmet to be optimized. policy : policy to be optimized. max_it : maximal number of iterations before we abort. avg_reward : integer average reward at which the optimization will be finished. window : integer window for the average reward kernel : GPy kernel Kernel used to initialize the gaussian process. If this is a list multiple kernels will be initialized. The size of this argument has to agree with the size of the likelihood. likelihood : GPy likelihood Likelihood used to initialize kernels. If this is a list, multiple kernels will be initialized. The size of this argument has to agree with the size of the likelihood. parameter_set : 2d-array List of parameters fmin : list of floats Safety threshold for the function value. If multiple safety constraints are used this can also be a list of floats (the first one is always the one for the values, can be set to None if not wanted) lipschitz : list of floats The Lipschitz constant of the system, if None the GP confidence intervals are used directly. beta : float or callable A constant or a function of the time step that scales the confidence interval of the acquisition function. threshold : float or list of floats The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop expanding points eventually. If a list, this represents the stopping criterion for all the gps. This ignores the scaling factor. scaling : list of floats or "auto" A list used to scale the GP uncertainties to compensate for different input sizes. This should be set to the maximal variance of each kernel. You should probably leave this to "auto" unless your kernel is non-stationary. info : Dummy argument that can hold anything usable to identify the configuration. """ add_dependency(safeopt, 'SafeOpt') add_dependency(GPy, 'GPy') # store the `SafeOpt` arguments. gp_opt_par = { 'parameter_set': parameter_set, 'fmin': fmin, 'lipschitz': lipschitz, 'beta': beta, 'num_contexts': num_contexts, 'threshold': threshold, 'scaling': scaling} # store the kernel arguments if not isinstance(kernel, list): kernel = [kernel] if not isinstance(likelihood, list): likelihood = [likelihood] assert len(likelihood) == len(kernel), ( 'kernel and likelihood need to have same length (%d /= %d)' % (len(likelihood), len(kernel))) gp_par = (kernel, likelihood) super(SafeOpt, self).__init__(safeopt.SafeOpt, gp_opt_par, gp_par, environment, policy, max_it, avg_reward, window)