def __init__(self, hyperparams, dX, dU): PolicyOpt.__init__(self, hyperparams, dX, dU) self.dX = dX self.dU = dU tf.set_random_seed(self._hyperparams['random_seed']) self.var = self._hyperparams['init_var'] * np.ones(dU) self.epochs = self._hyperparams['epochs'] self.batch_size = self._hyperparams['batch_size'] self.weight_decay = self._hyperparams['weight_decay'] self.N_hidden = self._hyperparams['N_hidden'] self.graph = tf.Graph() # Encapsulate model in own graph with self.graph.as_default(): self.init_network() self.init_loss_function() self.init_solver() # Create session config = tf.ConfigProto() config.gpu_options.allow_growth = True # Prevent GPS from hogging all memory self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer()) self.policy = self # Act method is contained in this class self.scaler = None
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_TF) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) self.tf_iter = 0 self.checkpoint_file = self._hyperparams['checkpoint_prefix'] self.batch_size = self._hyperparams['batch_size'] self.device_string = "/cpu:0" if self._hyperparams['use_gpu'] == 1: self.gpu_device = self._hyperparams['gpu_id'] self.device_string = "/gpu:" + str(self.gpu_device) self.act_op = None # mu_hat self.loss_scalar = None self.obs_tensor = None self.precision_tensor = None self.action_tensor = None # mu true self.solver = None self.init_network() self.init_solver() self.var = self._hyperparams['init_var'] * np.ones(dU) self.sess = tf.Session() self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, np.zeros(dU), self.sess, self.device_string) init_op = tf.initialize_all_variables() self.sess.run(init_op)
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_CAFFE) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) self.batch_size = self._hyperparams['batch_size'] if self._hyperparams['use_gpu']: caffe.set_device(self._hyperparams['gpu_id']) caffe.set_mode_gpu() else: caffe.set_mode_cpu() self.init_solver() # Load parameters from caffemodel file if 'init_net' in self._hyperparams: self.solver.net.copy_from(self._hyperparams['init_net']) self.caffe_iter = 0 self.var = self._hyperparams['init_var'] * np.ones(dU) self.policy = CaffePolicy(self.solver.test_nets[0], self.solver.test_nets[1], self.var) self.policy.bias = None self.policy.scale = None if 'init_normalization' in self._hyperparams: with open(self._hyperparams['init_normalization']) as fin: normalzation_data = pickle.load(fin) self.policy.bias = normalzation_data['bias'] self.policy.scale = normalzation_data['scale']
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_TF) if hyperparams is None: return config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) tf.set_random_seed(self._hyperparams['random_seed']) self.tf_iter = 0 self.batch_size = self._hyperparams['batch_size'] self.device_string = "/cpu:0" if self._hyperparams['use_gpu'] == 1: self.gpu_device = self._hyperparams['gpu_id'] self.device_string = "/gpu:" + str(self.gpu_device) self.act_op = None # mu_hat self.feat_op = None # features self.loss_scalar = None self.obs_tensor = None self.precision_tensor = None self.action_tensor = None # mu true self.solver = None self.feat_vals = None self.init_network() self.init_solver() self.var = self._hyperparams['init_var'] * np.ones(dU) self.sess = tf.Session() self.policy = TfPolicy( dU, self.obs_tensor, self.act_op, self.feat_op, np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope']) # List of indices for state (vector) data and image (tensor) data in observation. self.x_idx, self.img_idx, i = [], [], 0 if 'obs_image_data' not in self._hyperparams['network_params']: self._hyperparams['network_params'].update({'obs_image_data': []}) for sensor in self._hyperparams['network_params']['obs_include']: dim = self._hyperparams['network_params']['sensor_dims'][sensor] if sensor in self._hyperparams['network_params']['obs_image_data']: self.img_idx = self.img_idx + list(range(i, i + dim)) else: self.x_idx = self.x_idx + list(range(i, i + dim)) i += dim init_op = tf.initialize_all_variables() self.sess.run(init_op) self.normalize = self._hyperparams['normalize'] self.policy.normalize = self.normalize
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_TF) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) tf.set_random_seed(self._hyperparams['random_seed']) self.tf_iter = 0 self.batch_size = self._hyperparams['batch_size'] self.device_string = "/cpu:0" if self._hyperparams['use_gpu'] == 1: self.gpu_device = self._hyperparams['gpu_id'] self.device_string = "/gpu:" + str(self.gpu_device) self.act_op = None # mu_hat self.feat_op = None # features self.obs_tensor = None self.cost_tensor = None self.action_tensor = None # mu true self.solver = None self.feat_vals = None self.init_network() self.init_solver() self.var = self._hyperparams['init_var'] * np.ones(dU) self.center_adv = self._hyperparams.get("center_adv", True) tfconfig = tf.ConfigProto() tfconfig.gpu_options.allow_growth = True self.sess = tf.Session(config=tfconfig) self.policy = TfPolicy( dU, self.obs_tensor, self.act_op, self.feat_op, np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope'], policy_type=self.policy_type, log_std=self.log_std) # List of indices for state (vector) data and image (tensor) data in observation. self.x_idx, self.img_idx, i = [], [], 0 if 'obs_image_data' not in self._hyperparams['network_params']: self._hyperparams['network_params'].update({'obs_image_data': []}) for sensor in self._hyperparams['network_params']['obs_include']: dim = self._hyperparams['network_params']['sensor_dims'][sensor] if sensor in self._hyperparams['network_params']['obs_image_data']: self.img_idx = self.img_idx + list(range(i, i + dim)) else: self.x_idx = self.x_idx + list(range(i, i + dim)) i += dim init_op = tf.global_variables_initializer() self.sess.run(init_op)
def __init__(self, hyperparams, dX, dU): """Initializes the policy. Args: hyperparams: Dictionary of hyperparameters. dX: Dimension of state space. dU: Dimension of action space. Hyperparameters: random_seed: Random seed used for tensorflow. init_var: Initial policy variance epochs: Number of training epochs each iteration. batch_size: Batch size used during training. Must be a divisor of M * N * (T - 1). weight_decay: L2 regularization of the network. N_hidden: Size of hidden layers. dZ: Dimension of the latent space. beta_kl: Weight of the KL-divergence term in loss function. N: Number of samples regulriztion. """ PolicyOpt.__init__(self, hyperparams, dX, dU) self.dX = dX self.dU = dU tf.set_random_seed(self._hyperparams['random_seed']) self.var = self._hyperparams['init_var'] * np.ones(dU) self.epochs = self._hyperparams['epochs'] self.batch_size = self._hyperparams['batch_size'] self.weight_decay = self._hyperparams['weight_decay'] self.N_hidden = self._hyperparams['N_hidden'] self.dZ = self._hyperparams['dZ'] self.beta_kl = self._hyperparams['beta_kl'] self.N = self._hyperparams['N'] self.dropout_rate = self._hyperparams['dropout_rate'] self.graph = tf.Graph() # Encapsulate model in own graph with self.graph.as_default(): self._init_network() self._init_loss_function() self._init_solver() # Create session config = tf.ConfigProto() config.gpu_options.allow_growth = True # Prevent GPS from hogging all memory self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(max_to_keep=None) self.graph.finalize() self.policy = self # Act method is contained in this class
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_CAFFE) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) self.batch_size = self._hyperparams['batch_size'] if self._hyperparams['use_gpu']: caffe.set_device(self._hyperparams['gpu_id']) caffe.set_mode_gpu() else: caffe.set_mode_cpu() self.init_solver() self.caffe_iter = 0 self.var = self._hyperparams['init_var'] * np.ones(dU) self.policy = CaffePolicy(self.solver.test_nets[0], self.solver.test_nets[1], np.zeros(dU))
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_TORCH) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) self.batch_size = self._hyperparams['batch_size'] self.lr = self._hyperparams['lr'] if self._hyperparams['use_gpu']: self.dev = torch.device("cuda") \ if torch.cuda.is_available() \ else torch.device("cpu") else: self.dev = torch.device("cpu") self._init_network() self.var = self._hyperparams['init_var'] * np.ones(dU) self.policy = TorchPolicy(self.model, self.var, self.dev)
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_TF) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) tf.set_random_seed(self._hyperparams['random_seed']) self.tf_iter = 0 self.batch_size = self._hyperparams['batch_size'] self.device_string = "/cpu:0" if self._hyperparams['use_gpu'] == 1: self.gpu_device = self._hyperparams['gpu_id'] self.device_string = "/gpu:" + str(self.gpu_device) self.act_op = None # mu_hat self.feat_op = None # features self.loss_scalar = None self.obs_tensor = None self.precision_tensor = None self.action_tensor = None # mu true self.solver = None self.feat_vals = None self.init_network() self.init_solver() self.var = self._hyperparams['init_var'] * np.ones(dU) self.sess = tf.Session() self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, self.feat_op, np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope']) # List of indices for state (vector) data and image (tensor) data in observation. self.x_idx, self.img_idx, i = [], [], 0 if 'obs_image_data' not in self._hyperparams['network_params']: self._hyperparams['network_params'].update({'obs_image_data': []}) for sensor in self._hyperparams['network_params']['obs_include']: dim = self._hyperparams['network_params']['sensor_dims'][sensor] if sensor in self._hyperparams['network_params']['obs_image_data']: self.img_idx = self.img_idx + list(range(i, i+dim)) else: self.x_idx = self.x_idx + list(range(i, i+dim)) i += dim init_op = tf.initialize_all_variables() self.sess.run(init_op)
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_CAFFE) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) self.batch_size = self._hyperparams['batch_size'] if self._hyperparams['use_gpu']: caffe.set_device(self._hyperparams['gpu_id']) caffe.set_mode_gpu() else: caffe.set_mode_cpu() self.init_solver() self.caffe_iter = 0 self.var = self._hyperparams['init_var'] * np.ones(dU) self.policy = CaffePolicy(self.solver.test_nets[0], self.solver.test_nets[1], self.var)
def __init__(self, hyperparams, dX, dU): """Initializes the policy. Args: hyperparams: Dictionary of hyperparameters. dX: Dimension of state space. dU: Dimension of action space. """ PolicyOpt.__init__(self, hyperparams, dX, dU) self.dX = dX self.dU = dU self.epochs = hyperparams['epochs'] self.param_noise_adaption_interval = hyperparams[ 'param_noise_adaption_interval'] set_global_seeds(hyperparams['seed']) # Initialize DDPG policy self.pol = DDPG(Actor(dU, network=hyperparams['network'], **hyperparams['network_kwargs']), Critic(network=hyperparams['network'], **hyperparams['network_kwargs']), Memory(limit=hyperparams['memory_limit'], action_shape=(dU, ), observation_shape=(dX, )), observation_shape=(dX, ), action_shape=(dU, ), param_noise=AdaptiveParamNoiseSpec( initial_stddev=0.2, desired_action_stddev=0.2), **hyperparams['ddpg_kwargs']) sess = get_session() self.pol.initialize(sess) sess.graph.finalize() self.policy = self # Act method is contained in this class
def __init__(self, hyperparams, dO, dU): config = copy.deepcopy(POLICY_OPT_TF) config.update(hyperparams) PolicyOpt.__init__(self, config, dO, dU) #self.debug=True tf.set_random_seed(self._hyperparams['random_seed']) self.tf_iter = 0 self.checkpoint_file = self._hyperparams['checkpoint_prefix'] self.batch_size = self._hyperparams['batch_size'] self.device_string = "/cpu:0" if self._hyperparams['use_gpu'] == 1: self.gpu_device = self._hyperparams['gpu_id'] self.device_string = "/gpu:" + str(self.gpu_device) self.act_op = None # mu_hat self.feat_op = None # features self.loss_scalar = None self.obs_tensor = None self.precision_tensor = None self.action_tensor = None # mu true self.solver = None self.feat_vals = None ## self.conv_layer_0 = None self.conv_layer_1 = None self.conv_layer_2 = None # self.main_itr = None # Set this value to None when training # self.main_itr = 10 # Set this value to i-th iteration when testing policy at i-th iteration # or when resuming training at i-th iteration. self.main_itr = 6 ## self.init_network() self.init_solver() self.var = self._hyperparams['init_var'] * np.ones(dU) self.sess = tf.Session() #self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, self.feat_op, # np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope']) ## self.policy = TfPolicy( dU, self.obs_tensor, self.act_op, self.feat_op, np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope'], conv_layer_0=self.conv_layer_0, conv_layer_1=self.conv_layer_1, conv_layer_2=self.conv_layer_2) ## # List of indices for state (vector) data and image (tensor) data in observation. self.x_idx, self.img_idx, i = [], [], 0 if 'obs_image_data' not in self._hyperparams['network_params']: self._hyperparams['network_params'].update({'obs_image_data': []}) for sensor in self._hyperparams['network_params']['obs_include']: dim = self._hyperparams['network_params']['sensor_dims'][sensor] if sensor in self._hyperparams['network_params']['obs_image_data']: self.img_idx = self.img_idx + list(range(i, i + dim)) else: self.x_idx = self.x_idx + list(range(i, i + dim)) i += dim #init_op = tf.initialize_all_variables() init_op = tf.global_variables_initializer() self.sess.run(init_op)