Пример #1
0
    def __init__(self, hyperparams, dX, dU):
        PolicyOpt.__init__(self, hyperparams, dX, dU)
        self.dX = dX
        self.dU = dU

        tf.set_random_seed(self._hyperparams['random_seed'])
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.epochs = self._hyperparams['epochs']
        self.batch_size = self._hyperparams['batch_size']
        self.weight_decay = self._hyperparams['weight_decay']
        self.N_hidden = self._hyperparams['N_hidden']

        self.graph = tf.Graph()  # Encapsulate model in own graph
        with self.graph.as_default():
            self.init_network()
            self.init_loss_function()
            self.init_solver()

            # Create session
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True  # Prevent GPS from hogging all memory
            self.sess = tf.Session(config=config)
            self.sess.run(tf.global_variables_initializer())

        self.policy = self  # Act method is contained in this class
        self.scaler = None
Пример #2
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_TF)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        self.tf_iter = 0
        self.checkpoint_file = self._hyperparams['checkpoint_prefix']
        self.batch_size = self._hyperparams['batch_size']
        self.device_string = "/cpu:0"
        if self._hyperparams['use_gpu'] == 1:
            self.gpu_device = self._hyperparams['gpu_id']
            self.device_string = "/gpu:" + str(self.gpu_device)
        self.act_op = None  # mu_hat
        self.loss_scalar = None
        self.obs_tensor = None
        self.precision_tensor = None
        self.action_tensor = None  # mu true
        self.solver = None
        self.init_network()
        self.init_solver()
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.sess = tf.Session()
        self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, np.zeros(dU), self.sess, self.device_string)
        init_op = tf.initialize_all_variables()
        self.sess.run(init_op)
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_CAFFE)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        self.batch_size = self._hyperparams['batch_size']

        if self._hyperparams['use_gpu']:
            caffe.set_device(self._hyperparams['gpu_id'])
            caffe.set_mode_gpu()
        else:
            caffe.set_mode_cpu()

        self.init_solver()
        # Load parameters from caffemodel file
        if 'init_net' in self._hyperparams:
            self.solver.net.copy_from(self._hyperparams['init_net'])

        self.caffe_iter = 0
        self.var = self._hyperparams['init_var'] * np.ones(dU)

        self.policy = CaffePolicy(self.solver.test_nets[0],
                                  self.solver.test_nets[1],
                                  self.var)

        self.policy.bias = None
        self.policy.scale = None
        if 'init_normalization' in self._hyperparams:
            with open(self._hyperparams['init_normalization']) as fin:
                normalzation_data = pickle.load(fin)
            self.policy.bias = normalzation_data['bias']
            self.policy.scale = normalzation_data['scale']
Пример #4
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_TF)
        if hyperparams is None:
            return

        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        tf.set_random_seed(self._hyperparams['random_seed'])

        self.tf_iter = 0
        self.batch_size = self._hyperparams['batch_size']
        self.device_string = "/cpu:0"
        if self._hyperparams['use_gpu'] == 1:
            self.gpu_device = self._hyperparams['gpu_id']
            self.device_string = "/gpu:" + str(self.gpu_device)
        self.act_op = None  # mu_hat
        self.feat_op = None  # features
        self.loss_scalar = None
        self.obs_tensor = None
        self.precision_tensor = None
        self.action_tensor = None  # mu true
        self.solver = None
        self.feat_vals = None
        self.init_network()
        self.init_solver()
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.sess = tf.Session()
        self.policy = TfPolicy(
            dU,
            self.obs_tensor,
            self.act_op,
            self.feat_op,
            np.zeros(dU),
            self.sess,
            self.device_string,
            copy_param_scope=self._hyperparams['copy_param_scope'])
        # List of indices for state (vector) data and image (tensor) data in observation.
        self.x_idx, self.img_idx, i = [], [], 0
        if 'obs_image_data' not in self._hyperparams['network_params']:
            self._hyperparams['network_params'].update({'obs_image_data': []})
        for sensor in self._hyperparams['network_params']['obs_include']:
            dim = self._hyperparams['network_params']['sensor_dims'][sensor]
            if sensor in self._hyperparams['network_params']['obs_image_data']:
                self.img_idx = self.img_idx + list(range(i, i + dim))
            else:
                self.x_idx = self.x_idx + list(range(i, i + dim))
            i += dim
        init_op = tf.initialize_all_variables()
        self.sess.run(init_op)

        self.normalize = self._hyperparams['normalize']
        self.policy.normalize = self.normalize
Пример #5
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_TF)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        tf.set_random_seed(self._hyperparams['random_seed'])

        self.tf_iter = 0
        self.batch_size = self._hyperparams['batch_size']
        self.device_string = "/cpu:0"
        if self._hyperparams['use_gpu'] == 1:
            self.gpu_device = self._hyperparams['gpu_id']
            self.device_string = "/gpu:" + str(self.gpu_device)
        self.act_op = None  # mu_hat
        self.feat_op = None  # features
        self.obs_tensor = None
        self.cost_tensor = None
        self.action_tensor = None  # mu true
        self.solver = None
        self.feat_vals = None
        self.init_network()
        self.init_solver()
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.center_adv = self._hyperparams.get("center_adv", True)
        tfconfig = tf.ConfigProto()
        tfconfig.gpu_options.allow_growth = True
        self.sess = tf.Session(config=tfconfig)
        self.policy = TfPolicy(
            dU,
            self.obs_tensor,
            self.act_op,
            self.feat_op,
            np.zeros(dU),
            self.sess,
            self.device_string,
            copy_param_scope=self._hyperparams['copy_param_scope'],
            policy_type=self.policy_type,
            log_std=self.log_std)
        # List of indices for state (vector) data and image (tensor) data in observation.
        self.x_idx, self.img_idx, i = [], [], 0
        if 'obs_image_data' not in self._hyperparams['network_params']:
            self._hyperparams['network_params'].update({'obs_image_data': []})
        for sensor in self._hyperparams['network_params']['obs_include']:
            dim = self._hyperparams['network_params']['sensor_dims'][sensor]
            if sensor in self._hyperparams['network_params']['obs_image_data']:
                self.img_idx = self.img_idx + list(range(i, i + dim))
            else:
                self.x_idx = self.x_idx + list(range(i, i + dim))
            i += dim
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)
Пример #6
0
    def __init__(self, hyperparams, dX, dU):
        """Initializes the policy.

        Args:
            hyperparams: Dictionary of hyperparameters.
            dX: Dimension of state space.
            dU: Dimension of action space.

        Hyperparameters:
            random_seed: Random seed used for tensorflow.
            init_var: Initial policy variance
            epochs: Number of training epochs each iteration.
            batch_size: Batch size used during training. Must be a divisor of  M * N * (T - 1).
            weight_decay: L2 regularization of the network.
            N_hidden: Size of hidden layers.
            dZ: Dimension of the latent space.
            beta_kl: Weight of the KL-divergence term in loss function.
            N: Number of samples regulriztion.

        """
        PolicyOpt.__init__(self, hyperparams, dX, dU)
        self.dX = dX
        self.dU = dU

        tf.set_random_seed(self._hyperparams['random_seed'])
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.epochs = self._hyperparams['epochs']
        self.batch_size = self._hyperparams['batch_size']
        self.weight_decay = self._hyperparams['weight_decay']
        self.N_hidden = self._hyperparams['N_hidden']
        self.dZ = self._hyperparams['dZ']
        self.beta_kl = self._hyperparams['beta_kl']
        self.N = self._hyperparams['N']
        self.dropout_rate = self._hyperparams['dropout_rate']

        self.graph = tf.Graph()  # Encapsulate model in own graph
        with self.graph.as_default():
            self._init_network()
            self._init_loss_function()
            self._init_solver()

            # Create session
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True  # Prevent GPS from hogging all memory
            self.sess = tf.Session(config=config)
            self.sess.run(tf.global_variables_initializer())

            self.saver = tf.train.Saver(max_to_keep=None)
            self.graph.finalize()

        self.policy = self  # Act method is contained in this class
Пример #7
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_CAFFE)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        self.batch_size = self._hyperparams['batch_size']

        if self._hyperparams['use_gpu']:
            caffe.set_device(self._hyperparams['gpu_id'])
            caffe.set_mode_gpu()
        else:
            caffe.set_mode_cpu()

        self.init_solver()
        self.caffe_iter = 0
        self.var = self._hyperparams['init_var'] * np.ones(dU)

        self.policy = CaffePolicy(self.solver.test_nets[0],
                                  self.solver.test_nets[1], np.zeros(dU))
Пример #8
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_TORCH)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        self.batch_size = self._hyperparams['batch_size']
        self.lr = self._hyperparams['lr']

        if self._hyperparams['use_gpu']:
            self.dev = torch.device("cuda") \
                  if torch.cuda.is_available() \
                  else torch.device("cpu")
        else:
            self.dev = torch.device("cpu")

        self._init_network()
        self.var = self._hyperparams['init_var'] * np.ones(dU)

        self.policy = TorchPolicy(self.model, self.var, self.dev)
Пример #9
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_TF)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        tf.set_random_seed(self._hyperparams['random_seed'])

        self.tf_iter = 0
        self.batch_size = self._hyperparams['batch_size']
        self.device_string = "/cpu:0"
        if self._hyperparams['use_gpu'] == 1:
            self.gpu_device = self._hyperparams['gpu_id']
            self.device_string = "/gpu:" + str(self.gpu_device)
        self.act_op = None  # mu_hat
        self.feat_op = None # features
        self.loss_scalar = None
        self.obs_tensor = None
        self.precision_tensor = None
        self.action_tensor = None  # mu true
        self.solver = None
        self.feat_vals = None
        self.init_network()
        self.init_solver()
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.sess = tf.Session()
        self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, self.feat_op,
                               np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope'])
        # List of indices for state (vector) data and image (tensor) data in observation.
        self.x_idx, self.img_idx, i = [], [], 0
        if 'obs_image_data' not in self._hyperparams['network_params']:
            self._hyperparams['network_params'].update({'obs_image_data': []})
        for sensor in self._hyperparams['network_params']['obs_include']:
            dim = self._hyperparams['network_params']['sensor_dims'][sensor]
            if sensor in self._hyperparams['network_params']['obs_image_data']:
                self.img_idx = self.img_idx + list(range(i, i+dim))
            else:
                self.x_idx = self.x_idx + list(range(i, i+dim))
            i += dim
        init_op = tf.initialize_all_variables()
        self.sess.run(init_op)
Пример #10
0
    def __init__(self, hyperparams, dO, dU):
        config = copy.deepcopy(POLICY_OPT_CAFFE)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        self.batch_size = self._hyperparams['batch_size']

        if self._hyperparams['use_gpu']:
            caffe.set_device(self._hyperparams['gpu_id'])
            caffe.set_mode_gpu()
        else:
            caffe.set_mode_cpu()

        self.init_solver()
        self.caffe_iter = 0
        self.var = self._hyperparams['init_var'] * np.ones(dU)

        self.policy = CaffePolicy(self.solver.test_nets[0],
                                  self.solver.test_nets[1],
                                  self.var)
Пример #11
0
Файл: ddpg.py Проект: DiddiZ/gps
    def __init__(self, hyperparams, dX, dU):
        """Initializes the policy.

        Args:
            hyperparams: Dictionary of hyperparameters.
            dX: Dimension of state space.
            dU: Dimension of action space.

        """
        PolicyOpt.__init__(self, hyperparams, dX, dU)
        self.dX = dX
        self.dU = dU

        self.epochs = hyperparams['epochs']
        self.param_noise_adaption_interval = hyperparams[
            'param_noise_adaption_interval']
        set_global_seeds(hyperparams['seed'])

        # Initialize DDPG policy
        self.pol = DDPG(Actor(dU,
                              network=hyperparams['network'],
                              **hyperparams['network_kwargs']),
                        Critic(network=hyperparams['network'],
                               **hyperparams['network_kwargs']),
                        Memory(limit=hyperparams['memory_limit'],
                               action_shape=(dU, ),
                               observation_shape=(dX, )),
                        observation_shape=(dX, ),
                        action_shape=(dU, ),
                        param_noise=AdaptiveParamNoiseSpec(
                            initial_stddev=0.2, desired_action_stddev=0.2),
                        **hyperparams['ddpg_kwargs'])

        sess = get_session()
        self.pol.initialize(sess)
        sess.graph.finalize()

        self.policy = self  # Act method is contained in this class
Пример #12
0
    def __init__(self, hyperparams, dO, dU):

        config = copy.deepcopy(POLICY_OPT_TF)
        config.update(hyperparams)

        PolicyOpt.__init__(self, config, dO, dU)

        #self.debug=True
        tf.set_random_seed(self._hyperparams['random_seed'])

        self.tf_iter = 0
        self.checkpoint_file = self._hyperparams['checkpoint_prefix']
        self.batch_size = self._hyperparams['batch_size']
        self.device_string = "/cpu:0"
        if self._hyperparams['use_gpu'] == 1:
            self.gpu_device = self._hyperparams['gpu_id']
            self.device_string = "/gpu:" + str(self.gpu_device)
        self.act_op = None  # mu_hat
        self.feat_op = None  # features
        self.loss_scalar = None
        self.obs_tensor = None
        self.precision_tensor = None
        self.action_tensor = None  # mu true
        self.solver = None
        self.feat_vals = None
        ##
        self.conv_layer_0 = None
        self.conv_layer_1 = None
        self.conv_layer_2 = None
        # self.main_itr = None    # Set this value to None when training
        # self.main_itr = 10      # Set this value to i-th iteration when testing policy at i-th iteration
        # or when resuming training at i-th iteration.
        self.main_itr = 6
        ##

        self.init_network()
        self.init_solver()
        self.var = self._hyperparams['init_var'] * np.ones(dU)
        self.sess = tf.Session()
        #self.policy = TfPolicy(dU, self.obs_tensor, self.act_op, self.feat_op,
        #                       np.zeros(dU), self.sess, self.device_string, copy_param_scope=self._hyperparams['copy_param_scope'])
        ##
        self.policy = TfPolicy(
            dU,
            self.obs_tensor,
            self.act_op,
            self.feat_op,
            np.zeros(dU),
            self.sess,
            self.device_string,
            copy_param_scope=self._hyperparams['copy_param_scope'],
            conv_layer_0=self.conv_layer_0,
            conv_layer_1=self.conv_layer_1,
            conv_layer_2=self.conv_layer_2)
        ##
        # List of indices for state (vector) data and image (tensor) data in observation.
        self.x_idx, self.img_idx, i = [], [], 0
        if 'obs_image_data' not in self._hyperparams['network_params']:
            self._hyperparams['network_params'].update({'obs_image_data': []})
        for sensor in self._hyperparams['network_params']['obs_include']:
            dim = self._hyperparams['network_params']['sensor_dims'][sensor]
            if sensor in self._hyperparams['network_params']['obs_image_data']:
                self.img_idx = self.img_idx + list(range(i, i + dim))
            else:
                self.x_idx = self.x_idx + list(range(i, i + dim))
            i += dim
        #init_op = tf.initialize_all_variables()
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)