示例#1
0
    def __init__(self,
                 dU,
                 obs_tensor,
                 act_op,
                 feat_op,
                 var,
                 sess,
                 device_string,
                 copy_param_scope=None):
        Policy.__init__(self)
        self.dU = dU
        self.obs_tensor = obs_tensor
        self.act_op = act_op
        self.feat_op = feat_op
        self.sess = sess
        self.device_string = device_string
        self.chol_pol_covar = np.diag(np.sqrt(var))
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None

        if copy_param_scope:
            self.copy_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                                 scope=copy_param_scope)
            self.copy_params_assign_placeholders = [
                tf.placeholder(tf.float32, shape=param.get_shape())
                for param in self.copy_params
            ]

            self.copy_params_assign_ops = [
                tf.assign(self.copy_params[i],
                          self.copy_params_assign_placeholders[i])
                for i in range(len(self.copy_params))
            ]
 def __init__(self, test_net, deploy_net, var):
     Policy.__init__(self)
     self.net = test_net
     self.deploy_net = deploy_net
     self.chol_pol_covar = np.diag(np.sqrt(var))
     self.scale = None  # must be set from elsewhere based on observations
     self.bias = None
 def __init__(self, agent, learning_rate, cond, noise_var = None):
     Policy.__init__(self)
     
     self.agent = agent
     self.learning_rate = learning_rate
     if noise_var is not None:
         self.sqrt_noise_var = np.sqrt(noise_var)
     self.cond = cond    # cond, not m
示例#4
0
    def __init__(self, algorithm, history_length):
        Policy.__init__(self)
        self.dU = algorithm.dU  # This should be in super class

        self.algorithm = algorithm
        self.X = np.empty([algorithm.T, algorithm.dX])
        self.U = np.empty([algorithm.T, algorithm.dU])
        self.history_length = history_length
示例#5
0
 def __init__(self, dU, dX, var, gcm):
     Policy.__init__(self)
     self.gcm = gcm
     self.dU = dU
     self.dX = dX
     self.chol_pol_covar = np.diag(np.sqrt(var))
     self.scale = None  # must be set from elsewhere based on observations
     self.bias = None
     self.x_idx = None
示例#6
0
 def __init__(self, dU, obs_tensor, act_op, var, sess, device_string):
     Policy.__init__(self)
     self.dU = dU
     self.obs_tensor = obs_tensor
     self.act_op = act_op
     self.sess = sess
     self.device_string = device_string
     self.chol_pol_covar = np.diag(np.sqrt(var))
     self.scale = None  # must be set from elsewhere based on observations
     self.bias = None
示例#7
0
 def __init__(self, dU, obs_tensor, act_op, var, sess, device_string):
     Policy.__init__(self)
     self.dU = dU
     self.obs_tensor = obs_tensor
     self.act_op = act_op
     self.sess = sess
     self.device_string = device_string
     self.chol_pol_covar = np.diag(np.sqrt(var))
     self.scale = None  # must be set from elsewhere based on observations
     self.bias = None
示例#8
0
    def __init__(self, K, k, pol_covar, chol_pol_covar, inv_pol_covar):
        Policy.__init__(self)

        # Assume K has the correct shape, and make sure others match.
        self.T = K.shape[0]
        self.dU = K.shape[1]
        self.dX = K.shape[2]

        check_shape(k, (self.T, self.dU))
        check_shape(pol_covar, (self.T, self.dU, self.dU))
        check_shape(chol_pol_covar, (self.T, self.dU, self.dU))
        check_shape(inv_pol_covar, (self.T, self.dU, self.dU))

        self.K = K
        self.k = k
        self.pol_covar = pol_covar
        self.chol_pol_covar = chol_pol_covar
        self.inv_pol_covar = inv_pol_covar
示例#9
0
文件: tf_policy.py 项目: cbfinn/gps
    def __init__(self, dU, obs_tensor, act_op, feat_op, var, sess, device_string, copy_param_scope=None):
        Policy.__init__(self)
        self.dU = dU
        self.obs_tensor = obs_tensor
        self.act_op = act_op
        self.feat_op = feat_op
        self.sess = sess
        self.device_string = device_string
        self.chol_pol_covar = np.diag(np.sqrt(var))
        self.scale = None  # must be set from elsewhere based on observations
        self.bias = None
        self.x_idx = None

        if copy_param_scope:
            self.copy_params = tf.get_collection(tf.GraphKeys.VARIABLES, scope=copy_param_scope)
            self.copy_params_assign_placeholders = [tf.placeholder(tf.float32, shape=param.get_shape()) for
                                                      param in self.copy_params]

            self.copy_params_assign_ops = [tf.assign(self.copy_params[i],
                                                     self.copy_params_assign_placeholders[i])
                                             for i in range(len(self.copy_params))]
示例#10
0
 def __init__(self, dU):
     Policy.__init__(self)
     self.dU = dU
示例#11
0
 def __init__(self, const=0.5):
     Policy.__init__(self)
     self.const = const
     self.adapt_sigma = CMAAdaptSigmaCSA()
示例#12
0
    def __init__(self, T=50):
        Policy.__init__(self)

        self.teacher = 0 #np.random.choice([0,1])
        self.T = T
        self.adapt_sigma = CMAAdaptSigmaCSA()
示例#13
0
 def __init__(self, test_net, deploy_net, var):
     Policy.__init__(self)
     self.net = test_net
     self.deploy_net = deploy_net
     self.chol_pol_covar = np.diag(np.sqrt(var))