def __init__(self, policy_params): Policy.__init__(self, policy_params) self.numvars = policy_params['numvars'] hsize = policy_params['hsize'] numlayers = policy_params['numlayers'] rowembeddim = policy_params['rowembed'] # row embedding embeddeddim = policy_params['embed'] # attention embedding #self.make_mlp_weights(policy_params['numvars']+1, embeddeddim, policy_params['hsize'], policy_params['numlayers']) self.make_mlp_weights(rowembeddim, embeddeddim, policy_params['hsize'], policy_params['numlayers']) # embed self.embedlayer = lstmlayer(nin=1, nh=rowembeddim) self.rowembeddim = policy_params['rowembed'] self.embedoffset = self.embedlayer.get_weights().size # build up filter self.observation_filter = get_filter(policy_params['ob_filter'], shape=(self.numvars + 1, )) # for visualization """ self.baseobsdict = [] self.normalized_attentionmap = [] self.cutsdict = [] """ self.t = 0
def __init__(self, policy_params): Policy.__init__(self, policy_params) self.numvars = policy_params['numvars'] self.weight = np.random.randn(self.numvars + 1) * 0.001 # build up filter self.observation_filter = get_filter(policy_params['ob_filter'], shape=(self.numvars + 1, ))
def __init__(self, policy_params): self.numvars = policy_params['numvars'] self.weights = np.empty(0) # a filter for updating statistics of the observations and normalizing inputs to the policies #if False: if True: self.observation_filter = get_filter(policy_params['ob_filter'], shape=(2, )) self.update_filter = True
def __init__(self, policy_params): Policy.__init__(self, policy_params) self.numvars = policy_params['numvars'] hsize = policy_params['hsize'] numlayers = policy_params['numlayers'] self.make_mlp_weights(policy_params['numvars'] + 1, 1, policy_params['hsize'], policy_params['numlayers']) # build up filter self.observation_filter = get_filter(policy_params['ob_filter'], shape=(self.numvars + 1, ))
def __init__(self, policy_params): Policy.__init__(self, policy_params) self.numvars = policy_params['numvars'] hsize = policy_params['hsize'] numlayers = policy_params['numlayers'] embeddeddim = policy_params['embed'] self.make_mlp_weights(policy_params['numvars'] + 1, embeddeddim, policy_params['hsize'], policy_params['numlayers']) # build up filter self.observation_filter = get_filter(policy_params['ob_filter'], shape=(self.numvars + 1, )) # for visualization """ self.baseobsdict = [] self.normalized_attentionmap = [] self.cutsdict = [] """ self.t = 0