def __init__(self, gamma=0.95, threshold=0): ValueBasedLearner.__init__(self) self.num_features = 5 self.num_actions = 1 self.sigma = 1.0 self.kern_c = 10 self.kern_sigma = 0.5 self.thresh = threshold self.gamma = gamma self.laststate = None self.lastaction = None self.lastreward = None self.state_dict = None self.cum_reward = np.array([]) self.u_tilde = np.array([]) self.C_tilde = np.array([[]]) self.d = 0.0 self.v_inv = 0.0 self.c_tild = np.array([]) self.dataset = None #self.g=np.array([]) self.K_inv = np.array([[]])
def __init__(self, alpha=0.5, gamma=0.99): ValueBasedLearner.__init__(self) self.alpha = alpha self.gamma = gamma self.laststate = None self.lastaction = None
def __init__(self, alpha=0.5, gamma=0.99, neg_reward=False): ValueBasedLearner.__init__(self) self.alpha = alpha self.gamma = gamma self.neg_reward = neg_reward self.laststate = None self.lastactions = None
def __init__(self, maxEpochs=20, indexOfAgent=None,): ValueBasedLearner.__init__(self) self.gamma = 0.9 self.maxEpochs = maxEpochs # self.ownerAgentProperties["requireOtherAgentsState"]=False self.ownerAgentProperties["requireJointAction"]=False self.ownerAgentProperties["requireJointReward"]=False self.isFirstLerning=True
def __init__(self, alpha=1.0, w=1.0, gamma=0.99, iters=10): ValueBasedLearner.__init__(self) self.alpha = alpha # step scale self.w = w # learning rate self.gamma = gamma # temporal discount self.iters = 10 # number of times to propagate value changes self.step = 0
def __init__(self, num_actions, num_features, **kwargs): ValueBasedLearner.__init__(self) setAllArgs(self, kwargs) self.explorer = None self.num_actions = num_actions self.num_features = num_features if self.randomInit: self._theta = randn(self.num_actions, self.num_features) / 10. else: self._theta = zeros((self.num_actions, self.num_features)) self._additionalInit() self._behaviorPolicy = self._boltzmannPolicy self.reset()
def __init__( self, maxEpochs=20, indexOfAgent=None, ): ValueBasedLearner.__init__(self) self.gamma = 0.9 self.maxEpochs = maxEpochs # self.ownerAgentProperties["requireOtherAgentsState"] = False self.ownerAgentProperties["requireJointAction"] = False self.ownerAgentProperties["requireJointReward"] = False self.isFirstLerning = True
def __init__(self, gamma=0.99): ValueBasedLearner.__init__(self) self.gamma = gamma self.laststate = None self.lastaction = None self.num_features = 2 self.num_actions = 1 self.kern_c = 10 self.covariance_mat = np.array([[]]) self.inv = np.array([]) self.state_dict = None self.cum_reward = np.array([]) self.H = [] self.kern_sigma = 0.2 self.dataset = None self.sigma = 1
def reset(self): ValueBasedLearner.reset(self) self._callcount = 0 self.newEpisode()
def __init__(self, indexOfAgent=None, **kwargs): ValueBasedLearner.__init__(self) self.indexOfAgent = indexOfAgent
def reset(self): ValueBasedLearner.reset(self) self.laststate = None self.lastaction = None
def __init__(self, maxEpochs=20): ValueBasedLearner.__init__(self) self.gamma = 0.9 self.maxEpochs = maxEpochs
def __init__(self, alpha=0.5): ValueBasedLearner.__init__(self) self.alpha = alpha self.gamma = config.getf('gammaDiscountReward') self.netManager = CaffeMultiLayerPerceptronManagement(config.get('networkDir'))
def newEpisode(self): ValueBasedLearner.newEpisode(self) self._callcount += 1 self.learningRate *= ((self.learningRateDecay + self._callcount) / (self.learningRateDecay + self._callcount + 1.))
def __init__(self): ValueBasedLearner.__init__(self) self.gamma = 0.9
def __init__(self, alpha=0.5): ValueBasedLearner.__init__(self) self.alpha = alpha self.gamma = config.getf('gammaDiscountReward') self.netManager = CaffeMultiLayerPerceptronManagement( config.get('networkDir'))
def __init__(self, indexOfAgent=None, **kwargs): ValueBasedLearner.__init__(self) self.indexOfAgent=indexOfAgent
def __init__(self, alpha=0.5, gamma=0.99): ValueBasedLearner.__init__(self) self.alpha = alpha self.gamma = gamma self.netManager = cnm.CaffeConvNetManagement(config.get('networkDir'))
epis=10000/batch #number of roleouts numbExp=1 #number of experiments env = None for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) if env != None: env.closeSocket() env = Tetra2Environment() # create task task = WalkForwardTask(env) # create automatic programmer module for the robot (it will be received already created from the robot) channels_setup = [(1 if i!=2 else 0,1, (0,255), 0) for i in range(5)] programmer = Programmer(steps = 1000, channels_setup = channels_setup, types_subset = [1]) #, 2, 3]) learner = ValueBasedLearner() agent = TetrapodAgent(programmer, learner) # create the experiment experiment = EpisodicExperiment(task, agent) for _ in range(epis): experiment.doEpisodes(batch) agent.learn(total_reward = task.getTotalReward())