def __init__(self, numTilings=1, parameters=2, rlAlpha=0.5, rlLambda=0.9, rlGamma=0.9, cTableSize=0): """ If you want to run an example of the code, simply just leave the parameters blank and it'll automatically set based on the parameters. """ self.numTilings = numTilings self.tileWidths = list() self.parameters = parameters self.rlAlpha = rlAlpha self.rlLambda = rlLambda self.rlGamma = rlGamma self.prediction = None self.lastS = None self.lastQ = None self.lastPrediction = None self.lastReward = None self.traceH = TraceHolder((self.numTilings**(self.parameters) + 1), self.rlLambda, 1000) self.F = [0 for item in range(self.numTilings) ] # the indices of the returned tiles will go in here self.theta = [ 0 for item in range((self.numTilings**(self.parameters + 1)) + 1) ] # weight vector. self.cTable = CollisionTable(cTableSize, 'safe') # look into this... self.verifier = Verifier(self.rlGamma)
def __init__(actions, self, numTilings=1, parameters=2, rlAlpha=0.5, rlLambda=0.9, rlGamma=0.9, rlEpsilon=0.1, cTableSize=0, action_selection='softmax'): """ If you want to run an example of the code, simply just leave the parameters blank and it'll automatically set based on the parameters. """ self.numTilings = numTilings self.tileWidths = list() self.parameters = parameters self.rlAlpha = rlAlpha self.rlLambda = rlLambda self.rlGamma = rlGamma self.rlEpsilon = rlEpsilon self.action_selection = action_selection self.lastS = None self.lastQ = None self.lastPrediction = None self.lastReward = None self.lastAction = None self.currentAction = None self.actions = actions # an array of actions which we can select from self.traceH = TraceHolder((self.numTilings**(self.parameters) + 1), self.rlLambda, 1000) self.F = [[0 for item in range(self.numTilings)] for i in range(actions) ] # the indices of the returned tiles will go in here self.q_vals = [0 for i in range(actions)] for action in actions: self.q.append(action, [ 0 for item in range((self.numTilings**(self.parameters + 1)) + 1) ]) # action and weight vec self.cTable = CollisionTable(cTableSize, 'safe') # look into this... self.verifier = Verifier(self.rlGamma)