Exemplo n.º 1
0
    def __init__(self,
                 numTilings=1,
                 parameters=2,
                 rlAlpha=0.5,
                 rlLambda=0.9,
                 rlGamma=0.9,
                 cTableSize=0):
        """ If you want to run an example of the code, simply just leave the parameters blank and it'll automatically set based on the parameters. """
        self.numTilings = numTilings
        self.tileWidths = list()
        self.parameters = parameters
        self.rlAlpha = rlAlpha
        self.rlLambda = rlLambda
        self.rlGamma = rlGamma

        self.prediction = None
        self.lastS = None
        self.lastQ = None
        self.lastPrediction = None
        self.lastReward = None
        self.traceH = TraceHolder((self.numTilings**(self.parameters) + 1),
                                  self.rlLambda, 1000)
        self.F = [0 for item in range(self.numTilings)
                  ]  # the indices of the returned tiles will go in here
        self.theta = [
            0 for item in range((self.numTilings**(self.parameters + 1)) + 1)
        ]  # weight vector.
        self.cTable = CollisionTable(cTableSize, 'safe')  # look into this...
        self.verifier = Verifier(self.rlGamma)
Exemplo n.º 2
0
    def __init__(actions,
                 self,
                 numTilings=1,
                 parameters=2,
                 rlAlpha=0.5,
                 rlLambda=0.9,
                 rlGamma=0.9,
                 rlEpsilon=0.1,
                 cTableSize=0,
                 action_selection='softmax'):
        """ If you want to run an example of the code, simply just leave the parameters blank and it'll automatically set based on the parameters. """
        self.numTilings = numTilings
        self.tileWidths = list()
        self.parameters = parameters
        self.rlAlpha = rlAlpha
        self.rlLambda = rlLambda
        self.rlGamma = rlGamma
        self.rlEpsilon = rlEpsilon
        self.action_selection = action_selection

        self.lastS = None
        self.lastQ = None
        self.lastPrediction = None
        self.lastReward = None
        self.lastAction = None
        self.currentAction = None

        self.actions = actions  # an array of actions which we can select from
        self.traceH = TraceHolder((self.numTilings**(self.parameters) + 1),
                                  self.rlLambda, 1000)
        self.F = [[0 for item in range(self.numTilings)]
                  for i in range(actions)
                  ]  # the indices of the returned tiles will go in here
        self.q_vals = [0 for i in range(actions)]
        for action in actions:
            self.q.append(action, [
                0
                for item in range((self.numTilings**(self.parameters + 1)) + 1)
            ])  # action and weight vec
        self.cTable = CollisionTable(cTableSize, 'safe')  # look into this...
        self.verifier = Verifier(self.rlGamma)