def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" """ pseudo code: allowed function: self.getLegalActions(state) create a counter for (state, action), value the key of counter is state action pair. if there is nothing learned, then it's 0.0 every time new state gets reward, ??? for k = 1 to ...: for each state s: for action in allActions: for eachOutcome in transition: immediateReward = ... discountedFuture = ... nextState value = immediateReward + discountedFuture result = probs * nextState value find the best action according to chooseAction return that one. use the batch version: each vk is computed from a fixed v(k-1) not updated at all use --- collect policy according to value/action later. """ """ ---but we don't need to do much in init """ self.qvalues = util.Counter() self.values = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.Qvalues=dict() self.visit=dict()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) # print args "*** YOUR CODE HERE ***" self.qvalues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #initialize a dictionary. The dictionary is indexed by a tuple and everything is initialized to 0 at the beginning self.qValueMap= util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # Will use counter(Map) with key as State + Action, Value as value self.Q = util.Counter()
def __init__(self, numOfTurn, numofgauss, var, lamda, gaussDim, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.qVal = util.Counter() self.numofgauss = numofgauss self.var = var self.lamda = lamda self.numOfTurn = numOfTurn self.gaussDim = gaussDim # init basis self.basis = [] for i in range(self.numofgauss): base = {} base['mean'] = np.matrix( [ float(i)/float(self.numofgauss) for j in range(0,self.gaussDim)] ) base['var'] = np.matrix( np.diag([self.var for j in range(0,self.gaussDim)]) ) base['detOfVar'] = np.linalg.det(base['var']) # pre-calculate deteminant of covariance base['invOfVar'] = np.linalg.inv(base['var']) # pre-calculate inverse of covariance self.basis.append(base) # init parameters self.thetas = {} self.phis = {} self.labels = {} self.state_action_num = 0 for t in range(0,self.numOfTurn): for a in util.turnIndex2action('b','cycle_tree',t): self.thetas[(t,a)] = np.matrix([[0.0] \ for i in range(self.numofgauss)]) self.phis[(t,a)] = [[] for i in range(self.numofgauss)] self.labels[(t,a)] = [] self.state_action_num += 1 # count self.state_action_num
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # qValues ((state, action), Q(s,a)) self.qValues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # maps (state, action) pairs to Q values self.qvalues = Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) # Current state of the board # based on whatever the agent has learnt so far. self.qlearntVals = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # the data structure of qValues is: {state, {action, value}} self.qValues = {}
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.qValues = util.Counter() self.state_uses = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # initializing q vaules dictionary self.q_values = defaultdict(util.Counter)
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) #Using uitl.Counter from util.py self.states = util.Counter() "*** YOUR CODE HERE ***"
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" ## Create a dictionary object to record all Q_values in Counter() version self.Q_Values=util.Counter()
def __init__(self, numOfTurn, numofgauss=5, var=0.25, lamda=0, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.qVal = util.Counter() self.numofgauss = numofgauss self.var = var self.lamda = lamda self.numOfTurn = numOfTurn if self.numofgauss<=5: self.minimumNum = 10 else: self.minimumNum = 20 # init basis self.basis = [] for i in range(self.numofgauss): base = {} base['mean'] = [ float(i)/float(self.numofgauss) for j in range(0,82)] base['var'] = np.diag([var for j in range(0,82)]) self.basis.append(base) # init parameters self.thetas = {} self.phis = {} self.labels = {} self.state_action_num = 0 for t in range(0,self.numOfTurn): for a in util.turnIndex2action('b','cycle_tree',t): self.thetas[(t,a)] = np.matrix([[0.0] \ for i in range(self.numofgauss)]) self.phis[(t,a)] = [[] for i in range(self.numofgauss)] self.labels[(t,a)] = [] self.state_action_num += 1 # count self.state_action_num
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.qVals = util.Counter() #self.rando = FixedRandom().random "*** YOUR CODE HERE ***"
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) #print "ALPHA", self.alpha #print "DISCOUNT", self.discount #print "EXPLORATION", self.epsilon self.qValues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #keeps track of q values self.values = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.qTable = {}
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.values = util.Counter() self.predicate = BlocksworldPred(self.mdp.count, self.mdp.stackNum) self.root = QTreeNode(.01, self.mdp.count) # FIXME epsilon should passed via command line
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) # Initialized values # Extension of dictionary # Keeps track of counts for a set of keys self.values = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # Just copy it from valueIterationAgents.py # A Counter is a dict with default 0 self.values = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #import pdb; pdb.set_trace() # qvalues are organised as a util.Counter() of (state, action) pairs self.qvalues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) states = [] "*** YOUR CODE HERE ***" # keep a Counter/Dictionary of Q-values self.qvalues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.QValues = util.Counter() #self.qtype=qtype self.environment=None
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # qValues is a dict of util.Counter() self.qValues = {} self.visitCount = {}
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # En el init creamos un diccionario vacio donde # guardar la informacion de los estados. self.qValues = util.Counter()
def __init__(self, **args): """ You can initialize Q-values here... """ ReinforcementAgent.__init__(self, **args) self.Q = util.Counter() # Modify this value to set how many actions to choose randomly before selecting greedily self.greedy_constraint = 100 self.to_greedy = 0
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #OUR CODE HERE #So I guess we need to make a counter to stick Q values in #Everything in it will be 0 because it's a counter self.qvalues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #self.values = util.Counter() self.qValues = util.Counter() self.stateProbs = dict() self.rewards = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.qValues = util.Counter( ) #similar to values in valueIterationAgents.py
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.Qvalues = defaultdict(lambda: defaultdict(float))
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.QVals = util.Counter()
def startEpisode(self): ReinforcementAgent.startEpisode(self) self.episode_epsilon = self.episode_init_epsilon
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.qValues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.statesActions = {}
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.qValueHash = dict()