Exemplo n.º 1
0
  def __init__(self, **args):
    "You can initialize Q-values here..."
    ReinforcementAgent.__init__(self, **args)

    "*** YOUR CODE HERE ***"
    """
    pseudo code:
    allowed function: self.getLegalActions(state)
    create a counter for (state, action), value
      the key of counter is state action pair.
    if there is nothing learned, then it's 0.0
    every time new state gets reward, ???
    for k = 1 to ...:
      for each state s: 
        for action in allActions:
          for eachOutcome in transition:
            immediateReward = ...
            discountedFuture = ...
            nextState value = immediateReward + discountedFuture
            result = probs * nextState value
        find the best action according to chooseAction
        return that one.

        use the batch version: each vk is computed from a fixed v(k-1) not updated at all
        use 
        ---
    collect policy according to value/action later.  
    """
    """
    ---but we don't need to do much in init
    """
    self.qvalues = util.Counter()
    self.values = util.Counter()
Exemplo n.º 2
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.Qvalues=dict()
        self.visit=dict()
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        # print args
        "*** YOUR CODE HERE ***"
        self.qvalues = util.Counter()
Exemplo n.º 4
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        #initialize a dictionary. The dictionary is indexed by a tuple and everything is initialized to 0 at the beginning
        self.qValueMap= util.Counter()
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # Will use counter(Map) with key as State + Action, Value as value
        self.Q = util.Counter()
Exemplo n.º 6
0
	def __init__(self, numOfTurn, numofgauss, var, lamda, gaussDim, **args):
		"You can initialize Q-values here..."
		ReinforcementAgent.__init__(self, **args)
		self.qVal = util.Counter()
		self.numofgauss = numofgauss
		self.var = var
		self.lamda = lamda
		self.numOfTurn = numOfTurn
		self.gaussDim = gaussDim
	
		# init basis 
		self.basis = []
		for i in range(self.numofgauss):
			base = {}
			base['mean'] = np.matrix( [ float(i)/float(self.numofgauss) for j in range(0,self.gaussDim)] )
			base['var'] = np.matrix( np.diag([self.var for j in range(0,self.gaussDim)]) )
			base['detOfVar'] = np.linalg.det(base['var']) # pre-calculate deteminant of covariance
			base['invOfVar'] = np.linalg.inv(base['var']) # pre-calculate inverse of covariance
			self.basis.append(base)

		# init parameters
		self.thetas = {}
		self.phis = {}
		self.labels = {}
		self.state_action_num = 0
		for t in range(0,self.numOfTurn):
			for a in util.turnIndex2action('b','cycle_tree',t):
				self.thetas[(t,a)] = np.matrix([[0.0] \
				for i in range(self.numofgauss)])	
				self.phis[(t,a)] = [[] for i in range(self.numofgauss)]
				self.labels[(t,a)] = []
				self.state_action_num += 1 # count self.state_action_num
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # qValues ((state, action), Q(s,a))
        self.qValues = util.Counter()
Exemplo n.º 8
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # maps (state, action) pairs to Q values
        self.qvalues = Counter()
Exemplo n.º 9
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        # Current state of the board
        # based on whatever the agent has learnt so far.
        self.qlearntVals = util.Counter()
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # the data structure of qValues is: {state, {action, value}}
        self.qValues = {}
Exemplo n.º 11
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.qValues = util.Counter()
        self.state_uses = util.Counter()
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # initializing q vaules dictionary
        self.q_values = defaultdict(util.Counter)
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)
        #Using uitl.Counter from util.py
        self.states = util.Counter() 

        "*** YOUR CODE HERE ***"
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        ## Create a dictionary object to record all Q_values in Counter() version
        self.Q_Values=util.Counter()
	def __init__(self, numOfTurn, numofgauss=5, var=0.25, lamda=0, **args):
		"You can initialize Q-values here..."
		ReinforcementAgent.__init__(self, **args)
		self.qVal = util.Counter()
		self.numofgauss = numofgauss
		self.var = var
		self.lamda = lamda
		self.numOfTurn = numOfTurn

		if self.numofgauss<=5:
			self.minimumNum = 10
		else:
			self.minimumNum = 20

		# init basis 
		self.basis = []
		for i in range(self.numofgauss):
			base = {}
			base['mean'] = [ float(i)/float(self.numofgauss) for j in range(0,82)]
			base['var'] = np.diag([var for j in range(0,82)])
			self.basis.append(base)

		# init parameters
		self.thetas = {}
		self.phis = {}
		self.labels = {}
		self.state_action_num = 0
		for t in range(0,self.numOfTurn):
			for a in util.turnIndex2action('b','cycle_tree',t):
				self.thetas[(t,a)] = np.matrix([[0.0] \
				for i in range(self.numofgauss)])	
				self.phis[(t,a)] = [[] for i in range(self.numofgauss)]
				self.labels[(t,a)] = []
				self.state_action_num += 1 # count self.state_action_num
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)
        self.qVals = util.Counter()
        #self.rando = FixedRandom().random

        "*** YOUR CODE HERE ***"
Exemplo n.º 17
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     #print "ALPHA", self.alpha
     #print "DISCOUNT", self.discount
     #print "EXPLORATION", self.epsilon
     self.qValues = util.Counter()
Exemplo n.º 18
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        #keeps track of q values
        self.values = util.Counter()
Exemplo n.º 19
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        
        self.qTable = {}
Exemplo n.º 20
0
  def __init__(self, **args):
    "You can initialize Q-values here..."
    ReinforcementAgent.__init__(self, **args)

    "*** YOUR CODE HERE ***"
    self.values = util.Counter()
    self.predicate = BlocksworldPred(self.mdp.count, self.mdp.stackNum)
    self.root = QTreeNode(.01, self.mdp.count) # FIXME epsilon should passed via command line
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        # Initialized values
        # Extension of dictionary
        # Keeps track of counts for a set of keys
        self.values = util.Counter()
Exemplo n.º 22
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # Just copy it from valueIterationAgents.py
        # A Counter is a dict with default 0
        self.values = util.Counter() 
Exemplo n.º 23
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        #import pdb; pdb.set_trace()
        # qvalues are organised as a util.Counter() of (state, action) pairs
        self.qvalues = util.Counter()
Exemplo n.º 24
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     
     states = []
     "*** YOUR CODE HERE ***"
     # keep a Counter/Dictionary of Q-values
     self.qvalues = util.Counter()
Exemplo n.º 25
0
  def __init__(self, **args):
    "You can initialize Q-values here..."
    ReinforcementAgent.__init__(self, **args)

    "*** YOUR CODE HERE ***"
    self.QValues = util.Counter()
    #self.qtype=qtype
    self.environment=None
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # qValues is a dict of util.Counter()
        self.qValues = {}
        self.visitCount = {}
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # En el init creamos un diccionario vacio donde
        # guardar la informacion de los estados.
        self.qValues = util.Counter()
Exemplo n.º 28
0
 def __init__(self, **args):
   """
   You can initialize Q-values here...
   """
   ReinforcementAgent.__init__(self, **args)
   self.Q = util.Counter()
   # Modify this value to set how many actions to choose randomly before selecting greedily
   self.greedy_constraint = 100
   self.to_greedy = 0
Exemplo n.º 29
0
  def __init__(self, **args):
    "You can initialize Q-values here..."
    ReinforcementAgent.__init__(self, **args)

    "*** YOUR CODE HERE ***"
    #OUR CODE HERE
    #So I guess we need to make a counter to stick Q values in
    #Everything in it will be 0 because it's a counter
    self.qvalues = util.Counter()
Exemplo n.º 30
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        #self.values = util.Counter()
        self.qValues = util.Counter()
        self.stateProbs = dict()
        self.rewards = util.Counter()
Exemplo n.º 31
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     self.qValues = util.Counter(
     )  #similar to values in valueIterationAgents.py
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.Qvalues = defaultdict(lambda: defaultdict(float))
Exemplo n.º 33
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        self.QVals = util.Counter()
 def startEpisode(self):
     ReinforcementAgent.startEpisode(self)
     self.episode_epsilon = self.episode_init_epsilon
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"        
        self.qValues = util.Counter()
Exemplo n.º 36
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     "*** YOUR CODE HERE ***"
     self.statesActions = {}
Exemplo n.º 37
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.qValueHash = dict()