def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # qValues ((state, action), Q(s,a))
        self.qValues = util.Counter()
Exemplo n.º 2
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.qValues = util.Counter()
        self.state_uses = util.Counter()
Exemplo n.º 3
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # maps (state, action) pairs to Q values
        self.qvalues = Counter()
Exemplo n.º 4
0
	def __init__(self, numOfTurn, numofgauss, var, lamda, gaussDim, **args):
		"You can initialize Q-values here..."
		ReinforcementAgent.__init__(self, **args)
		self.qVal = util.Counter()
		self.numofgauss = numofgauss
		self.var = var
		self.lamda = lamda
		self.numOfTurn = numOfTurn
		self.gaussDim = gaussDim
	
		# init basis 
		self.basis = []
		for i in range(self.numofgauss):
			base = {}
			base['mean'] = np.matrix( [ float(i)/float(self.numofgauss) for j in range(0,self.gaussDim)] )
			base['var'] = np.matrix( np.diag([self.var for j in range(0,self.gaussDim)]) )
			base['detOfVar'] = np.linalg.det(base['var']) # pre-calculate deteminant of covariance
			base['invOfVar'] = np.linalg.inv(base['var']) # pre-calculate inverse of covariance
			self.basis.append(base)

		# init parameters
		self.thetas = {}
		self.phis = {}
		self.labels = {}
		self.state_action_num = 0
		for t in range(0,self.numOfTurn):
			for a in util.turnIndex2action('b','cycle_tree',t):
				self.thetas[(t,a)] = np.matrix([[0.0] \
				for i in range(self.numofgauss)])	
				self.phis[(t,a)] = [[] for i in range(self.numofgauss)]
				self.labels[(t,a)] = []
				self.state_action_num += 1 # count self.state_action_num
Exemplo n.º 5
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        # Current state of the board
        # based on whatever the agent has learnt so far.
        self.qlearntVals = util.Counter()
Exemplo n.º 6
0
  def __init__(self, **args):
    "You can initialize Q-values here..."
    ReinforcementAgent.__init__(self, **args)

    "*** YOUR CODE HERE ***"
    """
    pseudo code:
    allowed function: self.getLegalActions(state)
    create a counter for (state, action), value
      the key of counter is state action pair.
    if there is nothing learned, then it's 0.0
    every time new state gets reward, ???
    for k = 1 to ...:
      for each state s: 
        for action in allActions:
          for eachOutcome in transition:
            immediateReward = ...
            discountedFuture = ...
            nextState value = immediateReward + discountedFuture
            result = probs * nextState value
        find the best action according to chooseAction
        return that one.

        use the batch version: each vk is computed from a fixed v(k-1) not updated at all
        use 
        ---
    collect policy according to value/action later.  
    """
    """
    ---but we don't need to do much in init
    """
    self.qvalues = util.Counter()
    self.values = util.Counter()
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # the data structure of qValues is: {state, {action, value}}
        self.qValues = {}
Exemplo n.º 8
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        #initialize a dictionary. The dictionary is indexed by a tuple and everything is initialized to 0 at the beginning
        self.qValueMap= util.Counter()
Exemplo n.º 9
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     #print "ALPHA", self.alpha
     #print "DISCOUNT", self.discount
     #print "EXPLORATION", self.epsilon
     self.qValues = util.Counter()
Exemplo n.º 10
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        
        self.qTable = {}
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)
        self.qVals = util.Counter()
        #self.rando = FixedRandom().random

        "*** YOUR CODE HERE ***"
	def __init__(self, numOfTurn, numofgauss=5, var=0.25, lamda=0, **args):
		"You can initialize Q-values here..."
		ReinforcementAgent.__init__(self, **args)
		self.qVal = util.Counter()
		self.numofgauss = numofgauss
		self.var = var
		self.lamda = lamda
		self.numOfTurn = numOfTurn

		if self.numofgauss<=5:
			self.minimumNum = 10
		else:
			self.minimumNum = 20

		# init basis 
		self.basis = []
		for i in range(self.numofgauss):
			base = {}
			base['mean'] = [ float(i)/float(self.numofgauss) for j in range(0,82)]
			base['var'] = np.diag([var for j in range(0,82)])
			self.basis.append(base)

		# init parameters
		self.thetas = {}
		self.phis = {}
		self.labels = {}
		self.state_action_num = 0
		for t in range(0,self.numOfTurn):
			for a in util.turnIndex2action('b','cycle_tree',t):
				self.thetas[(t,a)] = np.matrix([[0.0] \
				for i in range(self.numofgauss)])	
				self.phis[(t,a)] = [[] for i in range(self.numofgauss)]
				self.labels[(t,a)] = []
				self.state_action_num += 1 # count self.state_action_num
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)
        #Using uitl.Counter from util.py
        self.states = util.Counter() 

        "*** YOUR CODE HERE ***"
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        ## Create a dictionary object to record all Q_values in Counter() version
        self.Q_Values=util.Counter()
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # initializing q vaules dictionary
        self.q_values = defaultdict(util.Counter)
Exemplo n.º 16
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        #keeps track of q values
        self.values = util.Counter()
Exemplo n.º 17
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.Qvalues=dict()
        self.visit=dict()
Exemplo n.º 18
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        # print args
        "*** YOUR CODE HERE ***"
        self.qvalues = util.Counter()
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # Will use counter(Map) with key as State + Action, Value as value
        self.Q = util.Counter()
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # qValues is a dict of util.Counter()
        self.qValues = {}
        self.visitCount = {}
Exemplo n.º 21
0
  def __init__(self, **args):
    "You can initialize Q-values here..."
    ReinforcementAgent.__init__(self, **args)

    "*** YOUR CODE HERE ***"
    self.QValues = util.Counter()
    #self.qtype=qtype
    self.environment=None
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        # Initialized values
        # Extension of dictionary
        # Keeps track of counts for a set of keys
        self.values = util.Counter()
Exemplo n.º 23
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # Just copy it from valueIterationAgents.py
        # A Counter is a dict with default 0
        self.values = util.Counter() 
Exemplo n.º 24
0
  def __init__(self, **args):
    "You can initialize Q-values here..."
    ReinforcementAgent.__init__(self, **args)

    "*** YOUR CODE HERE ***"
    self.values = util.Counter()
    self.predicate = BlocksworldPred(self.mdp.count, self.mdp.stackNum)
    self.root = QTreeNode(.01, self.mdp.count) # FIXME epsilon should passed via command line
Exemplo n.º 25
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        #import pdb; pdb.set_trace()
        # qvalues are organised as a util.Counter() of (state, action) pairs
        self.qvalues = util.Counter()
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        # En el init creamos un diccionario vacio donde
        # guardar la informacion de los estados.
        self.qValues = util.Counter()
Exemplo n.º 27
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     
     states = []
     "*** YOUR CODE HERE ***"
     # keep a Counter/Dictionary of Q-values
     self.qvalues = util.Counter()
Exemplo n.º 28
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        #self.values = util.Counter()
        self.qValues = util.Counter()
        self.stateProbs = dict()
        self.rewards = util.Counter()
Exemplo n.º 29
0
 def __init__(self, **args):
   """
   You can initialize Q-values here...
   """
   ReinforcementAgent.__init__(self, **args)
   self.Q = util.Counter()
   # Modify this value to set how many actions to choose randomly before selecting greedily
   self.greedy_constraint = 100
   self.to_greedy = 0
Exemplo n.º 30
0
  def __init__(self, **args):
    "You can initialize Q-values here..."
    ReinforcementAgent.__init__(self, **args)

    "*** YOUR CODE HERE ***"
    #OUR CODE HERE
    #So I guess we need to make a counter to stick Q values in
    #Everything in it will be 0 because it's a counter
    self.qvalues = util.Counter()
Exemplo n.º 31
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     self._q_values = {}
Exemplo n.º 32
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        self.Q = util.Counter()  #Init Counter to save all qvalues.
        "*** YOUR CODE HERE ***"
Exemplo n.º 33
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     #a counter to track all Q values
     self.qVals = util.Counter()
     "*** YOUR CODE HERE ***"
 def __init__(self, **args):
     ReinforcementAgent.__init__(self, **args)
     self.QVals = util.Counter() # keys are tuples of a tuple and val: ((state.x, state.y), action)
Exemplo n.º 35
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     "*** YOUR CODE HERE ***"
     # Inicialitzem els qValues, a 0 inicialment
     self.qValues = util.Counter()
Exemplo n.º 36
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     "*** YOUR CODE HERE ***"
     #sets up qvalue disctionary for (state,action)
     self.qval = util.Counter()
Exemplo n.º 37
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)
        self.qValuelist = {}

        "*** YOUR CODE HERE ***"
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.values = util.Counter()  # A Counter is a dict with default 0
Exemplo n.º 39
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     "*** YOUR CODE HERE ***"
     self.q_values = util.Counter()  # we will store (state, action) pairs as opposed to states.
Exemplo n.º 40
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.q_values = util.Counter()
Exemplo n.º 41
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        self.Qvalues = util.Counter()
Exemplo n.º 42
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.Q = {}  # All the states we have seen thus far
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.Q = {}  # Q(state,action)
Exemplo n.º 44
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     self.q_val = util.Counter(
     )  #maintain a dict type qValue, key is (state, action) pair
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     self.q_table = dict()  #represent q values as dict of dicts
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        # Dictionary storing all the q-Values for each (state, action) tuple
        self.qvalues = util.Counter()
 def __init__(self, **args):
     ReinforcementAgent.__init__(self, **args)
     self.qvalues = util.Counter()
     "*** YOUR CODE HERE ***"
Exemplo n.º 48
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        self.QValues = util.Counter()  #indexed by state and action
Exemplo n.º 49
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     "*** YOUR CODE HERE ***"
     #initialize Q values by setting them all on 0 (using a dictionary from util.py)
     self.values = util.Counter()
Exemplo n.º 50
0
  def __init__(self, **args):
    ReinforcementAgent.__init__(self, **args)

    self.qvalues = util.Counter()
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.Qvalues = util.Counter() # key is (state,action) tuple
Exemplo n.º 52
0
    def __init__(self, **args):
        """You can initialize Q-values here..."""
        ReinforcementAgent.__init__(self, **args)
        self.actions = [NORTH, WEST, SOUTH, EAST, STOP]
        # pdb.set_trace()
        self.num_episodes = 1
        self.num_datos = 0
        self.task = args["task"]
        "*** YOUR CODE HERE ***"
        self.memory = []
        self.memory_length = 4
        self.prueba = False
        self.history = []

        layout = args["layout"]
        width = layout.width
        height = layout.height
        self.BREAK = False
        self.phi = 1
        self.phi_end = 0.1
        self.phi_decay = np.exp((np.log(self.phi_end) - np.log(self.phi)) /
                                (int(self.numTraining)))

        self.num_trans = 0
        self.lastReward = 0
        self.n2 = 0
        self.n1 = 0
        self.eps_start = EPS_START
        self.epsilon = EPS_START
        self.eps_decay = np.exp((np.log(EPS_END) - np.log(EPS_START)) /
                                (int(self.numTraining / 2)))
        self.eps_end = EPS_END
        self.similarity_function = None
        if "transfer" in args.keys():
            self.policy_first = Policy(width,
                                       height,
                                       5,
                                       use_image=True,
                                       use_prior=False)
            self.policy_second = Policy(width,
                                        height,
                                        5,
                                        use_image=True,
                                        use_prior=False)
            num_first = args["transfer"][0]
            num_second = args["transfer"][1]

            self.n1 = num_first
            self.n2 = num_second
            if num_first == 0:
                name = "modelo_imagen_20000_04_01_dif0_1575607728_gamma_0.9_attemp_8"
            if num_first == 1:
                name = "modelo_imagen_25000_04_01_dif1_1576737275_attemp_3_gamma0.9"
            # if difficulty == 2:
            #     name = "modelo_imagen_25000_04_01_dif2_1577007228_attemp_2_gamma0.9"
            self.policy_first.load_Model("models/" + name + ".h5")

            f = open(f"datos/" + args["sim_function"], "r+b")
            # self.similarity_function = pickle.load(f)
            self.similarity_function = keras.models.load_model(
                "datos/" + args["sim_function"])
            f.close()

        else:
            self.policy_second = Policy(width,
                                        height,
                                        5,
                                        use_image=True,
                                        use_prior=False)
Exemplo n.º 53
0
 def __init__(self, **args):
     "You can initialize Q-values here..."
     ReinforcementAgent.__init__(self, **args)
     self.qvalues = util.Counter()  #dictionary for qvalues
     self.minus_infinity = -10000000000000  #minus infinity
Exemplo n.º 54
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        self.Qvalue = defaultdict(lambda: 0)
Exemplo n.º 55
0
 def __init__(self, **args):
     "Puedes inicializar tus valores Q aqui..."
     ReinforcementAgent.__init__(self, **args)
     "*** TU CODIGO AQUI ***"
Exemplo n.º 56
0
 def __init__(self, **args):
     "We initialize agent and Q-values here."
     ReinforcementAgent.__init__(self, **args)
     self._qValues = defaultdict(lambda: defaultdict(lambda: 0))
Exemplo n.º 57
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.seenQValues = {}
    def __init__(self, **args):
        """You can initialize Q-values here..."""
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.stateActionPair = util.Counter()  # {(state, action): value}}
Exemplo n.º 59
0
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        # want to have a Q value table to keep track of Q values
        self.qValuesTable = util.Counter()
    def __init__(self, **args):
        "You can initialize Q-values here..."
        ReinforcementAgent.__init__(self, **args)

        "*** YOUR CODE HERE ***"
        self.Qvalues = defaultdict(lambda: defaultdict(float))