def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # qValues ((state, action), Q(s,a)) self.qValues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.qValues = util.Counter() self.state_uses = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # maps (state, action) pairs to Q values self.qvalues = Counter()
def __init__(self, numOfTurn, numofgauss, var, lamda, gaussDim, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.qVal = util.Counter() self.numofgauss = numofgauss self.var = var self.lamda = lamda self.numOfTurn = numOfTurn self.gaussDim = gaussDim # init basis self.basis = [] for i in range(self.numofgauss): base = {} base['mean'] = np.matrix( [ float(i)/float(self.numofgauss) for j in range(0,self.gaussDim)] ) base['var'] = np.matrix( np.diag([self.var for j in range(0,self.gaussDim)]) ) base['detOfVar'] = np.linalg.det(base['var']) # pre-calculate deteminant of covariance base['invOfVar'] = np.linalg.inv(base['var']) # pre-calculate inverse of covariance self.basis.append(base) # init parameters self.thetas = {} self.phis = {} self.labels = {} self.state_action_num = 0 for t in range(0,self.numOfTurn): for a in util.turnIndex2action('b','cycle_tree',t): self.thetas[(t,a)] = np.matrix([[0.0] \ for i in range(self.numofgauss)]) self.phis[(t,a)] = [[] for i in range(self.numofgauss)] self.labels[(t,a)] = [] self.state_action_num += 1 # count self.state_action_num
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) # Current state of the board # based on whatever the agent has learnt so far. self.qlearntVals = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" """ pseudo code: allowed function: self.getLegalActions(state) create a counter for (state, action), value the key of counter is state action pair. if there is nothing learned, then it's 0.0 every time new state gets reward, ??? for k = 1 to ...: for each state s: for action in allActions: for eachOutcome in transition: immediateReward = ... discountedFuture = ... nextState value = immediateReward + discountedFuture result = probs * nextState value find the best action according to chooseAction return that one. use the batch version: each vk is computed from a fixed v(k-1) not updated at all use --- collect policy according to value/action later. """ """ ---but we don't need to do much in init """ self.qvalues = util.Counter() self.values = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # the data structure of qValues is: {state, {action, value}} self.qValues = {}
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #initialize a dictionary. The dictionary is indexed by a tuple and everything is initialized to 0 at the beginning self.qValueMap= util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) #print "ALPHA", self.alpha #print "DISCOUNT", self.discount #print "EXPLORATION", self.epsilon self.qValues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.qTable = {}
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.qVals = util.Counter() #self.rando = FixedRandom().random "*** YOUR CODE HERE ***"
def __init__(self, numOfTurn, numofgauss=5, var=0.25, lamda=0, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.qVal = util.Counter() self.numofgauss = numofgauss self.var = var self.lamda = lamda self.numOfTurn = numOfTurn if self.numofgauss<=5: self.minimumNum = 10 else: self.minimumNum = 20 # init basis self.basis = [] for i in range(self.numofgauss): base = {} base['mean'] = [ float(i)/float(self.numofgauss) for j in range(0,82)] base['var'] = np.diag([var for j in range(0,82)]) self.basis.append(base) # init parameters self.thetas = {} self.phis = {} self.labels = {} self.state_action_num = 0 for t in range(0,self.numOfTurn): for a in util.turnIndex2action('b','cycle_tree',t): self.thetas[(t,a)] = np.matrix([[0.0] \ for i in range(self.numofgauss)]) self.phis[(t,a)] = [[] for i in range(self.numofgauss)] self.labels[(t,a)] = [] self.state_action_num += 1 # count self.state_action_num
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) #Using uitl.Counter from util.py self.states = util.Counter() "*** YOUR CODE HERE ***"
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" ## Create a dictionary object to record all Q_values in Counter() version self.Q_Values=util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # initializing q vaules dictionary self.q_values = defaultdict(util.Counter)
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #keeps track of q values self.values = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.Qvalues=dict() self.visit=dict()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) # print args "*** YOUR CODE HERE ***" self.qvalues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # Will use counter(Map) with key as State + Action, Value as value self.Q = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # qValues is a dict of util.Counter() self.qValues = {} self.visitCount = {}
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.QValues = util.Counter() #self.qtype=qtype self.environment=None
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) # Initialized values # Extension of dictionary # Keeps track of counts for a set of keys self.values = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # Just copy it from valueIterationAgents.py # A Counter is a dict with default 0 self.values = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.values = util.Counter() self.predicate = BlocksworldPred(self.mdp.count, self.mdp.stackNum) self.root = QTreeNode(.01, self.mdp.count) # FIXME epsilon should passed via command line
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #import pdb; pdb.set_trace() # qvalues are organised as a util.Counter() of (state, action) pairs self.qvalues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # En el init creamos un diccionario vacio donde # guardar la informacion de los estados. self.qValues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) states = [] "*** YOUR CODE HERE ***" # keep a Counter/Dictionary of Q-values self.qvalues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #self.values = util.Counter() self.qValues = util.Counter() self.stateProbs = dict() self.rewards = util.Counter()
def __init__(self, **args): """ You can initialize Q-values here... """ ReinforcementAgent.__init__(self, **args) self.Q = util.Counter() # Modify this value to set how many actions to choose randomly before selecting greedily self.greedy_constraint = 100 self.to_greedy = 0
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #OUR CODE HERE #So I guess we need to make a counter to stick Q values in #Everything in it will be 0 because it's a counter self.qvalues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self._q_values = {}
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.Q = util.Counter() #Init Counter to save all qvalues. "*** YOUR CODE HERE ***"
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) #a counter to track all Q values self.qVals = util.Counter() "*** YOUR CODE HERE ***"
def __init__(self, **args): ReinforcementAgent.__init__(self, **args) self.QVals = util.Counter() # keys are tuples of a tuple and val: ((state.x, state.y), action)
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" # Inicialitzem els qValues, a 0 inicialment self.qValues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #sets up qvalue disctionary for (state,action) self.qval = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.qValuelist = {} "*** YOUR CODE HERE ***"
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.values = util.Counter() # A Counter is a dict with default 0
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.q_values = util.Counter() # we will store (state, action) pairs as opposed to states.
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.q_values = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.Qvalues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.Q = {} # All the states we have seen thus far
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.Q = {} # Q(state,action)
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.q_val = util.Counter( ) #maintain a dict type qValue, key is (state, action) pair
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.q_table = dict() #represent q values as dict of dicts
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) # Dictionary storing all the q-Values for each (state, action) tuple self.qvalues = util.Counter()
def __init__(self, **args): ReinforcementAgent.__init__(self, **args) self.qvalues = util.Counter() "*** YOUR CODE HERE ***"
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.QValues = util.Counter() #indexed by state and action
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" #initialize Q values by setting them all on 0 (using a dictionary from util.py) self.values = util.Counter()
def __init__(self, **args): ReinforcementAgent.__init__(self, **args) self.qvalues = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.Qvalues = util.Counter() # key is (state,action) tuple
def __init__(self, **args): """You can initialize Q-values here...""" ReinforcementAgent.__init__(self, **args) self.actions = [NORTH, WEST, SOUTH, EAST, STOP] # pdb.set_trace() self.num_episodes = 1 self.num_datos = 0 self.task = args["task"] "*** YOUR CODE HERE ***" self.memory = [] self.memory_length = 4 self.prueba = False self.history = [] layout = args["layout"] width = layout.width height = layout.height self.BREAK = False self.phi = 1 self.phi_end = 0.1 self.phi_decay = np.exp((np.log(self.phi_end) - np.log(self.phi)) / (int(self.numTraining))) self.num_trans = 0 self.lastReward = 0 self.n2 = 0 self.n1 = 0 self.eps_start = EPS_START self.epsilon = EPS_START self.eps_decay = np.exp((np.log(EPS_END) - np.log(EPS_START)) / (int(self.numTraining / 2))) self.eps_end = EPS_END self.similarity_function = None if "transfer" in args.keys(): self.policy_first = Policy(width, height, 5, use_image=True, use_prior=False) self.policy_second = Policy(width, height, 5, use_image=True, use_prior=False) num_first = args["transfer"][0] num_second = args["transfer"][1] self.n1 = num_first self.n2 = num_second if num_first == 0: name = "modelo_imagen_20000_04_01_dif0_1575607728_gamma_0.9_attemp_8" if num_first == 1: name = "modelo_imagen_25000_04_01_dif1_1576737275_attemp_3_gamma0.9" # if difficulty == 2: # name = "modelo_imagen_25000_04_01_dif2_1577007228_attemp_2_gamma0.9" self.policy_first.load_Model("models/" + name + ".h5") f = open(f"datos/" + args["sim_function"], "r+b") # self.similarity_function = pickle.load(f) self.similarity_function = keras.models.load_model( "datos/" + args["sim_function"]) f.close() else: self.policy_second = Policy(width, height, 5, use_image=True, use_prior=False)
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.qvalues = util.Counter() #dictionary for qvalues self.minus_infinity = -10000000000000 #minus infinity
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) self.Qvalue = defaultdict(lambda: 0)
def __init__(self, **args): "Puedes inicializar tus valores Q aqui..." ReinforcementAgent.__init__(self, **args) "*** TU CODIGO AQUI ***"
def __init__(self, **args): "We initialize agent and Q-values here." ReinforcementAgent.__init__(self, **args) self._qValues = defaultdict(lambda: defaultdict(lambda: 0))
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.seenQValues = {}
def __init__(self, **args): """You can initialize Q-values here...""" ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.stateActionPair = util.Counter() # {(state, action): value}}
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) # want to have a Q value table to keep track of Q values self.qValuesTable = util.Counter()
def __init__(self, **args): "You can initialize Q-values here..." ReinforcementAgent.__init__(self, **args) "*** YOUR CODE HERE ***" self.Qvalues = defaultdict(lambda: defaultdict(float))