def __init__ (self, bidding, player, p, learning_p, active_P_count): self.player = player self.learning_p = learning_p if active_P_count == 2: if player != 0 and player != 2: self.legal_bids = [possible_bids.index ("P")] else: self.legal_bids = self.getLegalBid (bidding) else: self.legal_bids = self.getLegalBid (bidding) self.W = [] self.N = [] self.Q = [] self.P = [] self.children = [] self.width = len (self.legal_bids) for i in range (self.width): self.W.append (0) self.N.append (0) self.Q.append (0) self.P.append (0) self.children.append (None) if len (self.legal_bids) > 1 and self.player != learning_p: self.P = self.addNoise (p [0])
def getLegalBid(bidding): ret = [] legal_bids = generateBiddings(bidding, "Competitive") for bid in legal_bids: ret.append(possible_bids.index(bid)) return ret
def update (self, value, bid): index = self.legal_bids.index (possible_bids.index (bid)) self.N [index] += 1 self.W [index] += value self.Q [index] = self.W [index] / self.N [index] return
def one_hot_encode (self, bids): indices = [] res = np.zeros (POSSIBLE_BID_COUNT * 3) for bid in bids [-3:]: indices.append (possible_bids.index (bid)) base = 3 - len (indices) for i in range (len (indices)): res [indices [i] + POSSIBLE_BID_COUNT * (base + i)] = 1 return list (res)
def expReplay(network_1, par, hands, resTable, mode=2): agents = {} counter = 0 stats = list(-1 for i in range(STAT_SIZE * 4)) temp_Agent = Agent("", {}) temp_Agent.setHand(hands[0]) for i in range(STAT_SIZE): stats[i] = temp_Agent.stat[i] temp_Agent.setHand(hands[2]) for i in range(STAT_SIZE): stats[i + STAT_SIZE * 2] = temp_Agent.stat[i] while counter < 10000: ended = False bids = [] player = 0 related_agents = [] last_agent = None while not ended: key = str(player) + "|" + toString(bids) if key not in agents.keys(): agents[key] = Agent_Open_Hand(network_1, {}, EXPLORE_COEFFICIENT) agents[key].setState((stats, bids.copy(), player), hands[player]) agent = agents[key] related_agents.append(agent) bid = agent.quickMove() bids.append(bid) bids.append("P") if player == 0: player = 2 else: player = 0 # print (bids) if bids[-3:] == ["P", "P", "P"]: ended = True if last_agent != None and last_agent.children[possible_bids.index( bid)] == None: last_agent.children[possible_bids.index(bid)] = agent last_agent = agent score = getScore(bids, resTable) # print (counter) # print (bids) for agent in reversed(related_agents): agent.updateValues(score - par) counter += 1 # print ("--------------") # print ("--------------") # print ("--------------") x = [] y = [] for key in agents.keys(): # print (key, max (agents [key].visit_count)) x.append(agents[key].X) y.append(agents[key].visit_count) return if K.backend() == "tensorflow": X = np.asarray(x) Y_true = np.asarray(y) # print ("X_SHAPE", x.shape) # print ("Y_SHAPE", y.shape) if _DEBUG > 2: session = tf.Session() print(session.run(custom_loss(network.predict(X), Y_true))) print(network.predict(X)) print("------") network.fit(X, Y_true, epochs=1, verbose=1) print(session.run(custom_loss(network.predict(X), Y_true))) print(network.predict(X)) print("------End of Exp Replay-----")