def __init__(self, env): self.temp = 1 self.start_temp = 1 self.end_temp = 0.2 self.action_space = card.get_action_space() self.name = 'global' self.env = env self.a_dim = 8310 self.gamma = 0.99 self.sess = None self.train_intervals = 30 self.trainer = tf.train.AdamOptimizer(learning_rate=0.001) self.episode_rewards = [[] for i in range(2)] self.episode_length = [[] for i in range(2)] self.episode_mean_values = [[] for i in range(2)] self.summary_writers = [ tf.summary.FileWriter("train_agent%d" % i) for i in range(2) ] self.agents = [ CardAgent('agent%d' % i, self.trainer) for i in range(2) ] self.global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) self.increment = self.global_episodes.assign_add(1)
def __init__(self): self.deck = None self.players = [] self.last_player = None self.last_cards = None self.history = [] self.extra_cards = [] self.action_space = card.get_action_space() self.next_turn = 0 self.reset()
def collect_data(): cnt = 0 action_space = card.get_action_space() # print(action_space) # print("a : ", len(action_space)) while cnt < N: gameID = random.randint(0, N - 1) demoGame = demoGames[gameID] lordID = demoGame.lordID gameLen = len(demoGame.actions) # while True: th = random.randint(0, gameLen - 1) # if not(th % 3 == lordID): # break # if actions[th] == 0: # continue handcards = to_color_handcards(demoGame.handcards) # print("demo handcards", demoGame.handcards) # print("hand ", handcards) # print("lord ", lordID) extracards = to_color_extracards(handcards[lordID], demoGame.extracards) # print("demo extracards", demoGame.extracards) # print("hand ", extracards) # print("demo actions", demoGame.actions) acts = [action_space[a] for a in demoGame.actions] # print("demo actions", acts) # print("th = ", th) outCardList = [[] for i in range(3)] ind = lordID last_cards = [] last_ID = lordID for i in range(th): put_list = to_color_putlist(action_space[demoGame.actions[i]], handcards[ind]) if not(put_list == []): last_cards = copy.deepcopy(put_list) last_ID = ind outCardList[ind] += put_list # print(put_list) for c in put_list: handcards[ind].remove(c) ind = int(ind + 1) % 3 if (last_ID == (th + lordID) % 3): last_cards = [] # print("last cards ", last_cards, [to_card(x) for x in last_cards]) state = [] total = [1 for i in range(54)] # print(handcards[th % 3]) self_cards = to_one_hot(handcards[(th + lordID) % 3]) remains = subtract(total, self_cards); history = [to_one_hot(outCardList[i]) for i in range(3)] # for i in range(3): # print("out card ", i, outCardList[i], [to_card(x) for x in outCardList[i]]) # print("history ", history) for i in range(3): remains = subtract(remains, history[i]) extra_cards = to_one_hot(extracards) state += self_cards; state += remains; state += history[0]; state += history[1]; state += history[2]; state += extra_cards; # numOfFeasibleActs = 0 # print("feasible actions : ") action = [] mask = get_mask([to_card(color) for color in handcards[(th + lordID) % 3]], action_space, [to_card(color) for color in last_cards]) for a in range(len(action_space)): if mask[a]: if a == demoGame.actions[th]: action.append(1) else: action.append(0) # print(action_space[a]) # numOfFeasibleActs += 1 # print("num of fea : ", numOfFeasibleActs) # X.append(state) # Y.append(action) printf(state) printf(action) # print("state ", cnt, " ", [(i % 54, state[i]) for i in range(54 * 6)]) # print("action ", cnt, " ", action, len(action)) return cnt += 1