Exemplo n.º 1
0
    def __init__(self, env):
        self.temp = 1
        self.start_temp = 1
        self.end_temp = 0.2
        self.action_space = card.get_action_space()
        self.name = 'global'
        self.env = env
        self.a_dim = 8310
        self.gamma = 0.99
        self.sess = None

        self.train_intervals = 30

        self.trainer = tf.train.AdamOptimizer(learning_rate=0.001)
        self.episode_rewards = [[] for i in range(2)]
        self.episode_length = [[] for i in range(2)]
        self.episode_mean_values = [[] for i in range(2)]
        self.summary_writers = [
            tf.summary.FileWriter("train_agent%d" % i) for i in range(2)
        ]

        self.agents = [
            CardAgent('agent%d' % i, self.trainer) for i in range(2)
        ]

        self.global_episodes = tf.Variable(0,
                                           dtype=tf.int32,
                                           name='global_episodes',
                                           trainable=False)
        self.increment = self.global_episodes.assign_add(1)
Exemplo n.º 2
0
 def __init__(self):
     self.deck = None
     self.players = []
     self.last_player = None
     self.last_cards = None
     self.history = []
     self.extra_cards = []
     self.action_space = card.get_action_space()
     self.next_turn = 0
     self.reset()
Exemplo n.º 3
0
def collect_data():
    cnt = 0

    action_space = card.get_action_space()

    # print(action_space)
    # print("a : ", len(action_space))

    while cnt < N:
        gameID = random.randint(0, N - 1)
        demoGame = demoGames[gameID]

        lordID = demoGame.lordID
        gameLen = len(demoGame.actions)

        # while True:
        th = random.randint(0, gameLen - 1)
            # if not(th % 3 == lordID):
            #     break

        # if actions[th] == 0:
        #     continue
        handcards = to_color_handcards(demoGame.handcards)
        # print("demo handcards", demoGame.handcards)
        # print("hand ", handcards)
        # print("lord ", lordID)
        extracards = to_color_extracards(handcards[lordID], demoGame.extracards)
        # print("demo extracards", demoGame.extracards)
        # print("hand ", extracards)
        # print("demo actions", demoGame.actions)
        acts = [action_space[a] for a in demoGame.actions]
        # print("demo actions", acts)
        # print("th = ", th)

        outCardList = [[] for i in range(3)]

        ind = lordID
        last_cards = []
        last_ID = lordID
        for i in range(th):
            put_list = to_color_putlist(action_space[demoGame.actions[i]], handcards[ind])
            if not(put_list == []):
                last_cards = copy.deepcopy(put_list)
                last_ID = ind
            outCardList[ind] += put_list
            # print(put_list)
            for c in put_list:
                handcards[ind].remove(c)
            ind = int(ind + 1) % 3

        if (last_ID == (th + lordID) % 3):
            last_cards = []
        # print("last cards ", last_cards, [to_card(x) for x in last_cards])

        state = []
        total = [1 for i in range(54)]

        # print(handcards[th % 3])
        self_cards = to_one_hot(handcards[(th + lordID) % 3])
        remains = subtract(total, self_cards);

        history = [to_one_hot(outCardList[i]) for i in range(3)]
        # for i in range(3):
        #     print("out card ", i, outCardList[i], [to_card(x) for x in outCardList[i]])
        # print("history ", history)

        for i in range(3):
            remains = subtract(remains, history[i])

        extra_cards = to_one_hot(extracards)

        state += self_cards;
        state += remains;
        state += history[0];
        state += history[1];
        state += history[2];
        state += extra_cards;

        # numOfFeasibleActs = 0
        # print("feasible actions : ")
        action = []
        mask = get_mask([to_card(color) for color in handcards[(th + lordID) % 3]], action_space, [to_card(color) for color in last_cards])
        for a in range(len(action_space)):
            if mask[a]:
                if a == demoGame.actions[th]:
                    action.append(1)
                else:
                    action.append(0)
        #         print(action_space[a])
        #         numOfFeasibleActs += 1
        # print("num of fea : ", numOfFeasibleActs)


        # X.append(state)
        # Y.append(action)

        printf(state)
        printf(action)

        # print("state ", cnt, " ", [(i % 54, state[i]) for i in range(54 * 6)])
        # print("action ", cnt, " ", action, len(action))
        return

        cnt += 1