示例#1
0
	def decide_chow(self, player, new_tile, choices, neighbors, game):
		self.begin_decision()

		fixed_hand, hand = player.fixed_hand, player.hand

		if self.display_step:
			self.print_game_board(fixed_hand, hand, neighbors, game)
		
		self.print_msg("Someone just discarded a %s."%new_tile.symbol)

		pg_model = get_MJPGFitted(self.pg_model_path)
		state = utils.extended_dnn_encode_state(player, neighbors, cpk_tile = new_tile)

		valid_actions = [34 + decisions_.index("no_action")]
		for choice in choices:
			valid_actions.append(34 + decisions_.index("chow_%d"%(choice)))
		action_filter = np.zeros(n_decisions)
		action_filter[valid_actions] = 1
		action = None

		if not self.skip_history and self.history_waiting:
			self.update_transition(state, REWARD_NON_TERMINAL, action_filter)
		
		while True:
			if action is not None and not self.skip_history:
				self.update_history(state, action, action_filter)
				self.update_transition(state, REWARD_INVALID_DECISION, action_filter)
			
			action, value = pg_model.choose_action(state, action_filter = action_filter, return_value = True, strict_filter = True)
			
			if action in valid_actions:
				break
			elif not self.is_train:
				action = random.choice(valid_actions)
				break

		if not self.skip_history:
			self.update_history(state, action, action_filter)

		self.end_decision()
		
		if action == 34 + decisions_.index("no_action"):
			self.print_msg("%s chooses not to Chow %s [%.2f]."%(self.player_name, new_tile.symbol, value))
			return False, None
		else:
			chow_tiles_tgstrs = []
			chow_tiles_str = ""
			choice = int(decisions_[action - 34].split("_")[1])
			for i in range(choice - 1, choice + 2):
				neighbor_tile = new_tile.generate_neighbor_tile(i)
				chow_tiles_str += neighbor_tile.symbol
				chow_tiles_tgstrs.append(neighbor_tile.get_display_name(game.lang_code, is_short = False))

			self.print_msg("%s chooses to Chow %s [%.2f]."%(self.player_name, chow_tiles_str, value))

			if game.lang_code is not None:
				game.add_notification(get_text(game.lang_code, "NOTI_CHOOSE_CHOW")%(self.player_name, ",".join(chow_tiles_tgstrs)))
			
			return True, choice
示例#2
0
    def decide_drop_tile(self, player, new_tile, neighbors, game):
        self.begin_decision()

        fixed_hand, hand = player.fixed_hand, player.hand
        state = utils.extended_dnn_encode_state(player,
                                                neighbors,
                                                new_tile=new_tile)

        if self.display_step:
            self.print_game_board(fixed_hand, hand, neighbors, game, new_tile)

        pg_model = get_MJPGFitted(self.pg_model_path)

        valid_actions = []
        tiles = player.hand if new_tile is None else player.hand + [new_tile]
        for tile in tiles:
            valid_actions.append(Tile.convert_tile_index(tile))

        action_filter = np.zeros(n_decisions)
        action_filter[valid_actions] = 1
        action = None

        if not self.skip_history and self.history_waiting:
            self.update_transition(state, REWARD_NON_TERMINAL, action_filter)

        while True:
            if action is not None and not self.skip_history:
                self.update_history(state, action, action_filter)
                self.update_transition(state, REWARD_INVALID_DECISION,
                                       action_filter)

            action, value = pg_model.choose_action(state,
                                                   action_filter=action_filter,
                                                   return_value=True,
                                                   strict_filter=True)

            if action in valid_actions:
                break
            elif not self.is_train:
                action = random.choice(valid_actions)
                break

        if not self.skip_history:
            self.update_history(state, action, action_filter)
        drop_tile = Tile.convert_tile_index(action)
        self.print_msg(
            "%s [%s] chooses to drop %s. [%.2f]" %
            (self.player_name, display_name, drop_tile.symbol, value))
        self.end_decision(True)

        if game.lang_code is not None:
            game.add_notification(
                get_text(game.lang_code, "NOTI_CHOOSE_DISCARD") %
                (self.player_name,
                 drop_tile.get_display_name(game.lang_code, is_short=False)))

        return drop_tile
示例#3
0
	def decide_kong(self, player, new_tile, kong_tile, location, src, neighbors, game):
		self.begin_decision()
		fixed_hand, hand = player.fixed_hand, player.hand

		if self.display_step:
			self.print_game_board(fixed_hand, hand, neighbors, game, new_tile)
			
		if src == "steal":
			self.print_msg("Someone just discarded a %s."%kong_tile.symbol)
		elif src == "draw":
			self.print_msg("You just drew a %s"%kong_tile.symbol)
		elif src == "existing":
			self.print_msg("You have 4 %s in hand"%kong_tile.symbol)

		if location == "fixed_hand":
			location = "fixed hand"
		else:
			location = "hand"

		pg_model = get_MJPGFitted(self.pg_model_path)
		state = utils.extended_dnn_encode_state(player, neighbors, cpk_tile = kong_tile)

		valid_actions = [34 + decisions_.index("pong"), 34 + decisions_.index("no_action")]
		action_filter = np.zeros(n_decisions)
		action_filter[valid_actions] = 1
		action = None

		if not self.skip_history and self.history_waiting:
			self.update_transition(state, REWARD_NON_TERMINAL, action_filter)
		
		while True:
			if action is not None and not self.skip_history:
				self.update_history(state, action, action_filter)
				self.update_transition(state, REWARD_INVALID_DECISION, action_filter)
			
			action, value = pg_model.choose_action(state, action_filter = action_filter, return_value = True, strict_filter = True)
			
			if action in valid_actions:
				break
			elif not self.is_train:
				action = random.choice(valid_actions)
				break

		if not self.skip_history:
			self.update_history(state, action, action_filter)

		self.end_decision()

		if action == 34 + decisions_.index("no_action"):
			self.print_msg("%s [%s] chooses to form a Kong %s%s%s%s [%.2f]."%(self.player_name, display_name, kong_tile.symbol, kong_tile.symbol, kong_tile.symbol, kong_tile.symbol, value))
			if game.lang_code is not None:
				game.add_notification(get_text(game.lang_code, "NOTI_CHOOSE_KONG")%(self.player_name, kong_tile.get_display_name(game.lang_code, is_short = False)))

			return True
		else:
			self.print_msg("%s [%s] chooses not to form a Kong %s%s%s%s [%.2f]."%(self.player_name, display_name, kong_tile.symbol, kong_tile.symbol, kong_tile.symbol, kong_tile.symbol, value))
			return False
示例#4
0
	def update_transition(self, state_, reward = 0, action_filter_ = None):
		if not self.is_train:
			return

		if not self.history_waiting:
			raise Exception("the network is NOT waiting for a transition")

		self.history_waiting = False
		pg_model = get_MJPGFitted(self.pg_model_path)
		pg_model.store_transition(self.pg_model_history["state"], self.pg_model_history["action"], reward, self.pg_model_history["action_filter"], self.pg_model_history["heuristics_action"])
示例#5
0
def test(args):
    global game_record_count
    args = parse_args(args)

    if args.action == "train":
        if args.save_name is None:
            response = input(
                "You have not entered save_name, are you sure? [y/n] ").lower(
                )
            if response != "y":
                exit(-1)

        if args.model_dir is None:
            args.model_dir = pg_model_path
        else:
            trainer_models["pgf"]["parameters"][
                "pg_model_path"] = args.model_dir
            trainer_models["pgfr"]["parameters"][
                "pg_model_path"] = args.model_dir

        freq_model_save = args.n_episodes // 10

    elif args.action in ["test", "play"]:
        if args.model_dir is None:
            raise Exception("model_dir must be given to test/play")

    model = get_MJPGFitted(args.model_dir, **pg_model_paras)

    players = []
    i = 0
    for model_tag in trainer_conf:
        if args.action == "play":
            player = Player.Player(MoveGenerator.Human, player_name=names[i])
        else:
            player = Player.Player(trainer_models[model_tag]["class"],
                                   player_name=names[i],
                                   **trainer_models[model_tag]["parameters"])
        players.append(player)
        i += 1

    pg_player = Player.Player(MoveGenerator.PGFGenerator if model_flag == "pgf"
                              else MoveGenerator.PGFRGenerator,
                              player_name=names[i],
                              pg_model_path=args.model_dir,
                              skip_history=False,
                              is_train=args.action == "train",
                              display_step=args.action == "play")
    players.append(pg_player)

    if args.action != "play":
        signal.signal(signal.SIGINT, signal_handler)
    game, shuffled_players, last_saved = None, None, -1
    for i in range(args.n_episodes):
        if EXIT_FLAG:
            break

        if i % freq_shuffle_players == 0:
            shuffled_players = random.sample(players, k=4)
            game = Game.Game(shuffled_players)

        winner, losers, penalty = game.start_game()
        if args.action == "train":
            model.learn(display_cost=(i + 1) % game_record_size == 0)

        index = game_record_count % game_record_size
        game_record[index, :, :] = np.zeros((4, 2))
        game_record_count += 1

        if winner is not None:
            winner_id = players.index(winner)
            game_record[index, winner_id, 0] = 1
            for loser in losers:
                loser_id = players.index(loser)
                game_record[index, loser_id, 1] = 1

        if (i + 1) % game_record_size == 0:
            print(
                "#%5d: %.2f%%/%.2f%%\t%.2f%%/%.2f%%\t%.2f%%/%.2f%%\t%.2f%%/%.2f%%"
                % (i + 1, game_record[:, 0, 0].mean() * 100,
                   game_record[:, 0, 1].mean() * 100,
                   game_record[:, 1, 0].mean() * 100,
                   game_record[:, 1, 1].mean() * 100,
                   game_record[:, 2, 0].mean() * 100,
                   game_record[:, 2, 1].mean() * 100,
                   game_record[:, 3, 0].mean() * 100,
                   game_record[:, 3, 1].mean() * 100))
        '''
		if args.action == "train" and args.save_name is not None and (i+1) % freq_model_save == 0:
			last_saved = i
			path = args.save_name.rstrip("/") + "_%d"%(i + 1)
			utils.makesure_dir_exists(path)
			model.save(path)
		'''

    if args.action == "train" and args.save_name is not None:
        if last_saved < args.n_episodes - 1:
            path = args.save_name.rstrip("/") + "_%d" % args.n_episodes
            utils.makesure_dir_exists(path)
            model.save(path)