Exemplo n.º 1
0
    def init_conn(self):
        """Init connection with torchcraft server"""
        # Import torchcraft in this function so that torchcraft is not an explicit
        # dependency for projects importing this repo
        import torchcraft as tc
        self.client1 = tc.Client()
        self.client1.connect(self.server_ip, self.server_port1)
        self.state1 = self.client1.init()

        self.client2 = tc.Client()
        self.client2.connect(self.server_ip, self.server_port2)
        self.state2 = self.client2.init()

        setup = [
            [tcc.set_combine_frames, 1],
            [tcc.set_speed, self.speed],
            [tcc.set_gui, self.set_gui],
            # NOTE: We use custom frameskip method now
            # Skip frame below
            [tcc.set_frameskip, 1],
            [tcc.set_cmd_optim, 1]
        ]

        self.client1.send(setup)
        self.state1 = self.client1.recv()
        self.client2.send(setup)
        self.state2 = self.client2.recv()
Exemplo n.º 2
0
    def __init__(self, server_ip, server_port, speed, frame_skip, self_play,
                 max_episode_steps):
        self.ip = server_ip
        self.port = server_port
        self.client = tc.Client()
        self.client.connect(server_ip, server_port)
        self.state = self.client.init(micro_battles=True)
        self.speed = speed
        self.frame_skip = frame_skip
        self.self_play = self_play
        self.max_episode_steps = max_episode_steps
        self.step_limit = 300
        self.step_rate = 10

        self.episodes = 0
        self.episode_wins = 0
        self.episode_steps = 0

        self.action_space = self._action_space()
        self.observation_space = self._observation_space()

        self.state = None
        self.obs = None
        self.obs_pre = None

        self.advanced_termination = True
def dump_replay(path, dest, bwenv):
    print('>> Dumping {} -> {}'.format(path, dest))

    port = find_free_port()
    cmdline = OPENBW_REPLAY_TEMPLATE.format(port=port, bwenv=bwenv, map=path)

    openbw = subprocess.Popen(cmdline, shell=True, preexec_fn=os.setsid)

    cl = tc.Client()
    cl.connect('localhost', port)
    state = cl.init()
    skip_frames = 3
    cl.send([
        [tcc.set_speed, 0],
        [tcc.set_gui, 0],
        [tcc.set_combine_frames, skip_frames, skip_frames],
        [tcc.set_max_frame_time_ms, 0],
        [tcc.set_blocking, 0],
        [tcc.set_frameskip, 1000],
        [tcc.set_log, 0],
        [tcc.set_cmd_optim, 1],
    ])
    state = cl.recv()

    rep = tc.replayer.Replayer()
    rep.setMapFromState(state)
    while not state.game_ended:
        rep.push(state.frame)
        state = cl.recv()

    rep.setKeyFrame(-1)
    rep.save(dest, True)

    # Bye, bye
    os.killpg(os.getpgid(openbw.pid), signal.SIGTERM)
Exemplo n.º 4
0
 def __connect(self):
     '''
     connect with the server
     :return:
     '''
     self.cl = tc.Client()
     dprint("connection start.....")
     connect_rt = self.cl.connect(self.default_config['hostname'],
                                  self.default_config['port'])
     dprint('conection rt: ', connect_rt)  # dprint True
     state = self.cl.init(micro_battles=True)
     for pid, player in state.player_info.items():
         dprint("player {} named {} is {}".format(
             player.id, player.name, tc.Constants.races._dict[player.race]),
                level=2)
     dprint(state.map_size, level=1)
     # Initial setup the game
     _ = self.cl.send([
         [tcc.set_combine_frames, self.default_config['skip_frames']],
         [tcc.set_speed, self.default_config['set_speed']],
         [tcc.set_gui, 1],
         [tcc.set_cmd_optim, 1],
     ])
     # dprint('init set up: ', _)  #print True
     dprint("connection ended.....")
     return state
Exemplo n.º 5
0
    def __init__(self, server_ip, server_port):
        assert (server_ip != ''), "Server ip cannot be empty"
        assert (server_port != ''), "Server port cannot be empty"

        self.server_ip = server_ip
        self.server_port = server_port
        self.socket = tc.Client()
        self.message_just_sent = False

        self.state = ServerState()
Exemplo n.º 6
0
    def _launch_client(self):
        self.controller = tc.Client()
        self.controller.connect(self.hostname, self.port)
        self._obs = self.controller.init(micro_battles=self.micro_battles)

        self.controller.send([
            [tcc.set_combine_frames, self._step_mul],
            [tcc.set_speed, 0],
            [tcc.set_gui, 1],
            [tcc.set_cmd_optim, 1],
        ])
Exemplo n.º 7
0
        command = [
            tcc.command_openbw,
            tcc.openbwcommandtypes.KillUnit,
            u.id,
        ]
        commands.append(command)
    return commands


skip_frames = 1
nrestarts = 0
total_battles = 0
max_add_quantity = 20
tries = 5

cl = tc.Client()
cl.connect(args.hostname, args.port)
state = cl.init(micro_battles=args.micro_battles)
returned = cl.send([
    [tcc.set_combine_frames, skip_frames],
    [tcc.set_speed, 0],
    [tcc.set_gui, 1],
    [tcc.set_cmd_optim, 1],
    [tcc.map_hack],
])
state = cl.recv()

for i in range(tries):
    print("# try: {}".format(i))

    while state.game_ended or state.waiting_for_restart:
Exemplo n.º 8
0
def run_thread(agent, game_num, Synchronizer, difficulty, sc_port):
    global UPDATE_EVENT, ROLLING_EVENT, Counter, Waiting_Counter, Update_Counter, Result_List

    num = 0
    all_num = 0
    proc_name = mp.current_process().name

    while all_num != game_num * TRAIN_ITERS:

        env = tc.Client()
        #print('begin connect')
        env.connect(FLAGS.hostname, sc_port)
        #print('end connect')
        state = env.init(micro_battles=False)
        #print('end init')
        for pid, player in state.player_info.items():
            pass
            #print("player {} named {} is {}".format(player.id, player.name, tc.Constants.races._dict[player.race]))

        # Initial setup
        env.send([
            [tcc.set_speed, 0],
            [tcc.set_gui, 0],
            [tcc.set_cmd_optim, 1],
            [tcc.set_frameskip, FLAGS.frame_skip],
        ])

        agent.set_env(env)
        agent.set_obs(state)
        #print('begin play')
        agent.play()
        #print('end play')

        if FLAGS.training:
            # check if the num of episodes is enough to update
            num += 1
            all_num += 1
            reward = agent.result['reward']
            Counter += 1
            Result_List.append(reward)
            logging("(diff: %d) %d epoch: %s get %d/%d episodes! return: %d!" %
                    (int(difficulty), Update_Counter, proc_name,
                     len(Result_List), game_num * THREAD_NUM, reward))

            # time for update
            if num == game_num:
                num = 0
                ROLLING_EVENT.clear()
                # worker stops rolling, wait for update
                if agent.index != 0 and THREAD_NUM > 1:
                    Waiting_Counter += 1
                    if Waiting_Counter == THREAD_NUM - 1:  # wait for all the workers stop
                        UPDATE_EVENT.set()
                    ROLLING_EVENT.wait()

                # update!
                else:
                    if THREAD_NUM > 1:
                        UPDATE_EVENT.wait()

                    Synchronizer.wait()  # wait for other processes to update

                    agent.update_network(Result_List)
                    Result_List.clear()
                    agent.global_buffer.reset()

                    Synchronizer.wait()

                    Update_Counter += 1

                    # finish update
                    UPDATE_EVENT.clear()
                    Waiting_Counter = 0
                    ROLLING_EVENT.set()

        if FLAGS.save_replay:
            env.save_replay(FLAGS.replay_dir)

        #print('begin close')
        env.close()
        #print('end close')
        agent.reset()
Exemplo n.º 9
0
    def train(self):
        while self.episodes <= self.max_episodes:
            dprint("", 0)
            dprint("CTRL-C to stop", 0)
            dprint("", 0)

            nloop = 0  # 每局的步数

            cl = tc.Client()
            cl.connect(args.hostname, args.port)
            state = cl.init(micro_battles=True)
            # dprint("image_size" + str(state.image_size), 0)
            # dprint("map_size" + str(state.map_size), 0)
            # dprint("visibility_size" + str(state.visibility_size), 0)
            # 80, 128
            # dprint("start_locations" + str(state.start_locations[0].y), 0)
            env_utils._SCREEN_SIZE = np.array(state.map_size, np.int32) - 1


            for pid, player in state.player_info.items():
                dprint("player {} named {} is {}".format(player.id, player.name,
                                                        tc.Constants.races._dict[player.race]), 0)

            # Initial setup the game
            cl.send([
                [tcc.set_combine_frames, self.skip_frames],
                [tcc.set_speed, self.set_speed],
                [tcc.set_gui, 1],
                [tcc.set_cmd_optim, 1],
            ])


            while True:
                nloop += 1
                state = cl.recv()
                reward = None
                if state.game_ended:
                    dprint("GAME ENDED", 0)
                    break

                # 超时,结束
                if state.battle_frame_count > FLAGS.max_step * self.skip_frames or self.part_end: #2 * 60 * 24:
                    # 统计总局数:
                    self.model_total_episodes += 1
                    self.reset()
                    cl.send([[tcc.restart]])
                    dprint("Battle frame count: {} too large!!!!!!!!!!!!!!!!!!!!!!!".format(state.battle_frame_count), 0)
                    continue

                self.steps += 1
                # 1: First step
                if self.all_friends_tag is None and not state.waiting_for_restart:
                    all_friend_units = state.units[0]
                    all_enemy_units = state.units[1]
                    raw_friends, raw_enemies = env_utils.get_units_info(all_friend_units, all_enemy_units)
                    self.all_friends_tag = raw_friends[:, 0]
                    self.all_enemies_tag = raw_enemies[:, 0]
                    self.friends_tag_2_id = {tag: id for id, tag in enumerate(self.all_friends_tag)}
                    self.friends_id_2_tag = dict(enumerate(self.all_friends_tag))
                    self.enemies_tag_2_id = {tag: id for id, tag in enumerate(self.all_enemies_tag)}
                    self.enemies_id_2_tag = dict(enumerate(self.all_enemies_tag))

                    self.friends_pre_health = {tag: health for tag, health in raw_friends[:, [0, 7]]}
                    self.enemies_pre_health = {tag: health for tag, health in raw_enemies[:, [0, 7]]}
                    pass
                    dprint("init!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", 0)
                # 2: 不是开始,计算上一step 动作的 reward
                elif self.all_friends_tag is not None:
                    reward = self._calculated_reward(state)
                    self.reward += reward
                    # 统计每局的累计 Return
                    self.return_of_each_episode += reward
                    pass

                # 3: 结束(胜利或者失败)
                if state.battle_just_ended:
                    if state.battle_won:
                        # 统计总局数:
                        self.model_total_episodes += 1
                        # 统计模型赢了次数:
                        self.model_win_episodes += 1
                        self.recent_100_episodes_win_cumulative += 1
                        self.cumulative_win_times += 1
                        print(
                            "win: episode {}, total step {}, return: {}, cumulative win times: {}, recent {}/100".format(
                                self.episodes, self.steps, self.return_of_each_episode, self.cumulative_win_times,
                                self.recent_100_episodes_win_cumulative))

                        if self.is_training:
                            # 存储 (s, a, r, s_, is_finished) (结局状态不重要)
                            self.save_transition(self.pre_all_alive_agents, reward, None)

                        self.record_recent_win_rate()
                    else:
                        # 统计总局数:
                        self.model_total_episodes += 1
                        print(
                            "loss: episode {}, total step {}, return: {}, cumulative win times: {}, recent {}/100".format(
                                self.episodes, self.steps, self.return_of_each_episode, self.cumulative_win_times,
                                self.recent_100_episodes_win_cumulative))

                        if self.is_training:
                            # 存储 (s, a, r, s_, is_finished) (结局状态不重要)
                            self.save_transition(self.pre_all_alive_agents, reward, None)

                        self.record_recent_win_rate()

                    self.reset()
                    actions = [[tcc.restart]]
                    cl.send(actions)
                    continue


                elif state.waiting_for_restart:
                    dprint("WAITING FOR RESTART", 0)
                    continue
                # 4: 开始或者游戏中
                else:
                    actions = []
                    alive_friends = state.units[0]
                    alive_enemies = state.units[1]
                    alive_friends, alive_enemies = env_utils.get_units_info(alive_friends, alive_enemies)
                    # dprint("friends: {} \n".format(state.battle_frame_count) + str(alive_friends), -3)
                    # dprint("enemies: {} \n".format(state.battle_frame_count) + str(alive_enemies), -3)
                    # dprint("alives: " + str(state.aliveUnits), 0)
                    # env_utils.get_units_info(my_units, enemy_units)
                    dprint("frame count: {}".format(state.battle_frame_count), 0)
                    dprint("step: {}".format(self.steps), 0)
                    all_alive_agents = {}
                    select_actions_prob = []
                    for friend in alive_friends:
                        agent_tuple = {}
                        local_observation, sequence_len, alive_friends_order = env_utils.cal_local_observation_for_unit(friend,
                                                                                                              alive_friends,
                                                                                                              alive_enemies,
                                                                                                              self.friends_tag_2_id,
                                                                                                              self.enemies_tag_2_id)
                        dprint(local_observation, 0)
                        if self.is_training:
                            selected_action_id, _ = self.actor.operation_choose_action(1,
                                                                                       [local_observation[0]],
                                                                                       [local_observation[1]],
                                                                                       [sequence_len[0]],
                                                                                       [sequence_len[1]],
                                                                                       is_training=False)
                            select_actions_prob.append(_)
                        else:
                            selected_action_id = self.actor.operation_greedy_action(1,
                                                                                    [local_observation[0]],
                                                                                    [local_observation[1]],
                                                                                    [sequence_len[0]],
                                                                                    [sequence_len[1]],
                                                                                    is_training=False)

                        # 此处存储的相当于是 r, s', finished, actions of s'
                        agent_tuple['state_friend'] = local_observation[0]
                        agent_tuple['state_enemy'] = local_observation[1]
                        agent_tuple['sequence_friend'] = sequence_len[0]
                        agent_tuple['sequence_enemy'] = sequence_len[1]
                        agent_tuple['terminated'] = False
                        agent_tuple['action'] = env_utils.one_hot_action(selected_action_id, FLAGS.action_dim)
                        agent_tuple['action_other_order'] = alive_friends_order  # 站在自己角度,其他存活单位 id(not tag) 顺序(不包括自己)

                        all_alive_agents[self.friends_tag_2_id[friend[0]]] = agent_tuple

                        action_sc1 = env_utils.convert_discrete_action_2_sc1_action(friend, selected_action_id, alive_enemies, self.enemies_id_2_tag)
                        # action_sc1 = env_utils.convert_discrete_action_2_sc1_action(friend, 5, alive_enemies, self.enemies_id_2_tag)
                        actions.extend(action_sc1)
                    cl.send(actions)

                    if self.steps % FLAGS.print_softmax_every_steps == 0:
                        print("select_actions", select_actions_prob)
                        self.append_log_to_file("{}/actions/select_actions.txt".format(FLAGS.map),
                                                "episodes {}, steps {}, select actions: {}".format(self.episodes,
                                                                                                   self.steps,
                                                                                                   select_actions_prob))

                    if self.is_training and reward:
                        self.save_transition(self.pre_all_alive_agents, reward, all_alive_agents)

                    # update prev properties
                    self.pre_all_alive_agents = all_alive_agents

                    # TODO: batch training
                    if self.is_training and self.replay_buffer.length >= FLAGS.batch_size * 2 and self.steps % FLAGS.training_every_steps == 0:
                        (fr_states, em_state, fr_seq_len, em_seq_len, ac_others,
                         ac, reward,
                         nxt_fr_states, nxt_em_states, nxt_fr_sequence_len, nxt_em_sequence_len,
                         nxt_oth_fr_states, nxt_oth_em_states, nxt_oth_fr_seq_len, nxt_oth_em_seq_len,
                         terminated_batch) = self.replay_buffer.sample_batch(FLAGS.batch_size)
                        # training critic:
                        # 1: 准备 batch 数据
                        action_others_batch_s_ = []
                        # TODO: 可能会只有一个(当前单位已经死亡)
                        for nxt_fr_s, nxt_fr_seq, nxt_em_s, nxt_em_seq in zip(nxt_oth_fr_states, nxt_oth_fr_seq_len,
                                                                              nxt_oth_em_states,
                                                                              nxt_oth_em_seq_len):  # 对于每一个单位:其他所有单位的观察

                            if nxt_fr_s is None:
                                action_others_s_ = None
                            else:
                                # 还没有 one-hot
                                action_others_s_ = self.actor.operation_greedy_action(len(nxt_fr_s), nxt_fr_s, nxt_em_s,
                                                                                      nxt_fr_seq,
                                                                                      nxt_em_seq, is_training=False)
                            action_per = []
                            if action_others_s_ is not None:
                                for action_id in action_others_s_:
                                    one_hot_a = env_utils.one_hot_action(action_id, FLAGS.action_dim)
                                    action_per.append(one_hot_a)
                            action_others_batch_s_.append(self._flatten_others_actions(action_per))

                        # 2: cal td target
                        batch_td_target = self.critic.operation_get_TDtarget(
                            len(nxt_fr_states),
                            nxt_fr_states,
                            nxt_em_states,
                            nxt_fr_sequence_len,
                            nxt_em_sequence_len,
                            action_others_batch_s_,  # 已经对齐了,11 * 8
                            reward,
                            terminated_batch,
                            is_training=True
                        )
                        # 3: training critic
                        self.critic.operation_critic_learn(len(fr_states),
                                                           fr_states,
                                                           em_state,
                                                           fr_seq_len,
                                                           em_seq_len,
                                                           ac_others,
                                                           ac, batch_td_target,
                                                           is_training=True)

                        # training actor
                        # 3: calculate advantage
                        actor_output_probability = self.actor.operation_cal_softmax_probablility(len(fr_states),
                                                                                                 fr_states,
                                                                                                 em_state,
                                                                                                 fr_seq_len,
                                                                                                 em_seq_len,
                                                                                                 is_training=True)
                        batch_advantages = self.critic.operation_cal_advantage(len(fr_states),
                                                                               fr_states,
                                                                               em_state,
                                                                               fr_seq_len,
                                                                               em_seq_len,
                                                                               ac_others,
                                                                               ac,
                                                                               actor_output_probability,
                                                                               is_training=True)
                        # update actor
                        cost = self.actor.operation_actor_learn(len(fr_states),
                                                                fr_states,
                                                                em_state,
                                                                fr_seq_len,
                                                                em_seq_len,
                                                                ac,
                                                                batch_advantages, is_training=True)
                        # self.new_state = final_state

                        # ===================================  可视化  ====================================
                        # add summary
                        if self.steps % FLAGS.log_every_steps == 0:
                            feed_dict = {
                                self.actor.state_inputs_friends: fr_states,
                                self.actor.state_inputs_enemies: em_state,
                                self.actor.sequence_length_friends: fr_seq_len,
                                self.actor.sequence_length_enemies: em_seq_len,
                                self.actor.execute_action: ac,
                                self.actor.advantage: batch_advantages,
                                self.actor.is_training: True,
                                self.actor.keep_prob: 1.,
                                self.actor.batch_size: len(fr_states),

                                self.critic.state_input_friends: fr_states,
                                self.critic.state_input_enemies: em_state,
                                self.critic.sequence_length_friends: fr_seq_len,
                                self.critic.sequence_length_enemies: em_seq_len,
                                self.critic.other_units_action_input: ac_others,
                                self.critic.self_action_input: ac,
                                self.critic.Q_value_label_input: batch_td_target,
                                self.critic.is_training: True,
                                self.critic.keep_prob: 1.,
                                self.critic.batch_size: len(fr_states),

                                self.cumulative_reward_tensor: self.reward,
                                self.cumulative_win_times_tensor: self.cumulative_win_times,
                                self.return_of_each_episode_tensor: self.pre_return_of_each_episode
                            }
                            rs = self.sess.run(self.merged, feed_dict=feed_dict)
                            self.writer.add_summary(rs, self.steps)
                        # ================================== 可视化 END ====================================

                        # soft update the parameters of the two model
                        # print("soft update parameters: episode {}, step {}, reward: {}".format(self.episodes, self.steps, reward))
                        self.actor.operation_soft_update_TDnet()
                        self.critic.operation_soft_update_TDnet()

                if not self.is_training and self.model_total_episodes == FLAGS.cal_win_rate_every_episodes:
                    if not self.is_testing:
                        self.is_training = True
                    print("model {} test end: =========================================================================".format(self.model_id))
                    content = "|| model {} test win rate : | episodes {} | steps {} | win rate {}/{} ||".format(self.model_id,
                                                                                       self.episodes,
                                                                                       self.steps,
                                                                                       self.model_win_episodes,
                                                                                       self.model_total_episodes)
                    self.append_log_to_file("{}/model/model.txt".format(FLAGS.map), content)
                    self.model_id += 1
                    self.model_win_episodes = 0
                    self.model_total_episodes = 0

                # TODO 每隔半个小时保存一次模型
                # if self.is_training and (time.time() - self.pre_save_time) > 1800:
                if self.is_training and self.model_total_episodes == FLAGS.verify_every_episodes and (
                        time.time() - self.pre_save_time) > 10:  # (防重复)
                    content = "model {}: episodes {}, steps {}, win rate {}/{}".format(self.model_id,
                                                                                       self.episodes,
                                                                                       self.steps,
                                                                                       self.model_win_episodes,
                                                                                       self.model_total_episodes)
                    self.append_log_to_file("{}/model/model.txt".format(FLAGS.map), content)

                    self.model_win_episodes = 0
                    self.model_total_episodes = 0

                    self.saver.save(self.sess, "{}/checkpoint_{}/model.ckpt".format(FLAGS.map, self.model_id))
                    self.pre_save_time = time.time()

                    print("model {} test begin: =========================================================================".format(self.model_id))
                    self.is_training = False


                # base update
                self.update_of_each_step(state)

            self.part_end = False
            cl.close()

        print(self.episodes)