Пример #1
0
    def initialize(self):
        args = self.args

        co = minirts.ContextOptions()
        self.context_params.initialize(co)

        opt = minirts.Options()
        opt.seed = args.seed
        opt.frame_skip_ai = args.fs_ai
        opt.frame_skip_opponent = args.fs_opponent
        opt.simulation_type = minirts.ST_NORMAL
        opt.ai_type = getattr(minirts, args.ai_type)
        if args.ai_type == "AI_NN":
            opt.backup_ai_type = minirts.AI_SIMPLE
        if args.ai_type == "AI_FLAG_NN":
            opt.backup_ai_type = minirts.AI_FLAG_SIMPLE
        opt.opponent_ai_type = getattr(minirts, args.opponent_type)
        opt.latest_start = args.latest_start
        opt.latest_start_decay = args.latest_start_decay
        opt.mcts_threads = args.mcts_threads
        opt.mcts_rollout_per_thread = 50
        opt.max_tick = args.max_tick
        opt.handicap_level = args.handicap_level
        opt.simple_ratio = args.simple_ratio
        opt.ratio_change = args.ratio_change
        # opt.output_filename = b"simulators.txt"
        # opt.cmd_dumper_prefix = b"cmd-dump"
        # opt.save_replay_prefix = b"replay"

        GC = minirts.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        num_unittype = GC.get_num_unittype()
        print("Num unittype: ", num_unittype)

        desc = []
        name2idx = {}
        # For actor model: group 0
        # No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        name2idx["actor"] = len(desc)
        desc.append((dict(id="",
                          s=str(2),
                          r0="",
                          r1="",
                          last_r="",
                          last_terminal="",
                          _batchsize=str(args.batchsize),
                          _T="1"),
                     dict(rv="",
                          pi=str(num_action),
                          V="1",
                          a="1",
                          _batchsize=str(args.batchsize),
                          _T="1")))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            name2idx["train"] = len(desc)
            desc.append((dict(rv="",
                              id="",
                              pi=str(num_action),
                              s=str(2),
                              r0="",
                              r1="",
                              a="1",
                              r="1",
                              V="1",
                              seq="",
                              terminal="",
                              _batchsize=str(args.batchsize),
                              _T=str(args.T)), None))

        inputs, replies = utils_elf.init_collectors(GC,
                                                    co,
                                                    desc,
                                                    use_numpy=False)

        params = dict(
            num_action=num_action,
            num_unit_type=num_unittype,
            num_group=1 if args.actor_only else 2,
            action_batchsize=int(desc[name2idx["actor"]][0]["_batchsize"]),
            train_batchsize=int(desc[name2idx["train"]][0]["_batchsize"])
            if not args.actor_only else None,
            T=args.T)

        return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)
Пример #2
0
    def initialize(self):
        args = self.args

        co = minirts.ContextOptions()
        self.context_args.initialize(co)

        opt = minirts.Options()
        opt.seed = args.seed
        opt.frame_skip_ai = args.fs_ai
        opt.frame_skip_opponent = args.fs_opponent
        opt.simulation_type = minirts.ST_NORMAL
        opt.ai_type = getattr(minirts, args.ai_type)
        if args.ai_type == "AI_NN":
            opt.backup_ai_type = minirts.AI_SIMPLE
        if args.ai_type == "AI_FLAG_NN":
            opt.backup_ai_type = minirts.AI_FLAG_SIMPLE
        opt.opponent_ai_type = getattr(minirts, args.opponent_type)
        opt.latest_start = args.latest_start
        opt.latest_start_decay = args.latest_start_decay
        opt.mcts_threads = args.mcts_threads
        opt.mcts_rollout_per_thread = 50
        opt.max_tick = args.max_tick
        opt.handicap_level = args.handicap_level
        opt.simple_ratio = args.simple_ratio
        opt.ratio_change = args.ratio_change
        # opt.output_filename = b"simulators.txt"
        # opt.cmd_dumper_prefix = b"cmd-dump"
        # opt.save_replay_prefix = b"replay"

        GC = minirts.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        num_unittype = GC.get_num_unittype()
        print("Num unittype: ", num_unittype)

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor"] = (dict(s=str(num_unittype + 7),
                              r0="",
                              r1="",
                              last_r="",
                              last_terminal="",
                              _batchsize=str(args.batchsize),
                              _T="1"),
                         dict(rv="",
                              pi=str(num_action),
                              V="1",
                              a="1",
                              _batchsize=str(args.batchsize),
                              _T="1"))

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train"] = (dict(rv="",
                                  pi=str(num_action),
                                  s=str(num_unittype + 7),
                                  r0="",
                                  r1="",
                                  a="1",
                                  r="1",
                                  V="1",
                                  terminal="",
                                  _batchsize=str(args.batchsize),
                                  _T=str(args.T)), None)

        if args.additional_labels is not None:
            extra = {label: "" for label in args.additional_labels.split(",")}
            for _, v in desc.items():
                v[0].update(extra)

        params = dict(num_action=num_action,
                      num_unit_type=num_unittype,
                      num_group=1 if args.actor_only else 2,
                      action_batchsize=int(desc["actor"][0]["_batchsize"]),
                      train_batchsize=int(desc["train"][0]["_batchsize"])
                      if not args.actor_only else None,
                      T=args.T)

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
Пример #3
0
    def initialize(self):
        args = self.args

        co = minirts.ContextOptions()
        self.context_args.initialize(co)

        opt = minirts.Options()
        opt.seed = args.seed
        opt.frame_skip_ai = args.fs_ai
        opt.frame_skip_opponent = args.fs_opponent
        opt.simulation_type = minirts.ST_NORMAL
        opt.ai_type = getattr(minirts, args.ai_type)
        if args.ai_type == "AI_NN":
            opt.backup_ai_type = minirts.AI_SIMPLE
        if args.ai_type == "AI_FLAG_NN":
            opt.backup_ai_type = minirts.AI_FLAG_SIMPLE
        opt.opponent_ai_type = getattr(minirts, args.opponent_type)
        opt.latest_start = args.latest_start
        opt.latest_start_decay = args.latest_start_decay
        opt.mcts_threads = args.mcts_threads
        opt.mcts_rollout_per_thread = 50
        opt.max_tick = args.max_tick
        opt.handicap_level = args.handicap_level
        opt.simple_ratio = args.simple_ratio
        opt.ratio_change = args.ratio_change
        # opt.output_filename = b"simulators.txt"
        # opt.cmd_dumper_prefix = b"cmd-dump"
        # opt.save_replay_prefix = b"replay"

        GC = minirts.GameContext(co, opt)
        params = GC.GetParams()
        print("Version: ", GC.Version())

        print("Num Actions: ", params["num_action"])
        print("Num unittype: ", params["num_unit_type"])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor"] = dict(
            batchsize=args.batchsize,
            input=dict(T=1,
                       keys=set(["s", "res", "last_r", "r0", "r1",
                                 "terminal"])),
            reply=dict(T=1, keys=set(["rv", "pi", "V", "a"])))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            desc["train"] = dict(batchsize=args.batchsize,
                                 input=dict(T=args.T,
                                            keys=set([
                                                "rv", "pi", "s", "res", "a",
                                                "last_r", "r0", "r1", "V",
                                                "terminal"
                                            ])),
                                 reply=None)

        if args.additional_labels is not None:
            extra = args.additional_labels.split(",")
            for _, v in desc.items():
                v["input"]["keys"].update(extra)

        params.update(
            dict(num_group=1 if args.actor_only else 2,
                 action_batchsize=int(desc["actor"]["batchsize"]),
                 train_batchsize=int(desc["train"]["batchsize"])
                 if not args.actor_only else None,
                 T=args.T))

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)