def initialize(self): args = self.args co = minirts.ContextOptions() self.context_args.initialize(co) opt = minirts.Options() opt.seed = args.seed opt.frame_skip_ai = args.fs_ai opt.frame_skip_opponent = args.fs_opponent opt.simulation_type = minirts.ST_NORMAL opt.ai_type = getattr(minirts, args.ai_type) if args.ai_type == "AI_NN": opt.backup_ai_type = minirts.AI_SIMPLE if args.ai_type == "AI_FLAG_NN": opt.backup_ai_type = minirts.AI_FLAG_SIMPLE opt.opponent_ai_type = getattr(minirts, args.opponent_type) opt.latest_start = args.latest_start opt.latest_start_decay = args.latest_start_decay opt.mcts_threads = args.mcts_threads opt.mcts_rollout_per_thread = 50 opt.max_tick = args.max_tick opt.handicap_level = args.handicap_level opt.simple_ratio = args.simple_ratio opt.ratio_change = args.ratio_change # opt.output_filename = b"simulators.txt" # opt.cmd_dumper_prefix = b"cmd-dump" # opt.save_replay_prefix = b"replay" GC = minirts.GameContext(co, opt) print("Version: ", GC.Version()) num_action = GC.get_num_actions() print("Num Actions: ", num_action) num_unittype = GC.get_num_unittype() print("Num unittype: ", num_unittype) desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor"] = (dict(s=str(num_unittype + 7), r0="", r1="", last_r="", last_terminal="", _batchsize=str(args.batchsize), _T="1"), dict(rv="", pi=str(num_action), V="1", a="1", _batchsize=str(args.batchsize), _T="1")) if not args.actor_only: # For training, we want input, action (filled by actor models), value (filled by actor models) and reward. desc["train"] = (dict(rv="", pi=str(num_action), s=str(num_unittype + 7), r0="", r1="", a="1", r="1", V="1", terminal="", _batchsize=str(args.batchsize), _T=str(args.T)), None) if args.additional_labels is not None: extra = {label: "" for label in args.additional_labels.split(",")} for _, v in desc.items(): v[0].update(extra) params = dict(num_action=num_action, num_unit_type=num_unittype, num_group=1 if args.actor_only else 2, action_batchsize=int(desc["actor"][0]["_batchsize"]), train_batchsize=int(desc["train"][0]["_batchsize"]) if not args.actor_only else None, T=args.T) return GCWrapper(GC, co, desc, use_numpy=False, params=params)
def initialize(self): args = self.args co = minirts.ContextOptions() self.context_args.initialize(co) opt = minirts.Options() opt.seed = args.seed opt.frame_skip_ai = args.fs_ai opt.frame_skip_opponent = args.fs_opponent opt.simulation_type = minirts.ST_NORMAL opt.ai_type = getattr(minirts, args.ai_type) if args.ai_type == "AI_NN": opt.backup_ai_type = minirts.AI_SIMPLE if args.ai_type == "AI_FLAG_NN": opt.backup_ai_type = minirts.AI_FLAG_SIMPLE opt.opponent_ai_type = getattr(minirts, args.opponent_type) opt.latest_start = args.latest_start opt.latest_start_decay = args.latest_start_decay opt.mcts_threads = args.mcts_threads opt.mcts_rollout_per_thread = 50 opt.max_tick = args.max_tick opt.handicap_level = args.handicap_level opt.simple_ratio = args.simple_ratio opt.ratio_change = args.ratio_change # opt.output_filename = b"simulators.txt" # opt.cmd_dumper_prefix = b"cmd-dump" # opt.save_replay_prefix = b"replay" GC = minirts.GameContext(co, opt) params = GC.GetParams() print("Version: ", GC.Version()) print("Num Actions: ", params["num_action"]) print("Num unittype: ", params["num_unit_type"]) desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor"] = dict( batchsize=args.batchsize, input=dict(T=1, keys=set(["s", "res", "last_r", "r0", "r1", "terminal"])), reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. desc["train"] = dict(batchsize=args.batchsize, input=dict(T=args.T, keys=set([ "rv", "pi", "s", "res", "a", "last_r", "r0", "r1", "V", "terminal" ])), reply=None) if args.additional_labels is not None: extra = args.additional_labels.split(",") for _, v in desc.items(): v["input"]["keys"].update(extra) params.update( dict(num_group=1 if args.actor_only else 2, action_batchsize=int(desc["actor"]["batchsize"]), train_batchsize=int(desc["train"]["batchsize"]) if not args.actor_only else None, T=args.T)) return GCWrapper(GC, co, desc, use_numpy=False, params=params)
def initialize(self): args = self.args co = minirts.ContextOptions() self.context_params.initialize(co) opt = minirts.Options() opt.seed = args.seed opt.frame_skip_ai = args.fs_ai opt.frame_skip_opponent = args.fs_opponent opt.simulation_type = minirts.ST_NORMAL opt.ai_type = getattr(minirts, args.ai_type) if args.ai_type == "AI_NN": opt.backup_ai_type = minirts.AI_SIMPLE if args.ai_type == "AI_FLAG_NN": opt.backup_ai_type = minirts.AI_FLAG_SIMPLE opt.opponent_ai_type = getattr(minirts, args.opponent_type) opt.latest_start = args.latest_start opt.latest_start_decay = args.latest_start_decay opt.mcts_threads = args.mcts_threads opt.mcts_rollout_per_thread = 50 opt.max_tick = args.max_tick opt.handicap_level = args.handicap_level opt.simple_ratio = args.simple_ratio opt.ratio_change = args.ratio_change # opt.output_filename = b"simulators.txt" # opt.cmd_dumper_prefix = b"cmd-dump" # opt.save_replay_prefix = b"replay" GC = minirts.GameContext(co, opt) print("Version: ", GC.Version()) num_action = GC.get_num_actions() print("Num Actions: ", num_action) num_unittype = GC.get_num_unittype() print("Num unittype: ", num_unittype) desc = [] name2idx = {} # For actor model: group 0 # No reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. name2idx["actor"] = len(desc) desc.append((dict(id="", s=str(2), r0="", r1="", last_r="", last_terminal="", _batchsize=str(args.batchsize), _T="1"), dict(rv="", pi=str(num_action), V="1", a="1", _batchsize=str(args.batchsize), _T="1"))) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. name2idx["train"] = len(desc) desc.append((dict(rv="", id="", pi=str(num_action), s=str(2), r0="", r1="", a="1", r="1", V="1", seq="", terminal="", _batchsize=str(args.batchsize), _T=str(args.T)), None)) inputs, replies = utils_elf.init_collectors(GC, co, desc, use_numpy=False) params = dict( num_action=num_action, num_unit_type=num_unittype, num_group=1 if args.actor_only else 2, action_batchsize=int(desc[name2idx["actor"]][0]["_batchsize"]), train_batchsize=int(desc[name2idx["train"]][0]["_batchsize"]) if not args.actor_only else None, T=args.T) return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)
def reduced_project(batch): global cnt_project cnt_project += 1 # print("in reduced_project, cnt_project = %d" % cnt_project) # ======================= load environment ======================= co = minirts.ContextOptions() # need to check the candidate keys co.num_games = 20 co.T = 1 co.wait_per_group = 1 co.verbose_comm = 1 opt = minirts.PythonOptions() GC = minirts.GameContext(co, opt) batch_descriptions = { "actor": dict(batchsize=128, input=dict(T=1, keys=set(["s", "a", "last_r", "terminal"])), reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))), "reduced_predict": dict(batchsize=128, input=dict(T=1, keys=set(["s", "a", "last_r", "terminal"])), reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))), "reduced_forward": dict(batchsize=128, input=dict(T=1, keys=set(["s", "a", "last_r", "terminal"])), reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))), "reduced_project":