def __init__(self, path="data/pursuit_model", total_step=500): # some parameter map_size = 1000 eps = 0.00 # init the game env = magent.GridWorld(load_config(map_size)) handles = env.get_handles() models = [] models.append(DeepQNetwork(env, handles[0], 'predator', use_conv=True)) models.append(DeepQNetwork(env, handles[1], 'prey', use_conv=True)) # load model models[0].load(path, 423, 'predator') models[1].load(path, 423, 'prey') # init environment env.reset() generate_map(env, map_size, handles) # save to member variable self.env = env self.handles = handles self.eps = eps self.models = models self.map_size = map_size self.total_step = total_step self.done = False self.total_handles = [ self.env.get_num(self.handles[0]), self.env.get_num(self.handles[1]) ] print(env.get_view2attack(handles[0])) plt.show()
def __init__(self, path="data/battle_model_3_players", total_step=1000, add_counter=10, add_interval=50): # some parameter map_size = 125 eps = 0.00 # init the game env = magent.GridWorld(utils.load_config(map_size)) handles = env.get_handles() models = [] models.append( DeepQNetwork(env, handles[0], 'trusty-battle-game-l1', use_conv=True)) # models.append(DeepQNetwork(env, handles[1], 'trusty-battle-game-l2', use_conv=True)) models.append( DeepQNetwork(env, handles[1], 'trusty-battle-game-r', use_conv=True)) # load model # tf.reset_default_graph() models[0].load(path, 1, 'trusty-battle-game-l1') # models[1].load(path, 1, 'trusty-battle-game-l2') # tf.reset_default_graph() models[2].load(path, 1, 'trusty-battle-game-r') # init environment env.reset() utils.generate_map(env, map_size, handles) # save to member variable self.env = env self.handles = handles self.eps = eps self.models = models self.map_size = map_size self.total_step = total_step self.add_interval = add_interval self.add_counter = add_counter self.done = False self.total_handles = [ self.env.get_num(self.handles[0]), self.env.get_num(self.handles[1]) ]
def __init__(self, path="data/arrange_model", messages=None, mode=1): # some parameter map_size = 250 eps = 0.15 # init the game env = magent.GridWorld(load_config(map_size)) font = FontProvider('data/font_8x8/basic.txt') handles = env.get_handles() food_handle, handles = handles[0], handles[1:] models = [] models.append(DeepQNetwork(env, handles[0], 'arrange', use_conv=True)) # load model models[0].load(path, 10) # init environment env.reset() generate_map(mode, env, map_size, food_handle, handles, messages, font) # save to member variable self.env = env self.food_handle = food_handle self.handles = handles self.eps = eps self.models = models self.done = False self.map_size = map_size self.new_rule_ct = 0 self.pos_reward_ct = set() self.num = None self.ct = 0
def __init__(self, path="data/battle_model", total_step=1000, add_counter=10, add_interval=50): # some parameter map_size = 125 eps = 0.05 # init the game env = magent.GridWorld(load_config(map_size)) handles = env.get_handles() models = [] models.append( DeepQNetwork(env, handles[0], 'trusty-battle-game-l', use_conv=True)) models.append( DeepQNetwork(env, handles[1], 'trusty-battle-game-r', use_conv=True)) # load model models[0].load(path, 0, 'trusty-battle-game-l') models[1].load(path, 0, 'trusty-battle-game-r') # init environment env.reset() generate_map(env, map_size, handles) # save to member variable self.env = env self.handles = handles self.eps = eps self.models = models self.map_size = map_size self.total_step = total_step self.add_interval = add_interval self.add_counter = add_counter self.done = False print(env.get_view2attack(handles[0])) plt.show()
def __init__(self, path="data/against_v2", total_step=500): # some parameter map_size = 125 eps = 0.00 # init the game env = magent.GridWorld("battle", map_size=map_size) handles = env.get_handles() models = [] models.append( DeepQNetwork(env, handles[0], 'trusty-battle-game-l', use_conv=True)) models.append(DeepQNetwork(env, handles[1], 'battle', use_conv=True)) # load model # models[0].load(path, 999, 'against-a') # # models[0].load('data/battle_model_1000_vs_500', 1500, 'trusty-battle-game-l') # models[1].load(path, 999, 'battle') # models[0].load("data/battle_model_1000_vs_500", 1500, 'trusty-battle-game-l') models[1].load("data/battle_model_1000_vs_500", 1500, 'trusty-battle-game-r') # init environment env.reset() x0, y0, x1, y1 = utils.generate_map(env, map_size, handles) # generate_map(env, map_size, handles) # save to member variable self.env = env self.handles = handles self.eps = eps self.models = models self.map_size = map_size self.total_step = total_step self.done = False self.total_handles = [ self.env.get_num(self.handles[0]), self.env.get_num(self.handles[1]) ]
generate_map(env, args.map_size, handles) eval_obs = buffer.sample_observation(env, handles, 2048, 500)[0] # init models batch_size = 256 unroll_step = 8 target_update = 1000 train_freq = 5 models = [] if args.alg == 'dqn': models.append( DeepQNetwork(env, handles[0], "selfplay", batch_size=batch_size, memory_size=2**20, target_update=target_update, train_freq=train_freq, eval_obs=eval_obs)) elif args.alg == 'drqn': models.append( DeepRecurrentQNetwork(env, handles[0], "selfplay", batch_size=batch_size / unroll_step, unroll_step=unroll_step, memory_size=2 * 8 * 625, target_update=target_update, train_freq=train_freq, eval_obs=eval_obs)) else:
deer_handle, tiger_handle = env.get_handles() env.add_walls(method="random", n=agent_number / 10) env.add_agents(deer_handle, method="random", n=agent_number / 2) env.add_agents(tiger_handle, method="random", n=agent_number / 2) # init two models if args.num_gpu == 0: model1 = RandomActor(env, deer_handle, "deer") model2 = RandomActor(env, tiger_handle, "tiger") else: if args.frame == 'tf': from models.tf_model import DeepQNetwork else: from models.mx_model import DeepQNetwork model1 = DeepQNetwork(env, deer_handle, "deer", num_gpu=args.num_gpu, infer_batch_size=100000) model2 = DeepQNetwork(env, tiger_handle, "tiger", num_gpu=args.num_gpu, infer_batch_size=100000) total_reward = 0 print(env.get_view_space(deer_handle)) print(env.get_view_space(tiger_handle)) total_time = 0 for i in range(n_step): print("===== step %d =====" % i) start_time = time.time() obs_1 = measure_time("get obs 1", env.get_observation, deer_handle) acts_1 = measure_time("infer act 1", model1.infer_action, obs_1, None)
"""rename tensorflow models""" import sys import magent from models.tf_model import DeepQNetwork env = magent.GridWorld("battle", map_size=125) handles = env.get_handles() rounds = eval(sys.argv[1]) for i in [rounds]: model = DeepQNetwork(env, handles[0], "battle") print("load %d" % i) model.load("data/", i, "selfplay") print("save %d" % i) model.save("data/battle_model", i)
# init the game "pursuit" (config file are stored in python/magent/builtin/config/) env = magent.GridWorld("pursuit", map_size=map_size) env.set_render_dir("build/render") # get group handles predator, prey = env.get_handles() # init env and agents env.reset() env.add_walls(method="random", n=map_size * map_size * 0.01) env.add_agents(predator, method="random", n=map_size * map_size * 0.02) env.add_agents(prey, method="random", n=map_size * map_size * 0.02) # init two models model1 = DeepQNetwork(env, predator, "predator") model2 = DeepQNetwork(env, prey, "prey") # load trained model model1.load("data/pursuit_model") model2.load("data/pursuit_model") done = False step_ct = 0 print("nums: %d vs %d" % (env.get_num(predator), env.get_num(prey))) while not done: # take actions for deers obs_1 = env.get_observation(predator) ids_1 = env.get_agent_id(predator) acts_1 = model1.infer_action(obs_1, ids_1) env.set_action(predator, acts_1)
deer_handle, tiger_handle = env.get_handles() # init two models models = [ RandomActor(env, deer_handle, tiger_handle), ] batch_size = 512 unroll = 8 if args.alg == 'dqn': from models.tf_model import DeepQNetwork models.append( DeepQNetwork(env, tiger_handle, "tiger", batch_size=batch_size, memory_size=2**20, learning_rate=4e-4)) step_batch_size = None elif args.alg == 'drqn': from models.tf_model import DeepRecurrentQNetwork models.append( DeepRecurrentQNetwork(env, tiger_handle, "tiger", batch_size=batch_size / unroll, unroll_step=unroll, memory_size=20000, learning_rate=4e-4)) step_batch_size = None elif args.alg == 'a2c':
# init models batch_size = 512 unroll_step = 8 target_update = 1200 train_freq = 5 models = [] if args.alg == 'dqn': from models.tf_model import DeepQNetwork models.append( DeepQNetwork(env, handles[0], args.name, batch_size=batch_size, learning_rate=3e-4, memory_size=2**21, target_update=target_update, train_freq=train_freq, eval_obs=eval_obs)) elif args.alg == 'drqn': from models.tf_model import DeepRecurrentQNetwork models.append( DeepRecurrentQNetwork(env, handles[0], args.name, learning_rate=3e-4, batch_size=batch_size / unroll_step, unroll_step=unroll_step, memory_size=2 * 8 * 625, target_update=target_update,