def play_a_round(env, map_size, handles, models, print_every, train=True, render=False, eps=None): env.reset() env.add_walls(method="random", n=map_size * map_size * 0.03) env.add_agents(handles[0], method="random", n=map_size * map_size * 0.0125) env.add_agents(handles[1], method="random", n=map_size * map_size * 0.025) step_ct = 0 done = False n = len(handles) obs = [[] for _ in range(n)] ids = [[] for _ in range(n)] acts = [[] for _ in range(n)] nums = [env.get_num(handle) for handle in handles] total_reward = [0 for _ in range(n)] print("===== sample =====") print("eps %s number %s" % (eps, nums)) start_time = time.time() while not done: # take actions for every model for i in range(n): obs[i] = env.get_observation(handles[i]) ids[i] = env.get_agent_id(handles[i]) # let models infer action in parallel (non-blocking) models[i].infer_action(obs[i], ids[i], 'e_greedy', eps, block=False) for i in range(n): acts[i] = models[i].fetch_action() # fetch actions (blocking) env.set_action(handles[i], acts[i]) # simulate one step done = env.step() # sample step_reward = [] for i in range(n): rewards = env.get_reward(handles[i]) if train: alives = env.get_alive(handles[i]) # store samples in replay buffer (non-blocking) models[i].sample_step(rewards, alives, block=False) s = sum(rewards) step_reward.append(s) total_reward[i] += s # render if render: env.render() # clear dead agents env.clear_dead() # check 'done' returned by 'sample' command if train: for model in models: model.check_done() if step_ct % print_every == 0: print("step %3d, reward: %s, total_reward: %s " % (step_ct, np.around(step_reward, 2), np.around(total_reward, 2))) step_ct += 1 if step_ct > 250: break sample_time = time.time() - start_time print("steps: %d, total time: %.2f, step average %.2f" % (step_ct, sample_time, sample_time / step_ct)) # train total_loss, value = [0 for _ in range(n)], [0 for _ in range(n)] if train: print("===== train =====") start_time = time.time() # train models in parallel for i in range(n): models[i].train(print_every=2000, block=False) for i in range(n): total_loss[i], value[i] = models[i].fetch_train() train_time = time.time() - start_time print("train_time %.2f" % train_time) print(total_loss) print(total_reward) print(value) return magent.round(total_loss), magent.round(total_reward), magent.round( value)
number = [1000, 10000, 100000, 1000000] gpus = range(max_gpu + 1) ret = [] for n in number: row = [] for g in gpus: n_step = 30000000 / n cmd = ( "python scripts/test/test_1m.py --n_step %d --agent_number %d --num_gpu %d --frame %s > /dev/shm/aha " "&& cat /dev/shm/aha | grep FPS > %s" % (n_step, n, g, framework, tmp_name)) if n < 1000000: cmd = 'OMP_NUM_THREADS=8 ' + cmd else: cmd = 'OMP_NUM_THREADS=16 ' + cmd print(cmd) os.system(cmd) with open(tmp_name) as fin: line = fin.readline() x = eval(line)[1] row.append(x) print(x) ret.append(row) for row in ret: print(magent.round(row))
max_gpu = args.max_gpu framework = args.frame number = [1000, 10000, 100000, 1000000] gpus = range(max_gpu+1) ret = [] for n in number: row = [] for g in gpus: n_step = 30000000 / n cmd = ("python scripts/test/test_1m.py --n_step %d --agent_number %d --num_gpu %d --frame %s > /dev/shm/aha " "&& cat /dev/shm/aha | grep FPS > %s" % (n_step, n, g, framework, tmp_name)) if n < 1000000: cmd = 'OMP_NUM_THREADS=8 ' + cmd else: cmd = 'OMP_NUM_THREADS=16 ' + cmd print(cmd) os.system(cmd) with open(tmp_name) as fin: line = fin.readline() x = eval(line)[1] row.append(x) print(x) ret.append(row) for row in ret: print(magent.round(row))
def play_a_round(env, map_size, handles, models, print_every, eps, step_batch_size=None, train=True, train_id=1, render=False): """play a round of game""" env.reset() generate_map(env, map_size, handles) step_ct = 0 done = False n = len(handles) obs = [[] for _ in range(n)] ids = [[] for _ in range(n)] acts = [[] for _ in range(n)] nums = [env.get_num(handle) for handle in handles] total_reward = [0 for _ in range(n)] n_transition = 0 pos_reward_num = 0 total_loss, value = 0, 0 print("===== sample =====") print("eps %s number %s" % (eps, nums)) start_time = time.time() while not done: # take actions for every model for i in range(n): obs[i] = env.get_observation(handles[i]) ids[i] = env.get_agent_id(handles[i]) # let models infer action in parallel (non-blocking) models[i].infer_action(obs[i], ids[i], 'e_greedy', eps[i], block=False) for i in range(n): acts[i] = models[i].fetch_action() # fetch actions (blocking) env.set_action(handles[i], acts[i]) # simulate one step done = env.step() # sample step_reward = [] for i in range(n): rewards = env.get_reward(handles[i]) if train and i == train_id: alives = env.get_alive(handles[train_id]) # store samples in replay buffer (non-blocking) models[train_id].sample_step(rewards, alives, block=False) pos_reward_num += len(rewards[rewards > 0]) s = sum(rewards) step_reward.append(s) total_reward[i] += s # render if render: env.render() # stat info nums = [env.get_num(handle) for handle in handles] n_transition += nums[train_id] # clear dead agents env.clear_dead() # check return message of previous called non-blocking function sample_step() if train: models[train_id].check_done() if step_ct % print_every == 0: print( "step %3d, nums: %s reward: %s, total_reward: %s, pos_rewards %d" % (step_ct, nums, np.around(step_reward, 2), np.around(total_reward, 2), pos_reward_num)) step_ct += 1 if step_ct > args.n_step: break if step_batch_size and n_transition > step_batch_size and train: total_loss, value = models[train_id].train(500) n_transition = 0 sample_time = time.time() - start_time print("steps: %d, total time: %.2f, step average %.2f" % (step_ct, sample_time, sample_time / step_ct)) # train if train: print("===== train =====") start_time = time.time() total_loss, value = models[train_id].train(500) train_time = time.time() - start_time print("train_time %.2f" % train_time) return magent.round(total_loss), nums, magent.round( total_reward), magent.round(value)
def play_a_round(env, map_size, handles, models, print_every, eps, step_batch_size=None, train=True, train_id=1, render=False): """play a round of game""" env.reset() generate_map(env, map_size, handles) step_ct = 0 done = False n = len(handles) obs = [[] for _ in range(n)] ids = [[] for _ in range(n)] acts = [[] for _ in range(n)] nums = [env.get_num(handle) for handle in handles] total_reward = [0 for _ in range(n)] n_transition = 0 pos_reward_num = 0 total_loss, value = 0, 0 print("===== sample =====") print("eps %s number %s" % (eps, nums)) start_time = time.time() while not done: # take actions for every model for i in range(n): obs[i] = env.get_observation(handles[i]) ids[i] = env.get_agent_id(handles[i]) # let models infer action in parallel (non-blocking) models[i].infer_action(obs[i], ids[i], 'e_greedy', eps[i], block=False) for i in range(n): acts[i] = models[i].fetch_action() # fetch actions (blocking) env.set_action(handles[i], acts[i]) # simulate one step done = env.step() # sample step_reward = [] for i in range(n): rewards = env.get_reward(handles[i]) if train and i == train_id: alives = env.get_alive(handles[train_id]) # store samples in replay buffer (non-blocking) models[train_id].sample_step(rewards, alives, block=False) pos_reward_num += len(rewards[rewards > 0]) s = sum(rewards) step_reward.append(s) total_reward[i] += s # render if render: env.render() # stat info nums = [env.get_num(handle) for handle in handles] n_transition += nums[train_id] # clear dead agents env.clear_dead() # check return message of previous called non-blocking function sample_step() if train: models[train_id].check_done() if step_ct % print_every == 0: print("step %3d, nums: %s reward: %s, total_reward: %s, pos_rewards %d" % (step_ct, nums, np.around(step_reward, 2), np.around(total_reward, 2), pos_reward_num)) step_ct += 1 if step_ct > args.n_step: break if step_batch_size and n_transition > step_batch_size and train: total_loss, value = models[train_id].train(500) n_transition = 0 sample_time = time.time() - start_time print("steps: %d, total time: %.2f, step average %.2f" % (step_ct, sample_time, sample_time / step_ct)) # train if train: print("===== train =====") start_time = time.time() total_loss, value = models[train_id].train(500) train_time = time.time() - start_time print("train_time %.2f" % train_time) return magent.round(total_loss), nums, magent.round(total_reward), magent.round(value)