def main(args): # Initialize the environment env = magent.GridWorld('battle', map_size=args.map_size) env.set_render_dir( os.path.join(BASE_DIR, 'examples/battle_model', 'build/render')) handles = env.get_handles() tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tf_config.gpu_options.allow_growth = True log_dir = os.path.join(BASE_DIR, 'data/tmp'.format(args.algo)) model_dir = os.path.join(BASE_DIR, 'data/models/{}'.format(args.algo)) start_from = 0 sess = tf.Session(config=tf_config) models = [ spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me', args.max_steps), spawn_ai(args.algo, sess, env, handles[1], args.algo + '-opponent', args.max_steps) ] sess.run(tf.global_variables_initializer()) runner = tools.Runner(sess, env, handles, args.map_size, args.max_steps, models, play, render_every=args.save_every if args.render else 0, save_every=args.save_every, tau=0.01, log_name=args.algo, log_dir=log_dir, model_dir=model_dir, train=True) for k in range(start_from, start_from + args.n_round): eps = linear_decay(k, [0, int(args.n_round * 0.8), args.n_round], [1, 0.2, 0.1]) runner.run(eps, k)
original_main = False # if args.oppo in ['ac', 'il', 'mfq', 'mfac']: original_oppo = True else: original_oppo = False # if args.neighbor_nums == -1: bias_flag = False else: bias_flag = True sess = tf.Session(config=tf_config) # if args.algo == args.oppo: models = [ spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me', args.max_steps, args.neighbor_nums, bias_flag), spawn_ai(args.oppo, sess, env, handles[1], args.oppo + '-opponent', args.max_steps, args.neighbor_nums, bias_flag) ] else: models = [ spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me', args.max_steps, args.neighbor_nums, bias_flag), spawn_ai(args.oppo, sess, env, handles[1], args.oppo + '-me', args.max_steps, args.neighbor_nums, bias_flag) ] # sess.run(tf.global_variables_initializer())
env.set_render_dir( os.path.join(BASE_DIR, 'examples/battle_model', 'build/render')) handles = env.get_handles() tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tf_config.gpu_options.allow_growth = True main_model_dir = os.path.join(BASE_DIR, 'data/models/{}-0'.format(args.algo)) oppo_model_dir = os.path.join(BASE_DIR, 'data/models/{}-1'.format(args.oppo)) sess = tf.Session(config=tf_config) models = [ spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me', args.max_steps), spawn_ai(args.oppo, sess, env, handles[1], args.oppo + '-opponent', args.max_steps) ] sess.run(tf.global_variables_initializer()) models[0].load(main_model_dir, step=args.idx[0]) models[1].load(oppo_model_dir, step=args.idx[1]) runner = tools.Runner(sess, env, handles, args.map_size, args.max_steps, models, battle,
# Initialize the environment env = magent.GridWorld('battle', map_size=args.map_size) env.set_render_dir(os.path.join(BASE_DIR, 'examples/battle_model', 'build/render')) handles = env.get_handles() mtmfq_position = args.mtmfqp tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tf_config.gpu_options.allow_growth = True main_model_dir = os.path.join(BASE_DIR, 'data/models/{}-0'.format(args.algo)) oppo_model_dir1 = os.path.join(BASE_DIR, 'data/models/{}-1'.format(args.oppo1)) oppo_model_dir2 = os.path.join(BASE_DIR, 'data/models/{}-2'.format(args.oppo2)) oppo_model_dir3 = os.path.join(BASE_DIR, 'data/models/{}-3'.format(args.oppo3)) sess = tf.Session(config=tf_config) models = [spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me', args.max_steps), spawn_ai(args.oppo1, sess, env, handles[1], args.oppo1 + '-opponent1', args.max_steps), spawn_ai(args.oppo2, sess, env, handles[2], args.oppo2 + '-opponent2', args.max_steps), spawn_ai(args.oppo3, sess, env, handles[3], args.oppo3 + '-opponent3', args.max_steps)] sess.run(tf.global_variables_initializer()) models[0].load(main_model_dir, step=args.idx[0]) models[1].load(oppo_model_dir1, step=args.idx[1]) models[2].load(oppo_model_dir2, step=args.idx[2]) models[3].load(oppo_model_dir3, step=args.idx[3]) runner = tools.Runner(sess, env, handles, args.map_size, args.max_steps, models, battle, mtmfq_position, render_every=0) win_cnt = {'main': 0, 'opponent1': 0, 'opponent2': 0, 'opponent3': 0} total_rewards = [] with open('storepoints_multibattle.csv', 'w+') as myfile: myfile.write('{0},{1},{2},{3},{4}\n'.format("Game", "Reward 1", "Reward 2","Reward 3", "Reward 4")) for k in range(0, args.n_round): total_rewards = runner.run(0.0, k, win_cnt=win_cnt) with open('storepoints_multibattle.csv', 'a') as myfile:
model_dir = os.path.join(BASE_DIR, 'data/models/{}'.format(args.algo)) with open('predator.csv', 'w+') as myfile: myfile.write('{0},{1}\n'.format("Episode", "Reward")) if args.algo in ['mfq', 'mfac', 'mtmfq']: use_mf = True else: use_mf = False start_from = 0 total_reward = [] meanerrortotal = [] sess = tf.Session(config=tf_config) models = [ spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me', args.max_steps), spawn_ai(args.algo, sess, env, handles[1], args.algo + '-opponent1', args.max_steps), spawn_ai(args.algo, sess, env, handles[2], args.algo + '-opponent2', args.max_steps), spawn_ai(args.algo, sess, env, handles[3], args.algo + '-opponent3', args.max_steps) ] sess.run(tf.global_variables_initializer()) if args.algo == 'mtmfq': runner = tools.Runner( sess, env, handles, args.map_size, args.max_steps,
start_from = 0 sess = tf.Session(config=tf_config) main_model_dir = os.path.join(BASE_DIR, 'data/models/{}-0'.format(args.algo)) oppo_model_dir = os.path.join(BASE_DIR, 'data/models/{}-1'.format(args.algo)) main_msg_dir = os.path.join(BASE_DIR, 'data/models/{}-msg0'.format(args.algo)) oppo_msg_dir = os.path.join(BASE_DIR, 'data/models/{}-msg1'.format(args.algo)) models = [ spawn_ai(args.algo, sess, env, handles[0], args.algo + '-me', args.max_steps), spawn_ai(args.algo, sess, env, handles[1], args.algo + '-oppo', args.max_steps) ] if args.usemsg != 'None': MsgModels = [ spawn_ai('msgdqn', sess, env, handles[0], 'msgdqn' + '-me', args.max_steps), spawn_ai('msgdqn', sess, env, handles[1], 'msgdqn' + '-opponent', args.max_steps) ] else: print('do not use msg models') MsgModels = [None, None] sess.run(tf.global_variables_initializer())
log_dir = os.path.join(BASE_DIR,'data/tmp'.format(args.algo)) model_dir = os.path.join(BASE_DIR, 'data/models/{}'.format(args.algo)) with open('gather.csv', 'w+') as myfile: myfile.write('{0},{1}\n'.format("Episode", "Reward")) if args.algo in ['mfq', 'mfac', 'mtmfq']: use_mf = True else: use_mf = False start_from = 0 total_reward = [] sess = tf.Session(config=tf_config) models = [spawn_ai(args.algo, sess, env, player_handles[0], args.algo + '-me', args.max_steps), spawn_ai(args.algo, sess, env, player_handles[1], args.algo + '-opponent1', args.max_steps), spawn_ai(args.algo, sess, env, player_handles[2], args.algo + '-opponent2', args.max_steps), spawn_ai(args.algo, sess, env, player_handles[3], args.algo + '-opponent3', args.max_steps)] sess.run(tf.global_variables_initializer()) if args.algo == 'mtmfq': runner = tools.Runner(sess, env, handles, args.map_size, args.max_steps, models, play2, render_every=args.save_every if args.render else 0, save_every=args.save_every, tau=0.01, log_name=args.algo, log_dir=log_dir, model_dir=model_dir, train=True) else: runner = tools.Runner(sess, env, handles, args.map_size, args.max_steps, models, play, render_every=args.save_every if args.render else 0, save_every=args.save_every, tau=0.01, log_name=args.algo, log_dir=log_dir, model_dir=model_dir, train=True) for k in range(start_from, start_from + args.n_round): eps = linear_decay(k, [0, int(args.n_round * 0.8), args.n_round], [1, 0.2, 0.1]) total_reward = runner.run(eps, k)