master_network = AC_Network( state_size, s_size_central, number_of_agents, action_size, (number_of_agents - 1) * comm_size, (number_of_agents - 1) * comm_size if spread_messages else comm_size, 'global', None, critic_action=critic_action, critic_comm=critic_comm) # Generate global network workers = [] # Create worker classes for i in range(FLAGS.num_slaves): workers.append( Worker(make_env("simple_spread6", benchmark=True), i, state_size, s_size_central, action_size, number_of_agents, trainer, model_path, global_episodes, display=display and i == 0, comm=(comm_size != 0), comm_size_per_agent=comm_size, spread_messages=spread_messages, critic_action=critic_action, critic_comm=critic_comm, comm_delivery_failure_chance=FLAGS.
action_size[3], 0, 0, 'global', None, '_agentPrey', critic_action=critic_action, critic_comm=critic_comm) master_networks = [predator, predator, predator, prey] # Generate global network workers = [] # Create worker classes for i in range(FLAGS.num_slaves): workers.append( Worker(make_env("simple_tag"), i, state_size, s_size_central, action_size, number_of_agents, trainer, model_path, global_episodes, display=display and i == 0, comm=(comm_size != 0), comm_size_per_agent=comm_size, spread_messages=spread_messages, critic_action=critic_action, critic_comm=critic_comm, comm_delivery_failure_chance=FLAGS.
number_of_agents, action_size[0], 0, 0, 'global', None, '_agentPrey', critic_action=critic_action, critic_comm=critic_comm) master_networks = [prey, predator, predator] # Generate global network workers = [] # Create worker classes for i in range(FLAGS.num_slaves): workers.append( Worker(make_env("simple_adversary"), i, state_size, s_size_central, action_size, number_of_agents, trainer, model_path, global_episodes, display=display and i == 0, comm=(comm_size != 0), comm_size_per_agent=comm_size, spread_messages=spread_messages, critic_action=critic_action, critic_comm=critic_comm, comm_delivery_failure_chance=FLAGS.
os.makedirs(model_path) with tf.device("/cpu:0"): global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) trainer = tf.train.AdamOptimizer(learning_rate=learning_rate) master_networks = [AC_Network(state_size[i], s_size_central[i], number_of_agents, action_size[i], (number_of_agents - 1) * comm_size, (number_of_agents - 1) * comm_size if spread_messages else comm_size, 'global', None, '_agent'+str(i), critic_action=critic_action, critic_comm=critic_comm) for i in range(number_of_agents)] # Generate global network workers = [] # Create worker classes for i in range(FLAGS.num_slaves): workers.append(Worker(make_env("simple_speaker_listener"), i, state_size, s_size_central, action_size, number_of_agents, trainer, model_path, global_episodes, display=display and i == 0, comm=(comm_size != 0), comm_size_per_agent=comm_size, spread_messages=spread_messages, critic_action=critic_action, critic_comm=critic_comm, comm_delivery_failure_chance=FLAGS.comm_delivery_failure_chance, comm_gaussian_noise=FLAGS.comm_gaussian_noise, comm_jumble_chance=FLAGS.comm_jumble_chance)) saver = tf.train.Saver() with tf.Session() as sess: coord = tf.train.Coordinator() if load_model: print('Loading Model...')
os.makedirs(model_path) with tf.device("/cpu:0"): global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False) trainer = tf.train.AdamOptimizer(learning_rate=learning_rate) predator = AC_Network(state_size[2], s_size_central[2], number_of_agents, action_size[2], comm_size, comm_size, 'global', None, '_agentPredator', critic_action=critic_action, critic_comm=critic_comm) prey = AC_Network(state_size[1], s_size_central[1], number_of_agents, action_size[1], comm_size, comm_size, 'global', None, '_agentPrey', critic_action=critic_action, critic_comm=critic_comm) master_networks = [None, prey, predator] # Generate global network workers = [] # Create worker classes for i in range(FLAGS.num_slaves): workers.append(Worker(make_env("simple_adversary"), i, state_size, s_size_central, action_size, number_of_agents, trainer, model_path, global_episodes, display=display and i == 0, comm=(comm_size != 0), comm_size_per_agent=comm_size, spread_messages=spread_messages, critic_action=critic_action, critic_comm=critic_comm, comm_delivery_failure_chance=FLAGS.comm_delivery_failure_chance, comm_gaussian_noise=FLAGS.comm_gaussian_noise, comm_jumble_chance=FLAGS.comm_jumble_chance)) saver = tf.train.Saver() with tf.Session() as sess: coord = tf.train.Coordinator() if load_model: print('Loading Model...')
master_network = AC_Network( state_size, s_size_central, number_of_agents, action_size, (number_of_agents - 1) * comm_size, (number_of_agents - 1) * comm_size if spread_messages else comm_size, 'global', None, critic_action=critic_action, critic_comm=critic_comm) # Generate global network workers = [] # Create worker classes for i in range(FLAGS.num_slaves): workers.append( Worker(make_env("simple_reference"), i, state_size, s_size_central, action_size, number_of_agents, trainer, model_path, global_episodes, display=display and i == 0, comm=(comm_size != 0), comm_size_per_agent=comm_size, spread_messages=spread_messages, critic_action=critic_action, critic_comm=critic_comm, comm_delivery_failure_chance=FLAGS.