Exemplo n.º 1
0
	def __init__(self, id, num_inputs, action_dim, hidden_size, gamma, critic_lr, actor_lr, tau, alpha, target_update_interval, savetag, foldername, actualize, use_gpu):

		self.num_inputs = num_inputs
		self.action_space = action_dim
		self.gamma = gamma
		self.tau = 0.005
		self.alpha = 0.2
		self.policy_type = "Gaussian"
		self.target_update_interval = 1
		self.tracker = utils.Tracker(foldername, ['q_'+savetag, 'qloss_'+savetag, 'value_'+savetag, 'value_loss_'+savetag, 'policy_loss_'+savetag, 'mean_loss_'+savetag, 'std_loss_'+savetag], '.csv',save_iteration=1000, conv_size=1000)
		self.total_update = 0
		self.agent_id = id
		self.actualize = actualize

		self.critic = QNetwork(self.num_inputs, self.action_space, hidden_size)
		self.critic_optim = Adam(self.critic.parameters(), lr=critic_lr)
		self.soft_q_criterion = nn.MSELoss()

		if self.policy_type == "Gaussian":
			self.policy = Actor(self.num_inputs, self.action_space, hidden_size, policy_type='GaussianPolicy')
			self.policy_optim = Adam(self.policy.parameters(), lr=actor_lr)

			self.value = ValueNetwork(self.num_inputs, hidden_size)
			self.value_target = ValueNetwork(self.num_inputs, hidden_size)
			self.value_optim = Adam(self.value.parameters(), lr=critic_lr)
			utils.hard_update(self.value_target, self.value)
			self.value_criterion = nn.MSELoss()
		else:
			self.policy = Actor(self.num_inputs, self.action_space, hidden_size, policy_type='DeterministicPolicy')
			self.policy_optim = Adam(self.policy.parameters(), lr=actor_lr)

			self.critic_target = QNetwork(self.num_inputs, self.action_space, hidden_size)
			utils.hard_update(self.critic_target, self.critic)

		self.policy.cuda()
		self.value.cuda()
		self.value_target.cuda()
		self.critic.cuda()

		#Statistics Tracker
		self.q = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.val = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.value_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.policy_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.mean_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.std_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.q_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
Exemplo n.º 2
0
	def __init__(self, id, algo_name, state_dim, action_dim, hidden_size, actor_lr, critic_lr, gamma, tau, savetag, foldername, actualize, use_gpu, num_agents, init_w = True):

		self.algo_name = algo_name; self.gamma = gamma; self.tau = tau; self.total_update = 0; self.agent_id = id;self.use_gpu = use_gpu
		self.tracker = utils.Tracker(foldername, ['q_'+savetag, 'qloss_'+savetag, 'policy_loss_'+savetag], '.csv', save_iteration=1000, conv_size=1000)
		self.num_agents = num_agents

		#Initialize actors
		self.policy = MultiHeadActor(state_dim, action_dim, hidden_size, num_agents)
		if init_w: self.policy.apply(utils.init_weights)
		self.policy_target = MultiHeadActor(state_dim, action_dim, hidden_size, num_agents)
		utils.hard_update(self.policy_target, self.policy)
		self.policy_optim = Adam(self.policy.parameters(), actor_lr)


		self.critics = [QNetwork(state_dim*num_agents, action_dim*num_agents, hidden_size*3) for _ in range(num_agents)]

		self.critics_target = [QNetwork(state_dim*num_agents, action_dim*num_agents, hidden_size*3) for _ in range(num_agents)]
		if init_w:
			for critic, critic_target in zip(self.critics, self.critics_target):
				critic.apply(utils.init_weights)
				utils.hard_update(critic_target, critic)
		self.critic_optims = [Adam(critic.parameters(), critic_lr) for critic in self.critics]


		self.loss = nn.MSELoss()

		if use_gpu:
			self.policy_target.cuda(); self.policy.cuda()
			for critic, critic_target in zip(self.critics, self.critics_target):
				critic.cuda()
				critic_target.cuda()


		self.num_critic_updates = 0

		#Statistics Tracker
		#self.action_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.policy_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.q_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.q = {'min':None, 'max': None, 'mean':None, 'std':None}
Exemplo n.º 3
0
	def __init__(self, id, algo_name, state_dim, action_dim, hidden_size, actor_lr, critic_lr, gamma, tau, savetag, foldername, actualize, use_gpu, num_agents, init_w = True):

		self.algo_name = algo_name; self.gamma = gamma; self.tau = tau; self.total_update = 0; self.agent_id = id;	self.actualize = actualize; self.use_gpu = use_gpu
		self.tracker = utils.Tracker(foldername, ['q_'+savetag, 'qloss_'+savetag, 'policy_loss_'+savetag, 'alz_score'+savetag,'alz_policy'+savetag], '.csv', save_iteration=1000, conv_size=1000)

		#Initialize actors
		self.policy = MultiHeadActor(state_dim, action_dim, hidden_size, num_agents)
		if init_w: self.policy.apply(utils.init_weights)
		self.policy_target = MultiHeadActor(state_dim, action_dim, hidden_size, num_agents)
		utils.hard_update(self.policy_target, self.policy)
		self.policy_optim = Adam(self.policy.parameters(), actor_lr)


		self.critic = QNetwork(state_dim, action_dim,hidden_size)
		if init_w: self.critic.apply(utils.init_weights)
		self.critic_target = QNetwork(state_dim, action_dim, hidden_size)
		utils.hard_update(self.critic_target, self.critic)
		self.critic_optim = Adam(self.critic.parameters(), critic_lr)

		if actualize:
			self.ANetwork = ActualizationNetwork(state_dim, action_dim, hidden_size)
			if init_w: self.ANetwork.apply(utils.init_weights)
			self.actualize_optim = Adam(self.ANetwork.parameters(), critic_lr)
			self.actualize_lr = 0.2
			if use_gpu: self.ANetwork.cuda()

		self.loss = nn.MSELoss()

		if use_gpu:
			self.policy_target.cuda(); self.critic_target.cuda(); self.policy.cuda(); self.critic.cuda()
		self.num_critic_updates = 0

		#Statistics Tracker
		#self.action_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.policy_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.q_loss = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.q = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.alz_score = {'min':None, 'max': None, 'mean':None, 'std':None}
		self.alz_policy = {'min':None, 'max': None, 'mean':None, 'std':None}
Exemplo n.º 4
0
            champ_wwid = int(self.rollout_bucket[0].wwid.item())
            all_fitness = [fitness]
            max_fit = fitness
            all_eplens = [num_frames]

        return max_fit, champ_len, all_fitness, all_eplens, test_mean, test_std, champ_wwid


if __name__ == "__main__":
    args = Parameters()  # Create the Parameters class
    SAVETAG = SAVETAG + '_p' + str(PORTFOLIO_ID)
    SAVETAG = SAVETAG + '_s' + str(SEED)
    if ISOLATE_PG: SAVETAG = SAVETAG + '_pg'

    frame_tracker = utils.Tracker(args.savefolder,
                                  ['score_' + ENV_NAME + SAVETAG],
                                  '.csv')  #Tracker class to log progress
    max_tracker = utils.Tracker(
        args.aux_folder, ['pop_max_score_' + ENV_NAME + SAVETAG],
        '.csv')  #Tracker class to log progress FOR MAX (NOT REPORTED)

    #Set seeds
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)

    #INITIALIZE THE MAIN AGENT CLASS
    agent = CERL_Agent(args)  #Initialize the agent
    print('Running CERL for', ENV_NAME, 'State_dim:', args.state_dim,
          ' Action_dim:', args.action_dim)
Exemplo n.º 5
0
		# Evolution Step
		for agent in self.agents:
			agent.evolve()

		#Save models periodically
		if gen % 20 == 0:
			for id, test_actor in enumerate(self.test_agent.rollout_actor):
				torch.save(test_actor.state_dict(), self.args.model_save + str(id) + '_' + self.args.actor_fname)
			print("Models Saved")

		return all_fits, pg_fits, test_fits


if __name__ == "__main__":
	args = Parameters()  # Create the Parameters class
	test_tracker = utils.Tracker(args.metric_save, [args.log_fname], '.csv')  # Initiate tracker
	torch.manual_seed(args.seed);
	np.random.seed(args.seed);
	random.seed(args.seed)  # Seeds
	if args.config.env_choice == 'hyper': from envs.hyper.PowerPlant_env import Fast_Simulator  # Main Module needs access to this class for some reason

	# INITIALIZE THE MAIN AGENT CLASS
	ai = MERL(args)
	print('Running ', args.config.env_choice, 'with config ', args.config.config, ' State_dim:', args.state_dim,
	      'Action_dim', args.action_dim)
	time_start = time.time()

	###### TRAINING LOOP ########
	for gen in range(1, 10000000000):  # RUN VIRTUALLY FOREVER

		# ONE EPOCH OF TRAINING
Exemplo n.º 6
0
            #Synch RL Agent to NE
            if self.num_games % self.args.synch_period == 0:
                self.rl_to_evo(self.rl_agent.actor, self.pop[worst_index])
                self.evolver.rl_policy = worst_index
                print('Synch from RL --> Nevo')

        # print("ddpg time:", (time.time()-time_evolution)/3600)

        return best_train_fitness, test_score, elite_index


if __name__ == "__main__":
    num_processes = 4
    parameters = Parameters()  # Create the Parameters class
    tracker = utils.Tracker(parameters, ['erl'],
                            '_score.csv')  # Initiate tracker
    frame_tracker = utils.Tracker(parameters, ['frame_erl'],
                                  '_score.csv')  # Initiate tracker
    time_tracker = utils.Tracker(parameters, ['time_erl'], '_score.csv')

    #Create Env
    env = utils.NormalizedActions(gym.make(env_tag))
    parameters.action_dim = env.action_space.shape[0]
    parameters.state_dim = env.observation_space.shape[0]

    #Seed
    env.seed(parameters.seed)
    torch.manual_seed(parameters.seed)
    np.random.seed(parameters.seed)
    random.seed(parameters.seed)
Exemplo n.º 7
0
        #Off-PG for Subs
        if len(self.replay_buffer) > self.args.batch_size * 5:
            transitions = self.replay_buffer.sample(self.args.batch_size)
            batch = replay_memory.Transition(*zip(*transitions))
            self.agent.learn_sub(torch.cat(batch.state),
                                 torch.cat(batch.action),
                                 torch.cat(batch.next_state),
                                 self.args.sub_gamma,
                                 self.args.num_gradient_steps)

        return score


if __name__ == "__main__":
    parameters = Parameters()  # Create the Parameters class
    tracker = utils.Tracker(parameters, ['sokoban'],
                            '_score.csv')  # Initiate tracker

    #Create Env
    env = Grid_Soccer.GridBallWorld()

    #Seed
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

    #Create Agent
    learner = Learner(parameters, env)

    for gen in range(1000000):
        score = learner.train()
        print('#Gen:', gen, ' Score:', '%.2f' % score, 'Buffer_Size',
Exemplo n.º 8
0
                    self.rl_agent.update_parameters(batch)

                #Synch RL Agent to NE
                if self.num_games % 10 == 0 and self.args.use_evo:
                    self.rl_to_evo(self.rl_agent.actor, self.pop[worst_index])
                    print('Synch from RL --> Nevo')

        else:
            rl_score = None

        return best_train_fitness, test_score, rl_score, elite_index


if __name__ == "__main__":
    parameters = Parameters()  # Create the Parameters class
    tracker = utils.Tracker(parameters, ['score', 'steps'],
                            '_score.csv')  # Initiate tracker
    #frame_tracker = utils.Tracker(parameters, ['frame_evo', 'frame_rl'], '_score.csv')  # Initiate tracker
    #time_tracker = utils.Tracker(parameters, ['time_evo', 'time_rl'], '_score.csv')

    if False:  #Deepmind Suite
        env = suite.load(domain_name=env_name, task_name=task_name)
        parameters.action_dim = env.action_spec().shape[0]
        state = env.observation_spec()
        shape = 0
        for key, value in state.items():
            if len(value.shape) != 0: shape += value.shape[0]
            else: shape += 1
        parameters.state_dim = shape

    else:  #OpenAI
        env = gym.make(env_tag)
Exemplo n.º 9
0
    parameters = Parameters()  # Create the Parameters class

    #################### PRCOESS FILENAMES TO SAVE PROGRESS  ################################
    parameters.critic_fname = shape_filename(parameters.critic_fname,
                                             parameters) + SAVE_TAG
    parameters.actor_fname = shape_filename(parameters.actor_fname,
                                            parameters) + SAVE_TAG
    parameters.log_fname = shape_filename(parameters.log_fname,
                                          parameters) + SAVE_TAG
    parameters.best_fname = shape_filename(parameters.best_fname,
                                           parameters) + SAVE_TAG
    ####################################################

    #
    frame_tracker = utils.Tracker(
        parameters.metric_save,
        [parameters.log_fname + '_1', parameters.log_fname + '_2'],
        '.csv')  # Initiate tracker
    ml_tracker = utils.Tracker(parameters.aux_save, [
        parameters.log_fname + 'critic_loss',
        parameters.log_fname + 'policy_loss'
    ], '.csv')  # Initiate tracker
    torch.manual_seed(parameters.seed)
    np.random.seed(parameters.seed)
    random.seed(parameters.seed)  #Seeds

    # INITIALIZE THE MAIN AGENT CLASS
    agent = PG_ALGO(parameters)
    print('Running', parameters.algo, ' State_dim:', parameters.state_dim,
          ' Action_dim:', parameters.action_dim, 'for',
          'Round 1' if DIFFICULTY == 0 else 'Round 2')
    time_start = time.time()
Exemplo n.º 10
0
parser.add_argument('-save_periodic',
                    help='Save actor, critic and memory periodically',
                    action='store_true')
parser.add_argument('-next_save',
                    help='Generation save frequency for save_periodic',
                    type=int,
                    default=200)
parser.add_argument('-test_operators',
                    help='Runs the operator runner to test the operators',
                    action='store_true')
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

if __name__ == "__main__":
    parameters = Parameters(
        parser)  # Inject the cla arguments in the parameters object
    tracker = utils.Tracker(parameters, ['erl'],
                            '_score.csv')  # Initiate tracker
    frame_tracker = utils.Tracker(parameters, ['frame_erl'],
                                  '_score.csv')  # Initiate tracker
    time_tracker = utils.Tracker(parameters, ['time_erl'], '_score.csv')
    ddpg_tracker = utils.Tracker(parameters, ['ddpg'], '_score.csv')
    selection_tracker = utils.Tracker(parameters,
                                      ['elite', 'selected', 'discarded'],
                                      '_selection.csv')

    # Create Env
    env = utils.NormalizedActions(gym.make(parameters.env_name))
    parameters.action_dim = env.action_space.shape[0]
    parameters.state_dim = env.observation_space.shape[0]

    # Write the parameters to a the info file and print them
    parameters.write_params(stdout=True)
Exemplo n.º 11
0
		# Evolution Step
		self.agents.evolve()

		#Save models periodically
		if gen % 20 == 0:
			torch.save(self.test_agent.predator[0].state_dict(), self.args.model_save + 'predator_' + self.args.savetag)
			torch.save(self.test_agent.prey[0].state_dict(), self.args.model_save + 'prey_' + self.args.savetag)
			print("Models Saved")

		return all_fits, pg_fits, test_fits, prey_score


if __name__ == "__main__":
	args = Parameters()  # Create the Parameters class
	test_tracker = utils.Tracker(args.metric_save, [args.log_fname], '.csv')  # Initiate tracker
	prey_tracker = utils.Tracker(args.metric_save, ['prey_'+args.log_fname], '.csv')  # Initiate tracker
	selects_tracker = utils.Tracker(args.metric_save, ['selects_' + args.log_fname], '.csv')
	torch.manual_seed(args.seed);
	np.random.seed(args.seed);
	random.seed(args.seed)  # Seeds
	if args.config.env_choice == 'hyper': from envs.hyper.PowerPlant_env import Fast_Simulator  # Main Module needs access to this class for some reason

	# INITIALIZE THE MAIN AGENT CLASS
	ai = MERL(args)
	print('Running ', args.config.env_choice, 'with config ', args.config.config, ' Predator State_dim:', args.pred_state_dim, 'Prey_state_dim', args.prey_state_dim,
	      'Action_dim', args.action_dim)
	time_start = time.time()

	###### TRAINING LOOP ########
	for gen in range(1, 10000000000):  # RUN VIRTUALLY FOREVER
Exemplo n.º 12
0
        #NeuroEvolution's probabilistic selection and recombination step
        self.evolver.epoch(self.pop, all_net_ids, all_fitness,
                           all_shaped_fitness)

        # Synch RL Agent to NE periodically
        if gen % 5 == 0:
            self.evolver.sync_rl(self.args.rl_models, self.pop)

        return max(all_fitness), all_eplens[all_fitness.index(
            max(all_fitness))], all_fitness, all_eplens, all_shaped_fitness


if __name__ == "__main__":
    parameters = Parameters()  # Create the Parameters class
    frame_tracker = utils.Tracker(parameters.metric_save, ['erl', 'eugenics'],
                                  '.csv')  #Tracker class to log progress

    #Set seeds
    torch.manual_seed(parameters.seed)
    np.random.seed(parameters.seed)
    random.seed(parameters.seed)

    #INITIALIZE THE MAIN AGENT CLASS
    agent = ERL_Agent(parameters)  #Initialize the agent
    print('Running osim-rl', ' State_dim:', parameters.state_dim,
          ' Action_dim:', parameters.action_dim, 'using ERL for ',
          'Round 1' if DIFFICULTY == 0 else 'Round 2')

    time_start = time.time()
    for gen in range(1, 1000000000):  #Infinite generations
        gen_time = time.time()