Exemplo n.º 1
0
def main(args):
	assert args.population >= 10, 'Population size should not be less than 10. Exiting.'
	assert args.generations >= 1, 'Number of generations should not be less than 1. Exiting.'
	assert args.trials >= 1, 'Number of trials should not be less than 1. Exiting.'
	assert args.episodes >= 1, 'Number of episodes should not be less than 1. Exiting.'
	assert args.save_interval >= 0, 'Number of generations should not be less than 0. Exiting.'
	assert args.num_workers >= 1, 'Number of workers should not be less than 1. Exiting.'
	# set seed
	np.random.seed(args.seed)
	torch.manual_seed(args.seed)
	# params
	visualise = not args.no_visualise
	hebb_plastic = not args.no_hebbian_plasticity
	dynamic_goal = not args.no_dynamic_goal
	m_ = 2 if args.episodes < 20 else 15
	if dynamic_goal: swap_range = int((args.episodes/2.) - m_), int((args.episodes/2.) + m_+1)
	else: swap_range = None
	# create environment 
	# and optionally pool of worker envs if args.num_workers > 1 (for multiprocessing)
	env_fn = make_env(args.env_config_path)
	env, env_conf = env_fn()
	if args.num_workers > 1: pool = PoolManager_v2(env_fn, args.num_workers)
	else: pool = None
	# get all goals (reward locations) in the environment.
	goals = get_goals(env_conf['graph_shape']['depth'])
	if not env_conf['image_dataset']['1D']: # observations are 2D images
		# set up feature extractor model that serves all (evolved) agent controller network.
		# it extracts features used as input to the evolved controllers.
		obs_dim = int(np.prod(env.observation_space.shape)) # 144. each observation is a 12 x 12 image
		layers = [obs_dim, 64, 16]
		fe_model = AutoEncoderFeatureExtractor(layers)
		latent_dim = controller_input_dim = fe_model.get_latent_features_dim()
		#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
		device = torch.device('cpu')
		fe_model.to(device)
		buffer_size = args.population * args.episodes * 10
		obs_buffer = ObservationBuffer(buffer_size, (obs_dim, ))
		args.feature_extractor = {'layers': layers, 'type': 'fc_autoencoder'}
		fe_trainer = TrainerFeatExtAE(fe_model, obs_buffer, 0.001, 20, 1.0, device)
	else:
		raise ValueError('CTgraph should be configured to produce image observations')
	# set up evolution (evolution of network controller/agent)
	args.agent['n_input_neurons'] = controller_input_dim
	args.agent['plasticity'] = hebb_plastic
	evo = Evolution(Agent, args.agent, args.population)
	# instantiate log
	unique_name = 'ctgraph'
	exp_name = 'train-s{0}-depth{1}'.format(args.seed, env_conf['graph_shape']['depth'])
	exp_name = '{0}-p{1}-g{2}{3}'.format(exp_name, args.population, args.generations, unique_name)
	log = Log('./log/'+exp_name)
	# logs
	log.info('General program Log')
	log.info('goal swap range: {0}'.format(swap_range))
	modeldir_path = log.get_modeldir_path()
	visdir_path = log.get_visdir_path()
	# save experiment config
	exp_config = {}
	exp_config['environment'] = env_conf
	exp_config['others'] = vars(args)
	f = open('{0}/config.json'.format(log.get_logdir_path()), 'w')
	json.dump(exp_config, f, indent=4)
	f.close()

	trials_goals = []
	# train model (evolve controllers)
	# optionally sgd optimise feature extractor if env observations are 2d
	for generation in np.arange(args.generations):
		start_time = datetime.datetime.now()
		log.info('generation {0}'.format(generation))
		# determine swap point(s) and goal(s) for current generation
		if dynamic_goal:
			swap_points = np.random.randint(low=swap_range[0], high=swap_range[1], size=args.trials)
			trials_goals = []
			for i in np.arange(args.trials):
				goal = sample_goal(goals)
				next_goal = sample_goal(goals, prev_goal=goal)
				trials_goals.append((goal, next_goal))
				log.info('trial {0} goals: {1}'.format(i+1, (goal, next_goal)))
			log.info('swap points: {0}'.format(swap_points))
		else:
			swap_points = None
			trials_goals = []
			for i in np.arange(args.trials):
				goal = sample_goal(goals)
				log.info('trial {0} goal: {1}.'.format(i, goal))
				trials_goals.append((goal,))
		# evaluate fitness - each agent fitness is its average reward across trials
		agents = evo.get_all_individuals()
		if args.num_workers > 1:
			# create a clone of feature extractor and pass to method below.
			# this is a trick to solve the issue of pytorch raising an error about serialising
			# a non-leaf tensor that requires_grad. we need to pass the feature extractor to 
			# worker processes and this error occurs after the first generation.
			if fe_model is not None:
				fe_model_ = type(fe_model)(fe_model.layers_dim)
				fe_model_.load_state_dict(fe_model.state_dict())
			else:
				fe_model_ = None
			pool.evaluate_agents(agents, args.trials, args.episodes, trials_goals, swap_points,\
				fe_model_, device, obs_buffer)
			fe_model_ = None
			gc.collect()
		else:
			evaluate_agents(agents, env, args.trials, args.episodes, trials_goals, swap_points,\
				fe_model, device, obs_buffer)
		# log summary, model and generate network visualisation
		if args.save_interval > 0 and generation % args.save_interval == 0:
			top_agents = evo.get_n_fittest_individuals(n = args.top_n)
			top_agents_reward = [agent.get_reward() for agent in top_agents]
			best_fitness = top_agents_reward[0]
			log.info('top {0} agents: {1}'.format(args.top_n, np.array(top_agents_reward)))
			# write generation summary to logs (and screen)
			log.summary(generation, best_fitness, evo.get_worst_fitness(), evo.get_fitness_mean(),\
				evo.get_fitness_std())
			# save model of the best agent and visualisation its phenotype/network.
			save_agent(top_agents[0], modeldir_path + 'gen-{0}-best.npy'.format(generation))
			if fe_model is not None:
				# save feature extractor
				state_dict_ = fe_model.state_dict()
				torch.save(state_dict_, modeldir_path + 'gen-{0}-femodel.pt'.format(generation))
			if visualise:
				# save controller/agent visualisation
				top_agents[0].draw_network(visdir_path +'gen-{0}-best'.format(generation))
		if generation == args.generations - 1:
			end_time = datetime.datetime.now()
			log.info('time taken: {0}\n\n'.format(str(end_time - start_time)))
			break
		else:
			evo.selection()
			evo.produce_next_generation()
			if fe_model is not None:
				fe_trainer.train(epochs=20)
			end_time = datetime.datetime.now()
			log.info('time taken: {0}\n\n'.format(str(end_time - start_time)))

	if pool is not None:
		pool.close()
	log.info('---Training over.---')
	log.close()
Exemplo n.º 2
0
def main(args):
    assert args.episodes >= 1, 'Number of episodes should not be less than 1. Exiting.'
    # set seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    # params
    dynamic_goal = not args.no_dynamic_goal
    m_ = 2 if args.episodes < 20 else 15
    if dynamic_goal:
        swap_range = int((args.episodes / 2.) - m_), int((args.episodes / 2.) +
                                                         m_ + 1)
    else:
        swap_range = None
    # create environment
    if args.exp_config is None: env, env_conf = make_env()
    else: env, env_conf = make_env(args.exp_config['environment'])
    goals = env.get_goals()
    # set up model - feature extractor and neuromodulated controller
    if args.feat_ext_path is None:
        msg = 'when environment observations are 2D images, feature extractor needs to be'\
         'specified. Use the --help flag (command below) to see descriptions.\n\n'\
         'python {0} --help\n'.format(__file__)
        raise ValueError(msg)
    # load feature extractor model
    if args.exp_config['others']['feature_extractor'] is not None:
        if args.exp_config['others']['feature_extractor'][
                'type'] == 'fc_autoencoder':
            layers = args.exp_config['others']['feature_extractor']['layers']
            fe_model = AutoEncoderFeatureExtractor(layers)
            latent_dim = fe_model.get_latent_features_dim()
        else:
            fe_model = ConvAutoEncoderFeatureExtractor()
            latent_dim = fe_model.get_latent_features_dim()
    else:
        raise NotImplementedError
    fe_model.load_state_dict(torch.load(args.feat_ext_path))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    fe_model.to(device)
    fe_model.eval()
    # load evolved controller
    agent = load_agent(args.agent_path, Agent)
    # setup log directory
    unique_name = 'mcaltenv'
    dir_path = 'log/test-s{0}-{1}'.format(args.seed, unique_name)
    log = Log(dir_path)
    # sample env goal(s)
    if dynamic_goal:
        swap_points = np.random.randint(low=swap_range[0],
                                        high=swap_range[1],
                                        size=args.trials)
        trials_goals = []
        for i in np.arange(args.trials):
            goal = sample_goal(goals)
            next_goal = sample_goal(goals, prev_goal=goal)
            log.info('trial {0} goal: {1}'.format(i, goal))
            log.info('goal will changed to {0} at episode {1}\n'.format(
                next_goal, swap_points[i]))
            trials_goals.append((goal, next_goal))
    else:
        swap_points = None
        trials_goals = []
        for i in np.arange(args.trials):
            goal = sample_goal(goals)
            log.info('trial {0} goal: {1}.'.format(i, goal))
            trials_goals.append((goal, ))
    # evaluate agent
    agent.reset()
    agent.enable_neurons_output_logging()

    trials, episodes = args.trials, args.episodes
    trials_reward = [None] * trials
    for trial in np.arange(trials):
        trial_total_reward = 0.0
        agent.reset()  # reset agent for each trial
        env.set_goal(trials_goals[trial][0])
        env.reset()
        for episode in np.arange(episodes):
            if swap_points is not None and episode == swap_points[trial]:
                # change goal location
                env.set_goal(trials_goals[trial][1])
            ret = run_episode(fe_model, agent, env, True, False, device)
            trial_total_reward += ret[0]
        trials_reward[trial] = trial_total_reward
    rewards = trials_reward

    for i, trial_reward in enumerate(rewards):
        log.info('\ntrial {0} reward: {1}'.format(i + 1, trial_reward))
    log.info('\naverage reward: {0:0.4f}'.format(sum(rewards) / args.trials))
    filepath_ = log.get_logdir_path() + 'agent-neurons-output-log.csv'
    np.savetxt(filepath_, agent.get_neurons_output_log(), delimiter=',')
    log.close()
    return
Exemplo n.º 3
0
def main(args):
	assert args.population >= 10, 'Population size should not be less than 10. Exiting.'
	assert args.generations >= 1, 'Number of generations should not be less than 1. Exiting.'
	assert args.trials >= 1, 'Number of trials should not be less than 1. Exiting.'
	assert args.episodes >= 20, 'Number of episodes should not be less than 20. Exiting.'
	assert args.save_interval >= 0, 'Number of generations should not be less than 0. Exiting.'
	assert args.num_workers >= 1, 'Number of workers should not be less than 1. Exiting.'
	# set seed
	np.random.seed(args.seed)
	torch.manual_seed(args.seed)
	# params
	visualise = not args.no_visualise
	hebb_plastic = not args.no_hebbian_plasticity
	dynamic_goal = not args.no_dynamic_goal
	num_trial_swaps = 2 # NOTE for multi goal swaps per trial
	args.num_trial_swaps = num_trial_swaps
	if dynamic_goal:
		segment_duration = int(args.episodes / (num_trial_swaps + 1))
		if segment_duration < 10: m_ = 2
		elif 10 <= segment_duration < 20: m_ = 5
		elif 20 <= segment_duration < 30: m_ = 10
		else: m_ = 15
		swap_range = []
		curr_seg = 0
		for _ in range(num_trial_swaps):
			curr_seg += segment_duration
			swap_range.append((curr_seg-m_, curr_seg+m_+1))
	else: swap_range = None
	# create environment 
	# and optionally pool of worker envs if args.num_workers > 1 (for multiprocessing)
	env, env_conf = make_env()
	# multiprocessing
	if args.num_workers > 1: pool = PoolManager_v2(make_env, args.num_workers)
	else: pool = None
	# enviroment goals
	goals = env.get_goals()
	# set up feature extractor model that serves all (evolved) agent controller network.
	# it extracts features used as input to the evolved controllers.
	obs_dim = env.observation_space.shape # for conv feature extractor
	fe_model = ConvAutoEncoderFeatureExtractor()
	latent_dim = controller_input_dim = fe_model.get_latent_features_dim()
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	#device = torch.device('cpu')
	fe_model.to(device)
	buffer_size = args.population * args.episodes
	obs_buffer = ObservationBuffer(buffer_size, obs_dim) # conv nn feature extractor
	args.feature_extractor = {'layers': 'N/A', 'type': 'conv_autoencoder'}
	fe_trainer = TrainerFeatExtAE(fe_model, obs_buffer, 0.0005, 20, 1.0, device)
	# set up evolution (evolution of network controller/agent)
	args.agent['n_input_neurons'] = controller_input_dim
	args.agent['plasticity'] = hebb_plastic
	evo = Evolution(Agent, args.agent, args.population)
	# instantiate log
	unique_name = 'mcaltenv'
	exp_name = 'train-s{0}'.format(args.seed)
	exp_name = '{0}-p{1}-g{2}{3}'.format(exp_name, args.population, args.generations, unique_name)
	log = Log('./log/'+exp_name)
	# logs
	log.info('General program Log')
	log.info('goal swap range: {0}'.format(swap_range))
	modeldir_path = log.get_modeldir_path()
	visdir_path = log.get_visdir_path()
	# save experiment config
	exp_config = {}
	exp_config['environment'] = env_conf
	exp_config['others'] = vars(args)
	f = open('{0}/config.json'.format(log.get_logdir_path()), 'w')
	json.dump(exp_config, f, indent=4)
	f.close()

	trials_goals = []
	# train model (evolve controllers)
	# rmsprop optimise feature extractor
	for generation in np.arange(args.generations):
		start_time = datetime.datetime.now()
		log.info('generation {0}'.format(generation))
		# determine swap point(s) and goal(s) for current generation
		if dynamic_goal:
			swap_points = []
			for r in swap_range:
				swap_points.append(np.random.randint(low=r[0], high=r[1], size=args.trials))
			swap_points = np.array(swap_points)
			swap_points = swap_points.T # transpose from swap range x trials to the reverse

			trials_goals = []
			for i in np.arange(args.trials):
				goal = None
				trial_goals = []
				for j in np.arange(num_trial_swaps+1):
					goal = sample_goal(goals, prev_goal=goal)
					trial_goals.append(goal)
				trials_goals.append(tuple(trial_goals))
				log.info('trial {0} goals: {1}'.format(i+1, tuple(trial_goals)))
			log.info('swap points: {0}'.format(swap_points))
		else:
			swap_points = None
			trials_goals = []
			for i in np.arange(args.trials):
				goal = sample_goal(goals)
				log.info('trial {0} goal: {1}.'.format(i, goal))
				trials_goals.append((goal,))
		# evaluate fitness - each agent fitness is its average reward across trials
		agents = evo.get_all_individuals()
		if args.num_workers > 1:
			# create a clone of feature extractor and pass to method below.
			# this is a trick to solve the issue of pytorch raising an error about serialising
			# a non-leaf tensor that requires_grad. we need to pass the feature extractor to 
			# worker processes and this error occurs after the first generation.
			if fe_model is not None:
				fe_model_ = type(fe_model)()
				fe_model_.to(device)
				fe_model_.load_state_dict(fe_model.state_dict())
			else:
				fe_model_ = None
			pool.evaluate_agents(agents, args.trials, args.episodes, trials_goals, swap_points,\
				fe_model_, device, obs_buffer)
			# free up memory
			fe_model_ = None
			gc.collect()
		else:
			evaluate_agents(agents, env, args.trials, args.episodes, trials_goals, swap_points,\
				fe_model, device, obs_buffer, xml_goalelem=None)
		# log summary, model and generate network visualisation
		if args.save_interval > 0 and generation % args.save_interval == 0:
			top_agents = evo.get_n_fittest_individuals(n = args.top_n)
			top_agents_reward = [agent.get_reward() for agent in top_agents]
			best_fitness = top_agents_reward[0]
			log.info('top {0} agents: {1}'.format(args.top_n, np.array(top_agents_reward)))
			# write generation summary to logs (and screen)
			log.summary(generation, best_fitness, evo.get_worst_fitness(), evo.get_fitness_mean(),\
				evo.get_fitness_std())
			# save model of the best agent and visualisation its phenotype/network.
			save_agent(top_agents[0], modeldir_path + 'gen-{0}-best.npy'.format(generation))
			if fe_model is not None:
				# save feature extractor
				state_dict_ = fe_model.state_dict()
				torch.save(state_dict_, modeldir_path + 'gen-{0}-femodel.pt'.format(generation))
			if visualise:
				# save agent visualisation
				top_agents[0].draw_network(visdir_path +'gen-{0}-best'.format(generation), prune=True)
		if generation == args.generations - 1:
			end_time = datetime.datetime.now()
			log.info('time taken: {0}\n\n'.format(str(end_time - start_time)))
			break
		else:
			evo.selection()
			evo.produce_next_generation()
			if fe_model is not None:
				fe_trainer.train(epochs=20)
			end_time = datetime.datetime.now()
			log.info('time taken: {0}\n\n'.format(str(end_time - start_time)))

	if pool is not None:
		pool.close()
	log.info('---Training over.---')
	log.close()

	return
Exemplo n.º 4
0
def main(args):
    assert args.episodes >= 1, 'Number of episodes should not be less than 1. Exiting.'
    # set seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    # params
    dynamic_goal = not args.no_dynamic_goal
    m_ = 2 if args.episodes < 20 else 15
    if dynamic_goal:
        swap_range = int((args.episodes / 2.) - m_), int((args.episodes / 2.) +
                                                         m_ + 1)
    else:
        swap_range = None
    # create environment
    if args.exp_config is None: env, env_conf = make_env()
    else: env, env_conf = make_env(args.exp_config['environment'])
    goals = get_goals(env_conf['graph_shape']['depth'])
    if not env_conf['image_dataset']['1D']:
        # load feature extractor model
        layers = args.exp_config['others']['feature_extractor']['layers']
        if args.exp_config['others']['feature_extractor'][
                'type'] == 'fc_autoencoder':
            fe_model = AutoEncoderFeatureExtractor(layers)
        else:
            raise NotImplementedError
        fe_model.load_state_dict(torch.load(args.feat_ext_path))
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        fe_model.to(device)
        fe_model.eval()
    else:
        raise ValueError(
            'CTgraph should be configured to produce image observations')
    # load agent
    agent = load_agent(args.agent_path, Agent)
    # setup log directory
    unique_name = 'ctgraph'
    dir_path = 'log/test-s{0}-d{1}'.format(args.seed,
                                           env_conf['graph_shape']['depth'],
                                           unique_name)
    log = Log(dir_path)
    # sample env goal(s)
    if dynamic_goal:
        swap_points = np.random.randint(low=swap_range[0],
                                        high=swap_range[1],
                                        size=args.trials)
        trials_goals = []
        for i in np.arange(args.trials):
            goal = sample_goal(goals)
            next_goal = sample_goal(goals, prev_goal=goal)
            log.info('trial {0} goal: {1}'.format(i, goal))
            log.info('goal will changed to {0} at episode {1}'.format(
                next_goal, swap_points[i]))
            trials_goals.append((goal, next_goal))
    else:
        swap_points = None
        trials_goals = []
        for i in np.arange(args.trials):
            goal = sample_goal(goals)
            log.info('trial {0} goal: {1}.'.format(i, goal))
            trials_goals.append((goal, ))
    # evaluate agent
    agent.reset()
    agent.enable_neurons_output_logging()
    rewards = evaluate_agent(agent, env, args.trials, args.episodes, trials_goals, swap_points,\
     fe_model, device, obs_buffer=None)
    for i, trial_reward in enumerate(rewards):
        log.info('\ntrial {0} reward: {1}'.format(i + 1, trial_reward))
    log.info('\naverage reward: {0:0.4f}'.format(sum(rewards) / args.trials))
    filepath_ = log.get_logdir_path() + 'agent-neurons-output-log.csv'
    np.savetxt(filepath_, agent.get_neurons_output_log(), delimiter=',')
    log.close()
    return