Exemplo n.º 1
0
	# INITIALIZE THE MAIN AGENT CLASS
	ai = MERL(args)
	print('Running ', args.config.env_choice, 'with config ', args.config.config, ' State_dim:', args.state_dim,
	      'Action_dim', args.action_dim)
	time_start = time.time()

	###### TRAINING LOOP ########
	for gen in range(1, 10000000000):  # RUN VIRTUALLY FOREVER

		# ONE EPOCH OF TRAINING
		popn_fits, pg_fits, test_fits = ai.train(gen, test_tracker)

		# PRINT PROGRESS
		print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:', mod.list_stat(popn_fits), 'PG_stat:',
		      mod.list_stat(pg_fits),
		      'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:',
		      pprint(ai.total_frames / (time.time() - time_start)), 'Evo', args.scheme, 'PS:', args.ps
		      )

		if gen % 5 == 0:
			print()
			print('Test_stat:', mod.list_stat(test_fits), 'SAVETAG:  ', args.savetag)
			print('Weight Stats: min/max/average', pprint(ai.test_bucket[0].get_norm_stats()))
			print('Buffer Lens:', [ag.buffer[0].__len__() for ag in ai.agents] if args.ps == 'trunk' else [ag.buffer.__len__() for ag in ai.agents])
			print()

		if gen % 10 == 0 and args.rollout_size > 0:
			print()
			print('Q', pprint(ai.agents[0].algo.q))
			print('Q_loss', pprint(ai.agents[0].algo.q_loss))
			print('Policy', pprint(ai.agents[0].algo.policy_loss))
Exemplo n.º 2
0
            'Gen',
            gen,
            'Frames',
            agent.total_frames,
            ' Pop_max/max_ever:',
            '%.2f' % best_score,
            '/',
            '%.2f' % agent.best_score,
            ' Avg:',
            '%.2f' % frame_tracker.all_tracker[0][1],
            ' Frames/sec:',
            '%.2f' % (agent.total_frames / (time.time() - time_start)),
            ' Champ_len',
            '%.2f' % test_len,
            ' Test_score u/std',
            utils.pprint(test_mean),
            utils.pprint(test_std),
            'savetag',
            SAVETAG,
        )

        # # PRINT MORE DETAILED STATS PERIODICALLY
        if gen % 5 == 0:
            print('Learner Fitness',
                  [utils.pprint(learner.value) for learner in agent.portfolio],
                  'Sum_stats_resource_allocation',
                  [learner.visit_count for learner in agent.portfolio])
            print('Pop/rollout size', args.pop_size, '/', args.rollout_size,
                  'gradperstep', args.gradperstep, 'Seed', SEED,
                  'Portfolio_id', PORTFOLIO_ID)
            try:
Exemplo n.º 3
0
	# INITIALIZE THE MAIN AGENT CLASS
	ai = MERL(args)
	print('Running ', args.config.env_choice, 'with config ', args.config.config, ' State_dim:', args.state_dim,
	      'Action_dim', args.action_dim)
	time_start = time.time()

	###### TRAINING LOOP ########
	for gen in range(1, 10000000000):  # RUN VIRTUALLY FOREVER

		# ONE EPOCH OF TRAINING
		popn_fits, pg_fits, test_fits = ai.train(gen, test_tracker)

		# PRINT PROGRESS
		print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:', mod.list_stat(popn_fits), 'PG_stat:',
		      mod.list_stat(pg_fits),
		      'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:',
		      pprint(ai.total_frames / (time.time() - time_start)))

		if gen % 5 == 0:
			print()
			print('Test_stat:', mod.list_stat(test_fits), 'SAVETAG:  ', args.savetag)
			print('Weight Stats: min/max/average', pprint(ai.test_bucket[0].get_norm_stats()))
			print('Buffer Lens:', [ag.buffer[0].__len__() for ag in ai.agents])
			print()

		if gen % 10 == 0 and args.rollout_size > 0:
			print()
			print('Q', pprint(ai.agents[0].algo.q))
			print('Q_loss', pprint(ai.agents[0].algo.q_loss))
			print('Policy', pprint(ai.agents[0].algo.policy_loss))
			print('########################################################################')
Exemplo n.º 4
0
    print('Running', parameters.algo, ' State_dim:', parameters.state_dim,
          ' Action_dim:', parameters.action_dim, 'for',
          'Round 1' if DIFFICULTY == 0 else 'Round 2')
    time_start = time.time()
    num_frames = 0.0

    ###### TRAINING LOOP ########
    for epoch in range(1, 1000000000):  #RUN VIRTUALLY FOREVER
        gen_time = time.time()

        #ONE EPOCH OF TRAINING
        agent.train(epoch)

        #PRINT PROGRESS
        print('Ep:', epoch, 'Score cur/best:',
              [pprint(score) for score in agent.test_score],
              pprint(agent.best_score),
              'Time:', pprint(time.time() - gen_time), 'Len',
              pprint(agent.test_len), 'Best_action_noise_score',
              pprint(agent.best_action_noise_score), 'Best_Agent_scores',
              [pprint(score) for score in agent.best_agent_scores])

        #PRINT MORE DETAILED STATS PERIODICALLY
        if epoch % 5 == 0:  #Special Stats
            print()
            print('#Data_Created', agent.buffer_added, 'Q_Val Stats',
                  pprint(list_mean(agent.rl_agent.q['min'])),
                  pprint(list_mean(agent.rl_agent.q['max'])),
                  pprint(list_mean(agent.rl_agent.q['mean'])), 'Val Stats',
                  pprint(list_mean(agent.rl_agent.val['min'])),
                  pprint(list_mean(agent.rl_agent.val['max'])),
                self.buffers[rover_id].push(exp[0], exp[1], exp[2], exp[3],
                                            exp[4])
                self.update_budget += 1


if __name__ == "__main__":
    args = Parameters()  # Create the Parameters class

    gen_tracker = utils.Tracker(args.metric_save, [args.log_fname],
                                '.csv')  # Initiate tracker
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)  #Seeds

    # INITIALIZE THE MAIN AGENT CLASS
    ai = IDPP(args)
    print(' State_dim:', args.state_dim)
    time_start = time.time()

    ###### TRAINING LOOP ########
    for gen in range(1, 1000000000):  #RUN VIRTUALLY FOREVER
        gen_time = time.time()

        #ONE EPOCH OF TRAINING
        ai.train(gen, gen_tracker)

        #PRINT PROGRESS
        print('Ep:', gen, 'Score: cur/best:', pprint(ai.test_score),
              pprint(ai.best_score), 'Time:', pprint(time.time() - gen_time),
              'Best_rollout_score', pprint(ai.best_rollout_score), 'DPP', DPP)
Exemplo n.º 6
0
	# INITIALIZE THE MAIN AGENT CLASS
	ai = MERL(args)
	print('Running ', args.config.env_choice, 'with config ', args.config.config, ' Predator State_dim:', args.pred_state_dim, 'Prey_state_dim', args.prey_state_dim,
	      'Action_dim', args.action_dim)
	time_start = time.time()

	###### TRAINING LOOP ########
	for gen in range(1, 10000000000):  # RUN VIRTUALLY FOREVER

		# ONE EPOCH OF TRAINING
		popn_fits, pg_fits, test_fits, prey_score = ai.train(gen, test_tracker, prey_tracker)

		# PRINT PROGRESS
		print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:', mod.list_stat(popn_fits), 'PG_stat:',
		      mod.list_stat(pg_fits),
		      'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:',
		      pprint(ai.total_frames / (time.time() - time_start)), 'Evo', args.scheme, 'Prey Score:', prey_score)

		#Update elites tracker
		if gen >2 and args.popn_size > 0:
			#elites_tracker.update([ai.agents[0].evolver.rl_res['elites']], gen)
			selects_tracker.update([ai.agents.evolver.rl_res['selects']], gen)

		if ai.total_frames > args.frames_bound:
			break

	###Kill all processes
	try: ai.pg_task_pipes[0].send('TERMINATE')
	except: None
	try: ai.test_task_pipes[0].send('TERMINATE')
	except: None
Exemplo n.º 7
0
	torch.manual_seed(args.seed); np.random.seed(args.seed); random.seed(args.seed)

	#INITIALIZE THE MAIN AGENT CLASS
	agent = CERL_Agent(args) #Initialize the agent
	print('Running CERL for', ENV_NAME, 'State_dim:', args.state_dim, ' Action_dim:', args.action_dim)

	time_start = time.time()
	for gen in range(1, 1000000000): #Infinite generations

		#Train one iteration
		best_score, test_len, all_fitness, all_eplen, test_mean, test_std, champ_wwid = agent.train(gen, frame_tracker)

		#PRINT PROGRESS
		print('Env', ENV_NAME, 'Gen', gen, 'Frames', agent.total_frames, ' Pop_max/max_ever:','%.2f'%best_score, '/','%.2f'%agent.best_score, ' Avg:','%.2f'%frame_tracker.all_tracker[0][1],
		      ' Frames/sec:','%.2f'%(agent.total_frames/(time.time()-time_start)),
			  ' Champ_len', '%.2f'%test_len, ' Test_score u/std', utils.pprint(test_mean), utils.pprint(test_std), 'savetag', SAVETAG, )

		# # PRINT MORE DETAILED STATS PERIODICALLY
		if gen % 5 == 0:
			print('Learner Fitness', [utils.pprint(learner.value) for learner in agent.portfolio], 'Sum_stats_resource_allocation', [learner.visit_count for learner in agent.portfolio])
			print('Pop/rollout size', args.pop_size,'/',args.rollout_size, 'gradperstep', args.gradperstep, 'Seed', SEED, 'Portfolio_id', PORTFOLIO_ID)
			try:
				print('Best Policy ever genealogy:', agent.genealogy.tree[int(agent.best_policy.wwid.item())].history)
				print('Champ genealogy:', agent.genealogy.tree[champ_wwid].history)
			except: None
			print()

		max_tracker.update([best_score], agent.total_frames)
		if agent.total_frames > TOTAL_STEPS:
			break