# INITIALIZE THE MAIN AGENT CLASS ai = MERL(args) print('Running ', args.config.env_choice, 'with config ', args.config.config, ' State_dim:', args.state_dim, 'Action_dim', args.action_dim) time_start = time.time() ###### TRAINING LOOP ######## for gen in range(1, 10000000000): # RUN VIRTUALLY FOREVER # ONE EPOCH OF TRAINING popn_fits, pg_fits, test_fits = ai.train(gen, test_tracker) # PRINT PROGRESS print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:', mod.list_stat(popn_fits), 'PG_stat:', mod.list_stat(pg_fits), 'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:', pprint(ai.total_frames / (time.time() - time_start)), 'Evo', args.scheme, 'PS:', args.ps) if gen % 5 == 0: print() print('Test_stat:', mod.list_stat(test_fits), 'SAVETAG: ', args.savetag) print('Weight Stats: min/max/average', pprint(ai.test_bucket[0].get_norm_stats())) print('Buffer Lens:', [ag.buffer[0].__len__() for ag in ai.agents] if args.ps == 'trunk' else [ag.buffer.__len__() for ag in ai.agents]) print()
if args.config.env_choice == 'hyper': from envs.hyper.PowerPlant_env import Fast_Simulator # Main Module needs access to this class for some reason # INITIALIZE THE MAIN AGENT CLASS ai = MERL(args) print('Running ', args.config.env_choice, 'with config ', args.config.config, ' State_dim:', args.state_dim, 'Action_dim', args.action_dim) time_start = time.time() ###### TRAINING LOOP ######## for gen in range(1, 10000000000): # RUN VIRTUALLY FOREVER # ONE EPOCH OF TRAINING popn_fits, pg_fits, test_fits = ai.train(gen, test_tracker) # PRINT PROGRESS print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:', mod.list_stat(popn_fits), 'PG_stat:', mod.list_stat(pg_fits), 'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:', pprint(ai.total_frames / (time.time() - time_start)), 'Evo', args.scheme, 'PS:', args.ps ) if gen % 5 == 0: print() print('Test_stat:', mod.list_stat(test_fits), 'SAVETAG: ', args.savetag) print('Weight Stats: min/max/average', pprint(ai.test_bucket[0].get_norm_stats())) print('Buffer Lens:', [ag.buffer[0].__len__() for ag in ai.agents] if args.ps == 'trunk' else [ag.buffer.__len__() for ag in ai.agents]) print() if gen % 10 == 0 and args.rollout_size > 0: print() print('Q', pprint(ai.agents[0].algo.q))
if args.config.env_choice == 'hyper': from envs.hyper.PowerPlant_env import Fast_Simulator # Main Module needs access to this class for some reason # INITIALIZE THE MAIN AGENT CLASS ai = MERL(args) print('Running ', args.config.env_choice, 'with config ', args.config.config, ' Predator State_dim:', args.pred_state_dim, 'Prey_state_dim', args.prey_state_dim, 'Action_dim', args.action_dim) time_start = time.time() ###### TRAINING LOOP ######## for gen in range(1, 10000000000): # RUN VIRTUALLY FOREVER # ONE EPOCH OF TRAINING popn_fits, pg_fits, test_fits, prey_score = ai.train(gen, test_tracker, prey_tracker) # PRINT PROGRESS print('Ep:/Frames', gen, '/', ai.total_frames, 'Popn stat:', mod.list_stat(popn_fits), 'PG_stat:', mod.list_stat(pg_fits), 'Test_trace:', [pprint(i) for i in ai.test_trace[-5:]], 'FPS:', pprint(ai.total_frames / (time.time() - time_start)), 'Evo', args.scheme, 'Prey Score:', prey_score) #Update elites tracker if gen >2 and args.popn_size > 0: #elites_tracker.update([ai.agents[0].evolver.rl_res['elites']], gen) selects_tracker.update([ai.agents.evolver.rl_res['selects']], gen) if ai.total_frames > args.frames_bound: break ###Kill all processes try: ai.pg_task_pipes[0].send('TERMINATE') except: None