def make_optimizer(ob_size, action_size, args): ''' dm_control optimizer requires different treatment ''' optimizer_tasks = multiprocessing.JoinableQueue() optimizer_results = multiprocessing.Queue() optimizer_agent = optimization_agent.optimization_agent( args, ob_size, action_size, optimizer_tasks, optimizer_results ) optimizer_agent.start() optimizer_tasks.put(parallel_util.START_SIGNAL) optimizer_tasks.join() starting_weights = optimizer_results.get() return optimizer_tasks, optimizer_results, optimizer_agent, starting_weights
viz_item = ['avg_reward', 'entropy', 'kl', 'surr_loss', 'vf_loss', 'weight_l2_loss', 'learning_rate'] viz_win = {} for item in viz_item: viz_win[item] = None if not args.dm: args.max_pathlength = gym.spec(args.task).timestep_limit learner_env = gym.make(args.task) optimizer_tasks = multiprocessing.JoinableQueue() optimizer_results = multiprocessing.Queue() optimizer_agent = optimization_agent.optimization_agent( args, learner_env.observation_space.shape[0], learner_env.action_space.shape[0], optimizer_tasks, optimizer_results ) optimizer_agent.start() # the rollouts agents rollout_agent = rollout_master_agent.parallel_rollout_master_agent( args, learner_env.observation_space.shape[0], learner_env.action_space.shape[0] ) # start the training and rollouting process optimizer_tasks.put(parallel_util.START_SIGNAL) optimizer_tasks.join()