示例#1
0
def make_optimizer(ob_size, action_size, args):
    ''' dm_control optimizer requires different treatment
    '''
    optimizer_tasks = multiprocessing.JoinableQueue()
    optimizer_results = multiprocessing.Queue()
    optimizer_agent = optimization_agent.optimization_agent(
        args,
        ob_size, action_size,
        optimizer_tasks, optimizer_results
    )
    optimizer_agent.start()

    optimizer_tasks.put(parallel_util.START_SIGNAL)
    optimizer_tasks.join()
    starting_weights = optimizer_results.get()
    return optimizer_tasks, optimizer_results, optimizer_agent, starting_weights
示例#2
0
        viz_item = ['avg_reward', 'entropy', 'kl', 'surr_loss',
                    'vf_loss', 'weight_l2_loss', 'learning_rate']
        viz_win = {}
        for item in viz_item:
            viz_win[item] = None

    if not args.dm:
        args.max_pathlength = gym.spec(args.task).timestep_limit
        learner_env = gym.make(args.task)

        optimizer_tasks = multiprocessing.JoinableQueue()
        optimizer_results = multiprocessing.Queue()
        optimizer_agent = optimization_agent.optimization_agent(
            args,
            learner_env.observation_space.shape[0],
            learner_env.action_space.shape[0],
            optimizer_tasks,
            optimizer_results
        )
        optimizer_agent.start()

        # the rollouts agents
        rollout_agent = rollout_master_agent.parallel_rollout_master_agent(
            args,
            learner_env.observation_space.shape[0],
            learner_env.action_space.shape[0]
        )

        # start the training and rollouting process
        optimizer_tasks.put(parallel_util.START_SIGNAL)
        optimizer_tasks.join()