args.num_traj_buffers, args.num_steps, ) next_obs = share_memory(np.zeros(dimensions[:-1]+env.observation_space.shape, dtype=np.float32)) next_done = share_memory(np.zeros(dimensions[:-1], dtype=np.float32)) obs = share_memory(np.zeros(dimensions+env.observation_space.shape, dtype=np.float32)) actions = share_memory(np.zeros(dimensions+env.action_space.shape, dtype=env.action_space.dtype)) logprobs = share_memory(np.zeros(dimensions, dtype=np.float32)) rewards = share_memory(np.zeros(dimensions, dtype=np.float32)) dones = share_memory(np.zeros(dimensions, dtype=np.float32)) values = share_memory(np.zeros(dimensions, dtype=np.float32)) traj_availables = share_memory(np.ones(dimensions, dtype=np.float32)) actor_processes = [] policy_workers = [] stats_queue = MpQueue() rollout_task_queues = [MpQueue() for i in range(args.num_rollout_workers)] policy_request_queues = [MpQueue() for i in range(args.num_policy_workers)] learner_request_queue = MpQueue() data_process_back_queues = [] for i in range(args.num_rollout_workers): actor = mp.Process( target=act, args=[[args, experiment_name, i, lock, stats_queue, 0, next_obs, next_done, obs, actions, logprobs, rewards, dones, values, traj_availables, rollout_task_queues[i], policy_request_queues, learner_request_queue]], ) actor.start()
num_steps = 32 lock = mp.Lock() dimensions = ( mp.cpu_count(), num_envs, 1, 1, num_steps, ) # obs = share_memory_numpy(np.zeros(dimensions + (84,84,3))) obs = share_memory_torch_numpy_mixed(np.zeros(dimensions + (84, 84, 3)), 5) # raise actor_processes = [] ctx = mp.get_context("forkserver") stats_queue = MpQueue() for i in range(num_cpus): actor = mp.Process( target=act, args=[obs, num_envs, num_steps], ) actor.start() actor_processes.append(actor) import timeit timer = timeit.default_timer existing_video_files = [] global_step = 0 global_step_increment = 0 start_time = time.time() update_step = 0 try: