示例#1
0
    def __init__(self, opt, world):
        super().__init__(opt)
        self.inner_world = world
        self.numthreads = opt['numthreads']

        self.sync = {  # syncronization primitives
            # semaphores for counting queued examples
            'queued_sem': Semaphore(0),  # counts num exs to be processed
            'threads_sem': Semaphore(0),  # counts threads
            'reset_sem': Semaphore(0),  # allows threads to reset

            # flags for communicating with threads
            'reset_flag': Value('b', False),  # threads should reset
            'term_flag': Value('b', False),  # threads should terminate

            # counters
            'epoch_done_ctr': Value('i', 0),  # number of done threads
            'total_parleys': Value('l', 0),  # number of parleys in threads
        }

        self.threads = []
        for i in range(self.numthreads):
            self.threads.append(
                HogwildProcess(i, opt, world.share(), self.sync))
            time.sleep(
                0.05)  # delay can help prevent deadlock in thread launches
        for t in self.threads:
            t.start()

        for _ in self.threads:
            # wait for threads to launch
            # this makes sure that no threads get examples before all are set up
            # otherwise they might reset one another after processing some exs
            self.sync['threads_sem'].acquire()
    def start(self) -> None:
        shards = glob.glob(self.file_path)
        # Ensure a consistent order before shuffling for testing.
        shards.sort()
        num_shards = len(shards)

        # If we want multiple epochs per read, put shards in the queue multiple times.
        self.input_queue = Queue(num_shards * self.epochs_per_read +
                                 self.num_workers)
        for _ in range(self.epochs_per_read):
            np.random.shuffle(shards)
            for shard in shards:
                self.input_queue.put(shard)

        # Then put a None per worker to signify no more files.
        for _ in range(self.num_workers):
            self.input_queue.put(None)

        assert not self.processes, "Process list non-empty! You must call QIterable.join() before restarting."
        self.num_active_workers = Value('i', self.num_workers)
        self.num_inflight_items = Value('i', 0)
        for worker_id in range(self.num_workers):
            process = Process(target=_worker,
                              args=(self.reader, self.input_queue,
                                    self.output_queue, self.num_active_workers,
                                    self.num_inflight_items, worker_id))
            logger.info(f"starting worker {worker_id}")
            process.start()
            self.processes.append(process)
示例#3
0
def load_saved_model(model: Module,
                     path: str,
                     T: Value,
                     global_reward: Value,
                     model_critic: Module = None) -> None:
    """
    load saved model from file
    :param model: model to load params for
    :param path: path to load parameters from
    :param T: global steps counter, to continue training
    :param model_critic: possible separate critic model to load if non shared network is used
    :return: None
    """
    if os.path.isfile(path):
        print(f"=> loading model checkpoint '{path}'")
        checkpoint = torch.load(path)
        model.load_state_dict(checkpoint['model'])
        T.value = checkpoint['epoch']
        global_reward.value = checkpoint['global_reward']
        if model_critic:
            model_critic.load_state_dict(checkpoint['model_critic'])
        print(f"=> loaded model checkpoint '{path}' (T: {checkpoint['epoch']} "
              f"-- global reward: {checkpoint['global_reward']})")
    else:
        print(f"=> no model checkpoint found at '{path}'")
示例#4
0
文件: worlds.py 项目: yucoian/ParlAI
    def __init__(self, opt, world):
        super().__init__(opt)
        self.inner_world = world
        self.numthreads = opt['numthreads']

        self.sync = {  # syncronization primitives
            # semaphores for counting queued examples
            'queued_sem': Semaphore(0),  # counts num exs to be processed
            'threads_sem': Semaphore(0),  # counts threads
            'reset_sem': Semaphore(0),  # allows threads to reset

            # flags for communicating with threads
            'reset_flag': Value('b', False),  # threads should reset
            'term_flag': Value('b', False),  # threads should terminate

            # counters
            'epoch_done_ctr': Value('i', 0),  # number of done threads
            'total_parleys': Value('l', 0),  # number of parleys in threads
        }

        # don't let threads create more threads!
        self.threads = []
        for i in range(self.numthreads):
            self.threads.append(HogwildProcess(i, opt, world, self.sync))
        for t in self.threads:
            t.start()

        for _ in self.threads:
            self.sync['threads_sem'].acquire()
示例#5
0
    def _compare_parallel(self, network, opponent_network, device, num_workers):
        q, r = divmod(self.conf.GAMES_PER_COMPARISON, num_workers)
        num_active_workers = Value('i', num_workers)
        evaluator_mgr = BulkEvaluatorManager(
            [network, opponent_network], device, num_workers)
        score = Value('i', 0)

        workers = []
        s = 0
        for worker_id in range(num_workers):
            num_games = q + 1 if worker_id < r else q
            evaluator = evaluator_mgr.get_evaluator(worker_id, 0)
            opponent_evaluator = evaluator_mgr.get_evaluator(worker_id, 1)
            color = BLACK if s % 2 == 0 else WHITE
            s += num_games
            worker = Process(
                target=self._worker_job,
                args=(num_games, num_active_workers,
                      evaluator, opponent_evaluator, color, score),
            )
            workers.append(worker)
            worker.start()

        # start evaluator server
        server = evaluator_mgr.get_server(num_active_workers)
        server.start()

        for worker in workers:
            worker.join()
        server.join()

        return score.value / self.conf.GAMES_PER_COMPARISON
示例#6
0
    def __init__(self, config: ParamDict, environment: Environment,
                 policy: Policy, filter_op: Filter):
        threads, gpu = config.require("threads", "gpu")
        super(Agent_sync, self).__init__(config, environment, policy,
                                         filter_op)

        # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update
        self._sync_signal = Value('i', 0)

        # sampler sub-process list
        self._sampler_proc = []

        # used for synchronize commands
        self._cmd_pipe = None
        self._param_pipe = None
        self._cmd_lock = Lock()

        cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True)
        param_pipe_child, param_pipe_parent = Pipe(duplex=False)
        self._cmd_pipe = cmd_pipe_parent
        self._param_pipe = param_pipe_parent
        for i_thread in range(threads):
            child_name = f"sampler_{i_thread}"
            worker_cfg = ParamDict({
                "seed": self.seed + 1024 + i_thread,
                "gpu": gpu
            })
            child = Process(target=Agent_sync._sampler_worker,
                            name=child_name,
                            args=(worker_cfg, cmd_pipe_child, param_pipe_child,
                                  self._cmd_lock, self._sync_signal,
                                  deepcopy(policy), deepcopy(environment),
                                  deepcopy(filter_op)))
            self._sampler_proc.append(child)
            child.start()
    def __init__(self, name, env_kwargs, model_kwargs, **kwargs):
        super().__init__(env_kwargs=env_kwargs, model_kwargs=model_kwargs)
        self.name = name
        self.num_processes = 16

        self._report_queue = Queue(maxsize=16)
        self._shared_global_t = Value('i', 0)
        self._shared_is_stopped = Value('i', False)
示例#8
0
    def _generate_parallel(self, iteration, network, device, num_workers):
        q, r = divmod(self.remaining_games, num_workers)
        num_active_workers = Value('i', num_workers)
        resign_threshold = Value('d', self.resign_mgr.threshold())
        evaluator_mgr = BulkEvaluatorManager([network], device, num_workers)
        output_queue = SimpleQueue()

        # start the workers
        workers = []
        for worker_id in range(num_workers):
            num_games = q + 1 if worker_id < r else q
            evaluator = evaluator_mgr.get_evaluator(worker_id, 0)
            worker = Process(
                target=self._worker_job,
                args=(worker_id, num_games, num_active_workers,
                      resign_threshold, evaluator, output_queue),
            )
            workers.append(worker)
            worker.start()

        # start evaluator server
        server = evaluator_mgr.get_server(num_active_workers)
        server.start()

        # collect the examples generated by workers
        while num_active_workers.value > 0 or not output_queue.empty():
            examples, resign_value_history, result = output_queue.get()
            self.example_pool += examples
            self.game_length.append(len(examples))

            # add the history into resignation manager to update the threshold
            if resign_value_history is not None:
                self.resign_mgr.add(resign_value_history, result)
                resign_threshold.value = self.resign_mgr.threshold()

            self.remaining_games -= 1

            # periodically save the progress
            if (self.conf.GAMES_PER_ITERATION - self.remaining_games) \
                    % self.conf.EXAMPLE_POOL_SAVE_FREQUENCY == 0:
                self.save(iteration)
                log.info(
                    f'[iter={iteration}] ExamplePool: checkpoint saved, '
                    f'{self.remaining_games} games remaining'
                )

        for worker in workers:
            worker.join()
        server.join()
示例#9
0
    def __init__(self, world_class, opt, agents):
        super().__init__(opt)
        self.inner_world = world_class(opt, agents)

        self.queued_items = Semaphore(0)  # counts num exs to be processed
        self.epochDone = Condition()  # notifies when exs are finished
        self.terminate = Value('b', False)  # tells threads when to shut down
        self.cnt = Value('i', 0)  # number of exs that remain to be processed

        self.threads = []
        for i in range(opt['numthreads']):
            self.threads.append(
                HogwildProcess(i, world_class, opt, agents, self.queued_items,
                               self.epochDone, self.terminate, self.cnt))
        for t in self.threads:
            t.start()
示例#10
0
 def create(cls):
     if not hasattr(cls, 'length_to_eps'):
         # Maps episode length to list of episodes
         cls.length_to_eps = {}
     if not hasattr(cls, 'ep_indices'):
         # Set of episode indices already in the cache
         cls.ep_indices = set()
     if not hasattr(cls, 'batches'):
         # List of batches if popping batches
         cls.batches = []
     if not hasattr(cls, 'load_complete'):
         # If all episodes have been loaded into memory
         cls.load_complete = Value(ctypes.c_bool, False)
     if not hasattr(cls, 'batches_lock'):
         # Lock to access batches
         cls.batches_lock = Lock()
     if not hasattr(cls, 'cache_lock'):
         # Lock to access length_to_eps
         cls.cache_lock = Lock()
     if not hasattr(cls, 'fill_cache_lock'):
         # Lock for condition variables
         cls.fill_cache_lock = RLock()
     if not hasattr(cls, 'add_to_cache_cv'):
         # Condition notifying Loader to add to cache
         cls.add_to_cache_cv = Condition(lock=cls.fill_cache_lock)
     if not hasattr(cls, 'cache_filled_cv'):
         # Condition notifying teacher that cache has episodes
         cls.cache_filled_cv = Condition(lock=cls.fill_cache_lock)
示例#11
0
    def __init__(self,
                 chk_dir,
                 chk,
                 keep_epoch_chk=True,
                 overwrite=True,
                 mode=CFMode.AUTO,
                 chk_prefix='model_v_'):

        self.logger = logging.getLogger(__name__)
        self.chk_dir = chk_dir
        self.chk = chk
        self.keep_epoch_chk = keep_epoch_chk
        self.overwrite = overwrite
        self.chk_prefix = chk_prefix
        self.mode = mode
        self.chk_epoch_subdir = 'epoch'
        self.mp_manager = Manager()
        self.snapshot_copy = None

        self.cpu_side = False
        # Active snapshot, if true, don't snapshot again
        self.active_snapshot = Value('i', 0)
        self.lock = Lock()
        self.in_progress_snapshot = Value('i', 0)

        # Handle to the process performing checkpoint
        # Can be only one at any instant. A new checkpoint
        # cannot start unless the previous one completes
        self.chk_process = None

        # `overwrite` supersedes if False
        if self.overwrite is False and self.keep_epoch_chk is False:
            self.keep_epoch_chk = True

        # Global ID of checkpoints being written
        # Used to format the checkpoint path
        # Instantiate from chk when restoring
        self.chk_global_id = -1

        # Sorted List of available checkpoints (fnames)
        self.available_chk_iters = self.mp_manager.list()
        self.available_chk_epochs = self.mp_manager.list()
        self.initalize_chk_dir()

        self.logger.info("Available checkpoints : ")
        for item in self.available_chk_iters:
            self.logger.info(item)
示例#12
0
    def __init__(self, config: ParamDict, environment: Environment,
                 policy: Policy, filter_op: Filter):
        threads, gpu = config.require("threads", "gpu")
        threads_gpu = config["gpu threads"] if "gpu threads" in config else 2
        super(Agent_async, self).__init__(config, environment, policy,
                                          filter_op)

        # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update
        self._sync_signal = Value('i', 0)

        # environment sub-process list
        self._environment_proc = []
        # policy sub-process list
        self._policy_proc = []

        # used for synchronize policy parameters
        self._param_pipe = None
        self._policy_lock = Lock()
        # used for synchronize roll-out commands
        self._control_pipe = None
        self._environment_lock = Lock()

        step_pipe = []
        cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True)
        param_pipe_child, param_pipe_parent = Pipe(duplex=False)
        self._control_pipe = cmd_pipe_parent
        self._param_pipe = param_pipe_parent
        for i_envs in range(threads):
            child_name = f"environment_{i_envs}"
            step_pipe_pi, step_pipe_env = Pipe(duplex=True)
            step_lock = Lock()
            worker_cfg = ParamDict({
                "seed": self.seed + 1024 + i_envs,
                "gpu": gpu
            })
            child = Process(target=Agent_async._environment_worker,
                            name=child_name,
                            args=(worker_cfg, cmd_pipe_child, step_pipe_env,
                                  self._environment_lock, step_lock,
                                  self._sync_signal, deepcopy(environment),
                                  deepcopy(filter_op)))
            self._environment_proc.append(child)
            step_pipe.append((step_pipe_pi, step_lock))
            child.start()

        for i_policies in range(threads_gpu):
            child_name = f"policy_{i_policies}"
            worker_cfg = ParamDict({
                "seed": self.seed + 2048 + i_policies,
                "gpu": gpu
            })
            child = Process(target=Agent_async._policy_worker,
                            name=child_name,
                            args=(worker_cfg, param_pipe_child, step_pipe,
                                  self._policy_lock, self._sync_signal,
                                  deepcopy(policy)))
            self._policy_proc.append(child)
            child.start()
        sleep(5)
示例#13
0
 def __init__(self, inputs, mode):
     self.mode = mode
     self.inputs = inputs
     self.queue = Queue()
     self.control = Value('i', 1)
     if self.mode == 0:
         self.process = Process(target=self.worker,
                                args=(self.inputs, self.queue,
                                      self.control))
         self.process.start()
示例#14
0
    def __init__(self, *args):
        """
        Statistics process saves the statistics obtained from workers.
        In particular, the shared models are saved every Config.MODEL_SAVE_FREQUENCY episodes.
        Moreover, some statistics are logged every Config.LOG_STATS_FREQUENCY episodes.
        """
        super(StatProcess, self).__init__()
        self.episode_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE)
        self.ae_loss_log_q = Queue(maxsize=Config.MAX_STATS_QUEUE_SIZE)
        self.episode_count = Value('i', 0)
        self.model_save = Value('i', 0)
        self.exit_flag = Value('i', 0)

        #:obj:`dict`: Dictionary of DPS models for RL.
        self.agents = {}
        for model, env_id in zip(args, Config.ENV_IDS):
            self.agents[env_id] = model
        #float: Time at start for logging.
        self._start_time = time.time()
示例#15
0
    def __init__(self, args) -> None:
        """
        Constructor
        :param args: Cmd-line arguments
        """
        self.args = args

        # global counter
        self.T = Value('i', 0)
        self.global_reward = Value('d', -np.inf)

        # worker handling
        self.worker_pool = []

        # validity check for input parameter
        if args.optimizer not in ['rmsprop', 'adam']:
            raise Exception(
                'Your given optimizer %s is currently not supported. Choose either "rmsprop" or "adam"',
                args.optimizer)
示例#16
0
class Counter(object):
    '''
    A counter used for multiprocessing, simple wrapper around multiprocessing.Value
    '''
    def __init__(self):
        from torch.multiprocessing import Value
        self.val = Value('i', 0)

    def increment(self, n=1):
        with self.val.get_lock():
            self.val.value += n

    def reset(self):
        with self.val.get_lock():
            self.val.value = 0

    @property
    def value(self):
        return self.val.value
def _worker(
    reader: DatasetReader,
    input_queue: Queue,
    output_queue: Queue,
    num_active_workers: Value,
    num_inflight_items: Value,
    worker_id: int,
) -> None:
    """
    A worker that pulls filenames off the input queue, uses the dataset reader
    to read them, and places the generated instances on the output queue.  When
    there are no filenames left on the input queue, it decrements
    num_active_workers to signal completion.
    """
    logger.info(f"Reader worker: {worker_id} PID: {os.getpid()}")
    # Keep going until you get a file_path that's None.
    while True:
        file_path = input_queue.get()
        if file_path is None:
            # It's important that we close and join the queue here before
            # decrementing num_active_workers. Otherwise our parent may join us
            # before the queue's feeder thread has passed all buffered items to
            # the underlying pipe resulting in a deadlock.
            #
            # See:
            # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#pipes-and-queues
            # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#programming-guidelines
            output_queue.close()
            output_queue.join_thread()
            # Decrementing is not atomic.
            # See https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Value.
            with num_active_workers.get_lock():
                num_active_workers.value -= 1
            logger.info(f"Reader worker {worker_id} finished")
            break

        logger.info(f"reading instances from {file_path}")
        for instance in reader.read(file_path):
            with num_inflight_items.get_lock():
                num_inflight_items.value += 1
            output_queue.put(instance)
    def init_data(self):
        self.is_working = False
        self.semaphore = True
        self.is_change_bar = Value(
            c_bool, False)  #whether user has dragged the slider,default: False

        self.frame_index = Value('i', 0)
        self.share_lock = Lock()  #shared lock for frame_index
        self.share_lock2 = Lock()  # shared lock for frame_index

        self.mutex = threading.Lock()

        self.timer = QTimer(self)  # used for the updating of progress bar
        self.temp_timer = QTimer(
            self)  #used for detecting whether the frame_total is given.
        self.frame_total = Value('i', -1)
        self.playable = Value(c_bool, True)
        self.is_working = Value(c_bool, False)
        manager = Manager()
        self.play_src = manager.Value(c_char_p, '0')  #用于记录播放的视频地址
        self.mode = None  # 'online' or 'offline'
示例#19
0
    def __init__(self,
                 scheduler: scheduler_type,
                 mode: str = 'nearest',
                 align_corners: bool = None,
                 preserve_range: bool = False,
                 keys: Sequence = ('data', ),
                 grad: bool = False,
                 **kwargs):
        """
        Args:
            scheduler: scheduler which determined the current size.
                The scheduler is called with the current iteration of the
                transform
            mode: one of ``nearest``, ``linear``, ``bilinear``, ``bicubic``,
                    ``trilinear``, ``area`` (for more inforamtion see
                    :func:`torch.nn.functional.interpolate`)
            align_corners: input and output tensors are aligned by the center
                points of their corners pixels, preserving the values at the
                corner pixels.
            preserve_range: output tensor has same range as input tensor
            keys: keys which should be augmented
            grad: enable gradient computation inside transformation
            **kwargs: keyword arguments passed to augment_fn

        Warnings:
            When this transformations is used in combination with
            multiprocessing, the step counter is not perfectly synchronized
            between multiple processes.
            As a result the step count my jump between values
            in a range of the number of processes used.
        """
        super().__init__(size=0,
                         mode=mode,
                         align_corners=align_corners,
                         preserve_range=preserve_range,
                         keys=keys,
                         grad=grad,
                         **kwargs)
        self.scheduler = scheduler
        self._step = Value('i', 0)
示例#20
0
    def __init__(self,
                 config,
                 share_batches=True,
                 manager=None,
                 new_process=True):
        if new_process == True and manager is None:
            manager = Manager()
        self.knows = Semaphore(0)  # > 0 if we know if any are coming
        # == 0 if DatasetReader is processing a command
        self.working = Semaphore(1 if new_process else 100)
        self.finished_reading = Lock(
        )  # locked if we're still reading from file
        # number of molecules that have been sent to the pipe:
        self.in_pipe = Value('i', 0)

        # Tracking what's already been sent through the pipe:
        self._example_number = Value('i', 0)

        # The final kill switch:
        self._close = Value('i', 0)

        self.command_queue = manager.Queue(10)
        self.molecule_pipeline = None
        self.batch_queue = Queue(config.data.batch_queue_cap
                                 )  #manager.Queue(config.data.batch_queue_cap)
        self.share_batches = share_batches

        self.dataset_reader = DatasetReader("dataset_reader",
                                            self,
                                            config,
                                            new_process=new_process)
        if new_process:
            self.dataset_reader.start()
示例#21
0
class Signal(object):
    '''
    a signal used for mutliprocessing, simple wrapper around multiprocessing.Value
    '''
    def __init__(self):
        from torch.multiprocessing import Value
        self.val = Value('i', False)

    def set_signal(self, boolean):
        with self.val.get_lock():
            self.val.value = boolean

    @property
    def value(self):
        return bool(self.val.value)
 def add_agents(self, nb):
     old_length = len(self.agents)
     for index in range(old_length, old_length + nb):
         self.agents.append(
             Agent(id_=index,
                   prediction_queue=self.prediction_queue,
                   training_queue=self.training_queue,
                   states=self.train_set,
                   exit_flag=Value(c_bool, False),
                   statistics_queue=self.statistics_queue,
                   episode_counter=self.nb_episodes,
                   observation_shape=(self.channels, self.height,
                                      self.width),
                   action_space=self.n_outputs,
                   device=self.agent_device,
                   step_max=self.sequence_length))
    def __init__(self, n_workers, actor, args):
        self._now_episode = Value('i', 0)

        self.queue = Queue()
        self.collect_event = Event()

        self.worker = []
        for i in range(n_workers):
            self.worker.append(
                Worker(self.queue, self.collect_event, actor, args, i))
        self.process = [
            Process(target=self.worker[i].run, args=(self._now_episode, ))
            for i in range(n_workers)
        ]

        for p in self.process:
            p.start()
        print(f'Start {n_workers} workers.')
示例#24
0
    def __init__(self, experience_q, prediction_q, observation_q, env_id,
                 episode_log_q, agent_id):
        """
        Workers are the agents interacting with the environment.
        Workers run a copy of the environment with their own specifications. 
        It requires Predictor processes to make decisions.
        Gathered experiences are submitted to a Queue on which the shared models are trained.

        Args:
            experience_q (mp.Queue): Shared memory queue containing experiences across workers of the same type.
            prediction_q (mp.Queue): Shared memory queue containing predictions of this worker.
            observation_q (mp.Queue): Shared memory queue containing observation across workers of the same type.
            env_id (str): The id of the environment instance this worker is interacting with.
            episode_log_q (mp.Queue): Shared memory queue containing the experience of past episodes.
            agent_id (int): The id of the worker process.
        """
        super(WorkerProcess, self).__init__()
        self.experience_q = experience_q
        self.prediction_q = prediction_q
        self.observation_q = observation_q
        self.env_id = env_id
        self.episode_log_q = episode_log_q
        self.id = agent_id

        #:class:`memory.ShortTermMemory`: Short term memory where the history is saved and experiences are memorized.
        self.memory = ShortTermMemory(Config.GLOW)
        #int: Signal for process exit.
        self.exit_flag = Value('i', 0)
        #torch.Tensor of float: Array of actions in one-hot encoding.
        self.actions = torch.Tensor(np.eye(Config.NUM_ACTIONS))
        #:class:`gym.Env`: The environment the agent interacts with.
        self.env = gym.make(Config.ENV_NAME, **Config.ENV_PARAMS[self.env_id])
        #bool: Boolean value that signals that an episode is finished.
        self.done = False
        #int: Current size of batches.
        self.batch_size = 0
        #torch.Tensor: Tensor of observation batch.
        self.o_batch = torch.Tensor([0.])
        #torch.Tensor: Tensor of action batch.
        self.a_batch = torch.Tensor([0.])
        #torch.Tensor: Tensor of target batch.
        self.t_batch = torch.Tensor([0.])
示例#25
0
    def __init__(self, autoencoder, optimizer_ae, agent, optimizer_ps, env_id,
                 select_data, experience_q, training_count, ae_loss_log_q,
                 trainer_id):
        """
        Trainers gather experiences and train respective models model. 

        Args:
            autoencoder (:class:`base_networks.DenseAutoencoder`): The Server.autoencoder model.
            optimizer_ae (:class:`optim.Adam`): The Server.optimizer_ae for encoder.
            agent (:class:`base_networks.DeepPS`): The deep PS model for RL.
            optimizer_ps (:class:`optim.Adam`): The Server.optimizer_ps for deep PS.
            env_id (str): The id of the environment/agent instance this trainer is using.
            select_data (tuple): The data used for training in 'selection' mode.
            experience_q (:class:`mp.Queue`): Shared memory queue containing experiences for training.
            training_count (:class:`mp.Value`): Shared memory value which counts the number of trainings.
            ae_loss_log_q (:class:`mp.Queue`): Shared memory queue containing loss of decoder.
            trainer_id (int): The id of the trainer process.
        """
        super(TrainerProcess, self).__init__()
        self.autoencoder = autoencoder
        self.optimizer_ae = optimizer_ae
        self.agent = agent
        self.optimizer_ps = optimizer_ps
        self.env_id = env_id
        self.experience_q = experience_q
        self.training_count = training_count
        self.ae_loss_log_q = ae_loss_log_q
        self.id = trainer_id

        if Config.TRAIN_MODE == 'selection':
            o_batch, a_batch, t_batch = select_data
            #torch.Tensor: The observation training data set.
            self.o_batch = o_batch.to(Config.DEVICE)
            #torch.Tensor: The action training data set.
            self.a_batch = a_batch.to(Config.DEVICE)
            #torch.Tensor: The target training data set.
            self.t_batch = t_batch.to(Config.DEVICE)

        #int: Signal for process exit.
        self.exit_flag = Value('i', 0)
示例#26
0
    def __init__(self, agent, observation_q, prediction_qs, env_id,
                 predictor_id):
        """
        Predictors gather observations from agents and make predictions.

        Args:
            agent (:class:`base_networks.DeepPS`): The deep PS model for RL.
            observation_q (:class:`mp.Queue`): Shared memory queue with observations of agents of the same type.
            prediction_qs (:obj:`list` of :class:`mp.Queue`): Shared memory queues containing predictions.
            env_id (str): The identifier for the environment type.
            predictor_id (int): The id of the trainer process.
        """
        super(PredictorProcess, self).__init__()
        self.agent = agent
        self.observation_q = observation_q
        self.prediction_qs = prediction_qs
        self.env_id = env_id
        self.id = predictor_id

        #int: Signal for process exit.
        self.exit_flag = Value('i', 0)
        #torch.Tensor of float: Array of actions in one-hot encoding.
        self.actions = torch.Tensor(np.eye(Config.NUM_ACTIONS)).to(
            Config.DEVICE)
示例#27
0
class Agent_sync(Agent):
    """
    An agent class will maintain multiple policy net and environments, each worker will have one environment and one policy
    useful for most of single agent RL/IL settings
    """
    def __init__(self, config: ParamDict, environment: Environment,
                 policy: Policy, filter_op: Filter):
        threads, gpu = config.require("threads", "gpu")
        super(Agent_sync, self).__init__(config, environment, policy,
                                         filter_op)

        # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update
        self._sync_signal = Value('i', 0)

        # sampler sub-process list
        self._sampler_proc = []

        # used for synchronize commands
        self._cmd_pipe = None
        self._param_pipe = None
        self._cmd_lock = Lock()

        cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True)
        param_pipe_child, param_pipe_parent = Pipe(duplex=False)
        self._cmd_pipe = cmd_pipe_parent
        self._param_pipe = param_pipe_parent
        for i_thread in range(threads):
            child_name = f"sampler_{i_thread}"
            worker_cfg = ParamDict({
                "seed": self.seed + 1024 + i_thread,
                "gpu": gpu
            })
            child = Process(target=Agent_sync._sampler_worker,
                            name=child_name,
                            args=(worker_cfg, cmd_pipe_child, param_pipe_child,
                                  self._cmd_lock, self._sync_signal,
                                  deepcopy(policy), deepcopy(environment),
                                  deepcopy(filter_op)))
            self._sampler_proc.append(child)
            child.start()

    def __del__(self):
        """
        We should terminate all child-process here
        """
        self._sync_signal.value = -1
        sleep(1)
        for _proc in self._sampler_proc:
            _proc.join(2)
            if _proc.is_alive():
                _proc.terminate()

        self._cmd_pipe.close()
        self._param_pipe.close()

    def broadcast(self, config: ParamDict):
        policy_state, filter_state, max_step, self._batch_size, fixed_env, fixed_policy, fixed_filter = \
            config.require("policy state dict", "filter state dict", "trajectory max step", "batch size",
                           "fixed environment", "fixed policy", "fixed filter")

        self._replay_buffer = []
        policy_state["fixed policy"] = fixed_policy
        filter_state["fixed filter"] = fixed_filter
        cmd = ParamDict({
            "trajectory max step": max_step,
            "fixed environment": fixed_env,
            "filter state dict": filter_state
        })

        assert self._sync_signal.value < 1, "Last sync event not finished due to some error, some sub-proc maybe died, abort"
        # tell sub-process to reset
        with self._sync_signal.get_lock():
            self._sync_signal.value = len(self._sampler_proc)

        # sync net parameters
        with self._cmd_lock:
            for _ in range(len(self._sampler_proc)):
                self._param_pipe.send(policy_state)

        # wait for all agents' ready feedback
        while self._sync_signal.value > 0:
            sleep(0.01)

        # sync commands
        for _ in range(self._batch_size):
            self._cmd_pipe.send(cmd)

    def collect(self):
        if self._cmd_pipe.poll(0.1):
            self._replay_buffer.append(self._cmd_pipe.recv())
        if len(self._replay_buffer) < self._batch_size:
            return None
        else:
            batch = self._filter.operate_trajectoryList(self._replay_buffer)
            return batch

    @staticmethod
    def _sampler_worker(setups: ParamDict, pipe_cmd, pipe_param, read_lock,
                        sync_signal, policy, environment, filter_op):
        gpu, seed = setups.require("gpu", "seed")

        device = decide_device(gpu)
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        environment.init(display=False)
        filter_op.init()
        filter_op.to_device(device)
        policy.init()
        policy.to_device(device)

        # -1: syncing, 0: waiting for new command, 1: sampling
        local_state = 0
        current_step = None
        step_buffer = []
        cmd = None

        def _get_piped_data(pipe):
            with read_lock:
                if pipe.poll(0.001):
                    return pipe.recv()
                else:
                    return None

        while sync_signal.value >= 0:
            # check sync counter for sync event, and waiting for new parameters
            if sync_signal.value > 0 and local_state >= 0:
                # receive sync signal, reset all workspace settings, decrease sync counter,
                # and set state machine to -1 for not init again
                while _get_piped_data(pipe_cmd) is not None:
                    pass
                step_buffer.clear()
                _policy_state = _get_piped_data(pipe_param)
                if _policy_state is not None:
                    # set new parameters
                    policy.reset(_policy_state)
                    with sync_signal.get_lock():
                        sync_signal.value -= 1
                    local_state = -1

            # if sync ends, tell state machine to recover from syncing state, and reset environment
            elif sync_signal.value == 0 and local_state == -1:
                local_state = 0

            # waiting for states (states are list of dicts)
            elif sync_signal.value == 0 and local_state == 0:
                cmd = _get_piped_data(pipe_cmd)
                if cmd is not None:
                    step_buffer.clear()
                    cmd.require("filter state dict", "fixed environment",
                                "trajectory max step")
                    current_step = environment.reset(
                        random=not cmd["fixed environment"])
                    filter_op.reset(cmd["filter state dict"])
                    local_state = 1

            # sampling
            elif sync_signal.value == 0 and local_state == 1:
                with torch.no_grad():
                    policy_step = filter_op.operate_currentStep(current_step)
                    last_step = policy.step([policy_step])[0]
                last_step, current_step, done = environment.step(last_step)
                record_step = filter_op.operate_recordStep(last_step)
                step_buffer.append(record_step)

                if len(step_buffer) >= cmd["trajectory max step"] or done:
                    traj = filter_op.operate_stepList(step_buffer, done=done)
                    with read_lock:
                        pipe_cmd.send(traj)
                    local_state = 0

        # finalization
        filter_op.finalize()
        policy.finalize()
        environment.finalize()
        pipe_cmd.close()
        pipe_param.close()
        print("Sampler sub-process exited")
示例#28
0
# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
# torch.multiprocessing.set_start_method('spawn', force=True)
# torch.multiprocessing.set_sharing_strategy('file_system')

PPO_Transition = namedtuple('PPO_Transition', ('obs', 'action', 'reward', 'done', 'value', 'logproba'))

CPPO_Transition = namedtuple('CPPO_Transition',
                             ('obs', 'action', 'reward', 'cost', 'sum_cost', 'done', 'value', 'c_value', 'logproba'))

TD_Transition = namedtuple('TD_Transition', ('obs', 'action', 'obs_next', 'reward', 'done'))

Safe_TD_Transition = namedtuple('Safe_TD_Transition',
                                ('obs', 'action', 'obs_next', 'reward', 'cost', 'cost_next', 'done'))

# 0正常 1阻塞 主进程完成一次sample后,控制子进程阻塞
Sub_Proc_Blocking = Value('i', 0)


def make_env(config_env, seed, env_index):
    env = gym.make(config_env['id'])
    env.start(str(env_index), path=config_env['path'], gui=config_env['gui'], max_step=config_env['max_step'],
              reward_model=config_env['reward_model'], is_human_model=config_env['human_model'])
    env.seed(seed)
    return env


def is_on_policy(args_algo):
    on_policys = ['ppo2', 'trpo', 'cppo', 'cppo2']
    if args_algo in on_policys:
        return True
    else:
示例#29
0
    def __init__(self,
                 cache_dir,
                 dataset_dir,
                 dataset_list,
                 cuda,
                 batch_size=500,
                 num_workers=3,
                 renew_frequency=5,
                 rejection_radius_position=0,
                 numpatches=900,
                 numneg=3,
                 pos_thr=50.0,
                 reject=True,
                 mode='train',
                 rejection_radius=3000,
                 dist_type='3D',
                 patch_radius=None,
                 use_depth=False,
                 use_normals=False,
                 use_silhouettes=False,
                 color_jitter=False,
                 greyscale=False,
                 maxres=4096,
                 scale_jitter=False,
                 photo_jitter=False,
                 uniform_negatives=False,
                 needles=0,
                 render_only=False,
                 maxitems=200,
                 cache_once=False):
        super(MultimodalPatchesCache, self).__init__()
        self.cache_dir = cache_dir
        self.dataset_dir = dataset_dir
        #self.images_path = images_path
        self.dataset_list = dataset_list
        self.cuda = cuda
        self.batch_size = batch_size

        self.num_workers = num_workers
        self.renew_frequency = renew_frequency
        self.rejection_radius_position = rejection_radius_position
        self.numpatches = numpatches
        self.numneg = numneg
        self.pos_thr = pos_thr
        self.reject = reject
        self.mode = mode
        self.rejection_radius = rejection_radius
        self.dist_type = dist_type
        self.patch_radius = patch_radius
        self.use_depth = use_depth
        self.use_normals = use_normals
        self.use_silhouettes = use_silhouettes
        self.color_jitter = color_jitter
        self.greyscale = greyscale
        self.maxres = maxres
        self.scale_jitter = scale_jitter
        self.photo_jitter = photo_jitter
        self.uniform_negatives = uniform_negatives
        self.needles = needles
        self.render_only = render_only

        self.cache_done_lock = Lock()
        self.all_done = Value('B', 0)  # 0 is False
        self.cache_done = Value('B', 0)  # 0 is False

        self.wait_for_cache_builder = Event()
        # prepare for wait until initial cache is built
        self.wait_for_cache_builder.clear()
        self.cache_builder_resume = Event()

        self.maxitems = maxitems
        self.cache_once = cache_once

        if self.mode == 'eval':
            self.maxitems = -1
        self.cache_builder = Process(target=self.buildCache,
                                     args=[self.maxitems])
        self.current_cache_build = Value('B', 0)  # 0th cache
        self.current_cache_use = Value('B', 1)  # 1th cache

        self.cache_names = ["cache1", "cache2"]  # constant

        rebuild_cache = True
        if self.mode == 'eval':
            validation_dir = os.path.join(
                self.cache_dir,
                self.cache_names[self.current_cache_build.value])
            if os.path.isdir(validation_dir):
                # we don't need to rebuild validation cache
                # TODO: check if cache is VALID
                rebuild_cache = False
        elif cache_once:
            build_dataset_dir = os.path.join(
                self.cache_dir,
                self.cache_names[self.current_cache_build.value])
            if os.path.isdir(build_dataset_dir):
                # we don't need to rebuild training cache if we are training
                # on limited subset of the training set
                rebuild_cache = False

        if rebuild_cache:
            # clear the caches if they already exist
            build_dataset_dir = os.path.join(
                self.cache_dir,
                self.cache_names[self.current_cache_build.value])
            if os.path.isdir(build_dataset_dir):
                shutil.rmtree(build_dataset_dir)
            use_dataset_dir = os.path.join(
                self.cache_dir, self.cache_names[self.current_cache_use.value])
            if os.path.isdir(use_dataset_dir):
                shutil.rmtree(use_dataset_dir)

            os.makedirs(build_dataset_dir)

            self.cache_builder_resume.set()
            self.cache_builder.start()

            # wait until initial cache is built
            # print("before wait to build")
            # print("wait for cache builder state",
            #       self.wait_for_cache_builder.is_set())
            self.wait_for_cache_builder.wait()
            # print("after wait to build")

        # we have been resumed
        if self.mode != 'eval' and (not self.cache_once):
            # for training, we can set up the cache builder to build
            # the second cache
            self.restart()
        else:
            # else for validation we don't need second cache
            # we just need to switch the built cache to the use cache in order
            # to use it
            tmp = self.current_cache_build.value
            self.current_cache_build.value = self.current_cache_use.value
            self.current_cache_use.value = tmp
示例#30
0
            config = json.load(server_file)

        if ('addr' not in config) or ('port' not in config):
            print("IP address (addr) and port number required in config")

        address = config['addr']
        port = int(config['port'])

        server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        server.bind((address, port))
        server.listen()

        param_queue = Queue()
        param_queue.put(net.state_dict())

        shutdown_val = Value('b', 0)

        receiver_proc = Process(target=HandleWorkers,
                                args=(server, replay_memory, mem_lock,
                                      param_queue, shutdown_val))
        receiver_proc.start()

    while True:
        try:
            Train(net, replay_memory, mem_lock, args.output_file)
            if param_queue is not None:
                param_queue.put(net.state_dict)
            torch.save(net.state_dict(), args.output_file)
        except KeyboardInterrupt:
            if server is not None:
                assert (shutdown_val is not None and receiver_proc is not None)