Exemplo n.º 1
0
    def __init__(self, result_container=None, verbose=False, n_processes=1,
                 source_paths=None, python_executable=None,
                 cache_callable=True):
        """Initialize the scheduler and start the slave processes.

        result_container -- ResultContainer used to store the results.
        verbose -- Set to True to get progress reports from the scheduler
            (default value is False).
        n_processes -- Number of processes used in parallel. If None (default)
            then the number of detected CPU cores is used.
        source_paths -- List of paths that are added to sys.path in
            the processes to make the task unpickling work. A single path
            instead of a list is also accepted.
            If None (default value) then source_paths is set to sys.path.
            To prevent this you can specify an empty list.
        python_executable -- Python executable that is used for the processes.
            The default value is None, in which case sys.executable will be
            used.
        cache_callable -- Cache the task objects in the processes (default
            is True). Disabling caching can reduce the memory usage, but will
            generally be less efficient since the task_callable has to be
            pickled each time.
        """
        super(ProcessScheduler, self).__init__(
                                        result_container=result_container,
                                        verbose=verbose)
        if n_processes:
            self._n_processes = n_processes
        else:
            self._n_processes = cpu_count()
        self._cache_callable = cache_callable
        if python_executable is None:
            python_executable = sys.executable
        # get the location of this module to start the processes
        module_path = os.path.dirname(mdp.__file__)
        module_file = os.path.join(module_path, "parallel",
                                   "process_schedule.py")
        # Note: -u argument is important on Windows to set stdout to binary
        #    mode. Otherwise you might get a strange error message for
        #    copy_reg.
        process_args = [python_executable, "-u", module_file]
        process_args.append(str(self._cache_callable))
        if isinstance(source_paths, str):
            source_paths = [source_paths]
        if source_paths is None:
            source_paths = sys.path
        process_args += source_paths
        # list of processes not in use, start the processes now
        self._free_processes = [subprocess.Popen(args=process_args,
                                                 stdout=subprocess.PIPE,
                                                 stdin=subprocess.PIPE)
                                for _ in range(self._n_processes)]
        # tag each process with its cached callable task_index,
        # this is compared with _last_callable_index to check if the cached
        # task_callable is still up to date
        for process in self._free_processes:
            process._callable_index = -1
        if self.verbose:
            print ("scheduler initialized with %d processes" %
                   self._n_processes)
Exemplo n.º 2
0
def test_cpu_count():
    """Test the cpu_count helper function."""
    n_cpus = parallel.cpu_count()
    assert isinstance(n_cpus, int)
Exemplo n.º 3
0
    def __init__(self,
                 result_container=None,
                 verbose=False,
                 n_processes=1,
                 source_paths=None,
                 python_executable=None,
                 cache_callable=True):
        """Initialize the scheduler and start the slave processes.

        result_container -- ResultContainer used to store the results.
        verbose -- Set to True to get progress reports from the scheduler
            (default value is False).
        n_processes -- Number of processes used in parallel. If None (default)
            then the number of detected CPU cores is used.
        source_paths -- List of paths that are added to sys.path in
            the processes to make the task unpickling work. A single path
            instead of a list is also accepted.
            If None (default value) then source_paths is set to sys.path.
            To prevent this you can specify an empty list.
        python_executable -- Python executable that is used for the processes.
            The default value is None, in which case sys.executable will be
            used.
        cache_callable -- Cache the task objects in the processes (default
            is True). Disabling caching can reduce the memory usage, but will
            generally be less efficient since the task_callable has to be
            pickled each time.
        """
        super(ProcessScheduler,
              self).__init__(result_container=result_container,
                             verbose=verbose)
        if n_processes:
            self._n_processes = n_processes
        else:
            self._n_processes = cpu_count()
        self._cache_callable = cache_callable
        if python_executable is None:
            python_executable = sys.executable
        # get the location of this module to start the processes
        module_path = os.path.dirname(mdp.__file__)
        module_file = os.path.join(module_path, "parallel",
                                   "process_schedule.py")
        # Note: -u argument is important on Windows to set stdout to binary
        #    mode. Otherwise you might get a strange error message for
        #    copy_reg.
        process_args = [python_executable, "-u", module_file]
        process_args.append(str(self._cache_callable))
        if isinstance(source_paths, str):
            source_paths = [source_paths]
        if source_paths is None:
            source_paths = sys.path
        process_args += source_paths
        # list of processes not in use, start the processes now
        self._free_processes = [
            subprocess.Popen(args=process_args,
                             stdout=subprocess.PIPE,
                             stdin=subprocess.PIPE)
            for _ in range(self._n_processes)
        ]
        # tag each process with its cached callable task_index,
        # this is compared with _last_callable_index to check if the cached
        # task_callable is still up to date
        for process in self._free_processes:
            process._callable_index = -1
        if self.verbose:
            print("scheduler initialized with %d processes" %
                  self._n_processes)
Exemplo n.º 4
0
def test_cpu_count():
    """Test the cpu_count helper function."""
    n_cpus = parallel.cpu_count()
    assert isinstance(n_cpus, int)