Exemplo n.º 1
0
    def _ApplyThreads(self,
                      func,
                      work_queue,
                      shard,
                      num_threads,
                      thr_exc_handler=None,
                      shared_vars=None,
                      arg_checker=_UriArgChecker):
        """
    Perform subset of required requests across a caller specified
    number of parallel Python threads, which may be one, in which
    case the requests are processed in the current thread.

    Args:
      func: Function to call for each request.
      work_queue: shared queue of NameExpansionResult to process.
      shard: Assigned subset (shard number) for this function.
      num_threads: Number of Python threads to spawn to process this shard.
      thr_exc_handler: Exception handler for ThreadPool class.
      shared_vars: Dict of shared memory variables to be managed.
                   (only relevant, and non-None, if this function is
                   run in a separate OS process).
      arg_checker: Used to determine whether we should process the current
                   argument or simply skip it. Also handles any logging that
                   is specific to a particular type of argument.
    """
        # Each OS process needs to establish its own set of connections to
        # the server to avoid writes from different OS processes interleaving
        # onto the same socket (and garbling the underlying SSL session).
        # We ensure each process gets its own set of connections here by
        # closing all connections in the storage provider connection pool.
        connection_pool = StorageUri.provider_pool
        if connection_pool:
            for i in connection_pool:
                connection_pool[i].connection.close()

        if num_threads > 1:
            thread_pool = ThreadPool(num_threads, thr_exc_handler)
        try:
            while True:  # Loop until we hit EOF marker.
                args = work_queue.get()
                if args == _EOF_ARGUMENT:
                    break
                if not arg_checker(self, args, shard):
                    continue
                if num_threads > 1:
                    thread_pool.AddTask(func, args)
                else:
                    func(args)
            # If any Python threads created, wait here for them to finish.
            if num_threads > 1:
                thread_pool.WaitCompletion()
        finally:
            if num_threads > 1:
                thread_pool.Shutdown()
        # If any shared variables (which means we are running in a separate OS
        # process), increment value for each shared variable.
        if shared_vars:
            for (name, var) in shared_vars.items():
                var.value += getattr(self, name)
Exemplo n.º 2
0
    def ApplyThreads(self,
                     func,
                     assigned_uris,
                     shard,
                     num_threads,
                     count=None,
                     thr_exc_handler=None):
        """Perform subset of required requests across a caller specified 
       number of parallel Python threads, which may be one, in which
       case the requests are processed in the current thread. 
    
    Args:
      func: function to call for each request.
      assigned_uris: list of URIs to process.
      shard: assigned subset (shard number) for this function.
      num_threads: number of Python threads to spawn to process this shard.
      count: shared integer for tracking total bytes transferred.
             (only relevant, and non-None, if this function is
             run in a separate OS process)
      thr_exc_handler: exception handler for ThreadPool class.
    """
        # Each OS process needs to establish its own set of connections to
        # the server to avoid writes from different OS processes interleaving
        # onto the same socket (and messing up the underlying SSL session).
        # We ensure each process gets its own set of connections here by
        # closing all connections in the storage provider connection pool.
        connection_pool = StorageUri.provider_pool
        if connection_pool:
            for i in connection_pool:
                connection_pool[i].connection.close()

        if num_threads > 1:
            thread_pool = ThreadPool(num_threads, thr_exc_handler)
        try:
            # Iterate over assigned URIs and perform copy operations for each.
            for (src_uri, exp_src_uri) in assigned_uris:
                if self.debug:
                    self.THREADED_LOGGER.info(
                        'process %d shard %d is handling uri %s', os.getpid(),
                        shard, exp_src_uri)
                if (self.ignore_symlinks and exp_src_uri.is_file_uri()
                        and os.path.islink(exp_src_uri.object_name)):
                    self.THREADED_LOGGER.info('Skipping symbolic link %s...',
                                              exp_src_uri)
                elif num_threads > 1:
                    thread_pool.AddTask(func, src_uri, exp_src_uri)
                else:
                    func(src_uri, exp_src_uri)
            # If any Python threads created, wait here for them to finish.
            if num_threads > 1:
                thread_pool.WaitCompletion()
        finally:
            if num_threads > 1:
                thread_pool.Shutdown()
        # If this call was spawned in a separate OS process, update shared
        # memory count of bytes transferred.
        if count:
            count.value += self.total_bytes_transferred
Exemplo n.º 3
0
  def _ApplyThreads(self, func, work_queue, shard, num_threads,
                    thr_exc_handler=None, shared_vars=None):
    """
    Perform subset of required requests across a caller specified
    number of parallel Python threads, which may be one, in which
    case the requests are processed in the current thread.

    Args:
      func: Function to call for each request.
      work_queue: shared queue of NameExpansionResult to process.
      shard: Assigned subset (shard number) for this function.
      num_threads: Number of Python threads to spawn to process this shard.
      thr_exc_handler: Exception handler for ThreadPool class.
      shared_vars: Dict of shared memory variables to be managed.
                   (only relevant, and non-None, if this function is
                   run in a separate OS process).
    """
    # Each OS process needs to establish its own set of connections to
    # the server to avoid writes from different OS processes interleaving
    # onto the same socket (and garbling the underlying SSL session).
    # We ensure each process gets its own set of connections here by
    # closing all connections in the storage provider connection pool.
    connection_pool = StorageUri.provider_pool
    if connection_pool:
      for i in connection_pool:
        connection_pool[i].connection.close()

    if num_threads > 1:
      thread_pool = ThreadPool(num_threads, thr_exc_handler)
    try:
      while True: # Loop until we hit EOF marker.
        name_expansion_result = work_queue.get()
        if name_expansion_result == _EOF_NAME_EXPANSION_RESULT:
          break
        exp_src_uri = self.suri_builder.StorageUri(
            name_expansion_result.GetExpandedUriStr())
        if self.debug:
          self.THREADED_LOGGER.info('process %d shard %d is handling uri %s',
                                    os.getpid(), shard, exp_src_uri)
        if (self.exclude_symlinks and exp_src_uri.is_file_uri()
            and os.path.islink(exp_src_uri.object_name)):
          self.THREADED_LOGGER.info('Skipping symbolic link %s...', exp_src_uri)
        elif num_threads > 1:
          thread_pool.AddTask(func, name_expansion_result)
        else:
          func(name_expansion_result)
      # If any Python threads created, wait here for them to finish.
      if num_threads > 1:
        thread_pool.WaitCompletion()
    finally:
      if num_threads > 1:
        thread_pool.Shutdown()
    # If any shared variables (which means we are running in a separate OS
    # process), increment value for each shared variable.
    if shared_vars:
      for (name, var) in shared_vars.items():
        var.value += getattr(self, name)
Exemplo n.º 4
0
    def _ApplyThreads(self,
                      func,
                      assigned_uris,
                      shard,
                      num_threads,
                      thr_exc_handler=None,
                      shared_vars=None):
        """
    Perform subset of required requests across a caller specified
    number of parallel Python threads, which may be one, in which
    case the requests are processed in the current thread.

    Args:
      func: Function to call for each request.
      assigned_uris: List of tuples to process, of the form:
          (src_uri, exp_src_uri, src_uri_names_container,
           src_uri_expands_to_multi, have_multiple_srcs,
           have_existing_dest_subdir).
      shard: Assigned subset (shard number) for this function.
      num_threads: Number of Python threads to spawn to process this shard.
      thr_exc_handler: Exception handler for ThreadPool class.
      shared_vars: Dict of shared memory variables to be managed.
                   (only relevant, and non-None, if this function is	
                   run in a separate OS process).
    """
        # Each OS process needs to establish its own set of connections to
        # the server to avoid writes from different OS processes interleaving
        # onto the same socket (and messing up the underlying SSL session).
        # We ensure each process gets its own set of connections here by
        # closing all connections in the storage provider connection pool.
        connection_pool = StorageUri.provider_pool
        if connection_pool:
            for i in connection_pool:
                connection_pool[i].connection.close()

        if num_threads > 1:
            thread_pool = ThreadPool(num_threads, thr_exc_handler)
        try:
            # Iterate over assigned URIs and perform copy operations for each.
            for (src_uri, exp_src_uri, src_uri_names_container,
                 src_uri_expands_to_multi, have_multiple_srcs,
                 have_existing_dest_subdir) in assigned_uris:
                if self.debug:
                    self.THREADED_LOGGER.info(
                        'process %d shard %d is handling uri %s', os.getpid(),
                        shard, exp_src_uri)
                if (self.exclude_symlinks and exp_src_uri.is_file_uri()
                        and os.path.islink(exp_src_uri.object_name)):
                    self.THREADED_LOGGER.info('Skipping symbolic link %s...',
                                              exp_src_uri)
                elif num_threads > 1:
                    thread_pool.AddTask(func, src_uri, exp_src_uri,
                                        src_uri_names_container,
                                        src_uri_expands_to_multi,
                                        have_multiple_srcs,
                                        have_existing_dest_subdir)
                else:
                    func(src_uri, exp_src_uri, src_uri_names_container,
                         src_uri_expands_to_multi, have_multiple_srcs,
                         have_existing_dest_subdir)
            # If any Python threads created, wait here for them to finish.
            if num_threads > 1:
                thread_pool.WaitCompletion()
        finally:
            if num_threads > 1:
                thread_pool.Shutdown()
        # If any shared variables (which means we are running in a separate OS
        # process), increment value for each shared variable.
        if shared_vars:
            for (name, var) in shared_vars.items():
                var.value += getattr(self, name)
Exemplo n.º 5
0
  def _ApplyThreads(self, func, work_queue, shard, num_threads,
                    thr_exc_handler=None, shared_vars=None,
                    arg_checker=_UriArgChecker, result_list=None,
                    should_return_results=False, use_thr_exc_handler=False):
    """
    Perform subset of required requests across a caller specified
    number of parallel Python threads, which may be one, in which
    case the requests are processed in the current thread.

    Args:
      func: Function to call for each argument.
      work_queue: shared queue of arguments to process.
      shard: Assigned subset (shard number) for this function.
      num_threads: Number of Python threads to spawn to process this shard.
      thr_exc_handler: Exception handler for ThreadPool class.
      shared_vars: Dict of shared memory variables to be managed.
                   (only relevant, and non-None, if this function is
                   run in a separate OS process).
      arg_checker: Used to determine whether we should process the current
                   argument or simply skip it. Also handles any logging that
                   is specific to a particular type of argument.
      result_list: A thread- and process-safe shared list in which to store
                   the return values from all calls to func. If result_list
                   is None (the default), then no return values will be stored.
      should_return_results: If False (the default), then return no values from
                             result_list.
      use_thr_exc_handler: If true, then use thr_exc_handler to process any
                           exceptions from func. Otherwise, exceptions from
                           func are propagated normally.

    Returns:
      return_values: A list of the return values from all calls to func. Or,
                     if return_results is False (the default), an empty list.
    """
    # Each OS process needs to establish its own set of connections to
    # the server to avoid writes from different OS processes interleaving
    # onto the same socket (and garbling the underlying SSL session).
    # We ensure each process gets its own set of connections here by
    # closing all connections in the storage provider connection pool.
    connection_pool = StorageUri.provider_pool
    if connection_pool:
      for i in connection_pool:
        connection_pool[i].connection.close()

    return_values = []

    if num_threads > 1:
      thread_pool = ThreadPool(num_threads, thr_exc_handler)
    try:
      while True: # Loop until we hit EOF marker.
        args = work_queue.get()
        if args == _EOF_ARGUMENT:
          break
        if not arg_checker(self, args, shard):
          continue
        if num_threads > 1:
          thread_pool.AddTask(func, args)
        else:
          try:
            return_value = func(args)
            if should_return_results:
              return_values.append(return_value)
              if (result_list is not None) and should_return_results:
                result_list.append(return_value)
          except Exception as e:
            if use_thr_exc_handler:
              thr_exc_handler(e)
            else:
              raise

      # If any Python threads created, wait here for them to finish.
    finally:
      if num_threads > 1:
        # We provide return values both in the normal way and in the
        # result_list so that we can use the result_list (which is quite slow)
        # for IPC, where it's necessary, and just return the values normally
        # when we're calling this function from a single process.
        return_values = thread_pool.Shutdown(should_return_results)
        if (result_list is not None) and should_return_results:
          for value in return_values:
              result_list.append(value)
    # If any shared variables (which means we are running in a separate OS
    # process), increment value for each shared variable.
    if shared_vars:
      for (name, var) in shared_vars.items():
        var.value += getattr(self, name)
    return return_values