def SetAclCommandHelper(self): """Common logic for setting ACLs. Sets the standard ACL or the default object ACL depending on self.command_name.""" acl_arg = self.args[0] uri_args = self.args[1:] # Disallow multi-provider setacl requests, because there are differences in # the ACL models. storage_uri = self.UrisAreForSingleProvider(uri_args) if not storage_uri: raise CommandException( '"%s" command spanning providers not allowed.' % self.command_name) # Get ACL object from connection for one URI, for interpreting the ACL. # This won't fail because the main startup code insists on at least 1 arg # for this command. acl_class = storage_uri.acl_class() canned_acls = storage_uri.canned_acls() # Determine whether acl_arg names a file containing XML ACL text vs. the # string name of a canned ACL. if os.path.isfile(acl_arg): acl_file = open(acl_arg, 'r') acl_txt = acl_file.read() acl_file.close() acl_obj = acl_class() h = handler.XmlHandler(acl_obj, storage_uri.get_bucket()) try: xml.sax.parseString(acl_txt, h) except xml.sax._exceptions.SAXParseException, e: raise CommandException( 'Requested ACL is invalid: %s at line %s, ' 'column %s' % (e.getMessage(), e.getLineNumber(), e.getColumnNumber())) acl_arg = acl_obj
def GetAclCommandHelper(self): """Common logic for getting ACLs. Gets the standard ACL or the default object ACL depending on self.command_name.""" # Wildcarding is allowed but must resolve to just one object. uris = list(self.CmdWildcardIterator(self.args[0])) if len(uris) != 1: raise CommandException( 'Wildcards must resolve to exactly one object for ' '"%s" command.' % self.command_name) uri = uris[0] if not uri.bucket_name: raise CommandException('"%s" command must specify a bucket or ' 'object.' % self.command_name) if self.command_name == 'getdefacl': acl = uri.get_def_acl(False, self.headers) else: acl = uri.get_acl(False, self.headers) # Pretty-print the XML to make it more easily human editable. parsed_xml = xml.dom.minidom.parseString(acl.to_xml().encode('utf-8')) print parsed_xml.toprettyxml(indent=' ')
def GetAclCommandHelper(self): """Common logic for getting ACLs. Gets the standard ACL or the default object ACL depending on self.command_name.""" # Wildcarding is allowed but must resolve to just one object. uris = list(self.exp_handler.WildcardIterator(self.args[0]).IterUris()) if len(uris) == 0: raise CommandException('No URIs matched') if len(uris) != 1: raise CommandException( '%s matched more than one URI, which is not ' 'allowed by the %s command' % (self.args[0], self.command_name)) uri = uris[0] if not uri.names_bucket() and not uri.names_object(): raise CommandException('"%s" command must specify a bucket or ' 'object.' % self.command_name) if self.command_name == 'getdefacl': acl = uri.get_def_acl(False, self.headers) else: acl = uri.get_acl(False, self.headers) # Pretty-print the XML to make it more easily human editable. parsed_xml = xml.dom.minidom.parseString(acl.to_xml().encode('utf-8')) print parsed_xml.toprettyxml(indent=' ')
def _ConfigureNoOpAuthIfNeeded(self): """Sets up no-op auth handler if no boto credentials are configured.""" config = boto.config if not util.HasConfiguredCredentials(): if self.config_file_list: if (config.has_option('Credentials', 'gs_oauth2_refresh_token') and not HAVE_OAUTH2): raise CommandException( "Your gsutil is configured with OAuth2 authentication " "credentials.\nHowever, OAuth2 is only supported when running " "under Python 2.6 or later\n(unless additional dependencies are " "installed, see README for details); you are running Python %s." % sys.version) raise CommandException( 'You have no storage service credentials in any ' 'of the following boto config\nfiles. Please ' 'add your credentials as described in the ' 'gsutil README file, or else\nre-run ' '"gsutil config" to re-create a config ' 'file:\n%s' % self.config_file_list) else: # With no boto config file the user can still access publicly readable # buckets and objects. from gslib import no_op_auth_plugin
def _RunSingleThreadedSetAcl(self, acl_arg, uri_args): some_matched = False for uri_str in uri_args: for blr in self.exp_handler.WildcardIterator(uri_str): if blr.HasPrefix(): continue some_matched = True uri = blr.GetUri() if self.command_name == 'setdefacl': print 'Setting default object ACL on %s...' % uri uri.set_def_acl(acl_arg, uri.object_name, False, self.headers) else: print 'Setting ACL on %s...' % uri uri.set_acl(acl_arg, uri.object_name, False, self.headers) if not some_matched: raise CommandException('No URIs matched')
def LoadVersionString(self): """Loads version string for currently installed gsutil command. Returns: Version string. Raises: CommandException: if errors encountered. """ ver_file_path = self.gsutil_bin_dir + os.sep + 'VERSION' if not os.path.isfile(ver_file_path): raise CommandException( '%s not found. Did you install the\ncomplete gsutil software after ' 'the gsutil "update" command was implemented?' % ver_file_path) ver_file = open(ver_file_path, 'r') installed_version_string = ver_file.read().rstrip('\n') ver_file.close() return installed_version_string
def run(self, elements, test = False, directory = None, logger = None, phase = None, root = False): ''' The run method, which runs a list of commands, and returns the results.''' # Create a list for the result of the commands result = list() # Get the user for the commands user = self.user if not root else 'root' # Iterate through each of the commands in the preamble. for element in elements: # If we are in test mode, add something to the command if test: command = self.commands.call(element + '; echo $?', self.environment, user = user) # If we have a directory set, run there elif directory is not None: command = self.commands.call(element, self.environment, directory = directory, user = user) # Otherwise, get the response of the command else: command = self.commands.call(element, self.environment, user = user) # Check if there is an attached logger if logger is not None and phase != 'unpacking': # And write the output to the logger logger.log(phase, element, command) # Check if there were any errors if command[1] is not None: raise CommandException(command[1]) # And append the output result.append(str(command[0])[2:-3]) # And return the output return result
def GetXmlSubresource(self, subresource, uri_arg): """Print an xml subresource, e.g. logging, for a bucket/object. Args: subresource: the subresource name uri_arg: uri for the bucket/object. Wildcards will be expanded. Raises: CommandException: if errors encountered. """ # Wildcarding is allowed but must resolve to just one bucket. uris = list(self.CmdWildcardIterator(uri_arg)) if len(uris) != 1: raise CommandException( 'Wildcards must resolve to exactly one item for ' 'get %s' % subresource) uri = uris[0] xml_str = uri.get_subresource(subresource, False, self.headers) # Pretty-print the XML to make it more easily human editable. parsed_xml = xml.dom.minidom.parseString(xml_str.encode('utf-8')) print parsed_xml.toprettyxml(indent=' ')
def InsistUriNamesContainer(self, uri, command_name, msg='Destination URI must name a bucket or ' 'directory for the\nmultiple source form of ' 'the %s command.'): """Checks that URI names a directory or bucket. Args: uri: StorageUri to check command_name: name of command making call. May not be the same as self.command_name in the case of commands implemented atop other commands (like mv command). msg: message to print on error, containing one '%s' to be replaced by command_name. Raises: CommandException: if errors encountered. """ if uri.names_singleton(): raise CommandException(msg % command_name)
def __init__(self, command_runner, args, headers, debug, parallel_operations, gsutil_bin_dir, boto_lib_dir, config_file_list, gsutil_ver, bucket_storage_uri_class, test_method=None): """ Args: command_runner: CommandRunner (for commands built atop other commands). args: Command-line args (arg0 = actual arg, not command name ala bash). headers: Dictionary containing optional HTTP headers to pass to boto. debug: Debug level to pass in to boto connection (range 0..3). parallel_operations: Should command operations be executed in parallel? gsutil_bin_dir: Bin dir from which gsutil is running. boto_lib_dir: Lib dir where boto runs. config_file_list: Config file list returned by _GetBotoConfigFileList(). gsutil_ver: Version string of currently running gsutil command. bucket_storage_uri_class: Class to instantiate for cloud StorageUris. Settable for testing/mocking. test_method: Optional general purpose method for testing purposes. Application and semantics of this method will vary by command and test type. Implementation note: subclasses shouldn't need to define an __init__ method, and instead depend on the shared initialization that happens here. If you do define an __init__ method in a subclass you'll need to explicitly call super().__init__(). But you're encouraged not to do this, because it will make changing the __init__ interface more painful. """ # Save class values from constructor params. self.command_runner = command_runner self.args = args self.unparsed_args = args self.headers = headers self.debug = debug self.parallel_operations = parallel_operations self.gsutil_bin_dir = gsutil_bin_dir self.boto_lib_dir = boto_lib_dir self.config_file_list = config_file_list self.gsutil_ver = gsutil_ver self.bucket_storage_uri_class = bucket_storage_uri_class self.test_method = test_method self.exclude_symlinks = False self.recursion_requested = False # Process sub-command instance specifications. # First, ensure subclass implementation sets all required keys. for k in self.REQUIRED_SPEC_KEYS: if k not in self.command_spec or self.command_spec[k] is None: raise CommandException( '"%s" command implementation is missing %s ' 'specification' % (self.command_name, k)) # Now override default command_spec with subclass-specified values. tmp = self._default_command_spec tmp.update(self.command_spec) self.command_spec = tmp del tmp # Make sure command provides a test specification. if not self.test_steps: # TODO: Uncomment following lines when test feature is ready. #raise CommandException('"%s" command implementation is missing test ' #'specification' % self.command_name) pass # Parse and validate args. try: (self.sub_opts, self.args) = getopt.getopt(args, self.command_spec[SUPPORTED_SUB_ARGS]) except GetoptError, e: raise CommandException('%s for "%s" command.' % (e.msg, self.command_name))
def __init__(self, command_runner, args, headers, debug, parallel_operations, gsutil_bin_dir, boto_lib_dir, config_file_list, bucket_storage_uri_class, test_method=None): """ Args: command_runner: CommandRunner (for commands built atop other commands). args: command-line args (arg0 = actual arg, not command name ala bash). headers: dictionary containing optional HTTP headers to pass to boto. debug: debug level to pass in to boto connection (range 0..3). parallel_operations: Should command operations be executed in parallel? gsutil_bin_dir: bin dir from which gsutil is running. boto_lib_dir: lib dir where boto runs. config_file_list: config file list returned by _GetBotoConfigFileList(). bucket_storage_uri_class: Class to instantiate for cloud StorageUris. Settable for testing/mocking. test_method: Optional general purpose method for testing purposes. Application and semantics of this method will vary by command and test type. """ # Save class values from constructor params. self.command_runner = command_runner self.args = args self.headers = headers self.debug = debug self.parallel_operations = parallel_operations self.gsutil_bin_dir = gsutil_bin_dir self.boto_lib_dir = boto_lib_dir self.config_file_list = config_file_list self.bucket_storage_uri_class = bucket_storage_uri_class self.test_method = test_method self.ignore_symlinks = False # Process sub-command instance specifications. # First, ensure subclass implementation sets all required keys. for k in self.REQUIRED_SPEC_KEYS: if k not in self.command_spec or self.command_spec[k] is None: raise CommandException( '"%s" command implementation is missing %s ' 'specification' % (self.command_name, k)) # Now override default command_spec with subclass-specified values. tmp = self._default_command_spec tmp.update(self.command_spec) self.command_spec = tmp del tmp # Parse and validate args. try: (self.sub_opts, self.args) = getopt.getopt(args, self.command_spec[SUPPORTED_SUB_ARGS]) except GetoptError, e: raise CommandException('%s for "%s" command.' % (e.msg, self.command_name))
acl_txt = acl_file.read() acl_file.close() acl_obj = acl_class() h = handler.XmlHandler(acl_obj, storage_uri.get_bucket()) try: xml.sax.parseString(acl_txt, h) except xml.sax._exceptions.SAXParseException, e: raise CommandException( 'Requested ACL is invalid: %s at line %s, ' 'column %s' % (e.getMessage(), e.getLineNumber(), e.getColumnNumber())) acl_arg = acl_obj else: # No file exists, so expect a canned ACL string. if acl_arg not in canned_acls: raise CommandException('Invalid canned ACL "%s".' % acl_arg) # Now iterate over URIs and set the ACL on each. for uri_str in uri_args: for uri in self.CmdWildcardIterator(uri_str): if self.command_name == 'setdefacl': print 'Setting default object ACL on %s...' % uri uri.set_def_acl(acl_arg, uri.object_name, False, self.headers) else: print 'Setting ACL on %s...' % uri uri.set_acl(acl_arg, uri.object_name, False, self.headers) def GetAclCommandHelper(self): """Common logic for getting ACLs. Gets the standard ACL or the default object ACL depending on self.command_name."""
class Command(object): # Global instance of a threaded logger object. THREADED_LOGGER = _ThreadedLogger() REQUIRED_SPEC_KEYS = [COMMAND_NAME] # Each subclass must define the following map, minimally including the # keys in REQUIRED_SPEC_KEYS; other values below will be used as defaults, # although for readbility subclasses should specify the complete map. command_spec = { # Name of command. COMMAND_NAME: None, # List of command name aliases. COMMAND_NAME_ALIASES: [], # Min number of args required by this command. MIN_ARGS: 0, # Max number of args required by this command, or NO_MAX. MAX_ARGS: NO_MAX, # Getopt-style string specifying acceptable sub args. SUPPORTED_SUB_ARGS: '', # True if file URIs are acceptable for this command. FILE_URIS_OK: False, # True if provider-only URIs are acceptable for this command. PROVIDER_URIS_OK: False, # Index in args of first URI arg. URIS_START_ARG: 0, # True if must configure gsutil before running command. CONFIG_REQUIRED: True, } _default_command_spec = command_spec help_spec = HelpProvider.help_spec """Define an empty test specification, which derived classes must populate. This is a list of tuples containing the following values: step_name - mnemonic name for test, displayed when test is run cmd_line - shell command line to run test expect_ret or None - expected return code from test (None means ignore) (result_file, expect_file) or None - tuple of result file and expected file to diff for additional test verification beyond the return code (None means no diff requested) Notes: - Setting expected_ret to None means there is no expectation and, hence, any returned value will pass. - Any occurrences of the string 'gsutil' in the cmd_line parameter are expanded to the full path to the gsutil command under test. - The cmd_line, result_file and expect_file parameters may contain the following special substrings: $Bn - converted to one of 10 unique-for-testing bucket names (n=0..9) $On - converted to one of 10 unique-for-testing object names (n=0..9) $Fn - converted to one of 10 unique-for-testing file names (n=0..9) - The generated file names are full pathnames, whereas the generated bucket and object names are simple relative names. - Tests with a non-None result_file and expect_file automatically trigger an implicit diff of the two files. - These test specifications, in combination with the conversion strings allow tests to be constructed parametrically. For example, here's an annotated subset of a test_steps for the cp command: # Copy local file to object, verify 0 return code. ('simple cp', 'gsutil cp $F1 gs://$B1/$O1', 0, None, None), # Copy uploaded object back to local file and diff vs. orig file. ('verify cp', 'gsutil cp gs://$B1/$O1 $F2', 0, '$F2', '$F1'), - After pattern substitution, the specs are run sequentially, in the order in which they appear in the test_steps list. """ test_steps = [] # Define a convenience property for command name, since it's used many places. def _GetDefaultCommandName(self): return self.command_spec[COMMAND_NAME] command_name = property(_GetDefaultCommandName) def __init__(self, command_runner, args, headers, debug, parallel_operations, gsutil_bin_dir, boto_lib_dir, config_file_list, gsutil_ver, bucket_storage_uri_class, test_method=None): """ Args: command_runner: CommandRunner (for commands built atop other commands). args: Command-line args (arg0 = actual arg, not command name ala bash). headers: Dictionary containing optional HTTP headers to pass to boto. debug: Debug level to pass in to boto connection (range 0..3). parallel_operations: Should command operations be executed in parallel? gsutil_bin_dir: Bin dir from which gsutil is running. boto_lib_dir: Lib dir where boto runs. config_file_list: Config file list returned by _GetBotoConfigFileList(). gsutil_ver: Version string of currently running gsutil command. bucket_storage_uri_class: Class to instantiate for cloud StorageUris. Settable for testing/mocking. test_method: Optional general purpose method for testing purposes. Application and semantics of this method will vary by command and test type. Implementation note: subclasses shouldn't need to define an __init__ method, and instead depend on the shared initialization that happens here. If you do define an __init__ method in a subclass you'll need to explicitly call super().__init__(). But you're encouraged not to do this, because it will make changing the __init__ interface more painful. """ # Save class values from constructor params. self.command_runner = command_runner self.args = args self.unparsed_args = args self.headers = headers self.debug = debug self.parallel_operations = parallel_operations self.gsutil_bin_dir = gsutil_bin_dir self.boto_lib_dir = boto_lib_dir self.config_file_list = config_file_list self.gsutil_ver = gsutil_ver self.bucket_storage_uri_class = bucket_storage_uri_class self.test_method = test_method self.exclude_symlinks = False self.recursion_requested = False # Process sub-command instance specifications. # First, ensure subclass implementation sets all required keys. for k in self.REQUIRED_SPEC_KEYS: if k not in self.command_spec or self.command_spec[k] is None: raise CommandException( '"%s" command implementation is missing %s ' 'specification' % (self.command_name, k)) # Now override default command_spec with subclass-specified values. tmp = self._default_command_spec tmp.update(self.command_spec) self.command_spec = tmp del tmp # Make sure command provides a test specification. if not self.test_steps: # TODO: Uncomment following lines when test feature is ready. #raise CommandException('"%s" command implementation is missing test ' #'specification' % self.command_name) pass # Parse and validate args. try: (self.sub_opts, self.args) = getopt.getopt(args, self.command_spec[SUPPORTED_SUB_ARGS]) except GetoptError, e: raise CommandException('%s for "%s" command.' % (e.msg, self.command_name)) if (len(self.args) < self.command_spec[MIN_ARGS] or len(self.args) > self.command_spec[MAX_ARGS]): raise CommandException( 'Wrong number of arguments for "%s" command.' % self.command_name) if (not self.command_spec[FILE_URIS_OK] and self.HaveFileUris( self.args[self.command_spec[URIS_START_ARG]:])): raise CommandException( '"%s" command does not support "file://" URIs. ' 'Did you mean to use a gs:// URI?' % self.command_name) if (not self.command_spec[PROVIDER_URIS_OK] and self._HaveProviderUris( self.args[self.command_spec[URIS_START_ARG]:])): raise CommandException( '"%s" command does not support provider-only ' 'URIs.' % self.command_name) if self.command_spec[CONFIG_REQUIRED]: self._ConfigureNoOpAuthIfNeeded() self.proj_id_handler = ProjectIdHandler() self.suri_builder = StorageUriBuilder(debug, bucket_storage_uri_class) # We're treating recursion_requested like it's used by all commands, but # only some of the commands accept the -R option. if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-r' or o == '-R': self.recursion_requested = True break self.exp_handler = NameExpansionHandler(self.command_name, self.proj_id_handler, self.headers, self.debug, self.bucket_storage_uri_class)
def Apply(self, func, src_uri_expansion, thr_exc_handler, have_existing_dest_subdir=None, shared_attrs=None): """Dispatch input URI assignments across a pool of parallel OS processes and/or Python threads, based on options (-m or not) and settings in the user's config file. If non-parallel mode or only one OS process requested, execute requests sequentially in the current OS process. Args: func: Function to call to process each URI. src_uri_expansion: gslib.name_expansion.NameExpansionResult. thr_exc_handler: Exception handler for ThreadPool class. have_existing_dest_subdir: bool indicator whether dest is an existing subdirectory. Only matters for cp/mv; pass None otherwise. shared_attrs: List of attributes to manage across sub-processes. Raises: CommandException if invalid config encountered. """ # Set OS process and python thread count as a function of options # and config. if self.parallel_operations: process_count = boto.config.getint( 'GSUtil', 'parallel_process_count', gslib.commands.config.DEFAULT_PARALLEL_PROCESS_COUNT) if process_count < 1: raise CommandException('Invalid parallel_process_count "%d".' % process_count) thread_count = boto.config.getint( 'GSUtil', 'parallel_thread_count', gslib.commands.config.DEFAULT_PARALLEL_THREAD_COUNT) if thread_count < 1: raise CommandException('Invalid parallel_thread_count "%d".' % thread_count) else: # If -m not specified, then assume 1 OS process and 1 Python thread. process_count = 1 thread_count = 1 if self.debug: self.THREADED_LOGGER.info('process count: %d', process_count) self.THREADED_LOGGER.info('thread count: %d', thread_count) # Construct dictionary of assigned URIs containing one list per # OS process/shard. Assignments are stored as tuples containing # (src_uri to be copied, # single URI from wildcard expansion of src_uri, # bool indicator whether src_uri expands to multiple URIs, # bool indicator whether this is a multi-source request, # bool indicator whether dest is an existing subdir). shard = 0 assigned_uris = {} have_multiple_srcs = src_uri_expansion.IsMultiSrcRequest() for src_uri in src_uri_expansion.GetSrcUris(): src_uri_names_container = src_uri_expansion.NamesContainer(src_uri) for exp_src_bucket_listing_ref in ( src_uri_expansion.IterExpandedBucketListingRefsFor(src_uri) ): if shard not in assigned_uris: assigned_uris[shard] = [] src_uri_expands_to_multi = ( src_uri_expansion.SrcUriExpandsToMultipleSources(src_uri)) assigned_uris[shard].append( (src_uri, exp_src_bucket_listing_ref.GetUri(), src_uri_names_container, src_uri_expands_to_multi, have_multiple_srcs, have_existing_dest_subdir)) shard = (shard + 1) % process_count if self.parallel_operations and (process_count > 1): procs = [] # If any shared attributes passed by caller, create a dictionary of # shared memory variables for every element in the list of shared # attributes. shared_vars = None if shared_attrs: for name in shared_attrs: if not shared_vars: shared_vars = {} shared_vars[name] = multiprocessing.Value('i', 0) for shard in assigned_uris: # Spawn a separate OS process for each shard. if self.debug: self.THREADED_LOGGER.info('spawning process for shard %d', shard) p = multiprocessing.Process( target=self._ApplyThreads, args=(func, assigned_uris[shard], shard, thread_count, thr_exc_handler, shared_vars)) procs.append(p) p.start() # Wait for all spawned OS processes to finish. failed_process_count = 0 for p in procs: p.join() # Count number of procs that returned non-zero exit code. if p.exitcode != 0: failed_process_count += 1 # Abort main process if one or more sub-processes failed. if failed_process_count: plural_str = '' if failed_process_count > 1: plural_str = 'es' raise Exception('unexpected failure in %d sub-process%s, ' 'aborting...' % (failed_process_count, plural_str)) # Propagate shared variables back to caller's attributes. if shared_vars: for (name, var) in shared_vars.items(): setattr(self, name, var.value) else: # Only one OS process requested so perform request in current # OS process, in shard zero with thread_count threads. self._ApplyThreads(func, assigned_uris[0], 0, thread_count, thr_exc_handler, None)
def Apply(self, func, src_uri_expansion, thr_exc_handler): """Dispatch input URI assignments across a pool of parallel OS processes and/or Python threads, based on options (-m or not) and settings in the user's config file. If non-parallel mode or only one OS process requested, execute requests sequentially in the current OS process. Args: func: function to call to process each URI. src_uri_expansion: dictionary of groups of URIs to process. thr_exc_handler: exception handler for ThreadPool class. """ # Set OS process and python thread count as a function of options # and config. if self.parallel_operations: process_count = boto.config.getint( 'GSUtil', 'parallel_process_count', gslib.commands.config.DEFAULT_PARALLEL_PROCESS_COUNT) if process_count < 1: raise CommandException('Invalid parallel_process_count "%d".' % process_count) thread_count = boto.config.getint( 'GSUtil', 'parallel_thread_count', gslib.commands.config.DEFAULT_PARALLEL_THREAD_COUNT) if thread_count < 1: raise CommandException('Invalid parallel_thread_count "%d".' % thread_count) else: # If -m not specified, then assume 1 OS process and 1 Python thread. process_count = 1 thread_count = 1 if self.debug: self.THREADED_LOGGER.info('process count: %d', process_count) self.THREADED_LOGGER.info('thread count: %d', thread_count) # Construct dictionary of assigned URIs containing one list per # OS process/shard. Assignments are stored as tuples containing # original source URI and expanded source URI. shard = 0 assigned_uris = {} for src_uri in iter(src_uri_expansion): for exp_src_uri in src_uri_expansion[src_uri]: if shard not in assigned_uris: assigned_uris[shard] = [] assigned_uris[shard].append((src_uri, exp_src_uri)) shard = (shard + 1) % process_count if self.parallel_operations and (process_count > 1): procs = [] byte_count = None # If the command calling this method keeps track of bytes transferred, # arrange to manage a global count across multiple OS processes. # TODO: The logic that manages the global byte_count is specific # to the cp command and should be refactored to be generic. if hasattr(self, 'total_bytes_transferred'): byte_count = multiprocessing.Value('i', 0) for shard in assigned_uris: # Spawn a separate OS process for each shard. if self.debug: self.THREADED_LOGGER.info('spawning process for shard %d', shard) p = multiprocessing.Process(target=self.ApplyThreads, args=(func, assigned_uris[shard], shard, thread_count, byte_count, thr_exc_handler)) procs.append(p) p.start() # Wait for all spawned OS processes to finish. for p in procs: p.join() # If tracking bytes processed, update the master process' count from # the global counter. if hasattr(self, 'total_bytes_transferred'): self.total_bytes_transferred = byte_count.value else: # Only one OS process requested so perform request in current # OS process, in shard zero with thread_count threads. self.ApplyThreads(func, assigned_uris[0], 0, thread_count, None, thr_exc_handler)
def RunCommand(self): """Abstract function in base class. Subclasses must implement this.""" raise CommandException('Command %s is missing its RunCommand() ' 'implementation' % self.command_name)
class Command(object): # Global instance of a threaded logger object. THREADED_LOGGER = _ThreadedLogger() REQUIRED_SPEC_KEYS = [COMMAND_NAME] # Each subclass must define the following map, minimally including the # keys in REQUIRED_SPEC_KEYS; other values below will be used as defaults, # although for readbility subclasses should specify the complete map. command_spec = { # Name of command. COMMAND_NAME: None, # List of command name aliases. COMMAND_NAME_ALIASES: [], # Min number of args required by this command. MIN_ARGS: 0, # Max number of args required by this command, or NO_MAX. MAX_ARGS: NO_MAX, # Getopt-style string specifying acceptable sub args. SUPPORTED_SUB_ARGS: '', # True if file URIs are acceptable for this command. FILE_URIS_OK: False, # True if provider-only URIs are acceptable for this command. PROVIDER_URIS_OK: False, # Index in args of first URI arg. URIS_START_ARG: 0, # True if must configure gsutil before running command. CONFIG_REQUIRED: True, } _default_command_spec = command_spec # Define a convenience property for command name, since it's used many places. def _get_command_name(self): return self.command_spec[COMMAND_NAME] command_name = property(_get_command_name) def __init__(self, command_runner, args, headers, debug, parallel_operations, gsutil_bin_dir, boto_lib_dir, config_file_list, bucket_storage_uri_class, test_method=None): """ Args: command_runner: CommandRunner (for commands built atop other commands). args: command-line args (arg0 = actual arg, not command name ala bash). headers: dictionary containing optional HTTP headers to pass to boto. debug: debug level to pass in to boto connection (range 0..3). parallel_operations: Should command operations be executed in parallel? gsutil_bin_dir: bin dir from which gsutil is running. boto_lib_dir: lib dir where boto runs. config_file_list: config file list returned by _GetBotoConfigFileList(). bucket_storage_uri_class: Class to instantiate for cloud StorageUris. Settable for testing/mocking. test_method: Optional general purpose method for testing purposes. Application and semantics of this method will vary by command and test type. """ # Save class values from constructor params. self.command_runner = command_runner self.args = args self.headers = headers self.debug = debug self.parallel_operations = parallel_operations self.gsutil_bin_dir = gsutil_bin_dir self.boto_lib_dir = boto_lib_dir self.config_file_list = config_file_list self.bucket_storage_uri_class = bucket_storage_uri_class self.test_method = test_method self.ignore_symlinks = False # Process sub-command instance specifications. # First, ensure subclass implementation sets all required keys. for k in self.REQUIRED_SPEC_KEYS: if k not in self.command_spec or self.command_spec[k] is None: raise CommandException( '"%s" command implementation is missing %s ' 'specification' % (self.command_name, k)) # Now override default command_spec with subclass-specified values. tmp = self._default_command_spec tmp.update(self.command_spec) self.command_spec = tmp del tmp # Parse and validate args. try: (self.sub_opts, self.args) = getopt.getopt(args, self.command_spec[SUPPORTED_SUB_ARGS]) except GetoptError, e: raise CommandException('%s for "%s" command.' % (e.msg, self.command_name)) if (len(self.args) < self.command_spec[MIN_ARGS] or len(self.args) > self.command_spec[MAX_ARGS]): raise CommandException( 'Wrong number of arguments for "%s" command.' % self.command_name) if (not self.command_spec[FILE_URIS_OK] and self._HaveFileUris( self.args[self.command_spec[URIS_START_ARG]:])): raise CommandException( '"%s" command does not support "file://" URIs. ' 'Did you mean to use a gs:// URI?' % self.command_name) if (not self.command_spec[PROVIDER_URIS_OK] and self._HaveProviderUris( self.args[self.command_spec[URIS_START_ARG]:])): raise CommandException( '"%s" command does not support provider-only ' 'URIs.' % self.command_name) if self.command_spec[CONFIG_REQUIRED]: self._ConfigureNoOpAuthIfNeeded() self.proj_id_handler = ProjectIdHandler()
raise CommandException('No URIs matched') else: bucket_uri = storage_uri h = handler.XmlHandler(acl_obj, bucket_uri.get_bucket()) try: xml.sax.parseString(acl_txt, h) except xml.sax._exceptions.SAXParseException, e: raise CommandException( 'Requested ACL is invalid: %s at line %s, ' 'column %s' % (e.getMessage(), e.getLineNumber(), e.getColumnNumber())) acl_arg = acl_obj else: # No file exists, so expect a canned ACL string. if acl_arg not in canned_acls: raise CommandException('Invalid canned ACL "%s".' % acl_arg) # Used to track if any ACLs failed to be set. self.everything_set_okay = True def _SetAclExceptionHandler(e): """Simple exception handler to allow post-completion status.""" self.THREADED_LOGGER.error(str(e)) self.everything_set_okay = False def _SetAclFunc(src_uri, exp_src_uri, _unused_src_uri_names_container=None, _unused_src_uri_expands_to_multi=None, _unused_have_multiple_srcs=None, _unused_have_existing_dest_subdir=None):