def call( self, # pylint: disable=invalid-name total_bytes_processed, total_size): """Prints an overwriting line to stderr describing the operation progress. Args: total_bytes_processed: Number of bytes processed so far. total_size: Total size of the ongoing operation. """ if not self._logger.isEnabledFor(logging.INFO): return # Handle streaming case specially where we don't know the total size: if total_size: total_size_string = '/%s' % MakeHumanReadable(total_size) else: total_size_string = '' # Use sys.stderr.write instead of self.logger.info so progress messages # output on a single continuously overwriting line. # TODO: Make this work with logging.Logger. sys.stderr.write( '%s%s%s \r' % (self._announce_text, MakeHumanReadable(total_bytes_processed), total_size_string)) if total_size and total_bytes_processed == total_size: sys.stderr.write('\n')
def call(self, total_bytes_transferred, total_size): sys.stderr.write('%s: %s/%s \r' % (self.announce_text, MakeHumanReadable(total_bytes_transferred), MakeHumanReadable(total_size))) if total_bytes_transferred == total_size: sys.stderr.write('\n')
def _PrintBucketInfo(self, bucket_uri, listing_style): """Print listing info for given bucket. Args: bucket_uri: StorageUri being listed. listing_style: ListingStyle enum describing type of output desired. Returns: Tuple (total objects, total bytes) in the bucket. """ bucket_objs = 0 bucket_bytes = 0 if listing_style == ListingStyle.SHORT: print bucket_uri else: for obj in self.WildcardIterator( bucket_uri.clone_replace_name('**')).IterKeys(): bucket_objs += 1 bucket_bytes += obj.size if listing_style == ListingStyle.LONG: print '%s : %s objects, %s' % (bucket_uri, bucket_objs, MakeHumanReadable(bucket_bytes)) else: # listing_style == ListingStyle.LONG_LONG: location_constraint = bucket_uri.get_location( validate=False, headers=self.headers) location_output = '' if location_constraint: location_output = '\n\tLocationConstraint: %s' % location_constraint self.proj_id_handler.FillInProjectHeaderIfNeeded( 'get_acl', bucket_uri, self.headers) print '%s :\n\t%d objects, %s%s\n\tACL: %s\n\tDefault ACL: %s' % ( bucket_uri, bucket_objs, MakeHumanReadable(bucket_bytes), location_output, bucket_uri.get_acl(False, self.headers), bucket_uri.get_def_acl(False, self.headers)) return (bucket_objs, bucket_bytes)
def call( self, # pylint: disable=invalid-name last_byte_processed, total_size): """Prints an overwriting line to stderr describing the operation progress. Args: last_byte_processed: The last byte processed in the file. For file components, this number should be in the range [start_byte:start_byte + override_total_size]. total_size: Total size of the ongoing operation. """ if self._last_byte_written: return if self._override_total_size: total_size = self._override_total_size if total_size: total_size_string = '/%s' % MakeHumanReadable(total_size) else: total_size_string = '' # Use sys.stderr.write instead of self.logger.info so progress messages # output on a single continuously overwriting line. # TODO: Make this work with logging.Logger. PutToQueueWithTimeout( self._status_queue, ('%s%s%s \r' % (self._announce_text, MakeHumanReadable(last_byte_processed - self._start_byte), total_size_string))) if total_size and last_byte_processed - self._start_byte == total_size: self._last_byte_written = True PutToQueueWithTimeout(self._status_queue, '\n')
def call(self, total_bytes_transferred, total_size): """Forcibly exits if the transfer has passed the halting point.""" if total_bytes_transferred >= self._halt_at_byte: sys.stderr.write( 'Halting transfer after byte %s. %s/%s transferred.\r\n' % ( self._halt_at_byte, MakeHumanReadable(total_bytes_transferred), MakeHumanReadable(total_size))) if self._is_upload: raise ResumableUploadException('Artifically halting upload.') else: raise ResumableDownloadException('Artifically halting download.')
def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref): """Print listing info for given bucket_listing_ref. Args: bucket_listing_ref: BucketListing being listed. Returns: Tuple (number of objects, object size) Raises: Exception: if calling bug encountered. """ obj = bucket_listing_ref.root_object url_str = bucket_listing_ref.url_string if (obj.metadata and S3_DELETE_MARKER_GUID in obj.metadata.additionalProperties): size_string = '0' num_bytes = 0 num_objs = 0 url_str += '<DeleteMarker>' else: size_string = (MakeHumanReadable(obj.size) if self.human_readable else str(obj.size)) num_bytes = obj.size num_objs = 1 if not self.summary_only: sys.stdout.write( '%(size)-10s %(url)s%(ending)s' % { 'size': size_string, 'url': url_str.encode(UTF8), 'ending': self.line_ending }) return (num_objs, num_bytes)
def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref): """Print listing info for given bucket_listing_ref. Args: bucket_listing_ref: BucketListing being listed. Returns: Tuple (number of objects, object size) Raises: Exception: if calling bug encountered. """ uri = bucket_listing_ref.GetUri() obj = bucket_listing_ref.GetKey() uri_str = UriStrForObj(uri, obj, self.all_versions) if isinstance(obj, DeleteMarker): size_string = '0' numobjs = 0 numbytes = 0 else: size_string = (MakeHumanReadable(obj.size) if self.human_readable else str(obj.size)) numobjs = 1 numbytes = obj.size if not self.summary_only: sys.stdout.write( '%(size)-10s %(uri)s%(ending)s' % { 'size': size_string, 'uri': uri_str.encode('utf-8'), 'ending': self.line_ending }) return numobjs, numbytes
def RunCommand(self): listing_style = ListingStyle.SHORT get_bucket_info = False if self.sub_opts: for o, a in self.sub_opts: if o == '-b': get_bucket_info = True elif o == '-l': listing_style = ListingStyle.LONG elif o == '-L': listing_style = ListingStyle.LONG_LONG elif o == '-p': self.proj_id_handler.SetProjectId(a) if not self.args: # default to listing all gs buckets self.args = ['gs://'] total_objs = 0 total_bytes = 0 for uri_str in self.args: uri = self.StorageUri(uri_str) self.proj_id_handler.FillInProjectHeaderIfNeeded( 'ls', uri, self.headers) if not uri.bucket_name: # Provider URI: add bucket wildcard to list buckets. for uri in self.CmdWildcardIterator('%s://*' % uri.scheme): (bucket_objs, bucket_bytes) = self._PrintBucketInfo(uri, listing_style) total_bytes += bucket_bytes total_objs += bucket_objs elif not uri.object_name: if get_bucket_info: # ls -b request on provider+bucket URI: List info about bucket(s). for uri in self.CmdWildcardIterator(uri): (bucket_objs, bucket_bytes) = self._PrintBucketInfo( uri, listing_style) total_bytes += bucket_bytes total_objs += bucket_objs else: # ls request on provider+bucket URI: List objects in the bucket(s). for obj in self.CmdWildcardIterator( uri.clone_replace_name('*'), ResultType.KEYS): total_bytes += self._PrintObjectInfo( uri, obj, listing_style) total_objs += 1 else: # Provider+bucket+object URI -> list the object(s). for obj in self.CmdWildcardIterator(uri, ResultType.KEYS): total_bytes += self._PrintObjectInfo( uri, obj, listing_style) total_objs += 1 if listing_style != ListingStyle.SHORT: print('TOTAL: %d objects, %d bytes (%s)' % (total_objs, total_bytes, MakeHumanReadable(float(total_bytes))))
def _PrintSummaryLine(self, num_bytes, name): size_string = (MakeHumanReadable(num_bytes) if self.human_readable else str(num_bytes)) sys.stdout.write('%(size)-10s %(name)s%(ending)s' % { 'size': size_string, 'name': name, 'ending': self.line_ending })
def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref, listing_style): """Print listing info for given bucket_listing_ref. Args: bucket_listing_ref: BucketListing being listed. listing_style: ListingStyle enum describing type of output desired. Returns: Tuple (number of objects, object length, if listing_style is one of the long listing formats) Raises: Exception: if calling bug encountered. """ uri = bucket_listing_ref.GetUri() obj = bucket_listing_ref.GetKey() uri_str = UriStrForObj(uri, obj, self.all_versions) if listing_style == ListingStyle.SHORT: print uri_str.encode('utf-8') return (1, 0) elif listing_style == ListingStyle.LONG: # Exclude timestamp fractional secs (example: 2010-08-23T12:46:54.187Z). timestamp = TIMESTAMP_RE.sub( r'\1Z', obj.last_modified.decode('utf8').encode('ascii')) if isinstance(obj, DeleteMarker): size_string = '0' numbytes = 0 numobjs = 0 else: size_string = (MakeHumanReadable(obj.size) if self.human_readable else str(obj.size)) numbytes = obj.size numobjs = 1 printstr = '%(size)10s %(timestamp)s %(uri)s' if self.all_versions and hasattr(obj, 'metageneration'): printstr += ' metageneration=%(metageneration)s' if self.include_etag: printstr += ' etag=%(etag)s' format_args = { 'size': size_string, 'timestamp': timestamp, 'uri': uri_str.encode('utf-8'), 'metageneration': str(getattr(obj, 'metageneration', '')), 'etag': obj.etag.encode('utf-8'), } print printstr % format_args return (numobjs, numbytes) elif listing_style == ListingStyle.LONG_LONG: return PrintFullInfoAboutUri(uri, True, self.headers) else: raise Exception('Unexpected ListingStyle(%s)' % listing_style)
def _RunLatencyTests(self): """Runs latency tests.""" # Stores timing information for each category of operation. self.results['latency'] = defaultdict(list) for i in range(self.num_iterations): self.logger.info('\nRunning latency iteration %d...', i + 1) for fpath in self.latency_files: basename = os.path.basename(fpath) gsbucket = str(self.bucket_uri) gsuri = gsbucket + basename file_size = self.file_sizes[fpath] readable_file_size = MakeHumanReadable(file_size) self.logger.info( "\nFile of size %(size)s located on disk at '%(fpath)s' being " "diagnosed in the cloud at '%(gsuri)s'." % { 'size': readable_file_size, 'fpath': fpath, 'gsuri': gsuri }) k = self.bucket.key_class(self.bucket) k.BufferSize = self.KEY_BUFFER_SIZE k.key = basename def _Upload(): with self._Time('UPLOAD_%d' % file_size, self.results['latency']): k.set_contents_from_string(self.file_contents[fpath], md5=self.file_md5s[fpath]) self._RunOperation(_Upload) def _Metadata(): with self._Time('METADATA_%d' % file_size, self.results['latency']): k.exists() self._RunOperation(_Metadata) def _Download(): with self._Time('DOWNLOAD_%d' % file_size, self.results['latency']): k.get_contents_to_file( self.devnull, **self.get_contents_to_file_args) self._RunOperation(_Download) def _Delete(): with self._Time('DELETE_%d' % file_size, self.results['latency']): k.delete() self._RunOperation(_Delete)
def _PrintBucketInfo(self, bucket_uri, listing_style): """Print listing info for given bucket. Args: bucket_uri: StorageUri being listed. listing_style: ListingStyle enum describing type of output desired. Returns: Tuple (total objects, total bytes) in the bucket. """ bucket_objs = 0 bucket_bytes = 0 if listing_style == ListingStyle.SHORT: print bucket_uri else: try: for obj in self.CmdWildcardIterator( bucket_uri.clone_replace_name('*'), ResultType.KEYS): bucket_objs += 1 bucket_bytes += obj.size except WildcardException, e: # Ignore non-matching wildcards, to allow empty bucket listings. if e.reason.find('No matches') == -1: raise e if listing_style == ListingStyle.LONG: print '%s : %s objects, %s' % (bucket_uri, bucket_objs, MakeHumanReadable(bucket_bytes)) else: # listing_style == ListingStyle.LONG_LONG: location_constraint = bucket_uri.get_location( validate=False, headers=self.headers) location_output = '' if location_constraint: location_output = '\n\tLocationConstraint: %s' % location_constraint self.proj_id_handler.FillInProjectHeaderIfNeeded( 'get_acl', bucket_uri, self.headers) print '%s :\n\t%d objects, %s%s\n\tACL: %s\n\tDefault ACL: %s' % ( bucket_uri, bucket_objs, MakeHumanReadable(bucket_bytes), location_output, bucket_uri.get_acl(False, self.headers), bucket_uri.get_def_acl(False, self.headers))
def RunCommand(self): """Called by gsutil when the command is being invoked.""" self._ParseArgs() if self.input_file: self._DisplayResults() return 0 print 'Number of iterations to run: %d' % self.num_iterations print 'Base bucket URI: %s' % self.bucket_uri print 'Concurrency level: %d' % self.concurrency print 'Throughput file size: %s' % MakeHumanReadable( self.thru_filesize) print 'Diagnostics to run: %s' % (', '.join(self.diag_tests)) try: self._SetUp() # Collect generic system info. self._CollectSysInfo() # Collect netstat info and disk counters before tests (and again later). self.results['sysinfo']['netstat_start'] = self._GetTcpStats() if IS_LINUX: self.results['sysinfo'][ 'disk_counters_start'] = self._GetDiskCounters() # Record bucket URI. self.results['bucket_uri'] = str(self.bucket_uri) if 'lat' in self.diag_tests: self._RunLatencyTests() if 'rthru' in self.diag_tests: self._RunReadThruTests() if 'wthru' in self.diag_tests: self._RunWriteThruTests() # Collect netstat info and disk counters after tests. self.results['sysinfo']['netstat_end'] = self._GetTcpStats() if IS_LINUX: self.results['sysinfo'][ 'disk_counters_end'] = self._GetDiskCounters() self._DisplayResults() finally: self._TearDown() return 0
def testEstimateWithSize(self): """Tests SeekAheadThread providing an object count and total size.""" class SeekAheadResultIteratorWithSize(object): """Yields dummy result of the given size.""" def __init__(self, num_objects, size): self.num_objects = num_objects self.size = size self.yielded = 0 def __iter__(self): while self.yielded < self.num_objects: yield SeekAheadResult(data_bytes=self.size) self.yielded += 1 cancel_event = threading.Event() status_queue = Queue.Queue() stream = StringIO.StringIO() ui_controller = UIController() ui_thread = UIThread(status_queue, stream, ui_controller) num_objects = 5 object_size = 10 seek_ahead_iterator = SeekAheadResultIteratorWithSize( num_objects, object_size) seek_ahead_thread = SeekAheadThread(seek_ahead_iterator, cancel_event, status_queue) seek_ahead_thread.join(self.thread_wait_time) status_queue.put(ZERO_TASKS_TO_DO_ARGUMENT) ui_thread.join(self.thread_wait_time) if seek_ahead_thread.isAlive(): seek_ahead_thread.terminate = True self.fail('SeekAheadThread is still alive.') message = stream.getvalue() if not message: self.fail( 'Status queue empty but SeekAheadThread should have posted ' 'summary message') total_size = num_objects * object_size self.assertEqual( message, 'Estimated work for this command: objects: %s, total size: %s\n' % (num_objects, MakeHumanReadable(total_size)))
def testWithLocalFiles(self): """Tests SeekAheadThread with an actual directory.""" tmpdir = self.CreateTempDir() num_files = 5 total_size = 0 # Create 5 files with sizes 0, 1, 2, 3, 4. for i in xrange(num_files): self.CreateTempFile(tmpdir=tmpdir, file_name='obj%s' % str(i), contents='a' * i) total_size += i # Recursively "copy" tmpdir. seek_ahead_iterator = SeekAheadNameExpansionIterator( 'cp', 0, None, [tmpdir], True) cancel_event = threading.Event() status_queue = Queue.Queue() stream = StringIO.StringIO() ui_controller = UIController() ui_thread = UIThread(status_queue, stream, ui_controller) seek_ahead_thread = SeekAheadThread(seek_ahead_iterator, cancel_event, status_queue) seek_ahead_thread.join(self.thread_wait_time) status_queue.put(ZERO_TASKS_TO_DO_ARGUMENT) ui_thread.join(self.thread_wait_time) if seek_ahead_thread.isAlive(): seek_ahead_thread.terminate = True self.fail('SeekAheadThread is still alive.') message = stream.getvalue() if not message: self.fail( 'Status queue empty but SeekAheadThread should have posted ' 'summary message') self.assertEqual( message, 'Estimated work for this command: objects: %s, total size: %s\n' % (num_files, MakeHumanReadable(total_size)))
def _RunLatencyTests(self): """Runs latency tests.""" # Stores timing information for each category of operation. self.results['latency'] = defaultdict(list) for i in range(self.num_iterations): print print 'Running latency iteration %d...' % (i + 1) for fpath in self.latency_files: basename = os.path.basename(fpath) gsbucket = str(self.bucket_uri) gsuri = gsbucket + basename file_size = self.file_sizes[fpath] readable_file_size = MakeHumanReadable(file_size) print print( "File of size %(size)s located on disk at '%(fpath)s' being " "diagnosed in the cloud at '%(gsuri)s'." % { 'size': readable_file_size, 'fpath': fpath, 'gsuri': gsuri }) k = self.bucket.key_class(self.bucket) k.key = basename with self._Time('UPLOAD_%d' % file_size, self.results['latency']): k.set_contents_from_string(self.file_contents[fpath]) with self._Time('METADATA_%d' % file_size, self.results['latency']): k.exists() with self._Time('DOWNLOAD_%d' % file_size, self.results['latency']): k.get_contents_as_string() with self._Time('DELETE_%d' % file_size, self.results['latency']): k.delete()
def _HandleSeekAheadMessage(self, status_message, stream): """Handles a SeekAheadMessage. Args: status_message: The SeekAheadMessage to be processed. stream: Stream to print messages. """ estimate_message = ('Estimated work for this command: objects: %s' % status_message.num_objects) if status_message.size: estimate_message += (', total size: %s' % MakeHumanReadable(status_message.size)) if self.total_size_source >= EstimationSource.SEEK_AHEAD_THREAD: self.total_size_source = EstimationSource.SEEK_AHEAD_THREAD self.total_size = status_message.size if self.num_objects_source >= EstimationSource.SEEK_AHEAD_THREAD: self.num_objects_source = EstimationSource.SEEK_AHEAD_THREAD self.num_objects = status_message.num_objects estimate_message += '\n' if not self.quiet_mode: stream.write(estimate_message)
def _PrintLongListing(self, bucket_listing_ref): """Prints an object with ListingStyle.LONG.""" obj = bucket_listing_ref.root_object url_str = bucket_listing_ref.url_string if (obj.metadata and S3_DELETE_MARKER_GUID in obj.metadata.additionalProperties): size_string = '0' num_bytes = 0 num_objs = 0 url_str += '<DeleteMarker>' else: size_string = (MakeHumanReadable(obj.size) if self.human_readable else str(obj.size)) num_bytes = obj.size num_objs = 1 timestamp = JSON_TIMESTAMP_RE.sub( r'\1T\2Z', str(obj.updated).decode(UTF8).encode('ascii')) printstr = '%(size)10s %(timestamp)s %(url)s' encoded_etag = None encoded_metagen = None if self.all_versions: printstr += ' metageneration=%(metageneration)s' encoded_metagen = str(obj.metageneration).encode(UTF8) if self.include_etag: printstr += ' etag=%(etag)s' encoded_etag = obj.etag.encode(UTF8) format_args = { 'size': size_string, 'timestamp': timestamp, 'url': url_str.encode(UTF8), 'metageneration': encoded_metagen, 'etag': encoded_etag } print printstr % format_args return (num_objs, num_bytes)
def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref, listing_style): """Print listing info for given bucket_listing_ref. Args: bucket_listing_ref: BucketListing being listed. listing_style: ListingStyle enum describing type of output desired. Returns: Tuple (number of objects, object length, if listing_style is one of the long listing formats) Raises: Exception: if calling bug encountered. """ uri = bucket_listing_ref.GetUri() obj = bucket_listing_ref.GetKey() uri_str = self._UriStrForObj(uri, obj) if listing_style == ListingStyle.SHORT: print uri_str.encode('utf-8') return (1, 0) elif listing_style == ListingStyle.LONG: # Exclude timestamp fractional secs (example: 2010-08-23T12:46:54.187Z). timestamp = TIMESTAMP_RE.sub( r'\1Z', obj.last_modified.decode('utf8').encode('ascii')) size_string = (MakeHumanReadable(obj.size) if self.human_readable else str(obj.size)) if not isinstance(obj, DeleteMarker): if self.all_versions: print '%10s %s %s metageneration=%s' % ( size_string, timestamp, uri_str.encode('utf-8'), obj.metageneration) else: print '%10s %s %s' % (size_string, timestamp, uri_str.encode('utf-8')) return (1, obj.size) else: if self.all_versions: print '%10s %s %s metageneration=%s' % ( 0, timestamp, uri_str.encode('utf-8'), obj.metageneration) else: print '%10s %s %s' % (0, timestamp, uri_str.encode('utf-8')) return (0, 1) elif listing_style == ListingStyle.LONG_LONG: # Run in a try/except clause so we can continue listings past # access-denied errors (which can happen because user may have READ # permission on object and thus see the bucket listing data, but lack # FULL_CONTROL over individual objects and thus not be able to read # their ACLs). try: print '%s:' % uri_str.encode('utf-8') suri = self.suri_builder.StorageUri(uri_str) obj = suri.get_key(False) print '\tCreation time:\t\t%s' % obj.last_modified if obj.cache_control: print '\tCache-Control:\t\t%s' % obj.cache_control if obj.content_disposition: print '\tContent-Disposition:\t\t%s' % obj.content_disposition if obj.content_encoding: print '\tContent-Encoding:\t\t%s' % obj.content_encoding if obj.content_language: print '\tContent-Language:\t%s' % obj.content_language print '\tContent-Length:\t\t%s' % obj.size print '\tContent-Type:\t\t%s' % obj.content_type if hasattr(obj, 'component_count') and obj.component_count: print '\tComponent-Count:\t%d' % obj.component_count if obj.metadata: prefix = uri.get_provider().metadata_prefix for name in obj.metadata: meta_string = '\t%s%s:\t\t%s' % (prefix, name, obj.metadata[name]) print meta_string.encode('utf-8') if hasattr(obj, 'cloud_hashes'): for alg in obj.cloud_hashes: print '\tHash (%s):\t\t%s' % ( alg, binascii.b2a_hex(obj.cloud_hashes[alg])) print '\tETag:\t\t\t%s' % obj.etag.strip('"\'') print '\tACL:\t\t%s' % (suri.get_acl(False, self.headers)) return (1, obj.size) except boto.exception.GSResponseError as e: if e.status == 403: print( '\tACL:\t\t\tACCESS DENIED. Note: you need FULL_CONTROL ' 'permission\n\t\t\ton the object to read its ACL.') return (1, obj.size) else: raise e else: raise Exception('Unexpected ListingStyle(%s)' % listing_style)
def RunCommand(self): self.total_elapsed_time = self.total_bytes_transferred = 0 if self.args[-1] == '-' or self.args[-1] == 'file://-': # Destination is <STDOUT>. Manipulate sys.stdout so as to redirect all # debug messages to <STDERR>. stdout_fp = sys.stdout sys.stdout = sys.stderr for uri_str in self.args[0:len(self.args) - 1]: for uri in self.CmdWildcardIterator(uri_str): if not uri.object_name: raise CommandException( 'Destination Stream requires that ' 'source URI %s should represent an object!') key = uri.get_key(False, self.headers) (elapsed_time, bytes_transferred) = self._PerformDownloadToStream( key, uri, stdout_fp) self.total_elapsed_time += elapsed_time self.total_bytes_transferred += bytes_transferred if self.debug == 3: if self.total_bytes_transferred != 0: sys.stderr.write( 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)\n' % (self.total_bytes_transferred, self.total_elapsed_time, MakeHumanReadable( float(self.total_bytes_transferred) / float(self.total_elapsed_time)))) return # Expand wildcards and containers in source StorageUris. src_uri_expansion = self._ExpandWildcardsAndContainers( self.args[0:len(self.args) - 1]) # Check for various problems and determine base_dst_uri based for request. (base_dst_uri, multi_src_request) = self._ErrorCheckCopyRequest( src_uri_expansion, self.args[-1]) # Rewrite base_dst_uri and create dest dir as needed for multi-source copy. if multi_src_request: base_dst_uri = self._HandleMultiSrcCopyRequst( src_uri_expansion, base_dst_uri) # Should symbolic links be skipped? if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-e': self.ignore_symlinks = True # To ensure statistics are accurate with threads we need to use a lock. stats_lock = threading.Lock() # Used to track if any files failed to copy over. self.everything_copied_okay = True def _CopyExceptionHandler(e): """Simple exception handler to allow post-completion status.""" self.THREADED_LOGGER.error(str(e)) self.everything_copied_okay = False def _CopyFunc(src_uri, exp_src_uri): """Worker function for performing the actual copy.""" if exp_src_uri.is_file_uri() and exp_src_uri.is_stream(): sys.stderr.write("Copying from <STDIN>...\n") else: self.THREADED_LOGGER.info('Copying %s...', exp_src_uri) dst_uri = self._ConstructDstUri(src_uri, exp_src_uri, base_dst_uri) (elapsed_time, bytes_transferred) = self._PerformCopy(exp_src_uri, dst_uri) stats_lock.acquire() self.total_elapsed_time += elapsed_time self.total_bytes_transferred += bytes_transferred stats_lock.release() # Start the clock. start_time = time.time() # Perform copy requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform request with sequential function calls in current process. self.Apply(_CopyFunc, src_uri_expansion, _CopyExceptionHandler) end_time = time.time() self.total_elapsed_time = end_time - start_time if self.debug == 3: # Note that this only counts the actual GET and PUT bytes for the copy # - not any transfers for doing wildcard expansion, the initial HEAD # request boto performs when doing a bucket.get_key() operation, etc. if self.total_bytes_transferred != 0: sys.stderr.write( 'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)\n' % (self.total_bytes_transferred, self.total_elapsed_time, MakeHumanReadable( float(self.total_bytes_transferred) / float(self.total_elapsed_time)))) if not self.everything_copied_okay: raise CommandException('Some files could not be transferred.')
def call(self, total_bytes_transferred, total_size): sys.stderr.write('Uploading: %s \r' % (MakeHumanReadable(total_bytes_transferred))) if total_size and total_bytes_transferred == total_size: sys.stderr.write('\n')
def RunCommand(self): got_nomatch_errors = False listing_style = ListingStyle.SHORT get_bucket_info = False self.recursion_requested = False self.all_versions = False self.include_etag = False self.human_readable = False if self.sub_opts: for o, a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-e': self.include_etag = True elif o == '-b': get_bucket_info = True elif o == '-h': self.human_readable = True elif o == '-l': listing_style = ListingStyle.LONG elif o == '-L': listing_style = ListingStyle.LONG_LONG elif o == '-p': self.proj_id_handler.SetProjectId(a) elif o == '-r' or o == '-R': self.recursion_requested = True if not self.args: # default to listing all gs buckets self.args = ['gs://'] total_objs = 0 total_bytes = 0 for uri_str in self.args: uri = self.suri_builder.StorageUri(uri_str) self.proj_id_handler.FillInProjectHeaderIfNeeded( 'ls', uri, self.headers) if uri.names_provider(): # Provider URI: use bucket wildcard to list buckets. for uri in self.WildcardIterator('%s://*' % uri.scheme).IterUris(): self._PrintBucketInfo(uri, listing_style) elif uri.names_bucket(): # Bucket URI -> list the object(s) in that bucket. if get_bucket_info: # ls -b bucket listing request: List info about bucket(s). if (listing_style != ListingStyle.LONG_LONG and not ContainsWildcard(uri)): # At this point, we haven't done any validation that the bucket URI # actually exists. If the listing style is short, the # _PrintBucketInfo doesn't do any RPCs, so check to make sure the # bucket actually exists by fetching it. uri.get_bucket(validate=True) for uri in self.WildcardIterator(uri).IterUris(): self._PrintBucketInfo(uri, listing_style) else: # Not -b request: List objects in the bucket(s). (no, nb) = self._ExpandUriAndPrintInfo( uri, listing_style, should_recurse=self.recursion_requested) if no == 0 and ContainsWildcard(uri): got_nomatch_errors = True total_objs += no total_bytes += nb else: # URI names an object or object subdir -> list matching object(s) / # subdirs. (exp_objs, exp_bytes) = self._ExpandUriAndPrintInfo( uri, listing_style, should_recurse=self.recursion_requested) if exp_objs == 0 and ContainsWildcard(uri): got_nomatch_errors = True total_bytes += exp_bytes total_objs += exp_objs if total_objs and listing_style != ListingStyle.SHORT: print('TOTAL: %d objects, %d bytes (%s)' % (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) if got_nomatch_errors: raise CommandException('One or more URIs matched no objects.') return 0
def _PrintBucketInfo(self, bucket_uri, listing_style): """Print listing info for given bucket. Args: bucket_uri: StorageUri being listed. listing_style: ListingStyle enum describing type of output desired. Returns: Tuple (total objects, total bytes) in the bucket. """ bucket_objs = 0 bucket_bytes = 0 if listing_style == ListingStyle.SHORT: print bucket_uri else: for obj in self.WildcardIterator( bucket_uri.clone_replace_name('**')).IterKeys(): bucket_objs += 1 bucket_bytes += obj.size if listing_style == ListingStyle.LONG: print '%s : %s objects, %s' % (bucket_uri, bucket_objs, MakeHumanReadable(bucket_bytes)) else: # listing_style == ListingStyle.LONG_LONG: location_constraint = bucket_uri.get_location( validate=False, headers=self.headers) location_output = '' if location_constraint: location_output = '\n\tLocationConstraint: %s' % location_constraint storage_class = bucket_uri.get_storage_class( validate=False, headers=self.headers) self.proj_id_handler.FillInProjectHeaderIfNeeded( 'get_acl', bucket_uri, self.headers) fields = { "bucket": bucket_uri, "object_count": bucket_objs, "bytes": MakeHumanReadable(bucket_bytes), "storage_class": storage_class, "location_output": location_output, "versioning": bucket_uri.get_versioning_config(self.headers), "acl": bucket_uri.get_acl(False, self.headers), "default_acl": bucket_uri.get_def_acl(False, self.headers) } # Logging and website need a bit more work to make them human-readable for message in [ bucket_uri.get_website_config(self.headers), bucket_uri.get_logging_config(self.headers) ]: field, content = message.items()[ 0] # expect only one entry in dict fields[field] = ", ".join( "%s: %s" % (property, value) for property, value in sorted(content.items())) print( "{bucket} :\n" "\t{object_count} objects, {bytes}\n" "\tStorageClass: {storage_class}{location_output}\n" "\tVersioning enabled: {versioning}\n" "\tLogging: {Logging}\n" "\tWebsiteConfiguration: {WebsiteConfiguration}\n" "\tACL: {acl}\n" "\tDefault ACL: {default_acl}".format(**fields)) return (bucket_objs, bucket_bytes)
def RunCommand(self): got_nomatch_errors = False listing_style = ListingStyle.SHORT get_bucket_info = False self.recursion_requested = False if self.sub_opts: for o, a in self.sub_opts: if o == '-b': get_bucket_info = True elif o == '-l': listing_style = ListingStyle.LONG elif o == '-L': listing_style = ListingStyle.LONG_LONG elif o == '-p': self.proj_id_handler.SetProjectId(a) elif o == '-r' or o == '-R': self.recursion_requested = True if not self.args: # default to listing all gs buckets self.args = ['gs://'] total_objs = 0 total_bytes = 0 for uri_str in self.args: uri = self.suri_builder.StorageUri(uri_str) self.proj_id_handler.FillInProjectHeaderIfNeeded( 'ls', uri, self.headers) if uri.names_provider(): # Provider URI: use bucket wildcard to list buckets. for uri in self.WildcardIterator('%s://*' % uri.scheme).IterUris(): (bucket_objs, bucket_bytes) = self._PrintBucketInfo(uri, listing_style) total_bytes += bucket_bytes total_objs += bucket_objs elif uri.names_bucket(): # Bucket URI -> list the object(s) in that bucket. if get_bucket_info: # ls -b bucket listing request: List info about bucket(s). for uri in self.WildcardIterator(uri).IterUris(): (bucket_objs, bucket_bytes) = self._PrintBucketInfo( uri, listing_style) total_bytes += bucket_bytes total_objs += bucket_objs else: # Not -b request: List objects in the bucket(s). (no, nb) = self._ExpandUriAndPrintInfo( uri, listing_style, should_recurse=self.recursion_requested) if no == 0 and ContainsWildcard(uri): got_nomatch_errors = True total_objs += no total_bytes += nb else: # URI names an object or object subdir -> list matching object(s) / # subdirs. (exp_objs, exp_bytes) = self._ExpandUriAndPrintInfo( uri, listing_style, should_recurse=self.recursion_requested) if exp_objs == 0 and ContainsWildcard(uri): got_nomatch_errors = True total_bytes += exp_bytes total_objs += exp_objs if total_objs and listing_style != ListingStyle.SHORT: print('TOTAL: %d objects, %d bytes (%s)' % (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) if got_nomatch_errors: raise CommandException('One or more URIs matched no objects.')
def _DisplayResults(self): """Displays results collected from diagnostic run.""" print print '=' * 78 print 'DIAGNOSTIC RESULTS'.center(78) print '=' * 78 if 'latency' in self.results: print print '-' * 78 print 'Latency'.center(78) print '-' * 78 print ('Operation Size Trials Mean (ms) Std Dev (ms) ' 'Median (ms) 90th % (ms)') print ('========= ========= ====== ========= ============ ' '=========== ===========') for key in sorted(self.results['latency']): trials = sorted(self.results['latency'][key]) op, numbytes = key.split('_') numbytes = int(numbytes) if op == 'METADATA': print 'Metadata'.rjust(9), '', print MakeHumanReadable(numbytes).rjust(9), '', self._DisplayStats(trials) if op == 'DOWNLOAD': print 'Download'.rjust(9), '', print MakeHumanReadable(numbytes).rjust(9), '', self._DisplayStats(trials) if op == 'UPLOAD': print 'Upload'.rjust(9), '', print MakeHumanReadable(numbytes).rjust(9), '', self._DisplayStats(trials) if op == 'DELETE': print 'Delete'.rjust(9), '', print MakeHumanReadable(numbytes).rjust(9), '', self._DisplayStats(trials) if 'write_throughput' in self.results: print print '-' * 78 print 'Write Throughput'.center(78) print '-' * 78 write_thru = self.results['write_throughput'] print 'Copied a %s file %d times for a total transfer size of %s.' % ( MakeHumanReadable(write_thru['file_size']), write_thru['num_copies'], MakeHumanReadable(write_thru['total_bytes_copied'])) print 'Write throughput: %s/s.' % ( MakeBitsHumanReadable(write_thru['bytes_per_second'] * 8)) if 'read_throughput' in self.results: print print '-' * 78 print 'Read Throughput'.center(78) print '-' * 78 read_thru = self.results['read_throughput'] print 'Copied a %s file %d times for a total transfer size of %s.' % ( MakeHumanReadable(read_thru['file_size']), read_thru['num_times'], MakeHumanReadable(read_thru['total_bytes_copied'])) print 'Read throughput: %s/s.' % ( MakeBitsHumanReadable(read_thru['bytes_per_second'] * 8)) if 'sysinfo' in self.results: print print '-' * 78 print 'System Information'.center(78) print '-' * 78 info = self.results['sysinfo'] print 'IP Address: \n %s' % info['ip_address'] print 'Temporary Directory: \n %s' % info['tempdir'] print 'Bucket URI: \n %s' % self.results['bucket_uri'] print 'gsutil Version: \n %s' % self.results.get('gsutil_version', 'Unknown') print 'boto Version: \n %s' % self.results.get('boto_version', 'Unknown') if 'gmt_timestamp' in info: ts_string = info['gmt_timestamp'] timetuple = None try: # Convert RFC 2822 string to Linux timestamp. timetuple = time.strptime(ts_string, '%a, %d %b %Y %H:%M:%S +0000') except ValueError: pass if timetuple: # Converts the GMT time tuple to local Linux timestamp. localtime = calendar.timegm(timetuple) localdt = datetime.datetime.fromtimestamp(localtime) print 'Measurement time: \n %s' % localdt.strftime( '%Y-%m-%d %I-%M-%S %p %Z') print 'Google Server: \n %s' % info['googserv_route'] print ('Google Server IP Addresses: \n %s' % ('\n '.join(info['googserv_ips']))) print ('Google Server Hostnames: \n %s' % ('\n '.join(info['googserv_hostnames']))) print 'Google DNS thinks your IP is: \n %s' % info['dns_o-o_ip'] print 'CPU Count: \n %s' % info['cpu_count'] print 'CPU Load Average: \n %s' % info['load_avg'] try: print ('Total Memory: \n %s' % MakeHumanReadable(info['meminfo']['mem_total'])) # Free memory is really MemFree + Buffers + Cached. print 'Free Memory: \n %s' % MakeHumanReadable( info['meminfo']['mem_free'] + info['meminfo']['mem_buffers'] + info['meminfo']['mem_cached']) except TypeError: pass netstat_after = info['netstat_end'] netstat_before = info['netstat_start'] for tcp_type in ('sent', 'received', 'retransmit'): try: delta = (netstat_after['tcp_%s' % tcp_type] - netstat_before['tcp_%s' % tcp_type]) print 'TCP segments %s during test:\n %d' % (tcp_type, delta) except TypeError: pass if 'disk_counters_end' in info and 'disk_counters_start' in info: print 'Disk Counter Deltas:\n', disk_after = info['disk_counters_end'] disk_before = info['disk_counters_start'] print '', 'disk'.rjust(6), for colname in ['reads', 'writes', 'rbytes', 'wbytes', 'rtime', 'wtime']: print colname.rjust(8), print for diskname in sorted(disk_after): before = disk_before[diskname] after = disk_after[diskname] (reads1, writes1, rbytes1, wbytes1, rtime1, wtime1) = before (reads2, writes2, rbytes2, wbytes2, rtime2, wtime2) = after print '', diskname.rjust(6), deltas = [reads2-reads1, writes2-writes1, rbytes2-rbytes1, wbytes2-wbytes1, rtime2-rtime1, wtime2-wtime1] for delta in deltas: print str(delta).rjust(8), print if 'tcp_proc_values' in info: print 'TCP /proc values:\n', for item in info['tcp_proc_values'].iteritems(): print ' %s = %s' % item if 'boto_https_enabled' in info: print 'Boto HTTPS Enabled: \n %s' % info['boto_https_enabled'] if 'request_errors' in self.results and 'total_requests' in self.results: print print '-' * 78 print 'In-Process HTTP Statistics'.center(78) print '-' * 78 total = int(self.results['total_requests']) numerrors = int(self.results['request_errors']) numbreaks = int(self.results['connection_breaks']) availability = (((total - numerrors) / float(total)) * 100 if total > 0 else 100) print 'Total HTTP requests made: %d' % total print 'HTTP 5xx errors: %d' % numerrors print 'HTTP connections broken: %d' % numbreaks print 'Availability: %.7g%%' % availability if 'error_responses_by_code' in self.results: sorted_codes = sorted( self.results['error_responses_by_code'].iteritems()) if sorted_codes: print 'Error responses by code:' print '\n'.join(' %s: %s' % c for c in sorted_codes) if self.output_file: with open(self.output_file, 'w') as f: json.dump(self.results, f, indent=2) print print "Output file written to '%s'." % self.output_file print
def RunCommand(self): """Command entry point for the ls command.""" got_nomatch_errors = False got_bucket_nomatch_errors = False listing_style = ListingStyle.SHORT get_bucket_info = False self.recursion_requested = False self.all_versions = False self.include_etag = False self.human_readable = False if self.sub_opts: for o, a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-e': self.include_etag = True elif o == '-b': get_bucket_info = True elif o == '-h': self.human_readable = True elif o == '-l': listing_style = ListingStyle.LONG elif o == '-L': listing_style = ListingStyle.LONG_LONG elif o == '-p': self.project_id = a elif o == '-r' or o == '-R': self.recursion_requested = True if not self.args: # default to listing all gs buckets self.args = ['gs://'] total_objs = 0 total_bytes = 0 def MaybePrintBucketHeader(blr): if len(self.args) > 1: print '%s:' % blr.url_string.encode(UTF8) print_bucket_header = MaybePrintBucketHeader for url_str in self.args: storage_url = StorageUrlFromString(url_str) if storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_fields = None if (listing_style == ListingStyle.SHORT or listing_style == ListingStyle.LONG): bucket_fields = ['id'] elif listing_style == ListingStyle.LONG_LONG: bucket_fields = [ 'location', 'storageClass', 'versioning', 'acl', 'defaultObjectAcl', 'website', 'logging', 'cors', 'lifecycle' ] if storage_url.IsProvider(): # Provider URL: use bucket wildcard to list buckets. for blr in self.WildcardIterator( '%s://*' % storage_url.scheme).IterBuckets( bucket_fields=bucket_fields): self._PrintBucketInfo(blr, listing_style) elif storage_url.IsBucket() and get_bucket_info: # ls -b bucket listing request: List info about bucket(s). total_buckets = 0 for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): if not ContainsWildcard(url_str) and not blr.root_object: # Iterator does not make an HTTP call for non-wildcarded # listings with fields=='id'. Ensure the bucket exists by calling # GetBucket. self.gsutil_api.GetBucket(blr.storage_url.bucket_name, fields=['id'], provider=storage_url.scheme) self._PrintBucketInfo(blr, listing_style) total_buckets += 1 if not ContainsWildcard(url_str) and not total_buckets: got_bucket_nomatch_errors = True else: # URL names a bucket, object, or object subdir -> # list matching object(s) / subdirs. def _PrintPrefixLong(blr): print '%-33s%s' % ('', blr.url_string.encode(UTF8)) if listing_style == ListingStyle.SHORT: # ls helper by default readies us for a short listing. ls_helper = LsHelper( self.WildcardIterator, self.logger, all_versions=self.all_versions, print_bucket_header_func=print_bucket_header, should_recurse=self.recursion_requested) elif listing_style == ListingStyle.LONG: bucket_listing_fields = ['name', 'updated', 'size'] if self.all_versions: bucket_listing_fields.extend( ['generation', 'metageneration']) if self.include_etag: bucket_listing_fields.append('etag') ls_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=self._PrintLongListing, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields) elif listing_style == ListingStyle.LONG_LONG: # List all fields bucket_listing_fields = None ls_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=PrintFullInfoAboutObject, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields) else: raise CommandException('Unknown listing style: %s' % listing_style) exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint( storage_url) if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0: got_nomatch_errors = True total_bytes += exp_bytes total_objs += exp_objs if total_objs and listing_style != ListingStyle.SHORT: print('TOTAL: %d objects, %d bytes (%s)' % (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') if got_bucket_nomatch_errors: raise NotFoundException( 'One or more bucket URLs matched no buckets.') return 0
def RunCommand(self): """Called by gsutil when the command is being invoked.""" self._ParseArgs() if self.input_file: self._DisplayResults() return 0 # We turn off retries in the underlying boto library because the # _RunOperation function handles errors manually so it can count them. boto.config.set('Boto', 'num_retries', '0') self.logger.info( 'Number of iterations to run: %d\n' 'Base bucket URI: %s\n' 'Number of processes: %d\n' 'Number of threads: %d\n' 'Throughput file size: %s\n' 'Diagnostics to run: %s', self.num_iterations, self.bucket_uri, self.processes, self.threads, MakeHumanReadable(self.thru_filesize), (', '.join(self.diag_tests))) try: self._SetUp() # Collect generic system info. self._CollectSysInfo() # Collect netstat info and disk counters before tests (and again later). self.results['sysinfo']['netstat_start'] = self._GetTcpStats() if IS_LINUX: self.results['sysinfo']['disk_counters_start'] = self._GetDiskCounters() # Record bucket URI. self.results['bucket_uri'] = str(self.bucket_uri) self.results['json_format'] = 'perfdiag' self.results['metadata'] = self.metadata_keys if 'lat' in self.diag_tests: self._RunLatencyTests() if 'rthru' in self.diag_tests: self._RunReadThruTests() if 'wthru' in self.diag_tests: self._RunWriteThruTests() # Collect netstat info and disk counters after tests. self.results['sysinfo']['netstat_end'] = self._GetTcpStats() if IS_LINUX: self.results['sysinfo']['disk_counters_end'] = self._GetDiskCounters() self.results['total_requests'] = self.total_requests self.results['request_errors'] = self.request_errors self.results['error_responses_by_code'] = self.error_responses_by_code self.results['connection_breaks'] = self.connection_breaks self.results['gsutil_version'] = gslib.VERSION self.results['boto_version'] = boto.__version__ self._DisplayResults() finally: self._TearDown() return 0