def RunCommand(self): """Command entry point for the hash command.""" (calc_crc32c, calc_md5, format_func, output_format) = (self._ParseOpts(self.sub_opts, self.logger)) matched_one = False for url_str in self.args: if not StorageUrlFromString(url_str).IsFileUrl(): raise CommandException('"hash" command requires a file URL') for file_ref in self.WildcardIterator(url_str).IterObjects(): matched_one = True file_name = file_ref.storage_url.object_name file_size = os.path.getsize(file_name) callback_processor = ProgressCallbackWithBackoff( file_size, FileProgressCallbackHandler( ConstructAnnounceText('Hashing', file_name), self.logger).call) hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5) with open(file_name, 'rb') as fp: CalculateHashesFromContents( fp, hash_dict, callback_processor=callback_processor) print 'Hashes [%s] for %s:' % (output_format, file_name) for name, digest in hash_dict.iteritems(): print '\tHash (%s):\t\t%s' % (name, format_func(digest)) if not matched_one: raise CommandException('No files matched') return 0
def RunCommand(self): """Command entry point for the hash command.""" (calc_crc32c, calc_md5, format_func, cloud_format_func, output_format) = ( self._ParseOpts(self.sub_opts, self.logger)) matched_one = False for url_str in self.args: for file_ref in self.WildcardIterator( url_str).IterObjects(bucket_listing_fields=['crc32c', 'md5Hash', 'customerEncryption', 'size']): matched_one = True url = StorageUrlFromString(url_str) file_name = file_ref.storage_url.object_name if StorageUrlFromString(url_str).IsFileUrl(): file_size = os.path.getsize(file_name) self.gsutil_api.status_queue.put( FileMessage(url, None, time.time(), size=file_size, finished=False, message_type=FileMessage.FILE_HASH)) callback_processor = ProgressCallbackWithTimeout( file_size, FileProgressCallbackHandler( self.gsutil_api.status_queue, src_url=StorageUrlFromString(url_str), operation_name='Hashing').call) hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5) with open(file_name, 'rb') as fp: CalculateHashesFromContents(fp, hash_dict, callback_processor=callback_processor) self.gsutil_api.status_queue.put( FileMessage(url, None, time.time(), size=file_size, finished=True, message_type=FileMessage.FILE_HASH)) else: hash_dict = {} obj_metadata = file_ref.root_object file_size = obj_metadata.size md5_present = obj_metadata.md5Hash is not None crc32c_present = obj_metadata.crc32c is not None if not md5_present and not crc32c_present: logging.getLogger().warn('No hashes present for %s', url_str) continue if md5_present: hash_dict['md5'] = obj_metadata.md5Hash if crc32c_present: hash_dict['crc32c'] = obj_metadata.crc32c print 'Hashes [%s] for %s:' % (output_format, file_name) for name, digest in hash_dict.iteritems(): print '\tHash (%s):\t\t%s' % (name, (format_func(digest) if url.IsFileUrl() else cloud_format_func(digest))) if not matched_one: raise CommandException('No files matched') PutToQueueWithTimeout(self.gsutil_api.status_queue, FinalMessage(time.time())) return 0
def testReadInChunks(self): tmp_file = self._GetTestFile() with open(tmp_file, 'rb') as stream: wrapper = ResumableStreamingJsonUploadWrapper( stream, TRANSFER_BUFFER_SIZE, test_small_buffer=True) hash_dict = {'md5': md5()} # CalculateHashesFromContents reads in chunks, but does not seek. CalculateHashesFromContents(wrapper, hash_dict) with open(tmp_file, 'rb') as stream: actual = CalculateMd5FromContents(stream) self.assertEqual(actual, hash_dict['md5'].hexdigest())