def _ComputeNeededFileChecksums(logger, src_url_str, src_size, src_crc32c, src_md5, dst_url_str, dst_size, dst_crc32c, dst_md5): """Computes any file checksums needed by _ObjectsMatch. Args: logger: logging.logger for outputting log messages. src_url_str: Source URL string. src_size: Source size src_crc32c: Source CRC32c. src_md5: Source MD5. dst_url_str: Destination URL string. dst_size: Destination size dst_crc32c: Destination CRC32c. dst_md5: Destination MD5. Returns: (src_crc32c, src_md5, dst_crc32c, dst_md5) """ src_url = StorageUrlFromString(src_url_str) dst_url = StorageUrlFromString(dst_url_str) if src_url.IsFileUrl(): if dst_crc32c != _NA or dst_url.IsFileUrl(): if src_size > TEN_MIB: logger.info('Computing MD5 for %s...', src_url_str) with open(src_url.object_name, 'rb') as fp: src_crc32c = CalculateB64EncodedCrc32cFromContents(fp) elif dst_md5 != _NA or dst_url.IsFileUrl(): if dst_size > TEN_MIB: logger.info('Computing MD5 for %s...', dst_url_str) with open(src_url.object_name, 'rb') as fp: src_md5 = CalculateB64EncodedMd5FromContents(fp) if dst_url.IsFileUrl(): if src_crc32c != _NA: if src_size > TEN_MIB: logger.info('Computing CRC32C for %s...', src_url_str) with open(dst_url.object_name, 'rb') as fp: dst_crc32c = CalculateB64EncodedCrc32cFromContents(fp) elif src_md5 != _NA: if dst_size > TEN_MIB: logger.info('Computing CRC32C for %s...', dst_url_str) with open(dst_url.object_name, 'rb') as fp: dst_md5 = CalculateB64EncodedMd5FromContents(fp) return (src_crc32c, src_md5, dst_crc32c, dst_md5)
def test_compute_needed_file_checksums(self): """Tests that we compute all/only needed file checksums.""" size = 4 logger = logging.getLogger() tmpdir = self.CreateTempDir() file_url_str = 'file://%s' % os.path.join(tmpdir, 'obj1') self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') cloud_url_str = 'gs://whatever' with open(os.path.join(tmpdir, 'obj1'), 'rb') as fp: crc32c = CalculateB64EncodedCrc32cFromContents(fp) fp.seek(0) md5 = CalculateB64EncodedMd5FromContents(fp) # Test case where source is a file and dest has CRC32C. (src_crc32c, src_md5, dst_crc32c, dst_md5) = _ComputeNeededFileChecksums(logger, file_url_str, size, _NA, _NA, cloud_url_str, size, crc32c, _NA) self.assertEquals(crc32c, src_crc32c) self.assertEquals(_NA, src_md5) self.assertEquals(crc32c, dst_crc32c) self.assertEquals(_NA, dst_md5) # Test case where source is a file and dest has MD5 but not CRC32C. (src_crc32c, src_md5, dst_crc32c, dst_md5) = _ComputeNeededFileChecksums(logger, file_url_str, size, _NA, _NA, cloud_url_str, size, _NA, md5) self.assertEquals(_NA, src_crc32c) self.assertEquals(md5, src_md5) self.assertEquals(_NA, dst_crc32c) self.assertEquals(md5, dst_md5) # Test case where dest is a file and src has CRC32C. (src_crc32c, src_md5, dst_crc32c, dst_md5) = _ComputeNeededFileChecksums(logger, cloud_url_str, size, crc32c, _NA, file_url_str, size, _NA, _NA) self.assertEquals(crc32c, dst_crc32c) self.assertEquals(_NA, src_md5) self.assertEquals(crc32c, src_crc32c) self.assertEquals(_NA, src_md5) # Test case where dest is a file and src has MD5 but not CRC32C. (src_crc32c, src_md5, dst_crc32c, dst_md5) = _ComputeNeededFileChecksums(logger, cloud_url_str, size, _NA, md5, file_url_str, size, _NA, _NA) self.assertEquals(_NA, dst_crc32c) self.assertEquals(md5, src_md5) self.assertEquals(_NA, src_crc32c) self.assertEquals(md5, src_md5)
def test_FilterExistingComponentsNonVersioned(self): """Tests upload with a variety of component states.""" mock_api = MockCloudApi() bucket_name = self.MakeTempName('bucket') tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() # dst_obj_metadata used for passing content-type. empty_object = apitools_messages.Object() # Already uploaded, contents still match, component still used. fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', contents='1') fpath_uploaded_correctly_url = StorageUrlFromString( str(fpath_uploaded_correctly)) object_uploaded_correctly_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_uploaded_correctly)) with open(fpath_uploaded_correctly) as f_in: fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents( f_in) mock_api.MockCreateObjectWithMetadata(apitools_messages.Object( bucket=bucket_name, name=fpath_uploaded_correctly, md5Hash=fpath_uploaded_correctly_md5), contents='1') args_uploaded_correctly = PerformParallelUploadFileToObjectArgs( fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url, object_uploaded_correctly_url, '', empty_object, tracker_file, tracker_file_lock, None) # Not yet uploaded, but needed. fpath_not_uploaded = self.CreateTempFile(file_name='foo2', contents='2') fpath_not_uploaded_url = StorageUrlFromString(str(fpath_not_uploaded)) object_not_uploaded_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_not_uploaded)) args_not_uploaded = PerformParallelUploadFileToObjectArgs( fpath_not_uploaded, 0, 1, fpath_not_uploaded_url, object_not_uploaded_url, '', empty_object, tracker_file, tracker_file_lock, None) # Already uploaded, but contents no longer match. Even though the contents # differ, we don't delete this since the bucket is not versioned and it # will be overwritten anyway. fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') fpath_wrong_contents_url = StorageUrlFromString( str(fpath_wrong_contents)) object_wrong_contents_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_wrong_contents)) with open(self.CreateTempFile(contents='_')) as f_in: fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in) mock_api.MockCreateObjectWithMetadata(apitools_messages.Object( bucket=bucket_name, name=fpath_wrong_contents, md5Hash=fpath_wrong_contents_md5), contents='1') args_wrong_contents = PerformParallelUploadFileToObjectArgs( fpath_wrong_contents, 0, 1, fpath_wrong_contents_url, object_wrong_contents_url, '', empty_object, tracker_file, tracker_file_lock, None) # Exists in tracker file, but component object no longer exists. fpath_remote_deleted = self.CreateTempFile(file_name='foo5', contents='5') fpath_remote_deleted_url = StorageUrlFromString( str(fpath_remote_deleted)) args_remote_deleted = PerformParallelUploadFileToObjectArgs( fpath_remote_deleted, 0, 1, fpath_remote_deleted_url, '', '', empty_object, tracker_file, tracker_file_lock, None) # Exists in tracker file and already uploaded, but no longer needed. fpath_no_longer_used = self.CreateTempFile(file_name='foo6', contents='6') with open(fpath_no_longer_used) as f_in: file_md5 = CalculateB64EncodedMd5FromContents(f_in) mock_api.MockCreateObjectWithMetadata(apitools_messages.Object( bucket=bucket_name, name='foo6', md5Hash=file_md5), contents='6') dst_args = { fpath_uploaded_correctly: args_uploaded_correctly, fpath_not_uploaded: args_not_uploaded, fpath_wrong_contents: args_wrong_contents, fpath_remote_deleted: args_remote_deleted } existing_components = [ ObjectFromTracker(fpath_uploaded_correctly, ''), ObjectFromTracker(fpath_wrong_contents, ''), ObjectFromTracker(fpath_remote_deleted, ''), ObjectFromTracker(fpath_no_longer_used, '') ] bucket_url = StorageUrlFromString('%s://%s' % (self.default_provider, bucket_name)) (components_to_upload, uploaded_components, existing_objects_to_delete) = (FilterExistingComponents( dst_args, existing_components, bucket_url, mock_api)) for arg in [ args_not_uploaded, args_wrong_contents, args_remote_deleted ]: self.assertTrue(arg in components_to_upload) self.assertEqual(1, len(uploaded_components)) self.assertEqual(args_uploaded_correctly.dst_url.url_string, uploaded_components[0].url_string) self.assertEqual(1, len(existing_objects_to_delete)) no_longer_used_url = StorageUrlFromString( '%s://%s/%s' % (self.default_provider, bucket_name, fpath_no_longer_used)) self.assertEqual(no_longer_used_url.url_string, existing_objects_to_delete[0].url_string)
def test_FilterExistingComponentsVersioned(self): """Tests upload with versionined parallel components.""" mock_api = MockCloudApi() bucket_name = self.MakeTempName('bucket') mock_api.MockCreateVersionedBucket(bucket_name) # dst_obj_metadata used for passing content-type. empty_object = apitools_messages.Object() tracker_file = self.CreateTempFile(file_name='foo', contents='asdf') tracker_file_lock = CreateLock() # Already uploaded, contents still match, component still used. fpath_uploaded_correctly = self.CreateTempFile(file_name='foo1', contents='1') fpath_uploaded_correctly_url = StorageUrlFromString( str(fpath_uploaded_correctly)) with open(fpath_uploaded_correctly) as f_in: fpath_uploaded_correctly_md5 = CalculateB64EncodedMd5FromContents( f_in) object_uploaded_correctly = mock_api.MockCreateObjectWithMetadata( apitools_messages.Object(bucket=bucket_name, name=fpath_uploaded_correctly, md5Hash=fpath_uploaded_correctly_md5), contents='1') object_uploaded_correctly_url = StorageUrlFromString( '%s://%s/%s#%s' % (self.default_provider, bucket_name, fpath_uploaded_correctly, object_uploaded_correctly.generation)) args_uploaded_correctly = PerformParallelUploadFileToObjectArgs( fpath_uploaded_correctly, 0, 1, fpath_uploaded_correctly_url, object_uploaded_correctly_url, object_uploaded_correctly.generation, empty_object, tracker_file, tracker_file_lock, None) # Duplicate object name in tracker file, but uploaded correctly. fpath_duplicate = fpath_uploaded_correctly fpath_duplicate_url = StorageUrlFromString(str(fpath_duplicate)) duplicate_uploaded_correctly = mock_api.MockCreateObjectWithMetadata( apitools_messages.Object(bucket=bucket_name, name=fpath_duplicate, md5Hash=fpath_uploaded_correctly_md5), contents='1') duplicate_uploaded_correctly_url = StorageUrlFromString( '%s://%s/%s#%s' % (self.default_provider, bucket_name, fpath_uploaded_correctly, duplicate_uploaded_correctly.generation)) args_duplicate = PerformParallelUploadFileToObjectArgs( fpath_duplicate, 0, 1, fpath_duplicate_url, duplicate_uploaded_correctly_url, duplicate_uploaded_correctly.generation, empty_object, tracker_file, tracker_file_lock, None) # Already uploaded, but contents no longer match. fpath_wrong_contents = self.CreateTempFile(file_name='foo4', contents='4') fpath_wrong_contents_url = StorageUrlFromString( str(fpath_wrong_contents)) with open(self.CreateTempFile(contents='_')) as f_in: fpath_wrong_contents_md5 = CalculateB64EncodedMd5FromContents(f_in) object_wrong_contents = mock_api.MockCreateObjectWithMetadata( apitools_messages.Object(bucket=bucket_name, name=fpath_wrong_contents, md5Hash=fpath_wrong_contents_md5), contents='_') wrong_contents_url = StorageUrlFromString( '%s://%s/%s#%s' % (self.default_provider, bucket_name, fpath_wrong_contents, object_wrong_contents.generation)) args_wrong_contents = PerformParallelUploadFileToObjectArgs( fpath_wrong_contents, 0, 1, fpath_wrong_contents_url, wrong_contents_url, '', empty_object, tracker_file, tracker_file_lock, None) dst_args = { fpath_uploaded_correctly: args_uploaded_correctly, fpath_wrong_contents: args_wrong_contents } existing_components = [ ObjectFromTracker(fpath_uploaded_correctly, object_uploaded_correctly_url.generation), ObjectFromTracker(fpath_duplicate, duplicate_uploaded_correctly_url.generation), ObjectFromTracker(fpath_wrong_contents, wrong_contents_url.generation) ] bucket_url = StorageUrlFromString('%s://%s' % (self.default_provider, bucket_name)) (components_to_upload, uploaded_components, existing_objects_to_delete) = (FilterExistingComponents( dst_args, existing_components, bucket_url, mock_api)) self.assertEqual([args_wrong_contents], components_to_upload) self.assertEqual(args_uploaded_correctly.dst_url.url_string, uploaded_components[0].url_string) expected_to_delete = [(args_wrong_contents.dst_url.object_name, args_wrong_contents.dst_url.generation), (args_duplicate.dst_url.object_name, args_duplicate.dst_url.generation)] for uri in existing_objects_to_delete: self.assertTrue((uri.object_name, uri.generation) in expected_to_delete) self.assertEqual(len(expected_to_delete), len(existing_objects_to_delete))