def RunCommand(self): """Command entry point for the rsync command.""" self._ParseOpts() if self.compute_checksums and not UsingCrcmodExtension(crcmod): self.logger.warn(SLOW_CRCMOD_WARNING) src_url = self._InsistContainer(self.args[0]) dst_url = self._InsistContainer(self.args[1]) # Tracks if any copy or rm operations failed. self.op_failure_count = 0 # List of attributes to share/manage across multiple processes in # parallel (-m) mode. shared_attrs = ['op_failure_count'] # Perform sync requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. diff_iterator = _DiffIterator(self, src_url, dst_url) self.logger.info('Starting synchronization') try: self.Apply(_RsyncFunc, diff_iterator, _RsyncExceptionHandler, shared_attrs, arg_checker=_DiffToApplyArgChecker, fail_on_error=True) finally: diff_iterator.CleanUpTempFiles() if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be copied/removed.' % (self.op_failure_count, plural_str, plural_str))
def RunCommand(self): """Command entry point for the version command.""" long_form = False if self.sub_opts: for o, _ in self.sub_opts: if o == '-l': long_form = True if GetConfigFilePaths(): config_paths = ', '.join(GetConfigFilePaths()) else: config_paths = 'no config found' shipped_checksum = gslib.CHECKSUM try: cur_checksum = self._ComputeCodeChecksum() except IOError: cur_checksum = 'MISSING FILES' if shipped_checksum == cur_checksum: checksum_ok_str = 'OK' else: checksum_ok_str = '!= %s' % shipped_checksum sys.stdout.write('gsutil version: %s\n' % gslib.VERSION) if long_form: long_form_output = ( 'checksum: {checksum} ({checksum_ok})\n' 'boto version: {boto_version}\n' 'python version: {python_version}\n' 'OS: {os_version}\n' 'multiprocessing available: {multiprocessing_available}\n' 'using cloud sdk: {cloud_sdk}\n' 'config path(s): {config_paths}\n' 'gsutil path: {gsutil_path}\n' 'compiled crcmod: {compiled_crcmod}\n' 'installed via package manager: {is_package_install}\n' 'editable install: {is_editable_install}\n') sys.stdout.write( long_form_output.format( checksum=cur_checksum, checksum_ok=checksum_ok_str, boto_version=boto.__version__, python_version=sys.version.replace('\n', ''), os_version='%s %s' % (platform.system(), platform.release()), multiprocessing_available=( CheckMultiprocessingAvailableAndInit().is_available), cloud_sdk=(os.environ.get('CLOUDSDK_WRAPPER') == '1'), config_paths=config_paths, gsutil_path=gslib.GSUTIL_PATH, compiled_crcmod=UsingCrcmodExtension(crcmod), is_package_install=gslib.IS_PACKAGE_INSTALL, is_editable_install=gslib.IS_EDITABLE_INSTALL, )) return 0
def Wrapper(*args, **kwargs): # Run the test normally once. func(*args, **kwargs) if not RUN_S3_TESTS and UsingCrcmodExtension(crcmod): # Try again, forcing parallel upload and sliced download. with SetBotoConfigForTest([ ('GSUtil', 'parallel_composite_upload_threshold', '1'), ('GSUtil', 'sliced_object_download_threshold', '1'), ('GSUtil', 'sliced_object_download_max_components', '3'), ('GSUtil', 'check_hashes', 'always')]): func(*args, **kwargs)
def RunCommand(self): long_form = False if self.sub_opts: for o, a in self.sub_opts: if o == '-l': long_form = True config_path = GetConfigFilePath() shipped_checksum = gslib.CHECKSUM try: cur_checksum = self._ComputeCodeChecksum() except IOError: cur_checksum = 'MISSING FILES' if shipped_checksum == cur_checksum: checksum_ok_str = 'OK' else: checksum_ok_str = '!= %s' % shipped_checksum sys.stdout.write('gsutil version %s\n' % gslib.VERSION) if long_form: long_form_output = ( 'checksum {checksum} ({checksum_ok})\n' 'boto version {boto_version}\n' 'python version {python_version}\n' 'config path: {config_path}\n' 'gsutil path: {gsutil_path}\n' 'compiled crcmod: {compiled_crcmod}\n' 'installed via package manager: {is_package_install}\n' 'editable install: {is_editable_install}\n') sys.stdout.write( long_form_output.format( checksum=cur_checksum, checksum_ok=checksum_ok_str, boto_version=boto.__version__, python_version=sys.version, config_path=config_path, gsutil_path=gslib.GSUTIL_PATH, compiled_crcmod=UsingCrcmodExtension(crcmod), is_package_install=gslib.IS_PACKAGE_INSTALL, is_editable_install=gslib.IS_EDITABLE_INSTALL, )) return 0
def _ParseOpts(cls, sub_opts, logger): """Returns behavior variables based on input options. Args: sub_opts: getopt sub-arguments for the command. logger: logging.Logger for the command. Returns: Tuple of calc_crc32c: Boolean, if True, command should calculate a CRC32c checksum. calc_md5: Boolean, if True, command should calculate an MD5 hash. format_func: Function used for formatting the hash in the desired format. cloud_format_func: Function used for formatting the hash in the desired format. output_format: String describing the hash output format. """ calc_crc32c = False calc_md5 = False format_func = lambda digest: Base64EncodeHash(digest.hexdigest()) cloud_format_func = lambda digest: digest found_hash_option = False output_format = 'base64' if sub_opts: for o, unused_a in sub_opts: if o == '-c': calc_crc32c = True found_hash_option = True elif o == '-h': output_format = 'hex' format_func = lambda digest: digest.hexdigest() cloud_format_func = lambda digest: Base64ToHexHash(digest) elif o == '-m': calc_md5 = True found_hash_option = True if not found_hash_option: calc_crc32c = True calc_md5 = True if calc_crc32c and not UsingCrcmodExtension(crcmod): logger.warn(SLOW_CRCMOD_WARNING) return calc_crc32c, calc_md5, format_func, cloud_format_func, output_format
def GetDownloadHashAlgs(logger, consider_md5=False, consider_crc32c=False): """Returns a dict of hash algorithms for validating an object. Args: logger: logging.Logger for outputting log messages. consider_md5: If True, consider using a md5 hash. consider_crc32c: If True, consider using a crc32c hash. Returns: Dict of (string, hash algorithm). Raises: CommandException if hash algorithms satisfying the boto config file cannot be returned. """ check_hashes_config = config.get('GSUtil', 'check_hashes', CHECK_HASH_IF_FAST_ELSE_FAIL) if check_hashes_config == CHECK_HASH_NEVER: return {} hash_algs = {} if consider_md5: hash_algs['md5'] = md5 elif consider_crc32c: # If the cloud provider supplies a CRC, we'll compute a checksum to # validate if we're using a native crcmod installation and MD5 isn't # offered as an alternative. if UsingCrcmodExtension(crcmod): hash_algs['crc32c'] = lambda: crcmod.predefined.Crc('crc-32c') elif not hash_algs: if check_hashes_config == CHECK_HASH_IF_FAST_ELSE_FAIL: raise CommandException(_SLOW_CRC_EXCEPTION_TEXT) elif check_hashes_config == CHECK_HASH_IF_FAST_ELSE_SKIP: logger.warn(_NO_HASH_CHECK_WARNING) elif check_hashes_config == CHECK_HASH_ALWAYS: logger.warn(_SLOW_CRCMOD_DOWNLOAD_WARNING) hash_algs['crc32c'] = lambda: crcmod.predefined.Crc('crc-32c') else: raise CommandException( 'Your boto config \'check_hashes\' option is misconfigured.' ) return hash_algs
class TestMv(testcase.GsUtilIntegrationTestCase): """Integration tests for mv command.""" def test_moving(self): """Tests moving two buckets, one with 2 objects and one with 0 objects.""" bucket1_uri = self.CreateBucket(test_objects=2) self.AssertNObjectsInBucket(bucket1_uri, 2) bucket2_uri = self.CreateBucket() self.AssertNObjectsInBucket(bucket2_uri, 0) # Move two objects from bucket1 to bucket2. objs = [ bucket1_uri.clone_replace_key(key).versionless_uri for key in bucket1_uri.list_bucket() ] cmd = (['-m', 'mv'] + objs + [suri(bucket2_uri)]) stderr = self.RunGsUtil(cmd, return_stderr=True) # Rewrite API may output an additional 'Copying' progress notification. self.assertGreaterEqual( stderr.count('Copying'), 2, 'stderr did not contain 2 "Copying" lines:\n%s' % stderr) self.assertLessEqual( stderr.count('Copying'), 4, 'stderr did not contain <= 4 "Copying" lines:\n%s' % stderr) self.assertEqual( stderr.count('Copying') % 2, 0, 'stderr did not contain even number of "Copying" lines:\n%s' % stderr) self.assertEqual( stderr.count('Removing'), 2, 'stderr did not contain 2 "Removing" lines:\n%s' % stderr) self.AssertNObjectsInBucket(bucket1_uri, 0) self.AssertNObjectsInBucket(bucket2_uri, 2) # Remove one of the objects. objs = [ bucket2_uri.clone_replace_key(key).versionless_uri for key in bucket2_uri.list_bucket() ] obj1 = objs[0] self.RunGsUtil(['rm', obj1]) self.AssertNObjectsInBucket(bucket1_uri, 0) self.AssertNObjectsInBucket(bucket2_uri, 1) # Move the 1 remaining object back. objs = [ suri(bucket2_uri.clone_replace_key(key)) for key in bucket2_uri.list_bucket() ] cmd = (['-m', 'mv'] + objs + [suri(bucket1_uri)]) stderr = self.RunGsUtil(cmd, return_stderr=True) # Rewrite API may output an additional 'Copying' progress notification. self.assertGreaterEqual( stderr.count('Copying'), 1, 'stderr did not contain >= 1 "Copying" lines:\n%s' % stderr) self.assertLessEqual( stderr.count('Copying'), 2, 'stderr did not contain <= 2 "Copying" lines:\n%s' % stderr) self.assertEqual(stderr.count('Removing'), 1) self.AssertNObjectsInBucket(bucket1_uri, 1) self.AssertNObjectsInBucket(bucket2_uri, 0) def test_move_bucket_to_dir(self): """Tests moving a local directory to a bucket.""" bucket_uri = self.CreateBucket(test_objects=2) self.AssertNObjectsInBucket(bucket_uri, 2) tmpdir = self.CreateTempDir() self.RunGsUtil(['mv', suri(bucket_uri, '*'), tmpdir]) dir_list = [] for dirname, _, filenames in os.walk(tmpdir): for filename in filenames: dir_list.append(os.path.join(dirname, filename)) self.assertEqual(len(dir_list), 2) self.AssertNObjectsInBucket(bucket_uri, 0) def test_move_dir_to_bucket(self): """Tests moving a local directory to a bucket.""" bucket_uri = self.CreateBucket() dir_to_move = self.CreateTempDir(test_files=2) self.RunGsUtil(['mv', dir_to_move, suri(bucket_uri)]) self.AssertNObjectsInBucket(bucket_uri, 2) @SequentialAndParallelTransfer def test_stdin_args(self): """Tests mv with the -I option.""" tmpdir = self.CreateTempDir() fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1') fpath2 = self.CreateTempFile(tmpdir=tmpdir, contents='data2') bucket_uri = self.CreateBucket() self.RunGsUtil(['mv', '-I', suri(bucket_uri)], stdin='\n'.join((fpath1, fpath2))) # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check1(): stdout = self.RunGsUtil(['ls', suri(bucket_uri)], return_stdout=True) self.assertIn(os.path.basename(fpath1), stdout) self.assertIn(os.path.basename(fpath2), stdout) self.assertNumLines(stdout, 2) _Check1() def test_mv_no_clobber(self): """Tests mv with the -n option.""" fpath1 = self.CreateTempFile(contents='data1') bucket_uri = self.CreateBucket() object_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2') stderr = self.RunGsUtil( ['mv', '-n', fpath1, suri(object_uri)], return_stderr=True) # Copy should be skipped and source file should not be removed. self.assertIn('Skipping existing item: %s' % suri(object_uri), stderr) self.assertNotIn('Removing %s' % suri(fpath1), stderr) # Object content should be unchanged. contents = self.RunGsUtil(['cat', suri(object_uri)], return_stdout=True) self.assertEqual(contents, 'data2') @unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.') @unittest.skipUnless(UsingCrcmodExtension(crcmod), 'Test requires fast crcmod.') def test_mv_preserve_posix_bucket_to_dir_no_errors(self): """Tests use of the -P flag with mv from a bucket to a local dir. Specifically tests combinations of POSIX attributes in metadata that will pass validation. """ bucket_uri = self.CreateBucket() tmpdir = self.CreateTempDir() TestCpMvPOSIXBucketToLocalNoErrors(self, bucket_uri, tmpdir, is_cp=False) @unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.') def test_mv_preserve_posix_bucket_to_dir_errors(self): """Tests use of the -P flag with mv from a bucket to a local dir. Specifically, combinations of POSIX attributes in metadata that will fail validation. """ bucket_uri = self.CreateBucket() tmpdir = self.CreateTempDir() obj = self.CreateObject(bucket_uri=bucket_uri, object_name='obj', contents='obj') TestCpMvPOSIXBucketToLocalErrors(self, bucket_uri, obj, tmpdir, is_cp=False) @unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.') def test_mv_preseve_posix_dir_to_bucket_no_errors(self): """Tests use of the -P flag with mv from a local dir to a bucket.""" bucket_uri = self.CreateBucket() TestCpMvPOSIXLocalToBucketNoErrors(self, bucket_uri, is_cp=False) @SkipForS3('Test is only relevant for gs storage classes.') def test_mv_early_deletion_warning(self): """Tests that mv on a recent nearline object warns about early deletion.""" if self.test_api == ApiSelector.XML: return unittest.skip('boto does not return object storage class') bucket_uri = self.CreateBucket(storage_class='NEARLINE') object_uri = self.CreateObject(bucket_uri=bucket_uri, contents='obj') stderr = self.RunGsUtil( ['mv', suri(object_uri), suri(bucket_uri, 'foo')], return_stderr=True) self.assertIn( 'Warning: moving nearline object %s may incur an early deletion ' 'charge, because the original object is less than 30 days old ' 'according to the local system time.' % suri(object_uri), stderr)
class TestRsync(testcase.GsUtilIntegrationTestCase): """Integration tests for rsync command.""" @staticmethod def _FlatListDir(directory): """Perform a flat listing over directory. Args: directory: The directory to list Returns: Listings with path separators canonicalized to '/', to make assertions easier for Linux vs Windows. """ result = [] for dirpath, _, filenames in os.walk(directory): for f in filenames: result.append(os.path.join(dirpath, f)) return '\n'.join(result).replace('\\', '/') def _FlatListBucket(self, bucket_url_string): """Perform a flat listing over bucket_url_string.""" return self.RunGsUtil(['ls', suri(bucket_url_string, '**')], return_stdout=True) def test_invalid_args(self): """Tests various invalid argument cases.""" bucket_uri = self.CreateBucket() obj1 = self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', contents='obj1') tmpdir = self.CreateTempDir() # rsync object to bucket. self.RunGsUtil( ['rsync', suri(obj1), suri(bucket_uri)], expected_status=1) # rsync bucket to object. self.RunGsUtil( ['rsync', suri(bucket_uri), suri(obj1)], expected_status=1) # rsync bucket to non-existent bucket. self.RunGsUtil( ['rsync', suri(bucket_uri), self.nonexistent_bucket_name], expected_status=1) # rsync object to dir. self.RunGsUtil(['rsync', suri(obj1), tmpdir], expected_status=1) # rsync dir to object. self.RunGsUtil(['rsync', tmpdir, suri(obj1)], expected_status=1) # rsync dir to non-existent bucket. self.RunGsUtil( ['rsync', tmpdir, suri(obj1), self.nonexistent_bucket_name], expected_status=1) # Note: The tests below exercise the cases # {src_dir, src_bucket} X {dst_dir, dst_bucket}. We use gsutil rsync -d for # all the cases but then have just one test without -d (test_bucket_to_bucket) # as representative of handling without the -d option. This provides # reasonable test coverage because the -d handling it src/dest URI-type # independent, and keeps the test case combinations more manageable. def test_bucket_to_bucket(self): """Tests that flat and recursive rsync between 2 buckets works correctly.""" # Create 2 buckets with 1 overlapping object, 1 extra object at root level # in each, and 1 extra object 1 level down in each. Make the overlapping # objects named the same but with different content, to test that we detect # and properly copy in that case. bucket1_uri = self.CreateBucket() bucket2_uri = self.CreateBucket() self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', contents='obj1') self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', contents='obj2') self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3', contents='subdir/obj3') self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', contents='OBJ2') self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4', contents='obj4') self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5', contents='subdir/obj5') # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check1(): """Tests rsync works as expected.""" self.RunGsUtil(['rsync', suri(bucket1_uri), suri(bucket2_uri)]) listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) # First bucket should have un-altered content. self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) # Second bucket should have new objects added from source bucket (without # removing extraneeous object found in dest bucket), and without the # subdir objects synchronized. self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj4', '/subdir/obj5'])) # Assert that the src/dest objects that had same length but different # content were correctly synchronized (bucket to bucket sync uses # checksums). self.assertEquals( 'obj2', self.RunGsUtil(['cat', suri(bucket1_uri, 'obj2')], return_stdout=True)) self.assertEquals( 'obj2', self.RunGsUtil(['cat', suri(bucket2_uri, 'obj2')], return_stdout=True)) _Check1() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil( ['rsync', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True)) # Now add and remove some objects in each bucket and test rsync -r. self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6', contents='obj6') self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7', contents='obj7') self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')]) self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')]) # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check2(): self.RunGsUtil( ['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)]) listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) # First bucket should have un-altered content. self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) # Second bucket should have objects tha were newly added to first bucket # (wihout removing extraneous dest bucket objects), and without the # subdir objects synchronized. self.assertEquals( listing2, set([ '/obj1', '/obj2', '/obj4', '/obj6', '/obj7', '/subdir/obj3', '/subdir/obj5' ])) _Check2() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil( ['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True)) def test_bucket_to_bucket_minus_d(self): """Tests that flat and recursive rsync between 2 buckets works correctly.""" # Create 2 buckets with 1 overlapping object, 1 extra object at root level # in each, and 1 extra object 1 level down in each. Make the overlapping # objects named the same but with different content, to test that we detect # and properly copy in that case. bucket1_uri = self.CreateBucket() bucket2_uri = self.CreateBucket() self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', contents='obj1') self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', contents='obj2') self.CreateObject(bucket_uri=bucket1_uri, object_name='subdir/obj3', contents='subdir/obj3') self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', contents='OBJ2') self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4', contents='obj4') self.CreateObject(bucket_uri=bucket2_uri, object_name='subdir/obj5', contents='subdir/obj5') # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check1(): """Tests rsync works as expected.""" self.RunGsUtil( ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) # First bucket should have un-altered content. self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) # Second bucket should have content like first bucket but without the # subdir objects synchronized. self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) # Assert that the src/dest objects that had same length but different # content were correctly synchronized (bucket to bucket sync uses # checksums). self.assertEquals( 'obj2', self.RunGsUtil(['cat', suri(bucket1_uri, 'obj2')], return_stdout=True)) self.assertEquals( 'obj2', self.RunGsUtil(['cat', suri(bucket2_uri, 'obj2')], return_stdout=True)) _Check1() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil( ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True)) # Now add and remove some objects in each bucket and test rsync -r. self.CreateObject(bucket_uri=bucket1_uri, object_name='obj6', contents='obj6') self.CreateObject(bucket_uri=bucket2_uri, object_name='obj7', contents='obj7') self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')]) self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')]) # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check2(): self.RunGsUtil( ['rsync', '-d', '-r', suri(bucket1_uri), suri(bucket2_uri)]) listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) # First bucket should have un-altered content. self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) # Second bucket should have content like first bucket but without the # subdir objects synchronized. self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3'])) _Check2() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil( ['rsync', '-d', '-r', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True)) # Test sequential upload as well as parallel composite upload case. @PerformsFileToObjectUpload @unittest.skipUnless(UsingCrcmodExtension(crcmod), 'Test requires fast crcmod.') def test_dir_to_bucket_minus_d(self): """Tests that flat and recursive rsync dir to bucket works correctly.""" # Create dir and bucket with 1 overlapping object, 1 extra object at root # level in each, and 1 extra object 1 level down in each. Make the # overlapping objects named the same but with different content, to test # that we detect and properly copy in that case. tmpdir = self.CreateTempDir() subdir = os.path.join(tmpdir, 'subdir') os.mkdir(subdir) bucket_uri = self.CreateBucket() self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3') self.CreateObject(bucket_uri=bucket_uri, object_name='obj2', contents='OBJ2') self.CreateObject(bucket_uri=bucket_uri, object_name='obj4', contents='obj4') self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5', contents='subdir/obj5') # Need to make sure the bucket listing is caught-up, otherwise the # first rsync may not see obj2 and overwrite it. self.AssertNObjectsInBucket(bucket_uri, 3) # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check1(): """Tests rsync works as expected.""" self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)]) listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) # Dir should have un-altered content. self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) # Bucket should have content like dir but without the subdir objects # synchronized. self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) # Assert that the src/dest objects that had same length but different # content were not synchronized (dir to bucket sync doesn't use checksums # unless you specify -c). with open(os.path.join(tmpdir, 'obj2')) as f: self.assertEquals('obj2', '\n'.join(f.readlines())) self.assertEquals( 'OBJ2', self.RunGsUtil(['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) _Check1() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True)) # Now rerun the sync with the -c option. # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check2(): """Tests rsync -c works as expected.""" self.RunGsUtil(['rsync', '-d', '-c', tmpdir, suri(bucket_uri)]) listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) # Dir should have un-altered content. self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) # Bucket should have content like dir but without the subdir objects # synchronized. self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) # Assert that the src/dest objects that had same length but different # content were synchronized (dir to bucket sync with -c uses checksums). with open(os.path.join(tmpdir, 'obj2')) as f: self.assertEquals('obj2', '\n'.join(f.readlines())) self.assertEquals( 'obj2', self.RunGsUtil(['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) _Check2() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil(['rsync', '-d', '-c', tmpdir, suri(bucket_uri)], return_stderr=True)) # Now add and remove some objects in dir and bucket and test rsync -r. self.CreateTempFile(tmpdir=tmpdir, file_name='obj6', contents='obj6') self.CreateObject(bucket_uri=bucket_uri, object_name='obj7', contents='obj7') os.unlink(os.path.join(tmpdir, 'obj1')) self.RunGsUtil(['rm', suri(bucket_uri, 'obj2')]) # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check3(): self.RunGsUtil(['rsync', '-d', '-r', tmpdir, suri(bucket_uri)]) listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) # Dir should have un-altered content. self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) # Bucket should have content like dir but without the subdir objects # synchronized. self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3'])) _Check3() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil(['rsync', '-d', '-r', tmpdir, suri(bucket_uri)], return_stderr=True)) @unittest.skipUnless(UsingCrcmodExtension(crcmod), 'Test requires fast crcmod.') def test_dir_to_dir_minus_d(self): """Tests that flat and recursive rsync dir to dir works correctly.""" # Create 2 dirs with 1 overlapping file, 1 extra file at root # level in each, and 1 extra file 1 level down in each. Make the # overlapping files named the same but with different content, to test # that we detect and properly copy in that case. tmpdir1 = self.CreateTempDir() tmpdir2 = self.CreateTempDir() subdir1 = os.path.join(tmpdir1, 'subdir1') subdir2 = os.path.join(tmpdir2, 'subdir2') os.mkdir(subdir1) os.mkdir(subdir2) self.CreateTempFile(tmpdir=tmpdir1, file_name='obj1', contents='obj1') self.CreateTempFile(tmpdir=tmpdir1, file_name='obj2', contents='obj2') self.CreateTempFile(tmpdir=subdir1, file_name='obj3', contents='subdir1/obj3') self.CreateTempFile(tmpdir=tmpdir2, file_name='obj2', contents='OBJ2') self.CreateTempFile(tmpdir=tmpdir2, file_name='obj4', contents='obj4') self.CreateTempFile(tmpdir=subdir2, file_name='obj5', contents='subdir2/obj5') self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2]) listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) # dir1 should have un-altered content. self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3'])) # dir2 should have content like dir1 but without the subdir1 objects # synchronized. self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5'])) # Assert that the src/dest objects that had same length but different # checksums were not synchronized (dir to dir sync doesn't use checksums # unless you specify -c). with open(os.path.join(tmpdir1, 'obj2')) as f: self.assertEquals('obj2', '\n'.join(f.readlines())) with open(os.path.join(tmpdir2, 'obj2')) as f: self.assertEquals('OBJ2', '\n'.join(f.readlines())) # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True)) # Now rerun the sync with the -c option. self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2]) listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) # dir1 should have un-altered content. self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3'])) # dir2 should have content like dir but without the subdir objects # synchronized. self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5'])) # Assert that the src/dest objects that had same length but different # content were synchronized (dir to dir sync with -c uses checksums). with open(os.path.join(tmpdir1, 'obj2')) as f: self.assertEquals('obj2', '\n'.join(f.readlines())) with open(os.path.join(tmpdir1, 'obj2')) as f: self.assertEquals('obj2', '\n'.join(f.readlines())) # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2], return_stderr=True)) # Now add and remove some objects in both dirs and test rsync -r. self.CreateTempFile(tmpdir=tmpdir1, file_name='obj6', contents='obj6') self.CreateTempFile(tmpdir=tmpdir2, file_name='obj7', contents='obj7') os.unlink(os.path.join(tmpdir1, 'obj1')) os.unlink(os.path.join(tmpdir2, 'obj2')) self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2]) listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) # dir1 should have un-altered content. self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir1/obj3'])) # dir2 should have content like dir but without the subdir objects # synchronized. self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir1/obj3'])) # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2], return_stderr=True)) def test_dir_to_dir_minus_d_more_files_than_bufsize(self): """Tests concurrently building listing from multiple tmp file ranges.""" # Create 2 dirs, where each dir has 1000 objects and differing names. tmpdir1 = self.CreateTempDir() tmpdir2 = self.CreateTempDir() for i in range(0, 1000): self.CreateTempFile(tmpdir=tmpdir1, file_name='d1-%s' % i, contents='x') self.CreateTempFile(tmpdir=tmpdir2, file_name='d2-%s' % i, contents='y') # We open a new temp file each time we reach rsync_buffer_lines of # listing output. On Windows, this will result in a 'too many open file # handles' error, so choose a larger value so as not to open so many files. rsync_buffer_config = [('GSUtil', 'rsync_buffer_lines', '50' if IS_WINDOWS else '2')] # Run gsutil with config option to make buffer size << # files. with SetBotoConfigForTest(rsync_buffer_config): self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2]) listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1)) listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2)) self.assertEquals(listing1, listing2) # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True)) @unittest.skipUnless(UsingCrcmodExtension(crcmod), 'Test requires fast crcmod.') def test_bucket_to_dir_minus_d(self): """Tests that flat and recursive rsync bucket to dir works correctly.""" # Create bucket and dir with 1 overlapping object, 1 extra object at root # level in each, and 1 extra object 1 level down in each. Make the # overlapping objects named the same but with different content, to test # that we detect and properly copy in that case. bucket_uri = self.CreateBucket() tmpdir = self.CreateTempDir() subdir = os.path.join(tmpdir, 'subdir') os.mkdir(subdir) self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', contents='obj1') self.CreateObject(bucket_uri=bucket_uri, object_name='obj2', contents='obj2') self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj3', contents='subdir/obj3') self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='OBJ2') self.CreateTempFile(tmpdir=tmpdir, file_name='obj4', contents='obj4') self.CreateTempFile(tmpdir=subdir, file_name='obj5', contents='subdir/obj5') # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check1(): """Tests rsync works as expected.""" self.RunGsUtil(['rsync', '-d', suri(bucket_uri), tmpdir]) listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) # Bucket should have un-altered content. self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) # Dir should have content like bucket but without the subdir objects # synchronized. self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) # Assert that the src/dest objects that had same length but different # content were not synchronized (bucket to dir sync doesn't use checksums # unless you specify -c). self.assertEquals( 'obj2', self.RunGsUtil(['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) with open(os.path.join(tmpdir, 'obj2')) as f: self.assertEquals('OBJ2', '\n'.join(f.readlines())) _Check1() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil( ['rsync', '-d', suri(bucket_uri), tmpdir], return_stderr=True)) # Now rerun the sync with the -c option. # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check2(): """Tests rsync -c works as expected.""" self.RunGsUtil(['rsync', '-d', '-c', suri(bucket_uri), tmpdir]) listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) # Bucket should have un-altered content. self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) # Dir should have content like bucket but without the subdir objects # synchronized. self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) # Assert that the src/dest objects that had same length but different # content were synchronized (bucket to dir sync with -c uses checksums). self.assertEquals( 'obj2', self.RunGsUtil(['cat', suri(bucket_uri, 'obj2')], return_stdout=True)) with open(os.path.join(tmpdir, 'obj2')) as f: self.assertEquals('obj2', '\n'.join(f.readlines())) _Check2() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil(['rsync', '-d', '-c', suri(bucket_uri), tmpdir], return_stderr=True)) # Now add and remove some objects in dir and bucket and test rsync -r. self.CreateObject(bucket_uri=bucket_uri, object_name='obj6', contents='obj6') self.CreateTempFile(tmpdir=tmpdir, file_name='obj7', contents='obj7') self.RunGsUtil(['rm', suri(bucket_uri, 'obj1')]) os.unlink(os.path.join(tmpdir, 'obj2')) # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check3(): self.RunGsUtil(['rsync', '-d', '-r', suri(bucket_uri), tmpdir]) listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) # Bucket should have un-altered content. self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir/obj3'])) # Dir should have content like bucket but without the subdir objects # synchronized. self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir/obj3'])) _Check3() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil(['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True)) def test_bucket_to_dir_minus_d_with_fname_case_change(self): """Tests that name case changes work correctly. Example: Windows filenames are case-preserving in what you wrote, but case- insensitive when compared. If you synchronize from FS to cloud and then change case-naming in local files, you could end up with this situation: Cloud copy is called .../TiVo/... FS copy is called .../Tivo/... Then, if you sync from cloud to FS, if rsync doesn't recognize that on Windows these names are identical, each rsync run will cause both a copy and a delete to be executed. """ # Create bucket and dir with same objects, but dir copy has different name # case. bucket_uri = self.CreateBucket() tmpdir = self.CreateTempDir() self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', contents='obj1') self.CreateTempFile(tmpdir=tmpdir, file_name='Obj1', contents='obj1') # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check1(): """Tests rsync works as expected.""" output = self.RunGsUtil( ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True) # Nothing should be copied or removed under Windows. if IS_WINDOWS: self.assertEquals(NO_CHANGES, output) else: self.assertNotEquals(NO_CHANGES, output) _Check1() def test_bucket_to_dir_minus_d_with_leftover_dir_placeholder(self): """Tests that we correctly handle leftover dir placeholders. See comments in gslib.commands.rsync._FieldedListingIterator for details. """ bucket_uri = self.CreateBucket() tmpdir = self.CreateTempDir() self.CreateObject(bucket_uri=bucket_uri, object_name='obj1', contents='obj1') # Create a placeholder like what can be left over by web GUI tools. key_uri = bucket_uri.clone_replace_name('/') key_uri.set_contents_from_string('') # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check1(): """Tests rsync works as expected.""" output = self.RunGsUtil( ['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True) listing1 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) # Bucket should have un-altered content. self.assertEquals(listing1, set(['/obj1', '//'])) # Bucket should not have the placeholder object. self.assertEquals(listing2, set(['/obj1'])) # Stdout should report what happened. self.assertRegexpMatches(output, r'.*Skipping cloud sub-directory.*') _Check1() @unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.') def test_rsync_minus_d_minus_e(self): """Tests that rsync -e ignores symlinks.""" tmpdir = self.CreateTempDir() subdir = os.path.join(tmpdir, 'subdir') os.mkdir(subdir) bucket_uri = self.CreateBucket() fpath1 = self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3') good_symlink_path = os.path.join(tmpdir, 'symlink1') os.symlink(fpath1, good_symlink_path) # Make a symlink that points to a non-existent path to test that -e also # handles that case. bad_symlink_path = os.path.join(tmpdir, 'symlink2') os.symlink(os.path.join('/', 'non-existent'), bad_symlink_path) self.CreateObject(bucket_uri=bucket_uri, object_name='obj2', contents='OBJ2') self.CreateObject(bucket_uri=bucket_uri, object_name='obj4', contents='obj4') self.CreateObject(bucket_uri=bucket_uri, object_name='subdir/obj5', contents='subdir/obj5') # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check1(): """Ensure listings match the commented expectations.""" self.RunGsUtil(['rsync', '-d', '-e', tmpdir, suri(bucket_uri)]) listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) # Dir should have un-altered content. self.assertEquals( listing1, set([ '/obj1', '/obj2', '/subdir/obj3', '/symlink1', '/symlink2' ])) # Bucket should have content like dir but without the symlink, and # without subdir objects synchronized. self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj5'])) _Check1() # Now remove invalid symlink and run without -e, and see that symlink gets # copied (as file to which it points). Use @Retry as hedge against bucket # listing eventual consistency. os.unlink(bad_symlink_path) @Retry(AssertionError, tries=3, timeout_secs=1) def _Check2(): """Tests rsync works as expected.""" self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)]) listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) listing2 = _TailSet(suri(bucket_uri), self._FlatListBucket(bucket_uri)) # Dir should have un-altered content. self.assertEquals( listing1, set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1'])) # Bucket should have content like dir but without the symlink, and # without subdir objects synchronized. self.assertEquals( listing2, set(['/obj1', '/obj2', '/subdir/obj5', '/symlink1'])) self.assertEquals( 'obj1', self.RunGsUtil(['cat', suri(bucket_uri, 'symlink1')], return_stdout=True)) _Check2() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True)) @SkipForS3('S3 does not support composite objects') def test_bucket_to_bucket_minus_d_with_composites(self): """Tests that rsync works with composite objects (which don't have MD5s).""" bucket1_uri = self.CreateBucket() bucket2_uri = self.CreateBucket() self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', contents='obj1') self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', contents='obj2') self.RunGsUtil([ 'compose', suri(bucket1_uri, 'obj1'), suri(bucket1_uri, 'obj2'), suri(bucket1_uri, 'obj3') ]) self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', contents='OBJ2') self.CreateObject(bucket_uri=bucket2_uri, object_name='obj4', contents='obj4') # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check(): self.RunGsUtil( ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) # First bucket should have un-altered content. self.assertEquals(listing1, set(['/obj1', '/obj2', '/obj3'])) # Second bucket should have content like first bucket but without the # subdir objects synchronized. self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj3'])) _Check() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil( ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True)) def test_bucket_to_bucket_minus_d_empty_dest(self): """Tests working with empty dest bucket (iter runs out before src iter).""" bucket1_uri = self.CreateBucket() bucket2_uri = self.CreateBucket() self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', contents='obj1') self.CreateObject(bucket_uri=bucket1_uri, object_name='obj2', contents='obj2') # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check(): self.RunGsUtil( ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) self.assertEquals(listing1, set(['/obj1', '/obj2'])) self.assertEquals(listing2, set(['/obj1', '/obj2'])) _Check() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil( ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True)) def test_bucket_to_bucket_minus_d_empty_src(self): """Tests working with empty src bucket (iter runs out before dst iter).""" bucket1_uri = self.CreateBucket() bucket2_uri = self.CreateBucket() self.CreateObject(bucket_uri=bucket2_uri, object_name='obj1', contents='obj1') self.CreateObject(bucket_uri=bucket2_uri, object_name='obj2', contents='obj2') # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check(): self.RunGsUtil( ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)]) stderr = self.RunGsUtil(['ls', suri(bucket1_uri, '**')], expected_status=1, return_stderr=True) self.assertIn('One or more URLs matched no objects', stderr) stderr = self.RunGsUtil(['ls', suri(bucket2_uri, '**')], expected_status=1, return_stderr=True) self.assertIn('One or more URLs matched no objects', stderr) _Check() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil( ['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True)) def test_rsync_minus_d_minus_p(self): """Tests that rsync -p preserves ACLs.""" bucket1_uri = self.CreateBucket() bucket2_uri = self.CreateBucket() self.CreateObject(bucket_uri=bucket1_uri, object_name='obj1', contents='obj1') # Set public-read (non-default) ACL so we can verify that rsync -p works. self.RunGsUtil( ['acl', 'set', 'public-read', suri(bucket1_uri, 'obj1')]) # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check(): """Tests rsync -p works as expected.""" self.RunGsUtil( ['rsync', '-d', '-p', suri(bucket1_uri), suri(bucket2_uri)]) listing1 = _TailSet(suri(bucket1_uri), self._FlatListBucket(bucket1_uri)) listing2 = _TailSet(suri(bucket2_uri), self._FlatListBucket(bucket2_uri)) self.assertEquals(listing1, set(['/obj1'])) self.assertEquals(listing2, set(['/obj1'])) acl1_json = self.RunGsUtil( ['acl', 'get', suri(bucket1_uri, 'obj1')], return_stdout=True) acl2_json = self.RunGsUtil( ['acl', 'get', suri(bucket2_uri, 'obj1')], return_stdout=True) self.assertEquals(acl1_json, acl2_json) _Check() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil( ['rsync', '-d', '-p', suri(bucket1_uri), suri(bucket2_uri)], return_stderr=True)) def test_rsync_to_nonexistent_bucket_subdir(self): """Tests that rsync to non-existent bucket subdir works.""" # Create dir with some objects and empty bucket. tmpdir = self.CreateTempDir() subdir = os.path.join(tmpdir, 'subdir') os.mkdir(subdir) bucket_url = self.CreateBucket() self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1') self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2') self.CreateTempFile(tmpdir=subdir, file_name='obj3', contents='subdir/obj3') # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check1(): """Tests rsync works as expected.""" self.RunGsUtil(['rsync', '-r', tmpdir, suri(bucket_url, 'subdir')]) listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir)) listing2 = _TailSet( suri(bucket_url, 'subdir'), self._FlatListBucket(bucket_url.clone_replace_name('subdir'))) # Dir should have un-altered content. self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir/obj3'])) # Bucket subdir should have content like dir. self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir/obj3'])) _Check1() # Check that re-running the same rsync command causes no more changes. self.assertEquals( NO_CHANGES, self.RunGsUtil(['rsync', '-r', tmpdir, suri(bucket_url, 'subdir')], return_stderr=True))
class TestMv(testcase.GsUtilIntegrationTestCase): """Integration tests for mv command.""" def test_moving(self): """Tests moving two buckets, one with 2 objects and one with 0 objects.""" bucket1_uri = self.CreateBucket(test_objects=2) self.AssertNObjectsInBucket(bucket1_uri, 2) bucket2_uri = self.CreateBucket() self.AssertNObjectsInBucket(bucket2_uri, 0) # Move two objects from bucket1 to bucket2. objs = [ bucket1_uri.clone_replace_key(key).versionless_uri for key in bucket1_uri.list_bucket() ] cmd = (['-m', 'mv'] + objs + [suri(bucket2_uri)]) stderr = self.RunGsUtil(cmd, return_stderr=True) # Rewrite API may output an additional 'Copying' progress notification. self.assertGreaterEqual(stderr.count('Copying'), 2) self.assertLessEqual(stderr.count('Copying'), 4) self.assertEqual(stderr.count('Copying') % 2, 0) self.assertEqual(stderr.count('Removing'), 2) self.AssertNObjectsInBucket(bucket1_uri, 0) self.AssertNObjectsInBucket(bucket2_uri, 2) # Remove one of the objects. objs = [ bucket2_uri.clone_replace_key(key).versionless_uri for key in bucket2_uri.list_bucket() ] obj1 = objs[0] self.RunGsUtil(['rm', obj1]) self.AssertNObjectsInBucket(bucket1_uri, 0) self.AssertNObjectsInBucket(bucket2_uri, 1) # Move the 1 remaining object back. objs = [ suri(bucket2_uri.clone_replace_key(key)) for key in bucket2_uri.list_bucket() ] cmd = (['-m', 'mv'] + objs + [suri(bucket1_uri)]) stderr = self.RunGsUtil(cmd, return_stderr=True) # Rewrite API may output an additional 'Copying' progress notification. self.assertGreaterEqual(stderr.count('Copying'), 1) self.assertLessEqual(stderr.count('Copying'), 2) self.assertEqual(stderr.count('Removing'), 1) self.AssertNObjectsInBucket(bucket1_uri, 1) self.AssertNObjectsInBucket(bucket2_uri, 0) def test_move_bucket_to_dir(self): """Tests moving a local directory to a bucket.""" bucket_uri = self.CreateBucket(test_objects=2) self.AssertNObjectsInBucket(bucket_uri, 2) tmpdir = self.CreateTempDir() self.RunGsUtil(['mv', suri(bucket_uri, '*'), tmpdir]) dir_list = [] for dirname, _, filenames in os.walk(tmpdir): for filename in filenames: dir_list.append(os.path.join(dirname, filename)) self.assertEqual(len(dir_list), 2) self.AssertNObjectsInBucket(bucket_uri, 0) def test_move_dir_to_bucket(self): """Tests moving a local directory to a bucket.""" bucket_uri = self.CreateBucket() dir_to_move = self.CreateTempDir(test_files=2) self.RunGsUtil(['mv', dir_to_move, suri(bucket_uri)]) self.AssertNObjectsInBucket(bucket_uri, 2) @SequentialAndParallelTransfer def test_stdin_args(self): """Tests mv with the -I option.""" tmpdir = self.CreateTempDir() fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1') fpath2 = self.CreateTempFile(tmpdir=tmpdir, contents='data2') bucket_uri = self.CreateBucket() self.RunGsUtil(['mv', '-I', suri(bucket_uri)], stdin='\n'.join((fpath1, fpath2))) # Use @Retry as hedge against bucket listing eventual consistency. @Retry(AssertionError, tries=3, timeout_secs=1) def _Check1(): stdout = self.RunGsUtil(['ls', suri(bucket_uri)], return_stdout=True) self.assertIn(os.path.basename(fpath1), stdout) self.assertIn(os.path.basename(fpath2), stdout) self.assertNumLines(stdout, 2) _Check1() def test_mv_no_clobber(self): """Tests mv with the -n option.""" fpath1 = self.CreateTempFile(contents='data1') bucket_uri = self.CreateBucket() object_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2') stderr = self.RunGsUtil( ['mv', '-n', fpath1, suri(object_uri)], return_stderr=True) # Copy should be skipped and source file should not be removed. self.assertIn('Skipping existing item: %s' % suri(object_uri), stderr) self.assertNotIn('Removing %s' % suri(fpath1), stderr) # Object content should be unchanged. contents = self.RunGsUtil(['cat', suri(object_uri)], return_stdout=True) self.assertEqual(contents, 'data2') @unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.') @unittest.skipUnless(UsingCrcmodExtension(crcmod), 'Test requires fast crcmod.') def test_mv_preserve_posix_no_errors(self): """Tests use of the -P flag with mv. Specifically tests combinations of POSIX attributes in metadata that will pass validation. """ bucket_uri = self.CreateBucket() tmpdir = self.CreateTempDir() TestCpMvPOSIXNoErrors(self, bucket_uri, tmpdir, is_cp=False) @unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.') def test_mv_preserve_posix_errors(self): """Tests use of the -P flag with mv. Specifically, combinations of POSIX attributes in metadata that will fail validation. """ bucket_uri = self.CreateBucket() tmpdir = self.CreateTempDir() obj = self.CreateObject(bucket_uri=bucket_uri, object_name='obj', contents='obj') TestCpMvPOSIXErrors(self, bucket_uri, obj, tmpdir, is_cp=False)
# Load the gsutil version number and append it to boto.UserAgent so the value # is set before anything instantiates boto. (If parts of boto were # instantiated first those parts would have the old value of boto.UserAgent, # so we wouldn't be guaranteed that all code paths send the correct user # agent.) boto.UserAgent += ' gsutil/%s (%s)' % (gslib.VERSION, sys.platform) from gslib.util import UsingCrcmodExtension from gslib.util import IS_OSX CRCMOD_PATH = os.path.join(THIRD_PARTY_DIR, 'crcmod', 'python2') CRCMOD_OSX_PATH = os.path.join(THIRD_PARTY_DIR, 'crcmod_osx') try: import crcmod except ImportError: crcmod = None if not UsingCrcmodExtension(crcmod): local_crcmod_path = CRCMOD_OSX_PATH if IS_OSX else CRCMOD_PATH sys.path.insert(0, local_crcmod_path) def RunMain(): import gslib.__main__ sys.exit(gslib.__main__.main()) if __name__ == '__main__': RunMain()