示例#1
0
  def RunCommand(self):
    """Command entry point for the rsync command."""
    self._ParseOpts()
    if self.compute_checksums and not UsingCrcmodExtension(crcmod):
      self.logger.warn(SLOW_CRCMOD_WARNING)

    src_url = self._InsistContainer(self.args[0])
    dst_url = self._InsistContainer(self.args[1])

    # Tracks if any copy or rm operations failed.
    self.op_failure_count = 0

    # List of attributes to share/manage across multiple processes in
    # parallel (-m) mode.
    shared_attrs = ['op_failure_count']

    # Perform sync requests in parallel (-m) mode, if requested, using
    # configured number of parallel processes and threads. Otherwise,
    # perform requests with sequential function calls in current process.
    diff_iterator = _DiffIterator(self, src_url, dst_url)
    self.logger.info('Starting synchronization')
    try:
      self.Apply(_RsyncFunc, diff_iterator, _RsyncExceptionHandler,
                 shared_attrs, arg_checker=_DiffToApplyArgChecker,
                 fail_on_error=True)
    finally:
      diff_iterator.CleanUpTempFiles()

    if self.op_failure_count:
      plural_str = 's' if self.op_failure_count else ''
      raise CommandException(
          '%d file%s/object%s could not be copied/removed.' %
          (self.op_failure_count, plural_str, plural_str))
示例#2
0
    def RunCommand(self):
        """Command entry point for the version command."""
        long_form = False
        if self.sub_opts:
            for o, _ in self.sub_opts:
                if o == '-l':
                    long_form = True

        if GetConfigFilePaths():
            config_paths = ', '.join(GetConfigFilePaths())
        else:
            config_paths = 'no config found'

        shipped_checksum = gslib.CHECKSUM
        try:
            cur_checksum = self._ComputeCodeChecksum()
        except IOError:
            cur_checksum = 'MISSING FILES'
        if shipped_checksum == cur_checksum:
            checksum_ok_str = 'OK'
        else:
            checksum_ok_str = '!= %s' % shipped_checksum

        sys.stdout.write('gsutil version: %s\n' % gslib.VERSION)

        if long_form:

            long_form_output = (
                'checksum: {checksum} ({checksum_ok})\n'
                'boto version: {boto_version}\n'
                'python version: {python_version}\n'
                'OS: {os_version}\n'
                'multiprocessing available: {multiprocessing_available}\n'
                'using cloud sdk: {cloud_sdk}\n'
                'config path(s): {config_paths}\n'
                'gsutil path: {gsutil_path}\n'
                'compiled crcmod: {compiled_crcmod}\n'
                'installed via package manager: {is_package_install}\n'
                'editable install: {is_editable_install}\n')

            sys.stdout.write(
                long_form_output.format(
                    checksum=cur_checksum,
                    checksum_ok=checksum_ok_str,
                    boto_version=boto.__version__,
                    python_version=sys.version.replace('\n', ''),
                    os_version='%s %s' %
                    (platform.system(), platform.release()),
                    multiprocessing_available=(
                        CheckMultiprocessingAvailableAndInit().is_available),
                    cloud_sdk=(os.environ.get('CLOUDSDK_WRAPPER') == '1'),
                    config_paths=config_paths,
                    gsutil_path=gslib.GSUTIL_PATH,
                    compiled_crcmod=UsingCrcmodExtension(crcmod),
                    is_package_install=gslib.IS_PACKAGE_INSTALL,
                    is_editable_install=gslib.IS_EDITABLE_INSTALL,
                ))

        return 0
示例#3
0
  def Wrapper(*args, **kwargs):
    # Run the test normally once.
    func(*args, **kwargs)

    if not RUN_S3_TESTS and UsingCrcmodExtension(crcmod):
      # Try again, forcing parallel upload and sliced download.
      with SetBotoConfigForTest([
          ('GSUtil', 'parallel_composite_upload_threshold', '1'),
          ('GSUtil', 'sliced_object_download_threshold', '1'),
          ('GSUtil', 'sliced_object_download_max_components', '3'),
          ('GSUtil', 'check_hashes', 'always')]):
        func(*args, **kwargs)
示例#4
0
    def RunCommand(self):
        long_form = False
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-l':
                    long_form = True

        config_path = GetConfigFilePath()

        shipped_checksum = gslib.CHECKSUM
        try:
            cur_checksum = self._ComputeCodeChecksum()
        except IOError:
            cur_checksum = 'MISSING FILES'
        if shipped_checksum == cur_checksum:
            checksum_ok_str = 'OK'
        else:
            checksum_ok_str = '!= %s' % shipped_checksum

        sys.stdout.write('gsutil version %s\n' % gslib.VERSION)

        if long_form:

            long_form_output = (
                'checksum {checksum} ({checksum_ok})\n'
                'boto version {boto_version}\n'
                'python version {python_version}\n'
                'config path: {config_path}\n'
                'gsutil path: {gsutil_path}\n'
                'compiled crcmod: {compiled_crcmod}\n'
                'installed via package manager: {is_package_install}\n'
                'editable install: {is_editable_install}\n')

            sys.stdout.write(
                long_form_output.format(
                    checksum=cur_checksum,
                    checksum_ok=checksum_ok_str,
                    boto_version=boto.__version__,
                    python_version=sys.version,
                    config_path=config_path,
                    gsutil_path=gslib.GSUTIL_PATH,
                    compiled_crcmod=UsingCrcmodExtension(crcmod),
                    is_package_install=gslib.IS_PACKAGE_INSTALL,
                    is_editable_install=gslib.IS_EDITABLE_INSTALL,
                ))

        return 0
示例#5
0
  def _ParseOpts(cls, sub_opts, logger):
    """Returns behavior variables based on input options.

    Args:
      sub_opts: getopt sub-arguments for the command.
      logger: logging.Logger for the command.

    Returns:
      Tuple of
      calc_crc32c: Boolean, if True, command should calculate a CRC32c checksum.
      calc_md5: Boolean, if True, command should calculate an MD5 hash.
      format_func: Function used for formatting the hash in the desired format.
      cloud_format_func: Function used for formatting the hash in the desired
                         format.
      output_format: String describing the hash output format.
    """
    calc_crc32c = False
    calc_md5 = False
    format_func = lambda digest: Base64EncodeHash(digest.hexdigest())
    cloud_format_func = lambda digest: digest
    found_hash_option = False
    output_format = 'base64'

    if sub_opts:
      for o, unused_a in sub_opts:
        if o == '-c':
          calc_crc32c = True
          found_hash_option = True
        elif o == '-h':
          output_format = 'hex'
          format_func = lambda digest: digest.hexdigest()
          cloud_format_func = lambda digest: Base64ToHexHash(digest)
        elif o == '-m':
          calc_md5 = True
          found_hash_option = True

    if not found_hash_option:
      calc_crc32c = True
      calc_md5 = True

    if calc_crc32c and not UsingCrcmodExtension(crcmod):
      logger.warn(SLOW_CRCMOD_WARNING)

    return calc_crc32c, calc_md5, format_func, cloud_format_func, output_format
示例#6
0
def GetDownloadHashAlgs(logger, consider_md5=False, consider_crc32c=False):
    """Returns a dict of hash algorithms for validating an object.

  Args:
    logger: logging.Logger for outputting log messages.
    consider_md5: If True, consider using a md5 hash.
    consider_crc32c: If True, consider using a crc32c hash.

  Returns:
    Dict of (string, hash algorithm).

  Raises:
    CommandException if hash algorithms satisfying the boto config file
    cannot be returned.
  """
    check_hashes_config = config.get('GSUtil', 'check_hashes',
                                     CHECK_HASH_IF_FAST_ELSE_FAIL)
    if check_hashes_config == CHECK_HASH_NEVER:
        return {}

    hash_algs = {}
    if consider_md5:
        hash_algs['md5'] = md5
    elif consider_crc32c:
        # If the cloud provider supplies a CRC, we'll compute a checksum to
        # validate if we're using a native crcmod installation and MD5 isn't
        # offered as an alternative.
        if UsingCrcmodExtension(crcmod):
            hash_algs['crc32c'] = lambda: crcmod.predefined.Crc('crc-32c')
        elif not hash_algs:
            if check_hashes_config == CHECK_HASH_IF_FAST_ELSE_FAIL:
                raise CommandException(_SLOW_CRC_EXCEPTION_TEXT)
            elif check_hashes_config == CHECK_HASH_IF_FAST_ELSE_SKIP:
                logger.warn(_NO_HASH_CHECK_WARNING)
            elif check_hashes_config == CHECK_HASH_ALWAYS:
                logger.warn(_SLOW_CRCMOD_DOWNLOAD_WARNING)
                hash_algs['crc32c'] = lambda: crcmod.predefined.Crc('crc-32c')
            else:
                raise CommandException(
                    'Your boto config \'check_hashes\' option is misconfigured.'
                )

    return hash_algs
示例#7
0
class TestMv(testcase.GsUtilIntegrationTestCase):
    """Integration tests for mv command."""
    def test_moving(self):
        """Tests moving two buckets, one with 2 objects and one with 0 objects."""
        bucket1_uri = self.CreateBucket(test_objects=2)
        self.AssertNObjectsInBucket(bucket1_uri, 2)
        bucket2_uri = self.CreateBucket()
        self.AssertNObjectsInBucket(bucket2_uri, 0)

        # Move two objects from bucket1 to bucket2.
        objs = [
            bucket1_uri.clone_replace_key(key).versionless_uri
            for key in bucket1_uri.list_bucket()
        ]
        cmd = (['-m', 'mv'] + objs + [suri(bucket2_uri)])
        stderr = self.RunGsUtil(cmd, return_stderr=True)
        # Rewrite API may output an additional 'Copying' progress notification.
        self.assertGreaterEqual(
            stderr.count('Copying'), 2,
            'stderr did not contain 2 "Copying" lines:\n%s' % stderr)
        self.assertLessEqual(
            stderr.count('Copying'), 4,
            'stderr did not contain <= 4 "Copying" lines:\n%s' % stderr)
        self.assertEqual(
            stderr.count('Copying') % 2, 0,
            'stderr did not contain even number of "Copying" lines:\n%s' %
            stderr)
        self.assertEqual(
            stderr.count('Removing'), 2,
            'stderr did not contain 2 "Removing" lines:\n%s' % stderr)

        self.AssertNObjectsInBucket(bucket1_uri, 0)
        self.AssertNObjectsInBucket(bucket2_uri, 2)

        # Remove one of the objects.
        objs = [
            bucket2_uri.clone_replace_key(key).versionless_uri
            for key in bucket2_uri.list_bucket()
        ]
        obj1 = objs[0]
        self.RunGsUtil(['rm', obj1])

        self.AssertNObjectsInBucket(bucket1_uri, 0)
        self.AssertNObjectsInBucket(bucket2_uri, 1)

        # Move the 1 remaining object back.
        objs = [
            suri(bucket2_uri.clone_replace_key(key))
            for key in bucket2_uri.list_bucket()
        ]
        cmd = (['-m', 'mv'] + objs + [suri(bucket1_uri)])
        stderr = self.RunGsUtil(cmd, return_stderr=True)
        # Rewrite API may output an additional 'Copying' progress notification.
        self.assertGreaterEqual(
            stderr.count('Copying'), 1,
            'stderr did not contain >= 1 "Copying" lines:\n%s' % stderr)
        self.assertLessEqual(
            stderr.count('Copying'), 2,
            'stderr did not contain <= 2 "Copying" lines:\n%s' % stderr)
        self.assertEqual(stderr.count('Removing'), 1)

        self.AssertNObjectsInBucket(bucket1_uri, 1)
        self.AssertNObjectsInBucket(bucket2_uri, 0)

    def test_move_bucket_to_dir(self):
        """Tests moving a local directory to a bucket."""
        bucket_uri = self.CreateBucket(test_objects=2)
        self.AssertNObjectsInBucket(bucket_uri, 2)
        tmpdir = self.CreateTempDir()
        self.RunGsUtil(['mv', suri(bucket_uri, '*'), tmpdir])
        dir_list = []
        for dirname, _, filenames in os.walk(tmpdir):
            for filename in filenames:
                dir_list.append(os.path.join(dirname, filename))
        self.assertEqual(len(dir_list), 2)
        self.AssertNObjectsInBucket(bucket_uri, 0)

    def test_move_dir_to_bucket(self):
        """Tests moving a local directory to a bucket."""
        bucket_uri = self.CreateBucket()
        dir_to_move = self.CreateTempDir(test_files=2)
        self.RunGsUtil(['mv', dir_to_move, suri(bucket_uri)])
        self.AssertNObjectsInBucket(bucket_uri, 2)

    @SequentialAndParallelTransfer
    def test_stdin_args(self):
        """Tests mv with the -I option."""
        tmpdir = self.CreateTempDir()
        fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1')
        fpath2 = self.CreateTempFile(tmpdir=tmpdir, contents='data2')
        bucket_uri = self.CreateBucket()
        self.RunGsUtil(['mv', '-I', suri(bucket_uri)],
                       stdin='\n'.join((fpath1, fpath2)))

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check1():
            stdout = self.RunGsUtil(['ls', suri(bucket_uri)],
                                    return_stdout=True)
            self.assertIn(os.path.basename(fpath1), stdout)
            self.assertIn(os.path.basename(fpath2), stdout)
            self.assertNumLines(stdout, 2)

        _Check1()

    def test_mv_no_clobber(self):
        """Tests mv with the -n option."""
        fpath1 = self.CreateTempFile(contents='data1')
        bucket_uri = self.CreateBucket()
        object_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2')
        stderr = self.RunGsUtil(
            ['mv', '-n', fpath1, suri(object_uri)], return_stderr=True)
        # Copy should be skipped and source file should not be removed.
        self.assertIn('Skipping existing item: %s' % suri(object_uri), stderr)
        self.assertNotIn('Removing %s' % suri(fpath1), stderr)
        # Object content should be unchanged.
        contents = self.RunGsUtil(['cat', suri(object_uri)],
                                  return_stdout=True)
        self.assertEqual(contents, 'data2')

    @unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
    @unittest.skipUnless(UsingCrcmodExtension(crcmod),
                         'Test requires fast crcmod.')
    def test_mv_preserve_posix_bucket_to_dir_no_errors(self):
        """Tests use of the -P flag with mv from a bucket to a local dir.

    Specifically tests combinations of POSIX attributes in metadata that will
    pass validation.
    """
        bucket_uri = self.CreateBucket()
        tmpdir = self.CreateTempDir()
        TestCpMvPOSIXBucketToLocalNoErrors(self,
                                           bucket_uri,
                                           tmpdir,
                                           is_cp=False)

    @unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
    def test_mv_preserve_posix_bucket_to_dir_errors(self):
        """Tests use of the -P flag with mv from a bucket to a local dir.

    Specifically, combinations of POSIX attributes in metadata that will fail
    validation.
    """
        bucket_uri = self.CreateBucket()
        tmpdir = self.CreateTempDir()

        obj = self.CreateObject(bucket_uri=bucket_uri,
                                object_name='obj',
                                contents='obj')
        TestCpMvPOSIXBucketToLocalErrors(self,
                                         bucket_uri,
                                         obj,
                                         tmpdir,
                                         is_cp=False)

    @unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
    def test_mv_preseve_posix_dir_to_bucket_no_errors(self):
        """Tests use of the -P flag with mv from a local dir to a bucket."""
        bucket_uri = self.CreateBucket()
        TestCpMvPOSIXLocalToBucketNoErrors(self, bucket_uri, is_cp=False)

    @SkipForS3('Test is only relevant for gs storage classes.')
    def test_mv_early_deletion_warning(self):
        """Tests that mv on a recent nearline object warns about early deletion."""
        if self.test_api == ApiSelector.XML:
            return unittest.skip('boto does not return object storage class')

        bucket_uri = self.CreateBucket(storage_class='NEARLINE')
        object_uri = self.CreateObject(bucket_uri=bucket_uri, contents='obj')
        stderr = self.RunGsUtil(
            ['mv', suri(object_uri),
             suri(bucket_uri, 'foo')],
            return_stderr=True)
        self.assertIn(
            'Warning: moving nearline object %s may incur an early deletion '
            'charge, because the original object is less than 30 days old '
            'according to the local system time.' % suri(object_uri), stderr)
示例#8
0
class TestRsync(testcase.GsUtilIntegrationTestCase):
    """Integration tests for rsync command."""
    @staticmethod
    def _FlatListDir(directory):
        """Perform a flat listing over directory.

    Args:
      directory: The directory to list

    Returns:
      Listings with path separators canonicalized to '/', to make assertions
      easier for Linux vs Windows.
    """
        result = []
        for dirpath, _, filenames in os.walk(directory):
            for f in filenames:
                result.append(os.path.join(dirpath, f))
        return '\n'.join(result).replace('\\', '/')

    def _FlatListBucket(self, bucket_url_string):
        """Perform a flat listing over bucket_url_string."""
        return self.RunGsUtil(['ls', suri(bucket_url_string, '**')],
                              return_stdout=True)

    def test_invalid_args(self):
        """Tests various invalid argument cases."""
        bucket_uri = self.CreateBucket()
        obj1 = self.CreateObject(bucket_uri=bucket_uri,
                                 object_name='obj1',
                                 contents='obj1')
        tmpdir = self.CreateTempDir()
        # rsync object to bucket.
        self.RunGsUtil(
            ['rsync', suri(obj1), suri(bucket_uri)], expected_status=1)
        # rsync bucket to object.
        self.RunGsUtil(
            ['rsync', suri(bucket_uri), suri(obj1)], expected_status=1)
        # rsync bucket to non-existent bucket.
        self.RunGsUtil(
            ['rsync', suri(bucket_uri), self.nonexistent_bucket_name],
            expected_status=1)
        # rsync object to dir.
        self.RunGsUtil(['rsync', suri(obj1), tmpdir], expected_status=1)
        # rsync dir to object.
        self.RunGsUtil(['rsync', tmpdir, suri(obj1)], expected_status=1)
        # rsync dir to non-existent bucket.
        self.RunGsUtil(
            ['rsync', tmpdir,
             suri(obj1), self.nonexistent_bucket_name],
            expected_status=1)

    # Note: The tests below exercise the cases
    # {src_dir, src_bucket} X {dst_dir, dst_bucket}. We use gsutil rsync -d for
    # all the cases but then have just one test without -d (test_bucket_to_bucket)
    # as representative of handling without the -d option. This provides
    # reasonable test coverage because the -d handling it src/dest URI-type
    # independent, and keeps the test case combinations more manageable.

    def test_bucket_to_bucket(self):
        """Tests that flat and recursive rsync between 2 buckets works correctly."""
        # Create 2 buckets with 1 overlapping object, 1 extra object at root level
        # in each, and 1 extra object 1 level down in each. Make the overlapping
        # objects named the same but with different content, to test that we detect
        # and properly copy in that case.
        bucket1_uri = self.CreateBucket()
        bucket2_uri = self.CreateBucket()
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='obj1',
                          contents='obj1')
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='obj2',
                          contents='obj2')
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='subdir/obj3',
                          contents='subdir/obj3')
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='obj2',
                          contents='OBJ2')
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='obj4',
                          contents='obj4')
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='subdir/obj5',
                          contents='subdir/obj5')

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check1():
            """Tests rsync works as expected."""
            self.RunGsUtil(['rsync', suri(bucket1_uri), suri(bucket2_uri)])
            listing1 = _TailSet(suri(bucket1_uri),
                                self._FlatListBucket(bucket1_uri))
            listing2 = _TailSet(suri(bucket2_uri),
                                self._FlatListBucket(bucket2_uri))
            # First bucket should have un-altered content.
            self.assertEquals(listing1, set(['/obj1', '/obj2',
                                             '/subdir/obj3']))
            # Second bucket should have new objects added from source bucket (without
            # removing extraneeous object found in dest bucket), and without the
            # subdir objects synchronized.
            self.assertEquals(listing2,
                              set(['/obj1', '/obj2', '/obj4', '/subdir/obj5']))
            # Assert that the src/dest objects that had same length but different
            # content were correctly synchronized (bucket to bucket sync uses
            # checksums).
            self.assertEquals(
                'obj2',
                self.RunGsUtil(['cat', suri(bucket1_uri, 'obj2')],
                               return_stdout=True))
            self.assertEquals(
                'obj2',
                self.RunGsUtil(['cat', suri(bucket2_uri, 'obj2')],
                               return_stdout=True))

        _Check1()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(
                ['rsync', suri(bucket1_uri),
                 suri(bucket2_uri)],
                return_stderr=True))

        # Now add and remove some objects in each bucket and test rsync -r.
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='obj6',
                          contents='obj6')
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='obj7',
                          contents='obj7')
        self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')])
        self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')])

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check2():
            self.RunGsUtil(
                ['rsync', '-r',
                 suri(bucket1_uri),
                 suri(bucket2_uri)])
            listing1 = _TailSet(suri(bucket1_uri),
                                self._FlatListBucket(bucket1_uri))
            listing2 = _TailSet(suri(bucket2_uri),
                                self._FlatListBucket(bucket2_uri))
            # First bucket should have un-altered content.
            self.assertEquals(listing1, set(['/obj2', '/obj6',
                                             '/subdir/obj3']))
            # Second bucket should have objects tha were newly added to first bucket
            # (wihout removing extraneous dest bucket objects), and without the
            # subdir objects synchronized.
            self.assertEquals(
                listing2,
                set([
                    '/obj1', '/obj2', '/obj4', '/obj6', '/obj7',
                    '/subdir/obj3', '/subdir/obj5'
                ]))

        _Check2()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(
                ['rsync', '-r',
                 suri(bucket1_uri),
                 suri(bucket2_uri)],
                return_stderr=True))

    def test_bucket_to_bucket_minus_d(self):
        """Tests that flat and recursive rsync between 2 buckets works correctly."""
        # Create 2 buckets with 1 overlapping object, 1 extra object at root level
        # in each, and 1 extra object 1 level down in each. Make the overlapping
        # objects named the same but with different content, to test that we detect
        # and properly copy in that case.
        bucket1_uri = self.CreateBucket()
        bucket2_uri = self.CreateBucket()
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='obj1',
                          contents='obj1')
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='obj2',
                          contents='obj2')
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='subdir/obj3',
                          contents='subdir/obj3')
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='obj2',
                          contents='OBJ2')
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='obj4',
                          contents='obj4')
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='subdir/obj5',
                          contents='subdir/obj5')

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check1():
            """Tests rsync works as expected."""
            self.RunGsUtil(
                ['rsync', '-d',
                 suri(bucket1_uri),
                 suri(bucket2_uri)])
            listing1 = _TailSet(suri(bucket1_uri),
                                self._FlatListBucket(bucket1_uri))
            listing2 = _TailSet(suri(bucket2_uri),
                                self._FlatListBucket(bucket2_uri))
            # First bucket should have un-altered content.
            self.assertEquals(listing1, set(['/obj1', '/obj2',
                                             '/subdir/obj3']))
            # Second bucket should have content like first bucket but without the
            # subdir objects synchronized.
            self.assertEquals(listing2, set(['/obj1', '/obj2',
                                             '/subdir/obj5']))
            # Assert that the src/dest objects that had same length but different
            # content were correctly synchronized (bucket to bucket sync uses
            # checksums).
            self.assertEquals(
                'obj2',
                self.RunGsUtil(['cat', suri(bucket1_uri, 'obj2')],
                               return_stdout=True))
            self.assertEquals(
                'obj2',
                self.RunGsUtil(['cat', suri(bucket2_uri, 'obj2')],
                               return_stdout=True))

        _Check1()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(
                ['rsync', '-d',
                 suri(bucket1_uri),
                 suri(bucket2_uri)],
                return_stderr=True))

        # Now add and remove some objects in each bucket and test rsync -r.
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='obj6',
                          contents='obj6')
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='obj7',
                          contents='obj7')
        self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')])
        self.RunGsUtil(['rm', suri(bucket2_uri, 'obj2')])

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check2():
            self.RunGsUtil(
                ['rsync', '-d', '-r',
                 suri(bucket1_uri),
                 suri(bucket2_uri)])
            listing1 = _TailSet(suri(bucket1_uri),
                                self._FlatListBucket(bucket1_uri))
            listing2 = _TailSet(suri(bucket2_uri),
                                self._FlatListBucket(bucket2_uri))
            # First bucket should have un-altered content.
            self.assertEquals(listing1, set(['/obj2', '/obj6',
                                             '/subdir/obj3']))
            # Second bucket should have content like first bucket but without the
            # subdir objects synchronized.
            self.assertEquals(listing2, set(['/obj2', '/obj6',
                                             '/subdir/obj3']))

        _Check2()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(
                ['rsync', '-d', '-r',
                 suri(bucket1_uri),
                 suri(bucket2_uri)],
                return_stderr=True))

    # Test sequential upload as well as parallel composite upload case.
    @PerformsFileToObjectUpload
    @unittest.skipUnless(UsingCrcmodExtension(crcmod),
                         'Test requires fast crcmod.')
    def test_dir_to_bucket_minus_d(self):
        """Tests that flat and recursive rsync dir to bucket works correctly."""
        # Create dir and bucket with 1 overlapping object, 1 extra object at root
        # level in each, and 1 extra object 1 level down in each. Make the
        # overlapping objects named the same but with different content, to test
        # that we detect and properly copy in that case.
        tmpdir = self.CreateTempDir()
        subdir = os.path.join(tmpdir, 'subdir')
        os.mkdir(subdir)
        bucket_uri = self.CreateBucket()
        self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1')
        self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
        self.CreateTempFile(tmpdir=subdir,
                            file_name='obj3',
                            contents='subdir/obj3')
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='obj2',
                          contents='OBJ2')
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='obj4',
                          contents='obj4')
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='subdir/obj5',
                          contents='subdir/obj5')

        # Need to make sure the bucket listing is caught-up, otherwise the
        # first rsync may not see obj2 and overwrite it.
        self.AssertNObjectsInBucket(bucket_uri, 3)

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check1():
            """Tests rsync works as expected."""
            self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)])
            listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
            listing2 = _TailSet(suri(bucket_uri),
                                self._FlatListBucket(bucket_uri))
            # Dir should have un-altered content.
            self.assertEquals(listing1, set(['/obj1', '/obj2',
                                             '/subdir/obj3']))
            # Bucket should have content like dir but without the subdir objects
            # synchronized.
            self.assertEquals(listing2, set(['/obj1', '/obj2',
                                             '/subdir/obj5']))
            # Assert that the src/dest objects that had same length but different
            # content were not synchronized (dir to bucket sync doesn't use checksums
            # unless you specify -c).
            with open(os.path.join(tmpdir, 'obj2')) as f:
                self.assertEquals('obj2', '\n'.join(f.readlines()))
            self.assertEquals(
                'OBJ2',
                self.RunGsUtil(['cat', suri(bucket_uri, 'obj2')],
                               return_stdout=True))

        _Check1()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(['rsync', '-d', tmpdir,
                            suri(bucket_uri)],
                           return_stderr=True))

        # Now rerun the sync with the -c option.
        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check2():
            """Tests rsync -c works as expected."""
            self.RunGsUtil(['rsync', '-d', '-c', tmpdir, suri(bucket_uri)])
            listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
            listing2 = _TailSet(suri(bucket_uri),
                                self._FlatListBucket(bucket_uri))
            # Dir should have un-altered content.
            self.assertEquals(listing1, set(['/obj1', '/obj2',
                                             '/subdir/obj3']))
            # Bucket should have content like dir but without the subdir objects
            # synchronized.
            self.assertEquals(listing2, set(['/obj1', '/obj2',
                                             '/subdir/obj5']))
            # Assert that the src/dest objects that had same length but different
            # content were synchronized (dir to bucket sync with -c uses checksums).
            with open(os.path.join(tmpdir, 'obj2')) as f:
                self.assertEquals('obj2', '\n'.join(f.readlines()))
            self.assertEquals(
                'obj2',
                self.RunGsUtil(['cat', suri(bucket_uri, 'obj2')],
                               return_stdout=True))

        _Check2()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(['rsync', '-d', '-c', tmpdir,
                            suri(bucket_uri)],
                           return_stderr=True))

        # Now add and remove some objects in dir and bucket and test rsync -r.
        self.CreateTempFile(tmpdir=tmpdir, file_name='obj6', contents='obj6')
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='obj7',
                          contents='obj7')
        os.unlink(os.path.join(tmpdir, 'obj1'))
        self.RunGsUtil(['rm', suri(bucket_uri, 'obj2')])

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check3():
            self.RunGsUtil(['rsync', '-d', '-r', tmpdir, suri(bucket_uri)])
            listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
            listing2 = _TailSet(suri(bucket_uri),
                                self._FlatListBucket(bucket_uri))
            # Dir should have un-altered content.
            self.assertEquals(listing1, set(['/obj2', '/obj6',
                                             '/subdir/obj3']))
            # Bucket should have content like dir but without the subdir objects
            # synchronized.
            self.assertEquals(listing2, set(['/obj2', '/obj6',
                                             '/subdir/obj3']))

        _Check3()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(['rsync', '-d', '-r', tmpdir,
                            suri(bucket_uri)],
                           return_stderr=True))

    @unittest.skipUnless(UsingCrcmodExtension(crcmod),
                         'Test requires fast crcmod.')
    def test_dir_to_dir_minus_d(self):
        """Tests that flat and recursive rsync dir to dir works correctly."""
        # Create 2 dirs with 1 overlapping file, 1 extra file at root
        # level in each, and 1 extra file 1 level down in each. Make the
        # overlapping files named the same but with different content, to test
        # that we detect and properly copy in that case.
        tmpdir1 = self.CreateTempDir()
        tmpdir2 = self.CreateTempDir()
        subdir1 = os.path.join(tmpdir1, 'subdir1')
        subdir2 = os.path.join(tmpdir2, 'subdir2')
        os.mkdir(subdir1)
        os.mkdir(subdir2)
        self.CreateTempFile(tmpdir=tmpdir1, file_name='obj1', contents='obj1')
        self.CreateTempFile(tmpdir=tmpdir1, file_name='obj2', contents='obj2')
        self.CreateTempFile(tmpdir=subdir1,
                            file_name='obj3',
                            contents='subdir1/obj3')
        self.CreateTempFile(tmpdir=tmpdir2, file_name='obj2', contents='OBJ2')
        self.CreateTempFile(tmpdir=tmpdir2, file_name='obj4', contents='obj4')
        self.CreateTempFile(tmpdir=subdir2,
                            file_name='obj5',
                            contents='subdir2/obj5')

        self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2])
        listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
        listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
        # dir1 should have un-altered content.
        self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3']))
        # dir2 should have content like dir1 but without the subdir1 objects
        # synchronized.
        self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5']))
        # Assert that the src/dest objects that had same length but different
        # checksums were not synchronized (dir to dir sync doesn't use checksums
        # unless you specify -c).
        with open(os.path.join(tmpdir1, 'obj2')) as f:
            self.assertEquals('obj2', '\n'.join(f.readlines()))
        with open(os.path.join(tmpdir2, 'obj2')) as f:
            self.assertEquals('OBJ2', '\n'.join(f.readlines()))

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2],
                           return_stderr=True))

        # Now rerun the sync with the -c option.
        self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2])
        listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
        listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
        # dir1 should have un-altered content.
        self.assertEquals(listing1, set(['/obj1', '/obj2', '/subdir1/obj3']))
        # dir2 should have content like dir but without the subdir objects
        # synchronized.
        self.assertEquals(listing2, set(['/obj1', '/obj2', '/subdir2/obj5']))
        # Assert that the src/dest objects that had same length but different
        # content were synchronized (dir to dir sync with -c uses checksums).
        with open(os.path.join(tmpdir1, 'obj2')) as f:
            self.assertEquals('obj2', '\n'.join(f.readlines()))
        with open(os.path.join(tmpdir1, 'obj2')) as f:
            self.assertEquals('obj2', '\n'.join(f.readlines()))

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2],
                           return_stderr=True))

        # Now add and remove some objects in both dirs and test rsync -r.
        self.CreateTempFile(tmpdir=tmpdir1, file_name='obj6', contents='obj6')
        self.CreateTempFile(tmpdir=tmpdir2, file_name='obj7', contents='obj7')
        os.unlink(os.path.join(tmpdir1, 'obj1'))
        os.unlink(os.path.join(tmpdir2, 'obj2'))

        self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2])
        listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
        listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
        # dir1 should have un-altered content.
        self.assertEquals(listing1, set(['/obj2', '/obj6', '/subdir1/obj3']))
        # dir2 should have content like dir but without the subdir objects
        # synchronized.
        self.assertEquals(listing2, set(['/obj2', '/obj6', '/subdir1/obj3']))

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2],
                           return_stderr=True))

    def test_dir_to_dir_minus_d_more_files_than_bufsize(self):
        """Tests concurrently building listing from multiple tmp file ranges."""
        # Create 2 dirs, where each dir has 1000 objects and differing names.
        tmpdir1 = self.CreateTempDir()
        tmpdir2 = self.CreateTempDir()
        for i in range(0, 1000):
            self.CreateTempFile(tmpdir=tmpdir1,
                                file_name='d1-%s' % i,
                                contents='x')
            self.CreateTempFile(tmpdir=tmpdir2,
                                file_name='d2-%s' % i,
                                contents='y')

        # We open a new temp file each time we reach rsync_buffer_lines of
        # listing output. On Windows, this will result in a 'too many open file
        # handles' error, so choose a larger value so as not to open so many files.
        rsync_buffer_config = [('GSUtil', 'rsync_buffer_lines',
                                '50' if IS_WINDOWS else '2')]
        # Run gsutil with config option to make buffer size << # files.
        with SetBotoConfigForTest(rsync_buffer_config):
            self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2])
        listing1 = _TailSet(tmpdir1, self._FlatListDir(tmpdir1))
        listing2 = _TailSet(tmpdir2, self._FlatListDir(tmpdir2))
        self.assertEquals(listing1, listing2)

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2],
                           return_stderr=True))

    @unittest.skipUnless(UsingCrcmodExtension(crcmod),
                         'Test requires fast crcmod.')
    def test_bucket_to_dir_minus_d(self):
        """Tests that flat and recursive rsync bucket to dir works correctly."""
        # Create bucket and dir with 1 overlapping object, 1 extra object at root
        # level in each, and 1 extra object 1 level down in each. Make the
        # overlapping objects named the same but with different content, to test
        # that we detect and properly copy in that case.
        bucket_uri = self.CreateBucket()
        tmpdir = self.CreateTempDir()
        subdir = os.path.join(tmpdir, 'subdir')
        os.mkdir(subdir)
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='obj1',
                          contents='obj1')
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='obj2',
                          contents='obj2')
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='subdir/obj3',
                          contents='subdir/obj3')
        self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='OBJ2')
        self.CreateTempFile(tmpdir=tmpdir, file_name='obj4', contents='obj4')
        self.CreateTempFile(tmpdir=subdir,
                            file_name='obj5',
                            contents='subdir/obj5')

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check1():
            """Tests rsync works as expected."""
            self.RunGsUtil(['rsync', '-d', suri(bucket_uri), tmpdir])
            listing1 = _TailSet(suri(bucket_uri),
                                self._FlatListBucket(bucket_uri))
            listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
            # Bucket should have un-altered content.
            self.assertEquals(listing1, set(['/obj1', '/obj2',
                                             '/subdir/obj3']))
            # Dir should have content like bucket but without the subdir objects
            # synchronized.
            self.assertEquals(listing2, set(['/obj1', '/obj2',
                                             '/subdir/obj5']))
            # Assert that the src/dest objects that had same length but different
            # content were not synchronized (bucket to dir sync doesn't use checksums
            # unless you specify -c).
            self.assertEquals(
                'obj2',
                self.RunGsUtil(['cat', suri(bucket_uri, 'obj2')],
                               return_stdout=True))
            with open(os.path.join(tmpdir, 'obj2')) as f:
                self.assertEquals('OBJ2', '\n'.join(f.readlines()))

        _Check1()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(
                ['rsync', '-d', suri(bucket_uri), tmpdir], return_stderr=True))

        # Now rerun the sync with the -c option.
        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check2():
            """Tests rsync -c works as expected."""
            self.RunGsUtil(['rsync', '-d', '-c', suri(bucket_uri), tmpdir])
            listing1 = _TailSet(suri(bucket_uri),
                                self._FlatListBucket(bucket_uri))
            listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
            # Bucket should have un-altered content.
            self.assertEquals(listing1, set(['/obj1', '/obj2',
                                             '/subdir/obj3']))
            # Dir should have content like bucket but without the subdir objects
            # synchronized.
            self.assertEquals(listing2, set(['/obj1', '/obj2',
                                             '/subdir/obj5']))
            # Assert that the src/dest objects that had same length but different
            # content were synchronized (bucket to dir sync with -c uses checksums).
            self.assertEquals(
                'obj2',
                self.RunGsUtil(['cat', suri(bucket_uri, 'obj2')],
                               return_stdout=True))
            with open(os.path.join(tmpdir, 'obj2')) as f:
                self.assertEquals('obj2', '\n'.join(f.readlines()))

        _Check2()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(['rsync', '-d', '-c',
                            suri(bucket_uri), tmpdir],
                           return_stderr=True))

        # Now add and remove some objects in dir and bucket and test rsync -r.
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='obj6',
                          contents='obj6')
        self.CreateTempFile(tmpdir=tmpdir, file_name='obj7', contents='obj7')
        self.RunGsUtil(['rm', suri(bucket_uri, 'obj1')])
        os.unlink(os.path.join(tmpdir, 'obj2'))

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check3():
            self.RunGsUtil(['rsync', '-d', '-r', suri(bucket_uri), tmpdir])
            listing1 = _TailSet(suri(bucket_uri),
                                self._FlatListBucket(bucket_uri))
            listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
            # Bucket should have un-altered content.
            self.assertEquals(listing1, set(['/obj2', '/obj6',
                                             '/subdir/obj3']))
            # Dir should have content like bucket but without the subdir objects
            # synchronized.
            self.assertEquals(listing2, set(['/obj2', '/obj6',
                                             '/subdir/obj3']))

        _Check3()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(['rsync', '-d', '-r',
                            suri(bucket_uri), tmpdir],
                           return_stderr=True))

    def test_bucket_to_dir_minus_d_with_fname_case_change(self):
        """Tests that name case changes work correctly.

    Example:

    Windows filenames are case-preserving in what you wrote, but case-
    insensitive when compared. If you synchronize from FS to cloud and then
    change case-naming in local files, you could end up with this situation:

    Cloud copy is called .../TiVo/...
    FS copy is called      .../Tivo/...

    Then, if you sync from cloud to FS, if rsync doesn't recognize that on
    Windows these names are identical, each rsync run will cause both a copy
    and a delete to be executed.
    """
        # Create bucket and dir with same objects, but dir copy has different name
        # case.
        bucket_uri = self.CreateBucket()
        tmpdir = self.CreateTempDir()
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='obj1',
                          contents='obj1')
        self.CreateTempFile(tmpdir=tmpdir, file_name='Obj1', contents='obj1')

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check1():
            """Tests rsync works as expected."""
            output = self.RunGsUtil(
                ['rsync', '-d', '-r',
                 suri(bucket_uri), tmpdir],
                return_stderr=True)
            # Nothing should be copied or removed under Windows.
            if IS_WINDOWS:
                self.assertEquals(NO_CHANGES, output)
            else:
                self.assertNotEquals(NO_CHANGES, output)

        _Check1()

    def test_bucket_to_dir_minus_d_with_leftover_dir_placeholder(self):
        """Tests that we correctly handle leftover dir placeholders.

    See comments in gslib.commands.rsync._FieldedListingIterator for details.
    """
        bucket_uri = self.CreateBucket()
        tmpdir = self.CreateTempDir()
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='obj1',
                          contents='obj1')
        # Create a placeholder like what can be left over by web GUI tools.
        key_uri = bucket_uri.clone_replace_name('/')
        key_uri.set_contents_from_string('')

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check1():
            """Tests rsync works as expected."""
            output = self.RunGsUtil(
                ['rsync', '-d', '-r',
                 suri(bucket_uri), tmpdir],
                return_stderr=True)
            listing1 = _TailSet(suri(bucket_uri),
                                self._FlatListBucket(bucket_uri))
            listing2 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
            # Bucket should have un-altered content.
            self.assertEquals(listing1, set(['/obj1', '//']))
            # Bucket should not have the placeholder object.
            self.assertEquals(listing2, set(['/obj1']))
            # Stdout should report what happened.
            self.assertRegexpMatches(output,
                                     r'.*Skipping cloud sub-directory.*')

        _Check1()

    @unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.')
    def test_rsync_minus_d_minus_e(self):
        """Tests that rsync -e ignores symlinks."""
        tmpdir = self.CreateTempDir()
        subdir = os.path.join(tmpdir, 'subdir')
        os.mkdir(subdir)
        bucket_uri = self.CreateBucket()
        fpath1 = self.CreateTempFile(tmpdir=tmpdir,
                                     file_name='obj1',
                                     contents='obj1')
        self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
        self.CreateTempFile(tmpdir=subdir,
                            file_name='obj3',
                            contents='subdir/obj3')
        good_symlink_path = os.path.join(tmpdir, 'symlink1')
        os.symlink(fpath1, good_symlink_path)
        # Make a symlink that points to a non-existent path to test that -e also
        # handles that case.
        bad_symlink_path = os.path.join(tmpdir, 'symlink2')
        os.symlink(os.path.join('/', 'non-existent'), bad_symlink_path)
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='obj2',
                          contents='OBJ2')
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='obj4',
                          contents='obj4')
        self.CreateObject(bucket_uri=bucket_uri,
                          object_name='subdir/obj5',
                          contents='subdir/obj5')

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check1():
            """Ensure listings match the commented expectations."""
            self.RunGsUtil(['rsync', '-d', '-e', tmpdir, suri(bucket_uri)])
            listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
            listing2 = _TailSet(suri(bucket_uri),
                                self._FlatListBucket(bucket_uri))
            # Dir should have un-altered content.
            self.assertEquals(
                listing1,
                set([
                    '/obj1', '/obj2', '/subdir/obj3', '/symlink1', '/symlink2'
                ]))
            # Bucket should have content like dir but without the symlink, and
            # without subdir objects synchronized.
            self.assertEquals(listing2, set(['/obj1', '/obj2',
                                             '/subdir/obj5']))

        _Check1()

        # Now remove invalid symlink and run without -e, and see that symlink gets
        # copied (as file to which it points). Use @Retry as hedge against bucket
        # listing eventual consistency.
        os.unlink(bad_symlink_path)

        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check2():
            """Tests rsync works as expected."""
            self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)])
            listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
            listing2 = _TailSet(suri(bucket_uri),
                                self._FlatListBucket(bucket_uri))
            # Dir should have un-altered content.
            self.assertEquals(
                listing1, set(['/obj1', '/obj2', '/subdir/obj3', '/symlink1']))
            # Bucket should have content like dir but without the symlink, and
            # without subdir objects synchronized.
            self.assertEquals(
                listing2, set(['/obj1', '/obj2', '/subdir/obj5', '/symlink1']))
            self.assertEquals(
                'obj1',
                self.RunGsUtil(['cat', suri(bucket_uri, 'symlink1')],
                               return_stdout=True))

        _Check2()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(['rsync', '-d', tmpdir,
                            suri(bucket_uri)],
                           return_stderr=True))

    @SkipForS3('S3 does not support composite objects')
    def test_bucket_to_bucket_minus_d_with_composites(self):
        """Tests that rsync works with composite objects (which don't have MD5s)."""
        bucket1_uri = self.CreateBucket()
        bucket2_uri = self.CreateBucket()
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='obj1',
                          contents='obj1')
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='obj2',
                          contents='obj2')
        self.RunGsUtil([
            'compose',
            suri(bucket1_uri, 'obj1'),
            suri(bucket1_uri, 'obj2'),
            suri(bucket1_uri, 'obj3')
        ])
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='obj2',
                          contents='OBJ2')
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='obj4',
                          contents='obj4')

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check():
            self.RunGsUtil(
                ['rsync', '-d',
                 suri(bucket1_uri),
                 suri(bucket2_uri)])
            listing1 = _TailSet(suri(bucket1_uri),
                                self._FlatListBucket(bucket1_uri))
            listing2 = _TailSet(suri(bucket2_uri),
                                self._FlatListBucket(bucket2_uri))
            # First bucket should have un-altered content.
            self.assertEquals(listing1, set(['/obj1', '/obj2', '/obj3']))
            # Second bucket should have content like first bucket but without the
            # subdir objects synchronized.
            self.assertEquals(listing2, set(['/obj1', '/obj2', '/obj3']))

        _Check()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(
                ['rsync', '-d',
                 suri(bucket1_uri),
                 suri(bucket2_uri)],
                return_stderr=True))

    def test_bucket_to_bucket_minus_d_empty_dest(self):
        """Tests working with empty dest bucket (iter runs out before src iter)."""
        bucket1_uri = self.CreateBucket()
        bucket2_uri = self.CreateBucket()
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='obj1',
                          contents='obj1')
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='obj2',
                          contents='obj2')

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check():
            self.RunGsUtil(
                ['rsync', '-d',
                 suri(bucket1_uri),
                 suri(bucket2_uri)])
            listing1 = _TailSet(suri(bucket1_uri),
                                self._FlatListBucket(bucket1_uri))
            listing2 = _TailSet(suri(bucket2_uri),
                                self._FlatListBucket(bucket2_uri))
            self.assertEquals(listing1, set(['/obj1', '/obj2']))
            self.assertEquals(listing2, set(['/obj1', '/obj2']))

        _Check()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(
                ['rsync', '-d',
                 suri(bucket1_uri),
                 suri(bucket2_uri)],
                return_stderr=True))

    def test_bucket_to_bucket_minus_d_empty_src(self):
        """Tests working with empty src bucket (iter runs out before dst iter)."""
        bucket1_uri = self.CreateBucket()
        bucket2_uri = self.CreateBucket()
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='obj1',
                          contents='obj1')
        self.CreateObject(bucket_uri=bucket2_uri,
                          object_name='obj2',
                          contents='obj2')

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check():
            self.RunGsUtil(
                ['rsync', '-d',
                 suri(bucket1_uri),
                 suri(bucket2_uri)])
            stderr = self.RunGsUtil(['ls', suri(bucket1_uri, '**')],
                                    expected_status=1,
                                    return_stderr=True)
            self.assertIn('One or more URLs matched no objects', stderr)
            stderr = self.RunGsUtil(['ls', suri(bucket2_uri, '**')],
                                    expected_status=1,
                                    return_stderr=True)
            self.assertIn('One or more URLs matched no objects', stderr)

        _Check()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(
                ['rsync', '-d',
                 suri(bucket1_uri),
                 suri(bucket2_uri)],
                return_stderr=True))

    def test_rsync_minus_d_minus_p(self):
        """Tests that rsync -p preserves ACLs."""
        bucket1_uri = self.CreateBucket()
        bucket2_uri = self.CreateBucket()
        self.CreateObject(bucket_uri=bucket1_uri,
                          object_name='obj1',
                          contents='obj1')
        # Set public-read (non-default) ACL so we can verify that rsync -p works.
        self.RunGsUtil(
            ['acl', 'set', 'public-read',
             suri(bucket1_uri, 'obj1')])

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check():
            """Tests rsync -p works as expected."""
            self.RunGsUtil(
                ['rsync', '-d', '-p',
                 suri(bucket1_uri),
                 suri(bucket2_uri)])
            listing1 = _TailSet(suri(bucket1_uri),
                                self._FlatListBucket(bucket1_uri))
            listing2 = _TailSet(suri(bucket2_uri),
                                self._FlatListBucket(bucket2_uri))
            self.assertEquals(listing1, set(['/obj1']))
            self.assertEquals(listing2, set(['/obj1']))
            acl1_json = self.RunGsUtil(
                ['acl', 'get', suri(bucket1_uri, 'obj1')], return_stdout=True)
            acl2_json = self.RunGsUtil(
                ['acl', 'get', suri(bucket2_uri, 'obj1')], return_stdout=True)
            self.assertEquals(acl1_json, acl2_json)

        _Check()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(
                ['rsync', '-d', '-p',
                 suri(bucket1_uri),
                 suri(bucket2_uri)],
                return_stderr=True))

    def test_rsync_to_nonexistent_bucket_subdir(self):
        """Tests that rsync to non-existent bucket subdir works."""
        # Create dir with some objects and empty bucket.
        tmpdir = self.CreateTempDir()
        subdir = os.path.join(tmpdir, 'subdir')
        os.mkdir(subdir)
        bucket_url = self.CreateBucket()
        self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents='obj1')
        self.CreateTempFile(tmpdir=tmpdir, file_name='obj2', contents='obj2')
        self.CreateTempFile(tmpdir=subdir,
                            file_name='obj3',
                            contents='subdir/obj3')

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check1():
            """Tests rsync works as expected."""
            self.RunGsUtil(['rsync', '-r', tmpdir, suri(bucket_url, 'subdir')])
            listing1 = _TailSet(tmpdir, self._FlatListDir(tmpdir))
            listing2 = _TailSet(
                suri(bucket_url, 'subdir'),
                self._FlatListBucket(bucket_url.clone_replace_name('subdir')))
            # Dir should have un-altered content.
            self.assertEquals(listing1, set(['/obj1', '/obj2',
                                             '/subdir/obj3']))
            # Bucket subdir should have content like dir.
            self.assertEquals(listing2, set(['/obj1', '/obj2',
                                             '/subdir/obj3']))

        _Check1()

        # Check that re-running the same rsync command causes no more changes.
        self.assertEquals(
            NO_CHANGES,
            self.RunGsUtil(['rsync', '-r', tmpdir,
                            suri(bucket_url, 'subdir')],
                           return_stderr=True))
示例#9
0
class TestMv(testcase.GsUtilIntegrationTestCase):
    """Integration tests for mv command."""
    def test_moving(self):
        """Tests moving two buckets, one with 2 objects and one with 0 objects."""
        bucket1_uri = self.CreateBucket(test_objects=2)
        self.AssertNObjectsInBucket(bucket1_uri, 2)
        bucket2_uri = self.CreateBucket()
        self.AssertNObjectsInBucket(bucket2_uri, 0)

        # Move two objects from bucket1 to bucket2.
        objs = [
            bucket1_uri.clone_replace_key(key).versionless_uri
            for key in bucket1_uri.list_bucket()
        ]
        cmd = (['-m', 'mv'] + objs + [suri(bucket2_uri)])
        stderr = self.RunGsUtil(cmd, return_stderr=True)
        # Rewrite API may output an additional 'Copying' progress notification.
        self.assertGreaterEqual(stderr.count('Copying'), 2)
        self.assertLessEqual(stderr.count('Copying'), 4)
        self.assertEqual(stderr.count('Copying') % 2, 0)
        self.assertEqual(stderr.count('Removing'), 2)

        self.AssertNObjectsInBucket(bucket1_uri, 0)
        self.AssertNObjectsInBucket(bucket2_uri, 2)

        # Remove one of the objects.
        objs = [
            bucket2_uri.clone_replace_key(key).versionless_uri
            for key in bucket2_uri.list_bucket()
        ]
        obj1 = objs[0]
        self.RunGsUtil(['rm', obj1])

        self.AssertNObjectsInBucket(bucket1_uri, 0)
        self.AssertNObjectsInBucket(bucket2_uri, 1)

        # Move the 1 remaining object back.
        objs = [
            suri(bucket2_uri.clone_replace_key(key))
            for key in bucket2_uri.list_bucket()
        ]
        cmd = (['-m', 'mv'] + objs + [suri(bucket1_uri)])
        stderr = self.RunGsUtil(cmd, return_stderr=True)
        # Rewrite API may output an additional 'Copying' progress notification.
        self.assertGreaterEqual(stderr.count('Copying'), 1)
        self.assertLessEqual(stderr.count('Copying'), 2)
        self.assertEqual(stderr.count('Removing'), 1)

        self.AssertNObjectsInBucket(bucket1_uri, 1)
        self.AssertNObjectsInBucket(bucket2_uri, 0)

    def test_move_bucket_to_dir(self):
        """Tests moving a local directory to a bucket."""
        bucket_uri = self.CreateBucket(test_objects=2)
        self.AssertNObjectsInBucket(bucket_uri, 2)
        tmpdir = self.CreateTempDir()
        self.RunGsUtil(['mv', suri(bucket_uri, '*'), tmpdir])
        dir_list = []
        for dirname, _, filenames in os.walk(tmpdir):
            for filename in filenames:
                dir_list.append(os.path.join(dirname, filename))
        self.assertEqual(len(dir_list), 2)
        self.AssertNObjectsInBucket(bucket_uri, 0)

    def test_move_dir_to_bucket(self):
        """Tests moving a local directory to a bucket."""
        bucket_uri = self.CreateBucket()
        dir_to_move = self.CreateTempDir(test_files=2)
        self.RunGsUtil(['mv', dir_to_move, suri(bucket_uri)])
        self.AssertNObjectsInBucket(bucket_uri, 2)

    @SequentialAndParallelTransfer
    def test_stdin_args(self):
        """Tests mv with the -I option."""
        tmpdir = self.CreateTempDir()
        fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents='data1')
        fpath2 = self.CreateTempFile(tmpdir=tmpdir, contents='data2')
        bucket_uri = self.CreateBucket()
        self.RunGsUtil(['mv', '-I', suri(bucket_uri)],
                       stdin='\n'.join((fpath1, fpath2)))

        # Use @Retry as hedge against bucket listing eventual consistency.
        @Retry(AssertionError, tries=3, timeout_secs=1)
        def _Check1():
            stdout = self.RunGsUtil(['ls', suri(bucket_uri)],
                                    return_stdout=True)
            self.assertIn(os.path.basename(fpath1), stdout)
            self.assertIn(os.path.basename(fpath2), stdout)
            self.assertNumLines(stdout, 2)

        _Check1()

    def test_mv_no_clobber(self):
        """Tests mv with the -n option."""
        fpath1 = self.CreateTempFile(contents='data1')
        bucket_uri = self.CreateBucket()
        object_uri = self.CreateObject(bucket_uri=bucket_uri, contents='data2')
        stderr = self.RunGsUtil(
            ['mv', '-n', fpath1, suri(object_uri)], return_stderr=True)
        # Copy should be skipped and source file should not be removed.
        self.assertIn('Skipping existing item: %s' % suri(object_uri), stderr)
        self.assertNotIn('Removing %s' % suri(fpath1), stderr)
        # Object content should be unchanged.
        contents = self.RunGsUtil(['cat', suri(object_uri)],
                                  return_stdout=True)
        self.assertEqual(contents, 'data2')

    @unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
    @unittest.skipUnless(UsingCrcmodExtension(crcmod),
                         'Test requires fast crcmod.')
    def test_mv_preserve_posix_no_errors(self):
        """Tests use of the -P flag with mv.

    Specifically tests combinations of POSIX attributes in metadata that will
    pass validation.
    """
        bucket_uri = self.CreateBucket()
        tmpdir = self.CreateTempDir()
        TestCpMvPOSIXNoErrors(self, bucket_uri, tmpdir, is_cp=False)

    @unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
    def test_mv_preserve_posix_errors(self):
        """Tests use of the -P flag with mv.

    Specifically, combinations of POSIX attributes in metadata that will fail
    validation.
    """
        bucket_uri = self.CreateBucket()
        tmpdir = self.CreateTempDir()

        obj = self.CreateObject(bucket_uri=bucket_uri,
                                object_name='obj',
                                contents='obj')
        TestCpMvPOSIXErrors(self, bucket_uri, obj, tmpdir, is_cp=False)
示例#10
0
# Load the gsutil version number and append it to boto.UserAgent so the value
# is set before anything instantiates boto. (If parts of boto were
# instantiated first those parts would have the old value of boto.UserAgent,
# so we wouldn't be guaranteed that all code paths send the correct user
# agent.)
boto.UserAgent += ' gsutil/%s (%s)' % (gslib.VERSION, sys.platform)

from gslib.util import UsingCrcmodExtension
from gslib.util import IS_OSX

CRCMOD_PATH = os.path.join(THIRD_PARTY_DIR, 'crcmod', 'python2')
CRCMOD_OSX_PATH = os.path.join(THIRD_PARTY_DIR, 'crcmod_osx')

try:
    import crcmod
except ImportError:
    crcmod = None

if not UsingCrcmodExtension(crcmod):
    local_crcmod_path = CRCMOD_OSX_PATH if IS_OSX else CRCMOD_PATH
    sys.path.insert(0, local_crcmod_path)


def RunMain():
    import gslib.__main__
    sys.exit(gslib.__main__.main())


if __name__ == '__main__':
    RunMain()