Exemplo n.º 1
0
 def testInvalidCSEKConfigurationRaises(self):
   invalid_key = 'aP7KbmxLqDw1SWHeKvlfKOVgNRNNZc8L2sFz8ybLN==='
   with self.assertRaises(CommandException) as cm:
     CryptoKeyWrapperFromKey(invalid_key)
   self.assertIn(
       'Configured encryption_key or decryption_key looked like a CSEK',
       cm.exception.reason)
Exemplo n.º 2
0
 def testInvalidCMEKConfigurationRaises(self):
   invalid_key = (
       'projects/my-project/locations/some-location/keyRings/keyring/'
       'cryptoKeyWHOOPS-INVALID-RESOURCE-PORTION/somekey')
   with self.assertRaises(CommandException) as cm:
     CryptoKeyWrapperFromKey(invalid_key)
   self.assertIn(
       'Configured encryption_key or decryption_key looked like a CMEK',
       cm.exception.reason)
Exemplo n.º 3
0
    def _test_rewrite_resume_or_restart(self,
                                        initial_dec_key,
                                        initial_enc_key,
                                        new_dec_key=None,
                                        new_enc_key=None):
        """Tests that the rewrite command restarts if the object's key changed.

    Args:
      initial_dec_key: Initial key the object is encrypted with, used as
          decryption key in the first rewrite call.
      initial_enc_key: Initial encryption key to rewrite the object with,
          used as encryption key in the first rewrite call.
      new_dec_key: Decryption key for the second rewrite call; if specified,
          object will be overwritten with a new encryption key in between
          the first and second rewrite calls, and this key will be used for
          the second rewrite call.
      new_enc_key: Encryption key for the second rewrite call; if specified,
          this key will be used for the second rewrite call, otherwise the
          initial key will be used.

    Returns:
      None
    """
        if self.test_api == ApiSelector.XML:
            return unittest.skip('Rewrite API is only supported in JSON.')
        bucket_uri = self.CreateBucket()
        # maxBytesPerCall must be >= 1 MiB, so create an object > 2 MiB because we
        # need 2 response from the service: 1 success, 1 failure prior to
        # completion.
        object_uri = self.CreateObject(bucket_uri=bucket_uri,
                                       object_name='foo',
                                       contents=('12' * ONE_MIB) + 'bar',
                                       prefer_json_api=True,
                                       encryption_key=initial_dec_key)
        gsutil_api = GcsJsonApi(BucketStorageUri, logging.getLogger(),
                                DiscardMessagesQueue(), self.default_provider)
        with SetBotoConfigForTest([('GSUtil', 'decryption_key1',
                                    initial_dec_key)]):
            src_obj_metadata = gsutil_api.GetObjectMetadata(
                object_uri.bucket_name,
                object_uri.object_name,
                provider=self.default_provider,
                fields=['bucket', 'contentType', 'etag', 'name'])
        dst_obj_metadata = src_obj_metadata
        tracker_file_name = GetRewriteTrackerFilePath(src_obj_metadata.bucket,
                                                      src_obj_metadata.name,
                                                      dst_obj_metadata.bucket,
                                                      dst_obj_metadata.name,
                                                      self.test_api)
        decryption_tuple = CryptoKeyWrapperFromKey(initial_dec_key)
        decryption_tuple2 = CryptoKeyWrapperFromKey(new_dec_key
                                                    or initial_dec_key)
        encryption_tuple = CryptoKeyWrapperFromKey(initial_enc_key)
        encryption_tuple2 = CryptoKeyWrapperFromKey(new_enc_key
                                                    or initial_enc_key)

        try:
            try:
                gsutil_api.CopyObject(
                    src_obj_metadata,
                    dst_obj_metadata,
                    progress_callback=HaltingRewriteCallbackHandler(ONE_MIB *
                                                                    2).call,
                    max_bytes_per_call=ONE_MIB,
                    decryption_tuple=decryption_tuple,
                    encryption_tuple=encryption_tuple)
                self.fail('Expected RewriteHaltException.')
            except RewriteHaltException:
                pass

            # Tracker file should be left over.
            self.assertTrue(os.path.exists(tracker_file_name))

            if new_dec_key:
                # Recreate the object with a different encryption key.
                self.CreateObject(bucket_uri=bucket_uri,
                                  object_name='foo',
                                  contents=('12' * ONE_MIB) + 'bar',
                                  prefer_json_api=True,
                                  encryption_key=new_dec_key,
                                  gs_idempotent_generation=urigen(object_uri))

            with SetBotoConfigForTest([('GSUtil', 'decryption_key1',
                                        new_dec_key or initial_dec_key)]):
                original_md5 = gsutil_api.GetObjectMetadata(
                    src_obj_metadata.bucket,
                    src_obj_metadata.name,
                    fields=['customerEncryption', 'md5Hash']).md5Hash

            if new_dec_key or new_enc_key:
                # Keys changed, rewrite should be restarted.
                progress_callback = EnsureRewriteRestartCallbackHandler(
                    ONE_MIB).call
            else:
                # Keys are the same, rewrite should be resumed.
                progress_callback = EnsureRewriteResumeCallbackHandler(
                    ONE_MIB * 2).call

            # Now resume. Callback ensures the appropriate resume/restart behavior.
            gsutil_api.CopyObject(src_obj_metadata,
                                  dst_obj_metadata,
                                  progress_callback=progress_callback,
                                  max_bytes_per_call=ONE_MIB,
                                  decryption_tuple=decryption_tuple2,
                                  encryption_tuple=encryption_tuple2)

            # Copy completed; tracker file should be deleted.
            self.assertFalse(os.path.exists(tracker_file_name))

            final_enc_key = new_enc_key or initial_enc_key

            with SetBotoConfigForTest([('GSUtil', 'encryption_key',
                                        final_enc_key)]):
                self.assertEqual(
                    original_md5,
                    gsutil_api.GetObjectMetadata(
                        dst_obj_metadata.bucket,
                        dst_obj_metadata.name,
                        fields=['customerEncryption', 'md5Hash']).md5Hash,
                    'Error: Rewritten object\'s hash doesn\'t match source object.'
                )
        finally:
            # Clean up if something went wrong.
            DeleteTrackerFile(tracker_file_name)
Exemplo n.º 4
0
    def CatUrlStrings(self,
                      url_strings,
                      show_header=False,
                      start_byte=0,
                      end_byte=None,
                      cat_out_fd=None):
        """Prints each of the url strings to stdout.

    Args:
      url_strings: String iterable.
      show_header: If true, print a header per file.
      start_byte: Starting byte of the file to print, used for constructing
                  range requests.
      end_byte: Ending byte of the file to print; used for constructing range
                requests. If this is negative, the start_byte is ignored and
                and end range is sent over HTTP (such as range: bytes -9)
      cat_out_fd: File descriptor to which output should be written. Defaults to
                 stdout if no file descriptor is supplied.
    Returns:
      0 on success.

    Raises:
      CommandException if no URLs can be found.
    """
        printed_one = False
        # This should refer to whatever sys.stdin refers to when this method is
        # run, not when this method is defined, so we do the initialization here
        # rather than define sys.stdin as the cat_out_fd parameter's default value.
        if cat_out_fd is None:
            cat_out_fd = sys.stdout
        # We manipulate the stdout so that all other data other than the Object
        # contents go to stderr.
        old_stdout = sys.stdout
        sys.stdout = sys.stderr
        try:
            if url_strings and url_strings[0] in ('-', 'file://-'):
                self._WriteBytesBufferedFileToFile(sys.stdin, cat_out_fd)
            else:
                for url_str in url_strings:
                    did_some_work = False
                    # TODO: Get only the needed fields here.
                    for blr in self.command_obj.WildcardIterator(
                            url_str
                    ).IterObjects(
                            bucket_listing_fields=_CAT_BUCKET_LISTING_FIELDS):
                        decryption_keywrapper = None
                        if (blr.root_object
                                and blr.root_object.customerEncryption and
                                blr.root_object.customerEncryption.keySha256):
                            decryption_key = FindMatchingCSEKInBotoConfig(
                                blr.root_object.customerEncryption.keySha256,
                                config)
                            if not decryption_key:
                                raise EncryptionException(
                                    'Missing decryption key with SHA256 hash %s. No decryption '
                                    'key matches object %s' %
                                    (blr.root_object.customerEncryption.
                                     keySha256, blr.url_string))
                            decryption_keywrapper = CryptoKeyWrapperFromKey(
                                decryption_key)

                        did_some_work = True
                        if show_header:
                            if printed_one:
                                print
                            print '==> %s <==' % blr
                            printed_one = True
                        cat_object = blr.root_object
                        storage_url = StorageUrlFromString(blr.url_string)
                        if storage_url.IsCloudUrl():
                            compressed_encoding = ObjectIsGzipEncoded(
                                cat_object)
                            self.command_obj.gsutil_api.GetObjectMedia(
                                cat_object.bucket,
                                cat_object.name,
                                cat_out_fd,
                                compressed_encoding=compressed_encoding,
                                start_byte=start_byte,
                                end_byte=end_byte,
                                object_size=cat_object.size,
                                generation=storage_url.generation,
                                decryption_tuple=decryption_keywrapper,
                                provider=storage_url.scheme)
                        else:
                            with open(storage_url.object_name, 'rb') as f:
                                self._WriteBytesBufferedFileToFile(
                                    f, cat_out_fd)
                    if not did_some_work:
                        raise CommandException(NO_URLS_MATCHED_TARGET %
                                               url_str)
        finally:
            sys.stdout = old_stdout

        return 0
Exemplo n.º 5
0
    def RunCommand(self):
        """Command entry point for the rewrite command."""
        self.continue_on_error = self.parallel_operations
        self.csek_hash_to_keywrapper = {}
        self.dest_storage_class = None
        self.no_preserve_acl = False
        self.read_args_from_stdin = False
        self.supported_transformation_flags = ['-k', '-s']
        self.transform_types = set()

        self.op_failure_count = 0
        self.boto_file_encryption_keywrapper = GetEncryptionKeyWrapper(config)
        self.boto_file_encryption_sha256 = (
            self.boto_file_encryption_keywrapper.crypto_key_sha256
            if self.boto_file_encryption_keywrapper else None)

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-f':
                    self.continue_on_error = True
                elif o == '-k':
                    self.transform_types.add(_TransformTypes.CRYPTO_KEY)
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-O':
                    self.no_preserve_acl = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True
                elif o == '-s':
                    self.transform_types.add(_TransformTypes.STORAGE_CLASS)
                    self.dest_storage_class = NormalizeStorageClass(a)

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rewrite command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        if not self.transform_types:
            raise CommandException(
                'rewrite command requires at least one transformation flag. '
                'Currently supported transformation flags: %s' %
                self.supported_transformation_flags)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        url_strs_generator = GenerationCheckGenerator(url_strs)

        # Convert recursive flag to flat wildcard to avoid performing multiple
        # listings.
        if self.recursion_requested:
            url_strs_generator = ConvertRecursiveToFlatWildcard(
                url_strs_generator)

        # Expand the source argument(s).
        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            url_strs_generator,
            self.recursion_requested,
            project_id=self.project_id,
            continue_on_error=self.continue_on_error
            or self.parallel_operations,
            bucket_listing_fields=['name', 'size'])

        seek_ahead_iterator = None
        # Cannot seek ahead with stdin args, since we can only iterate them
        # once without buffering in memory.
        if not self.read_args_from_stdin:
            # Perform the same recursive-to-flat conversion on original url_strs so
            # that it is as true to the original iterator as possible.
            seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs)
            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                seek_ahead_url_strs,
                self.recursion_requested,
                all_versions=self.all_versions,
                project_id=self.project_id)

        # Rather than have each worker repeatedly calculate the sha256 hash for each
        # decryption_key in the boto config, do this once now and cache the results.
        for i in range(0, MAX_DECRYPTION_KEYS):
            key_number = i + 1
            keywrapper = CryptoKeyWrapperFromKey(
                config.get('GSUtil', 'decryption_key%s' % str(key_number),
                           None))
            if keywrapper is None:
                # Stop at first attribute absence in lexicographical iteration.
                break
            if keywrapper.crypto_type == CryptoKeyType.CSEK:
                self.csek_hash_to_keywrapper[
                    keywrapper.crypto_key_sha256] = keywrapper
        # Also include the encryption_key, since it should be used to decrypt and
        # then encrypt if the object's CSEK should remain the same.
        if self.boto_file_encryption_sha256 is not None:
            self.csek_hash_to_keywrapper[self.boto_file_encryption_sha256] = (
                self.boto_file_encryption_keywrapper)

        # Perform rewrite requests in parallel (-m) mode, if requested.
        self.Apply(_RewriteFuncWrapper,
                   name_expansion_iterator,
                   _RewriteExceptionHandler,
                   fail_on_error=(not self.continue_on_error),
                   shared_attrs=['op_failure_count'],
                   seek_ahead_iterator=seek_ahead_iterator)

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be rewritten.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
Exemplo n.º 6
0
    def __init__(self,
                 src_url,
                 src_obj_size,
                 gsutil_api,
                 compressed_encoding=False,
                 progress_callback=None,
                 download_chunk_size=_DEFAULT_DOWNLOAD_CHUNK_SIZE,
                 decryption_key=None):
        """Initializes the daisy chain wrapper.

    Args:
      src_url: Source CloudUrl to copy from.
      src_obj_size: Size of source object.
      gsutil_api: gsutil Cloud API to use for the copy.
      compressed_encoding: If true, source object has content-encoding: gzip.
      progress_callback: Optional callback function for progress notifications
          for the download thread. Receives calls with arguments
          (bytes_transferred, total_size).
      download_chunk_size: Integer number of bytes to download per
          GetObjectMedia request. This is the upper bound of bytes that may be
          unnecessarily downloaded if there is a break in the resumable upload.
      decryption_key: Base64-encoded decryption key for the source object,
          if any.
    """
        # Current read position for the upload file pointer.
        self.position = 0
        self.buffer = deque()

        self.bytes_buffered = 0
        # Maximum amount of bytes in memory at a time.
        self.max_buffer_size = 1024 * 1024  # 1 MiB

        self._download_chunk_size = download_chunk_size

        # We save one buffer's worth of data as a special case for boto,
        # which seeks back one buffer and rereads to compute hashes. This is
        # unnecessary because we can just compare cloud hash digests at the end,
        # but it allows this to work without modfiying boto.
        self.last_position = 0
        self.last_data = None

        # Protects buffer, position, bytes_buffered, last_position, and last_data.
        self.lock = CreateLock()

        # Protects download_exception.
        self.download_exception_lock = CreateLock()

        self.src_obj_size = src_obj_size
        self.src_url = src_url
        self.compressed_encoding = compressed_encoding
        self.decryption_tuple = CryptoKeyWrapperFromKey(decryption_key)

        # This is safe to use the upload and download thread because the download
        # thread calls only GetObjectMedia, which creates a new HTTP connection
        # independent of gsutil_api. Thus, it will not share an HTTP connection
        # with the upload.
        self.gsutil_api = gsutil_api

        # If self.download_thread dies due to an exception, it is saved here so
        # that it can also be raised in the upload thread.
        self.download_exception = None
        self.download_thread = None
        self.progress_callback = progress_callback
        self.download_started = threading.Event()
        self.stop_download = threading.Event()
        self.StartDownloadThread(progress_callback=self.progress_callback)
        if self.download_started.wait(60) == False:
            raise Exception(
                'Could not start download thread after 60 seconds.')
Exemplo n.º 7
0
    def CreateObjectJson(self,
                         contents,
                         bucket_name=None,
                         object_name=None,
                         encryption_key=None,
                         mtime=None,
                         storage_class=None,
                         gs_idempotent_generation=None,
                         kms_key_name=None):
        """Creates a test object (GCS provider only) using the JSON API.

    Args:
      contents: The contents to write to the object.
      bucket_name: Name of bucket to place the object in. If not specified,
          a new temporary bucket is created.
      object_name: The name to use for the object. If not specified, a temporary
          test object name is constructed.
      encryption_key: AES256 encryption key to use when creating the object,
          if any.
      mtime: The modification time of the file in POSIX time (seconds since
          UTC 1970-01-01). If not specified, this defaults to the current
          system time.
      storage_class: String representing the storage class to use for the
          object.
      gs_idempotent_generation: For use when overwriting an object for which
          you know the previously uploaded generation. Create GCS object
          idempotently by supplying this generation number as a precondition
          and assuming the current object is correct on precondition failure.
          Defaults to 0 (new object); to disable, set to None.
      kms_key_name: Fully-qualified name of the KMS key that should be used to
          encrypt the object. Note that this is currently only valid for 'gs'
          objects.

    Returns:
      An apitools Object for the created object.
    """
        bucket_name = bucket_name or self.CreateBucketJson().name
        object_name = object_name or self.MakeTempName('obj')
        preconditions = Preconditions(gen_match=gs_idempotent_generation)
        custom_metadata = apitools_messages.Object.MetadataValue(
            additionalProperties=[])
        if mtime is not None:
            CreateCustomMetadata({MTIME_ATTR: mtime}, custom_metadata)
        object_metadata = apitools_messages.Object(
            name=object_name,
            metadata=custom_metadata,
            bucket=bucket_name,
            contentType='application/octet-stream',
            storageClass=storage_class,
            kmsKeyName=kms_key_name)
        encryption_keywrapper = CryptoKeyWrapperFromKey(encryption_key)
        try:
            return self.json_api.UploadObject(
                cStringIO.StringIO(contents),
                object_metadata,
                provider='gs',
                encryption_tuple=encryption_keywrapper,
                preconditions=preconditions)
        except PreconditionException:
            if gs_idempotent_generation is None:
                raise
            with SetBotoConfigForTest([('GSUtil', 'decryption_key1',
                                        encryption_key)]):
                return self.json_api.GetObjectMetadata(bucket_name,
                                                       object_name)