Пример #1
0
    def _testSeekForward(self, initial_seek):
        """Tests seeking to an initial position and then reading.

    This function simulates an upload that is resumed after a process break.
    It seeks from zero to the initial position (as if the server already had
    those bytes). Then it reads to the end of the file, ensuring the hash
    matches the original file upon completion.

    Args:
      initial_seek: Number of bytes to initially seek.

    Raises:
      AssertionError on wrong amount of data remaining or hash mismatch.
    """
        tmp_file = self._GetTestFile()
        tmp_file_len = os.path.getsize(tmp_file)

        self.assertLess(
            initial_seek, tmp_file_len,
            'initial_seek must be less than test file size %s '
            '(but was actually: %s)' % (tmp_file_len, initial_seek))

        digesters = {'md5': GetMd5()}
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            wrapper.seek(initial_seek)
            self.assertEqual(wrapper.tell(), initial_seek)
            data = wrapper.read()
            self.assertEqual(len(data), tmp_file_len - initial_seek)
        with open(tmp_file, 'rb') as stream:
            actual = CalculateMd5FromContents(stream)
        self.assertEqual(actual, digesters['md5'].hexdigest())
Пример #2
0
 def testGetsMd5HashOnRedHatSystem(self, mock_md5):
     # Can't actually compare output to calling hashlib.md5 because that could
     # trigger an error on a non-Red Hat system.
     # Return one ValueError to simulate a FIPS-mode distribution.
     mock_md5.side_effect = [ValueError, 'hash']
     self.assertEqual(GetMd5(b''), 'hash')
     self.assertEqual(
         mock_md5.mock_calls,
         [mock.call(b''),
          mock.call(b'', usedforsecurity=False)])
Пример #3
0
 def testReadInChunks(self):
     tmp_file = self._GetTestFile()
     with open(tmp_file, 'rb') as stream:
         wrapper = ResumableStreamingJsonUploadWrapper(
             stream, TRANSFER_BUFFER_SIZE, test_small_buffer=True)
         hash_dict = {'md5': GetMd5()}
         # CalculateHashesFromContents reads in chunks, but does not seek.
         CalculateHashesFromContents(wrapper, hash_dict)
     with open(tmp_file, 'rb') as stream:
         actual = CalculateMd5FromContents(stream)
     self.assertEqual(actual, hash_dict['md5'].hexdigest())
Пример #4
0
 def testReadToEOF(self):
     digesters = {'md5': GetMd5()}
     tmp_file = self.CreateTempFile(contents=b'a' * TRANSFER_BUFFER_SIZE *
                                    4)
     with open(tmp_file, 'rb') as stream:
         wrapper = HashingFileUploadWrapper(stream, digesters,
                                            {'md5': GetMd5},
                                            self._dummy_url, self.logger)
         wrapper.read()
     with open(tmp_file, 'rb') as stream:
         actual = CalculateMd5FromContents(stream)
     self.assertEqual(actual, digesters['md5'].hexdigest())
Пример #5
0
    def _testSeekBack(self, initial_position, seek_back_amount):
        """Tests reading then seeking backwards.

    This function simulates an upload that is resumed after a connection break.
    It reads one transfer buffer at a time until it reaches initial_position,
    then seeks backwards (as if the server did not receive some of the bytes)
    and reads to the end of the file, ensuring the hash matches the original
    file upon completion.

    Args:
      initial_position: Initial number of bytes to read before seek.
      seek_back_amount: Number of bytes to seek backward.

    Raises:
      AssertionError on wrong amount of data remaining or hash mismatch.
    """
        tmp_file = self._GetTestFile()
        tmp_file_len = os.path.getsize(tmp_file)

        self.assertGreaterEqual(
            initial_position, seek_back_amount,
            'seek_back_amount must be less than initial position %s '
            '(but was actually: %s)' % (initial_position, seek_back_amount))
        self.assertLess(
            initial_position, tmp_file_len,
            'initial_position must be less than test file size %s '
            '(but was actually: %s)' % (tmp_file_len, initial_position))

        digesters = {'md5': GetMd5()}
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            position = 0
            while position < initial_position - TRANSFER_BUFFER_SIZE:
                data = wrapper.read(TRANSFER_BUFFER_SIZE)
                position += len(data)
            wrapper.read(initial_position - position)
            wrapper.seek(initial_position - seek_back_amount)
            self.assertEqual(wrapper.tell(),
                             initial_position - seek_back_amount)
            data = wrapper.read()
            self.assertEqual(
                len(data),
                tmp_file_len - (initial_position - seek_back_amount))
        with open(tmp_file, 'rb') as stream:
            actual = CalculateMd5FromContents(stream)
        self.assertEqual(actual, digesters['md5'].hexdigest())
Пример #6
0
 def testInvalidSeekAway(self):
     """Tests seeking to EOF and then reading without first doing a SEEK_SET."""
     tmp_file = self._GetTestFile()
     digesters = {'md5': GetMd5()}
     with open(tmp_file, 'rb') as stream:
         wrapper = HashingFileUploadWrapper(stream, digesters,
                                            {'md5': GetMd5},
                                            self._dummy_url, self.logger)
         wrapper.read(TRANSFER_BUFFER_SIZE)
         wrapper.seek(0, os.SEEK_END)
         try:
             wrapper.read()
             self.fail('Expected CommandException for invalid seek.')
         except CommandException as e:
             self.assertIn(
                 'Read called on hashing file pointer in an unknown position',
                 str(e))
Пример #7
0
    def _testSeekAway(self, initial_read):
        """Tests reading to an initial position and then seeking to EOF and back.

    This function simulates an size check on the input file by seeking to the
    end of the file and then back to the current position. Then it reads to
    the end of the file, ensuring the hash matches the original file upon
    completion.

    Args:
      initial_read: Number of bytes to initially read.

    Raises:
      AssertionError on wrong amount of data remaining or hash mismatch.
    """
        tmp_file = self._GetTestFile()
        tmp_file_len = os.path.getsize(tmp_file)

        self.assertLess(
            initial_read, tmp_file_len,
            'initial_read must be less than test file size %s '
            '(but was actually: %s)' % (tmp_file_len, initial_read))

        digesters = {'md5': GetMd5()}
        with open(tmp_file, 'rb') as stream:
            wrapper = HashingFileUploadWrapper(stream, digesters,
                                               {'md5': GetMd5},
                                               self._dummy_url, self.logger)
            wrapper.read(initial_read)
            self.assertEqual(wrapper.tell(), initial_read)
            wrapper.seek(0, os.SEEK_END)
            self.assertEqual(wrapper.tell(), tmp_file_len)
            wrapper.seek(initial_read, os.SEEK_SET)
            data = wrapper.read()
            self.assertEqual(len(data), tmp_file_len - initial_read)
        with open(tmp_file, 'rb') as stream:
            actual = CalculateMd5FromContents(stream)
        self.assertEqual(actual, digesters['md5'].hexdigest())
Пример #8
0
def HashRewriteParameters(src_obj_metadata,
                          dst_obj_metadata,
                          projection,
                          src_generation=None,
                          gen_match=None,
                          meta_gen_match=None,
                          canned_acl=None,
                          max_bytes_per_call=None,
                          src_dec_key_sha256=None,
                          dst_enc_key_sha256=None,
                          fields=None):
    """Creates an MD5 hex digest of the parameters for a rewrite call.

  Resuming rewrites requires that the input parameters are identical. Thus,
  the rewrite tracker file needs to represent the input parameters. For
  easy comparison, hash the input values. If a user does a performs a
  same-source/same-destination rewrite via a different command (for example,
  with a changed ACL), the hashes will not match and we will restart the
  rewrite from the beginning.

  Args:
    src_obj_metadata: apitools Object describing source object. Must include
      bucket, name, and etag.
    dst_obj_metadata: apitools Object describing destination object. Must
      include bucket and object name
    projection: Projection used for the API call.
    src_generation: Optional source generation.
    gen_match: Optional generation precondition.
    meta_gen_match: Optional metageneration precondition.
    canned_acl: Optional canned ACL string.
    max_bytes_per_call: Optional maximum bytes rewritten per call.
    src_dec_key_sha256: Optional SHA256 hash string of decryption key for
        source object.
    dst_enc_key_sha256: Optional SHA256 hash string of encryption key for
        destination object.
    fields: Optional fields to include in response to call.

  Returns:
    MD5 hex digest Hash of the input parameters, or None if required parameters
    are missing.
  """
    if (not src_obj_metadata or not src_obj_metadata.bucket
            or not src_obj_metadata.name or not src_obj_metadata.etag
            or not dst_obj_metadata or not dst_obj_metadata.bucket
            or not dst_obj_metadata.name or not projection):
        return
    md5_hash = GetMd5()
    for input_param in (
            src_obj_metadata,
            dst_obj_metadata,
            projection,
            src_generation,
            gen_match,
            meta_gen_match,
            canned_acl,
            fields,
            max_bytes_per_call,
            src_dec_key_sha256,
            dst_enc_key_sha256,
    ):
        # Tracker file matching changed between gsutil 4.15 -> 4.16 and will cause
        # rewrites to start over from the beginning on a gsutil version upgrade.
        if input_param is not None:
            md5_hash.update(six.text_type(input_param).encode('UTF8'))
    return md5_hash.hexdigest()
Пример #9
0
 def testGetsMd5HashOnNonRedHatSystem(self, mock_md5):
     # Can't actually compare output to calling hashlib.md5 because that could
     # trigger an error on a Red Hat system.
     mock_md5.return_value = 'hash'
     self.assertEqual(GetMd5(b''), 'hash')
     mock_md5.assert_called_once_with(b'')