def calculate_md5(file_path, offset, chunk): """ Calculate the base64 encoded MD5 hash for a part of a file. :param str file_path: Path to the file :param int offset: Offset where the part starts in the file :param int chunk: Number of bytes in the part :return: Base64 encoded MD5 hash :rtype: str """ # Determine if we can use the hashlib version of md5 or the bundled # version of md5 if is_fips_mode(): md5 = MD5.md5 else: md5 = hashlib.md5 m = md5() with io.open(file_path, mode='rb') as f: bpr = BufferedPartReader(f, offset, chunk) while True: part = bpr.read(READ_BUFFER_SIZE) if part == b'': break m.update(part) return base64.b64encode(m.digest()).decode("utf-8")
def calculate_md5(file_path, offset, chunk): """ Calculate the base64 encoded MD5 hash for a part of a file. :param str file_path: Path to the file :param int offset: Offset where the part starts in the file :param int chunk: Number of bytes in the part :return: Base64 encoded MD5 hash :rtype: str """ # Determine if we can use the hashlib version of md5 or the bundled # version of md5 if is_fips_mode(): try: md5 = MD5.md5() except InternalError as ex: logger.warning( "An exception occur due to {}. Fallback to using hashlib.new('md5', usedforsecurity=false) for md5." .format(ex)) md5 = hashlib.new('md5', usedforsecurity=False) else: md5 = hashlib.md5() m = md5 with io.open(file_path, mode='rb') as f: bpr = BufferedPartReader(f, offset, chunk) while True: part = bpr.read(READ_BUFFER_SIZE) if part == b'': break m.update(part) return base64.b64encode(m.digest()).decode("utf-8")
def _upload_stream_part(self, part_num, part_bytes, **kwargs): try: if is_fips_mode(): try: m = MD5.md5() except InternalError as ex: logger.warning( "An exception occur due to {}. Fallback to using hashlib.new('md5', usedforsecurity=false) for md5." .format(ex)) m = hashlib.new('md5', usedforsecurity=False) else: m = hashlib.md5() m.update(part_bytes) new_kwargs = { 'content_md5': base64.b64encode(m.digest()).decode("utf-8") } if 'opc_client_request_id' in kwargs: new_kwargs['opc_client_request_id'] = kwargs[ 'opc_client_request_id'] # A call to upload_part can be retried because we've already read() the data and retrying the # request with same data part_bytes remaining_tries = self.max_retries while remaining_tries > 0: try: response = self.object_storage_client.upload_part( self.manifest["namespace"], self.manifest["bucketName"], self.manifest["objectName"], self.manifest["uploadId"], part_num + 1, # Internally this is 0-based but object storage is 1-based io.BytesIO(part_bytes), **new_kwargs) except Exception as e: if self._is_exception_retryable(e) and remaining_tries > 1: remaining_tries -= 1 else: if 'shared_dict' in kwargs: kwargs['shared_dict']['should_continue'] = False kwargs['shared_dict']['exceptions'].put(e) raise else: break if response.status == 200: self.manifest['parts'].append({ 'etag': response.headers['etag'], 'opc_md5': str(response.headers['opc-content-md5']), 'part_num': part_num }) if 'progress_callback' in kwargs: kwargs['progress_callback'](len(part_bytes)) except Exception as e: if 'shared_dict' in kwargs: kwargs['shared_dict']['should_continue'] = False kwargs['shared_dict']['exceptions'].put(e) raise finally: if 'semaphore' in kwargs: kwargs['semaphore'].release()