def _get_first_null_byte_index(destination_url, offset, length): """Checks to see how many bytes in range have already been downloaded. Args: destination_url (storage_url.FileUrl): Has path of file being downloaded. offset (int): For components, index to start reading bytes at. length (int): For components, where to stop reading bytes. Returns: Int byte count of size of partially-downloaded file. Returns 0 if file is an invalid size, empty, or non-existent. """ if not destination_url.exists(): return 0 # Component is slice of larger file. Find how much of slice is downloaded. first_null_byte = offset end_of_range = offset + length with files.BinaryFileReader(destination_url.object_name) as file_reader: file_reader.seek(offset) while first_null_byte < end_of_range: data = file_reader.read(_READ_SIZE) if not data: break null_byte_index = data.find(NULL_BYTE) if null_byte_index != -1: first_null_byte += null_byte_index break first_null_byte += len(data) return first_null_byte
def execute(self, task_status_queue=None): """Validates and clean ups after sliced download.""" # Clean up master and component tracker files. tracker_file_util.delete_download_tracker_files( self._destination_resource.storage_url) # Validate final product of sliced download. # TODO(b/181340192): See if sharing and concating task hashes is faster. with files.BinaryFileReader(self._destination_resource.storage_url. object_name) as downloaded_file: # TODO(b/172048376): Test other hash algorithms. downloaded_file_hash_object = util.get_hash_from_file_stream( downloaded_file, util.HashAlgorithms.MD5) downloaded_file_hash_digest = util.get_base64_hash_digest_string( downloaded_file_hash_object) try: util.validate_object_hashes_match( self._destination_resource.storage_url, self._source_resource.md5_hash, downloaded_file_hash_digest) except errors.HashMismatchError: os.remove(self._destination_resource.storage_url.object_name) raise if _should_decompress_gzip(self._source_resource, self._destination_resource): _ungzip_file(self._destination_resource.storage_url.object_name)
def _perform_resumable_download(self, digesters, progress_callback): """Resume or start download that can be resumabled.""" destination_url = self._destination_resource.storage_url existing_file_size = _get_valid_downloaded_byte_count( destination_url, self._source_resource) if existing_file_size: with files.BinaryFileReader( destination_url.object_name) as file_reader: # Get hash of partially-downloaded file as start for validation. for hash_algorithm in digesters: digesters[hash_algorithm] = util.get_hash_from_file_stream( file_reader, hash_algorithm) tracker_file_path, start_byte = ( tracker_file_util.read_or_create_download_tracker_file( self._source_resource, destination_url, existing_file_size=existing_file_size)) end_byte = self._source_resource.size self._perform_download(digesters, progress_callback, cloud_api.DownloadStrategy.RESUMABLE, start_byte, end_byte) tracker_file_util.delete_tracker_file(tracker_file_path)
def execute(self, task_status_queue=None): """Performs upload.""" progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, size=self._length, source_url=self._source_resource.storage_url, destination_url=self._destination_resource.storage_url, component_number=self._component_number, total_components=self._total_components, operation_name=task_status.OperationName.UPLOADING, process_id=os.getpid(), thread_id=threading.get_ident(), ) source_stream = files.BinaryFileReader( self._source_resource.storage_url.object_name) provider = self._destination_resource.storage_url.scheme with file_part.FilePart(source_stream, self._offset, self._length) as upload_stream: api_factory.get_api(provider).upload_object( upload_stream, self._destination_resource, request_config=cloud_api.RequestConfig( md5_hash=self._source_resource.md5_hash, size=self._length), progress_callback=progress_callback)
def FindSentinel(filename, blocksize=2**16): """Return the sentinel line from the output file. Args: filename: The filename of the output file. (We'll read this file.) blocksize: Optional block size for buffering, for unit testing. Returns: The contents of the last line in the file that doesn't start with a tab, with its trailing newline stripped; or None if the file couldn't be opened or no such line could be found by inspecting the last 'blocksize' bytes of the file. """ try: fp = files.BinaryFileReader(filename) except files.Error as err: log.warning('Append mode disabled: can\'t read [%r]: %s', filename, err) return None try: fp.seek(0, 2) # EOF fp.seek(max(0, fp.tell() - blocksize)) lines = fp.readlines() del lines[:1] # First line may be partial, throw it away sentinel = None for line in lines: if not line.startswith('\t'): sentinel = line if not sentinel: return None return sentinel.rstrip('\n') finally: fp.close()
def _MultiStepsDigest(digest, file_to_digest): # TODO(b/77481291) Refactor this to allow reading from stdin. with files.BinaryFileReader(file_to_digest) as f: while True: chunk = f.read(_READ_SIZE) if not chunk: break digest.update(chunk) return digest
def execute(self, callback=None): destination_url = self._destination_resource.storage_url provider = destination_url.scheme with files.BinaryFileReader(self._source_resource.storage_url. object_name) as upload_stream: # TODO(b/162069479): Support all of upload_object's parameters. api_factory.get_api(provider).upload_object( upload_stream, self._destination_resource)
def _catch_up_digesters(self, digesters, start_byte, end_byte): with files.BinaryFileReader(self._destination_resource.storage_url. object_name) as file_reader: # Get hash of partially-downloaded file as start for validation. for hash_algorithm in digesters: digesters[ hash_algorithm] = hash_util.get_hash_from_file_stream( file_reader, hash_algorithm, start=start_byte, stop=end_byte)
def get_stream(source_resource, length, offset=0, digesters=None, task_status_queue=None, destination_resource=None, component_number=None, total_components=None): """Gets a stream to use for an upload. Args: source_resource (resource_reference.FileObjectResource): Contains a path to the source file. length (int): The total number of bytes to be uploaded. offset (int): The position of the first byte to be uploaded. digesters (dict[hash_util.HashAlgorithm, hash object]): Hash objects to be populated as bytes are read. task_status_queue (multiprocessing.Queue|None): Used for sending progress messages. If None, no messages will be generated or sent. destination_resource (resource_reference.ObjectResource): The upload destination. Used for progress reports, and should be specified if task_status_queue is. component_number (int|None): Identifies a component in composite uploads. total_components (int|None): The total number of components used in a composite upload. Returns: An UploadStream wrapping the file specified by source_resource. """ if task_status_queue: progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, offset=offset, length=length, source_url=source_resource.storage_url, destination_url=destination_resource.storage_url, component_number=component_number, total_components=total_components, operation_name=task_status.OperationName.UPLOADING, process_id=os.getpid(), thread_id=threading.get_ident(), ) else: progress_callback = None source_stream = files.BinaryFileReader( source_resource.storage_url.object_name) return upload_stream.UploadStream(source_stream, offset, length, digesters=digesters, progress_callback=progress_callback)
def download_object(self, cloud_resource, download_stream, compressed_encoding=False, decryption_wrapper=None, digesters=None, download_strategy=cloud_api.DownloadStrategy.ONE_SHOT, progress_callback=None, start_byte=0, end_byte=None): """See super class.""" extra_args = {} if cloud_resource.generation: extra_args['VersionId'] = cloud_resource.generation if download_strategy == cloud_api.DownloadStrategy.RESUMABLE: response = self.client.get_object( Bucket=cloud_resource.bucket, Key=cloud_resource.name, Range='bytes={}-'.format(start_byte), ) processed_bytes = start_byte for chunk in response['Body'].iter_chunks( scaled_integer.ParseInteger( properties.VALUES.storage.download_chunk_size.Get())): download_stream.write(chunk) processed_bytes += len(chunk) if progress_callback: progress_callback(processed_bytes) else: # TODO(b/172480278) Conditionally call get_object for smaller object. self.client.download_fileobj(cloud_resource.bucket, cloud_resource.name, download_stream, Callback=progress_callback, ExtraArgs=extra_args) # Download callback doesn't give us streaming data, so we have to # read whole downloaded file to update digests. if digesters: with files.BinaryFileReader( download_stream.name) as completed_download_stream: completed_download_stream.seek(0) for hash_algorithm in digesters: digesters[ hash_algorithm] = hash_util.get_hash_from_file_stream( completed_download_stream, hash_algorithm) return self._get_content_encoding(cloud_resource)
def ReportMetrics(metrics_file_path): """Sends the specified anonymous usage event to the given analytics endpoint. Args: metrics_file_path: str, File with pickled metrics (list of tuples). """ with files.BinaryFileReader(metrics_file_path) as metrics_file: metrics = pickle.load(metrics_file) os.remove(metrics_file_path) http = httplib2.Http(timeout=TIMEOUT_IN_SEC, proxy_info=http_proxy.GetHttpProxyInfo()) for metric in metrics: http.request(metric[0], method=metric[1], body=metric[2], headers=metric[3])
def execute(self, callback=None): destination_url = self._destination_resource.storage_url provider = destination_url.scheme source_stream = files.BinaryFileReader( self._source_resource.storage_url.object_name) with file_part.FilePart(source_stream, self._offset, self._length) as upload_stream: api_factory.get_api(provider).upload_object( upload_stream, self._destination_resource, request_config=cloud_api.RequestConfig( md5_hash=self._source_resource.md5_hash, size=self._length))
def GetDigest(digest_algorithm, filename): """Digest the file at filename based on digest_algorithm. Args: digest_algorithm: The algorithm used to digest the file, can be one of 'sha256', 'sha384', or 'sha512'. filename: A valid file path over which a digest will be calculated. Returns: The digest of the provided file. Raises: InvalidArgumentException: The provided digest_algorithm is invalid. """ with files.BinaryFileReader(filename) as f: return GetDigestOfFile(digest_algorithm, f)
def execute(self, callback=None): with files.BinaryFileWriter( self._destination_resource.storage_url.object_name, create_path=True) as download_stream: provider = self._source_resource.storage_url.scheme # TODO(b/162264437): Support all of download_object's parameters. api_factory.get_api(provider).download_object( self._source_resource, download_stream) with files.BinaryFileReader(self._destination_resource.storage_url. object_name) as completed_download_stream: downloaded_file_hash = util.get_hash_digest_from_file_stream( completed_download_stream, util.HashAlgorithms.MD5) util.validate_object_hashes_match( self._source_resource.storage_url, self._source_resource.md5_hash, downloaded_file_hash)
def ReportMetrics(metrics_file_path): """Sends the specified anonymous usage event to the given analytics endpoint. Args: metrics_file_path: str, File with pickled metrics (list of tuples). """ with files.BinaryFileReader(metrics_file_path) as metrics_file: metrics = pickle.load(metrics_file) os.remove(metrics_file_path) session = requests.Session() for metric in metrics: session.request(metric[1], metric[0], data=metric[2], headers=metric[3], timeout=TIMEOUT_IN_SEC)
def UploadArchive(upload_url, zip_file): """Uploads the specified zip file with a PUT request to the provided URL. Args: upload_url: A string of the URL to send the PUT request to. Required to be a signed URL from GCS. zip_file: A string of the file path to the zip file to upload. Returns: A requests.Response object. """ sess = requests.GetSession() # Required headers for the Apigee generated signed URL. headers = { 'content-type': 'application/zip', 'x-goog-content-length-range': '0,1073741824' } with files.BinaryFileReader(zip_file) as data: response = sess.put(upload_url, data=data, headers=headers) return response
def _get_upload_stream(self, digesters, task_status_queue): if task_status_queue: progress_callback = progress_callbacks.FilesAndBytesProgressCallback( status_queue=task_status_queue, offset=self._offset, length=self._length, source_url=self._source_resource.storage_url, destination_url=self._destination_resource.storage_url, component_number=self._component_number, total_components=self._total_components, operation_name=task_status.OperationName.UPLOADING, process_id=os.getpid(), thread_id=threading.get_ident(), ) else: progress_callback = None source_stream = files.BinaryFileReader(self._source_path) return upload_stream.UploadStream(source_stream, self._offset, self._length, digesters=digesters, progress_callback=progress_callback)
def __init__(self, name, create=True, timeout=None, version=None): super(Cache, self).__init__(_Table, name, create=create, timeout=timeout, version=version) self._persistent = False # Check if the db file exists and is an sqlite3 db. # Surprise, we have to do the heavy lifting. # That stops here. try: with files.BinaryFileReader(name) as f: actual_magic = f.read(len(self._EXPECTED_MAGIC)) if actual_magic != self._EXPECTED_MAGIC: raise exceptions.CacheInvalid( '[{}] is not a persistent cache.'.format(self.name)) self._persistent = True except files.MissingFileError: if not create: raise exceptions.CacheNotFound( 'Persistent cache [{}] not found.'.format(self.name)) except files.Error: raise exceptions.CacheInvalid( '[{}] is not a persistent cache.'.format(self.name)) self._db = sqlite3.connect(name) self.cursor = self._db.cursor() self._restricted = set(['__lock__']) self._tables = {} self._metadata = None self._start = persistent_cache_base.Now() try: self.InitializeMetadata() except exceptions.Error: # Make sure we clean up any dangling resources. self.Close(commit=False) raise
def get_temporary_gzipped_file(file_path): zipped_file_path = file_path + storage_url.TEMPORARY_FILE_SUFFIX with files.BinaryFileReader(file_path) as file_reader: with gzip.open(zipped_file_path, 'wb') as gzip_file_writer: shutil.copyfileobj(file_reader, gzip_file_writer) return zipped_file_path