def test_digest(self): digest, size = py_utils.read_checksum_digest( os.path.join(self.test_data, '6pixels.png'), hashlib.sha256) self.assertEqual( digest, '04f38ebed34d3b027d2683193766155912fba647158c583c3bdb4597ad8af34c') self.assertEqual(102, size)
def _sync_download(self, url, destination_path): """Synchronous version of `download` method.""" try: # If url is on a filesystem that gfile understands, use copy. Otherwise, # use requests. path = os.path.join(destination_path, os.path.basename(url)) tf.io.gfile.copy(url, path) hexdigest, size = py_utils.read_checksum_digest( path, checksum_cls=self._checksumer) return hexdigest, size except tf.errors.UnimplementedError: pass checksum = self._checksumer() session = requests.Session() if _DRIVE_URL.match(url): url = self._get_drive_url(url, session) response = session.get(url, stream=True) if response.status_code != 200: raise DownloadError('Failed to get url %s. HTTP code: %d.' % (url, response.status_code)) fname = _get_filename(response) path = os.path.join(destination_path, fname) size = 0 size_mb = 0 unit_mb = units.MiB self._pbar_dl_size.update_total( int(response.headers.get('Content-length', 0)) // unit_mb) with tf.io.gfile.GFile(path, 'wb') as file_: for block in response.iter_content( chunk_size=io.DEFAULT_BUFFER_SIZE): size += len(block) # Update the progress bar size_mb += len(block) if size_mb > unit_mb: self._pbar_dl_size.update(size_mb // unit_mb) size_mb %= unit_mb checksum.update(block) # TODO(pierrot): Test this is faster than doing checksum in the end # and document results here. file_.write(block) self._pbar_url.update(1) return checksum.hexdigest(), size
def _sync_file_copy(self, filepath, destination_path): out_path = os.path.join(destination_path, os.path.basename(filepath)) tf.io.gfile.copy(filepath, out_path) hexdigest, size = py_utils.read_checksum_digest( out_path, checksum_cls=self._checksumer) return hexdigest, size