def get_data_http(self, filename): """Yield the data returned from `filename` of `pipeline_context` in manageable chunks.""" url = self.get_url(filename) try: infile = request.urlopen(url) file_size = utils.human_format_number( self.catalog_file_size(filename)).strip() stats = utils.TimingStats() data = infile.read(config.CRDS_DATA_CHUNK_SIZE) while data: stats.increment("bytes", len(data)) status = stats.status("bytes") bytes_so_far = " ".join(status[0].split()[:-1]) log.verbose("Transferred HTTP", repr(url), bytes_so_far, "/", file_size, "bytes at", status[1], verbosity=20) yield data data = infile.read(config.CRDS_DATA_CHUNK_SIZE) except Exception as exc: raise CrdsDownloadError("Failed downloading", srepr(filename), "from url", srepr(url), ":", str(exc)) from exc finally: try: infile.close() except UnboundLocalError: # maybe the open failed. pass
def _start_stats(self): """Helper method to initialize stats keeping for ingest.""" total_bytes = utils.total_size(self.files) stats = utils.TimingStats(output=log.verbose) stats.start() log.divider(name="ingest files", char="=") log.info("Uploading", len(self.files), "file(s) totalling", utils.human_format_number(total_bytes), "bytes") log.divider(func=log.verbose) return stats
def main(self): """Check files for availability from the archive.""" self.require_server_connection() log.info("Mapping URL:", repr(self.mapping_url)) log.info("Reference URL:", repr(self.reference_url)) stats = utils.TimingStats() self.init_files(self.files) for filename in self.files: self.verify_archive_file(filename) stats.increment("files") self.print_files() stats.report_stat("files") log.standard_status()