Exemplo n.º 1
0
    def ingest_files(self):
        """Copy self.files into the user's ingest directory on the CRDS server."""
        stats = self._start_stats()
        destination = self.submission_info.ingest_dir
        host, path = destination.split(":")
        total_size = utils.total_size(self.files)

        ingest_info = self.get_ingested_files()

        self.scan_for_nonsubmitted_ingests(ingest_info)

        remaining_files = self.keep_existing_files(ingest_info, self.files) \
            if self.args.keep_existing_files else self.files

        for i, filename in enumerate(remaining_files):
            file_size = utils.file_size(filename)
            log.info("Copy started", repr(filename), "[", i+1, "/", len(self.files), " files ]",
                     "[", utils.human_format_number(file_size), 
                     "/", utils.human_format_number(total_size), " bytes ]")
            self.copy_file(filename, path, destination)
            stats.increment("bytes", file_size)
            stats.increment("files", 1)
            stats.log_status("files", "Copy complete", len(self.files))
            stats.log_status("bytes", "Copy complete", total_size)

        log.divider(func=log.verbose)
        stats.report()
        log.divider(char="=")
Exemplo n.º 2
0
Arquivo: submit.py Projeto: nden/crds
    def ingest_files(self):
        """Copy self.files into the user's ingest directory on the CRDS server."""
        stats = self._start_stats()
        destination = self.submission_info.ingest_dir
        host, path = destination.split(":")
        total_size = utils.total_size(self.files)

        ingest_info = self.get_ingested_files()

        self.scan_for_nonsubmitted_ingests(ingest_info)

        remaining_files = self.keep_existing_files(ingest_info, self.files) \
            if self.args.keep_existing_files else self.files

        for i, filename in enumerate(remaining_files):
            file_size = utils.file_size(filename)
            log.info("Copy started", repr(filename), "[", i + 1, "/",
                     len(self.files), " files ]", "[",
                     utils.human_format_number(file_size), "/",
                     utils.human_format_number(total_size), " bytes ]")
            self.copy_file(filename, path, destination)
            stats.increment("bytes", file_size)
            stats.increment("files", 1)
            stats.log_status("files", "Copy complete", len(self.files))
            stats.log_status("bytes", "Copy complete", total_size)

        log.divider(func=log.verbose)
        stats.report()
        log.divider(char="=")
Exemplo n.º 3
0
Arquivo: api.py Projeto: nden/crds
def file_progress(activity, name, path, bytes, bytes_so_far, total_bytes, nth_file, total_files):
    """Output progress information for `activity` on file `name` at `path`."""
    return "{activity}  {path!s:<55}  {bytes} bytes  ({nth_file} / {total_files} files) ({bytes_so_far} / {total_bytes} bytes)".format(
        activity=activity, 
        path=path, 
        bytes=utils.human_format_number(bytes),
        nth_file=nth_file+1, 
        total_files=total_files, 
        bytes_so_far=utils.human_format_number(bytes_so_far).strip(), 
        total_bytes=utils.human_format_number(total_bytes).strip())
Exemplo n.º 4
0
Arquivo: submit.py Projeto: nden/crds
 def keep_existing_files(self, ingest_info, files):
     """Keep files which have already been copied and have the correct server side
     length.  This can save *hours* of copy time for repeat submissions.
     """
     for filename in files[:]:
         local_size = utils.file_size(filename)
         basename = os.path.basename(filename)
         try:
             existing_size = int(ingest_info[basename]["size"])
         except:
             log.info(
                 "File", repr(filename),
                 "does not exist in ingest directory and will be copied to CRDS server."
             )
             continue
         if local_size == existing_size:
             log.info(
                 "File", repr(filename),
                 "has already been copied and has correct length on CRDS server",
                 utils.human_format_number(existing_size))
             files.remove(filename)
         else:
             log.info(
                 "File", repr(filename),
                 "exists but has incorrect size and must be recopied.  Deleting old ingest."
             )
             self.connection.get(ingest_info[basename]["delete_url"])
     return files
Exemplo n.º 5
0
 def _start_stats(self):
     """Helper method to initialize stats keeping for ingest."""
     total_bytes = utils.total_size(self.files)
     stats = utils.TimingStats(output=log.verbose)
     stats.start()
     log.divider(name="ingest files", char="=")
     log.info("Copying", len(self.files), "file(s) totalling", utils.human_format_number(total_bytes), "bytes")
     log.divider(func=log.verbose)
     return stats
Exemplo n.º 6
0
Arquivo: submit.py Projeto: nden/crds
 def _start_stats(self):
     """Helper method to initialize stats keeping for ingest."""
     total_bytes = utils.total_size(self.files)
     stats = utils.TimingStats(output=log.verbose)
     stats.start()
     log.divider(name="ingest files", char="=")
     log.info("Copying", len(self.files), "file(s) totalling",
              utils.human_format_number(total_bytes), "bytes")
     log.divider(func=log.verbose)
     return stats
Exemplo n.º 7
0
 def fetch_references(self, references):
     """Gets all references required to support `only_contexts`.  Removes
     all references from the CRDS reference cache which are not required for
     `only_contexts`.
     """
     if not self.contexts:
         return
     if self.args.readonly_cache:
         already_have = set(rmap.list_references("*", self.observatory))
         fetched = [ x for x in sorted(set(references)-set(already_have)) if not x.startswith("NOT FOUND") ]
         if fetched:
             log.info("READONLY CACHE would fetch references:", repr(fetched))
             with log.info_on_exception("Reference size information not available."):
                 info_map = api.get_file_info_map(self.observatory, fetched, fields=["size"])
                 total_bytes = api.get_total_bytes(info_map)
                 log.info("READONLY CACHE would download", len(fetched), "references totaling",  
                          utils.human_format_number(total_bytes).strip(), "bytes.")
     else:
         self.dump_files(self.contexts[0], references)
Exemplo n.º 8
0
Arquivo: sync.py Projeto: nden/crds
 def fetch_references(self, references):
     """Gets all references required to support `only_contexts`.  Removes
     all references from the CRDS reference cache which are not required for
     `only_contexts`.
     """
     if not self.contexts:
         return
     if self.args.readonly_cache:
         already_have = set(rmap.list_references("*", self.observatory))
         fetched = [ x for x in sorted(set(references)-set(already_have)) if not x.startswith("NOT FOUND") ]
         if fetched:
             log.info("READONLY CACHE would fetch references:", repr(fetched))
             with log.info_on_exception("Reference size information not available."):
                 info_map = api.get_file_info_map(self.observatory, fetched, fields=["size"])
                 total_bytes = api.get_total_bytes(info_map)
                 log.info("READONLY CACHE would download", len(fetched), "references totaling",  
                          utils.human_format_number(total_bytes).strip(), "bytes.")
     else:
         self.dump_files(self.contexts[0], references)
Exemplo n.º 9
0
Arquivo: api.py Projeto: nden/crds
 def get_data_http(self, filename):
     """Yield the data returned from `filename` of `pipeline_context` in manageable chunks."""
     url = self.get_url(filename)
     try:
         infile = urlopen(url)
         file_size = utils.human_format_number(self.catalog_file_size(filename)).strip()
         stats = utils.TimingStats()
         data = infile.read(config.CRDS_DATA_CHUNK_SIZE)
         while data:
             stats.increment("bytes", len(data))
             status = stats.status("bytes")
             bytes_so_far = " ".join(status[0].split()[:-1])
             log.verbose("Transferred HTTP", repr(url), bytes_so_far, "/", file_size, "bytes at", status[1], verbosity=20)
             yield data
             data = infile.read(config.CRDS_DATA_CHUNK_SIZE)
     except Exception as exc:
         raise CrdsDownloadError("Failed downloading", srepr(filename), "from url", srepr(url), ":", str(exc))
     finally:
         try:
             infile.close()
         except UnboundLocalError:   # maybe the open failed.
             pass
Exemplo n.º 10
0
 def keep_existing_files(self, ingest_info, files):
     """Keep files which have already been copied and have the correct server side
     length.  This can save *hours* of copy time for repeat submissions.
     """
     for filename in files[:]:
         local_size = utils.file_size(filename)
         basename = os.path.basename(filename)
         try:
             existing_size = int(ingest_info[basename]["size"])
         except:
             log.info("File", repr(filename), 
                      "does not exist in ingest directory and will be copied to CRDS server.")
             continue
         if local_size == existing_size:
             log.info("File", repr(filename), 
                      "has already been copied and has correct length on CRDS server", 
                      utils.human_format_number(existing_size))
             files.remove(filename)
         else:
             log.info("File", repr(filename), 
                      "exists but has incorrect size and must be recopied.  Deleting old ingest.")
             self.connection.get(ingest_info[basename]["delete_url"])
     return files