Exemplo n.º 1
0
    def print_stats():
        """A helper for printing statistics about the simulation"""
        data = (
            args.rlc,
            args.rlu,
            args.max_threshold,
            args.offline_rate,
            utils.num_fmt(files_in_storage),
            utils.num_fmt(files_uploaded),
            1 - files_in_storage / files_uploaded,
            utils.sizeof_fmt(data_in_storage),
            utils.sizeof_fmt(data_uploaded),
            1 - data_in_storage / data_uploaded,
            utils.get_mem_info(),
            tmr.elapsed_str,
            tmr_start.elapsed_str,
        )

        tmpl = ("Statistics: \n"
                "  Params: RLc=%s, RLu=%s, max_threshold=%s, offline_rate=%s\n"
                "  Files: files_in_storage=%s, files_uploaded=%s, DDP=%s\n"
                "  Data: data_in_storage=%s, data_uploaded=%s, DDP=%s\n"
                "  Execution: memory[%s], chunk_time=%s, total_time=%s")

        tmr.reset()

        print(tmpl % data, file=sys.stderr)
Exemplo n.º 2
0
 def _import(self, deflated_text):
     text  = self._inflate(deflated_text)
     log('%s: Importing...' % self.file_name)
     start = time.time()
     if self.data_set_name == 'crawl':
         count = self._import_crawl(text)
     else:
         count = self._import_csv(text)
     self.batcher.flush()
     elapsed = time.time() - start
     speed   = count / elapsed
     log('%s: Imported %s docs (%s per sec)' % (self.file_name,
             num_fmt(count), num_fmt(speed)))
Exemplo n.º 3
0
    def print_stats():
        data = (utils.num_fmt(files_in_storage), utils.num_fmt(files_uploaded),
                1 - files_in_storage / files_uploaded,
                utils.sizeof_fmt(data_in_storage),
                utils.sizeof_fmt(data_uploaded),
                1 - data_in_storage / data_uploaded, utils.get_mem_info(),
                tmr.elapsed_str)

        tmpl = ("Statistics: \n"
                "  Files: files_in_storage=%s, files_uploaded=%s, DDP=%s\n"
                "  Data: data_in_storage=%s, data_uploaded=%s, DDP=%s\n"
                "  Execution: memory[%s], chunk_time=%s")

        tmr.reset()

        print(tmpl % data, file=sys.stderr)
Exemplo n.º 4
0
    def _check_collection(self, collection_name):
        if self.opts.drop_collection:
            log("Dropping collection '%s'" % collection_name)
            self.db[collection_name].drop()

        count = self.db[collection_name].count()
        if count != 0:
            log("Skipping non-empty collection '%s' (%s docs)" %
                    (collection_name, num_fmt(count)))
            return False
        log("Importing into collection '%s'" % collection_name)
        return True