def __call__(self, chunk: TransformationChunk):
        self._tasks = chunk.tasks
        self._context = chunk.context
        #
        io_time = 0
        working_time = 0
        log_step = max(1, int(len(self._tasks) / 20.0))
        for index, task in enumerate(self._tasks):
            if index % log_step == 0:
                logger.info(
                    "  {:>5} / {}   io: {:0.0f}s working: {:0.0f}s".format(
                        index, len(self._tasks), io_time, working_time))

            io_start = time.time()
            entity = read_json(task.in_path)
            io_time += time.time() - io_start

            work_start = time.time()
            self._prune_hierarchy(entity)
            working_time += time.time() - work_start

            io_start = time.time()
            write_json(task.out_path, entity)
            io_time += time.time() - io_start

        logger.info("  {:>5} / {}   io: {:0.0f}s working: {:0.0f}s".format(
            len(self._tasks), len(self._tasks), io_time, working_time))
    def _save_hierarchy(self, hierarchy):
        logger.info("Saving hierarchy ...")

        io_time = 0
        working_time = 0

        log_step = max(1, int(len(self._tasks) / 20.0))
        for index, task in enumerate(self._tasks):
            if index % int(len(self._tasks) / log_step) == 0:
                logger.info(
                    "  {:>5} / {}   io: {:0.0f}s working: {:0.0f}s".format(
                        index, len(self._tasks), io_time, working_time))

            io_start = time.time()
            entity = read_json(task.in_path)
            io_time += time.time() - io_start

            add_time = time.time()
            self._add_hierarchy(entity, hierarchy)
            working_time += time.time() - add_time

            io_start = time.time()
            write_json(task.out_path, entity)
            io_time += time.time() - io_start

        logger.info("  {:>5} / {}   io: {:0.0f}s working: {:0.0f}s".format(
            len(self._tasks), len(self._tasks), io_time, working_time))
    def _write_terms_mapping(self, terms_to_entities: WikidataEntityMap):
        log_progress_step = max(1, int(len(self._tasks) / 20.0))

        io_time = 0
        working_time = 0

        for index, task in enumerate(self._tasks):
            if index % log_progress_step == 0:
                logger.info(
                    "  {:>5} / {}   io: {:0.0f}s working: {:0.0f}s".format(
                        index, len(self._tasks), io_time, working_time))
            io_time_start = time.time()
            entity = read_json(task.in_path)
            io_time += time.time() - io_time_start

            working_time_start = time.time()
            for selector in self._context.mapping_selector(entity):
                self._add_mappings_for_selector(selector, terms_to_entities)
            working_time += time.time() - working_time_start

            io_time_start = time.time()
            write_json(task.out_path, entity)
            io_time += time.time() - io_time_start

        logger.info("  {:>5} / {}   io: {:0.0f}s working: {:0.0f}s".format(
            len(self._tasks), len(self._tasks), io_time, working_time))
 def __call__(self, input_dir: str, output_dir: str):
     with TransformationContext(output_dir) as context:
         for new_content in self._iterate_input_file():
             path = context.get_or_create(new_content["iri"])
             old_content = {}
             if os.path.exists(path):
                 old_content = read_json(path)
             write_json(path, self._data_selector(old_content, new_content))