class BulkLoadChecker(Checker): """check bulk load operations in a dependent thread""" def __init__(self, collection_name=None, files=[]): if collection_name is None: collection_name = cf.gen_unique_str("BulkLoadChecker_") super().__init__(collection_name=collection_name) self.utility_wrap = ApiUtilityWrapper() self.schema = cf.gen_default_collection_schema() self.files = files self.row_based = True self.recheck_failed_task = False self.failed_tasks = [] self.c_name = None def update(self, files=None, schema=None, row_based=None): if files is not None: self.files = files if schema is not None: self.schema = schema if row_based is not None: self.row_based = row_based @trace() def bulk_load(self): task_ids, result = self.utility_wrap.bulk_load(collection_name=self.c_name, row_based=self.row_based, files=self.files) completed, result = self.utility_wrap.wait_for_bulk_load_tasks_completed(task_ids=task_ids, timeout=30) return task_ids, completed @exception_handler() def run_task(self): if self.recheck_failed_task and self.failed_tasks: self.c_name = self.failed_tasks.pop(0) log.debug(f"check failed task: {self.c_name}") else: self.c_name = cf.gen_unique_str("BulkLoadChecker_") self.c_wrap.init_collection(name=self.c_name, schema=self.schema) # import data task_ids, completed = self.bulk_load() if not completed: self.failed_tasks.append(self.c_name) return task_ids, completed def keep_running(self): while self._keep_running: self.run_task() sleep(constants.WAIT_PER_OP / 10)
class BulkLoadChecker(Checker): """check bulk load operations in a dependent thread""" def __init__(self, flush=False): super().__init__() self.utility_wrap = ApiUtilityWrapper() self.schema = cf.gen_default_collection_schema() self.flush = flush self.files = ["bulk_load_data_source.json"] self.row_based = True self.recheck_failed_task = False self.failed_tasks = [] def update(self, files=None, schema=None, row_based=None): if files is not None: self.files = files if schema is not None: self.schema = schema if row_based is not None: self.row_based = row_based def keep_running(self): while True: if self.recheck_failed_task and self.failed_tasks: c_name = self.failed_tasks.pop(0) log.info(f"check failed task: {c_name}") else: c_name = cf.gen_unique_str("BulkLoadChecker_") self.c_wrap.init_collection(name=c_name, schema=self.schema) if self.flush: t0 = time.time() pre_entities_num = self.c_wrap.num_entities tt = time.time() - t0 log.info(f"flush before bulk load, cost time: {tt:.4f}") # import data t0 = time.time() task_ids, res_1 = self.utility_wrap.bulk_load( collection_name=c_name, row_based=self.row_based, files=self.files) log.info(f"bulk load task ids:{task_ids}") completed, res_2 = self.utility_wrap.wait_for_bulk_load_tasks_completed( task_ids=task_ids, timeout=30) tt = time.time() - t0 # added_num = sum(res_2[task_id].row_count for task_id in task_ids) if completed: self.rsp_times.append(tt) self.average_time = (tt + self.average_time * self._succ) / ( self._succ + 1) self._succ += 1 log.info( f"bulk load success for collection {c_name}, time: {tt:.4f}, average_time: {self.average_time:4f}" ) if self.flush: t0 = time.time() cur_entities_num = self.c_wrap.num_entities tt = time.time() - t0 log.info(f"flush after bulk load, cost time: {tt:.4f}") else: self._fail += 1 # if the task failed, store the failed collection name for further checking after chaos self.failed_tasks.append(c_name) log.info( f"bulk load failed for collection {c_name} time: {tt:.4f}, average_time: {self.average_time:4f}" ) sleep(constants.WAIT_PER_OP / 10)