def itask_slices_generator(cls, isc_config_file): msg_text = "%s init task slices generator ..." % str(cls) logging.info(msg_text) msg.send_message(msg_text) manager, task_queue, ht_task_queue, result_queue = \ SearchQueueManager.init_task_worker_queue_manager() isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks for tid in range(len(isc_tasks)): it = isc_tasks[tid] ht_task_number = it.ht_task_number batch_size = 100 ht_task_slices = [ i for i in range(0, ht_task_number + 1, batch_size) ] if ht_task_slices[-1] < ht_task_number: ht_task_slices.append(ht_task_number) for i in range(1, len(ht_task_slices)): ts = (tid, (ht_task_slices[i - 1], ht_task_slices[i])) ht_task_queue.put(ts) working_hosts_number = 5 for i in range(working_hosts_number * 200): ht_task_queue.put((ITaskSignal.kill_signal, -1)) logging.info("all itasks has been dispatched") msg.send_message("all itasks has been dispatched")
def init_kmn_isc_task_workers(cls, isc_config_file="isets-tasks.json", is_check_valid_rules=True, result_queue=None): payload = config.worker_payload worker_pool = Pool(payload) pathlib.Path(config.task_host_lock_file).touch() if result_queue is None: manager, task_queue, ht_task_queue, result_queue = \ SearchQueueManager.init_task_worker_queue_manager() host_ip = ssh.get_host_ip() result_queue.put( (ITaskSignal.add_worker_signal, config.worker_host_name, host_ip)) logging.info("task worker host %s start ..." % config.worker_host_name) # 初始化不等价条件目录文件 isc_tasks = ITaskConfig(isc_config_file) isc_tasks = isc_tasks.isc_tasks cls.init_worker_host_nse_envs(isc_tasks) for i in range(payload): worker_pool.apply_async(cls.kmn_isc_task_worker, args=(cls, isc_config_file, i + 1, is_check_valid_rules)) worker_pool.close() return worker_pool, result_queue, host_ip
def kmn_isc_task_worker(cls, isc_config_file="isets-tasks.json", worker_id=1, is_check_valid_rules=True): manager_tuple = SearchQueueManager.init_task_worker_queue_manager() # manager_tuple = (manager, task_queue, ht_task_queue, result_queue) task_queue = manager_tuple[1] result_queue = manager_tuple[3] worker_name = "worker-%d" % worker_id worker_host_name = config.worker_host_name processed_task_slices_number = 0 msg_text = "task worker %s start!" % (worker_name) logging.info(msg_text) isc_tasks = ITaskConfig(isc_config_file) isc_tasks = isc_tasks.isc_tasks is_process_task_queue = False while True: if not pathlib.Path(config.task_host_lock_file).exists(): break if task_queue.empty(): if is_process_task_queue: logging.info("%s:%s waiting for task queue ... " % (worker_host_name, worker_name)) is_process_task_queue = False time.sleep(1) continue task_slice_cache = task_queue.get() processed_task_slices_number += 1 is_process_task_queue = True itask_id = task_slice_cache[0] if itask_id == ITaskSignal.kill_signal: break itask = isc_tasks[itask_id] task_slice = task_slice_cache[1] i4_ne_isets, task_number = cls.process_semi_valid_task_slices( cls, itask_id, itask, task_slice) result_queue.put( (ITaskSignal.se_condition_signal, itask_id, i4_ne_isets)) result_queue.put((ITaskSignal.stat_signal, itask_id, task_number)) logging.info("%s processes %d isc task slices ... " % (worker_name, processed_task_slices_number))
def itask_slices_generator(cls, isc_config_file="isets-tasks.json"): msg_text = "%s init task slices generator ..." % str(cls) logging.info(msg_text) msg.send_message(msg_text) SearchWorkerQueueManger.register("get_task_queue") SearchWorkerQueueManger.register("get_result_queue") manager = SearchWorkerQueueManger(address=(config.task_host, config.task_host_port), authkey=bytes(config.task_host_key, encoding="utf-8")) manager.connect() task_queue = manager.get_task_queue() isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks for tid in range(len(isc_tasks)): it = isc_tasks[tid] min_ne = it.min_ne max_ne = it.max_ne search_iset_ids = it.meta_data.search_space_iset_ids unknown_iset_number = len(search_iset_ids) left_length = int(unknown_iset_number / 2) if left_length > 12: left_length = 12 right_length = unknown_iset_number - left_length left_zone_isets = search_iset_ids[0:left_length] for i in range(min_ne, max_ne + 1): ne_iset_number = i for left_iset_number in range(ne_iset_number + 1): right_iset_number = ne_iset_number - left_iset_number if left_iset_number > left_length or right_iset_number > right_length: continue task_iter = itertools.combinations(left_zone_isets, left_iset_number) for left_ti in task_iter: task_item = (tid, (ne_iset_number, set(left_zone_isets), list(left_ti))) # print(task_item) task_queue.put(task_item) working_hosts_number = 5 for i in range(working_hosts_number * 200): task_queue.put((ITaskSignal.kill_signal, -1)) logging.info("all itasks has been dispatched")
def itask_slices_generator(cls, isc_config_file): msg_text = "%s init task slices generator ..." % str(cls) logging.info(msg_text) msg.send_message(msg_text) manager, task_queue, ht_task_queue, result_queue = \ SearchQueueManager.init_task_worker_queue_manager() isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks for tid in range(len(isc_tasks)): it = isc_tasks[tid] min_ne = it.min_ne max_ne = it.max_ne right_zone_iset_ids = set( copy.deepcopy(it.meta_data.search_space_iset_ids)) left_zone_iset_ids = set(it.meta_data.search_i4_composed_iset_ids) max_left_zone_length = 12 if len(left_zone_iset_ids) > max_left_zone_length: left_zone_iset_ids = list( left_zone_iset_ids)[0:max_left_zone_length] left_zone_iset_ids = set(left_zone_iset_ids) right_zone_iset_ids = right_zone_iset_ids.difference( left_zone_iset_ids) for ne_iset_number in range(min_ne, max_ne + 1): if not cls.check_itask_terminate_status(it): task_slices = CombinationSearchingSpaceSplitter.near_uniform_vandermonde_generator( left_zone_iset_ids, right_zone_iset_ids, ne_iset_number) ts_cnt = 0 for ts in task_slices: new_ts = (set(ts[0]), set(ts[1]), ts[2]) task_queue.put((tid, new_ts)) ts_cnt += 1 if ts_cnt % 10000 == 0 and cls.check_itask_terminate_status( it): break working_hosts_number = 5 for i in range(working_hosts_number * 200): task_queue.put((ITaskSignal.kill_signal, -1)) logging.info("all itasks has been dispatched")
def itask_slices_generator(cls, isc_config_file): max_space_size = 10000000000 msg_text = "%s init task slices generator ..." % str(cls) logging.info(msg_text) msg.send_message(msg_text) manager_tuple = SearchQueueManager.init_task_worker_queue_manager() task_queue = manager_tuple[1] isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks for tid in range(len(isc_tasks)): it = isc_tasks[tid] min_ne = it.min_ne max_ne = it.max_ne rule_number = sum(it.k_m_n) isnse.clear_task_space_layer_finish_flag_files( *it.k_m_n, min_ne, max_ne) for ne_iset_number in range(min_ne, max_ne + 1): msg_text = "generating %d-%d-%d %d layer task slices" % ( *it.k_m_n, ne_iset_number) logging.info(msg_text) cls.itask_slice_generator_by_i4_meta(ne_iset_number, tid, it, max_space_size, manager_tuple) # if ne_iset_number <= rule_number: # cls.itask_slice_generator_by_i4_meta(ne_iset_number, tid, it, max_space_size, manager_tuple) # else: # if not cls.check_itask_terminate_status(it): # flag_file = isnse.get_task_space_layer_finish_flag_file(*it.k_m_n, ne_iset_number - 2) # while not pathlib.Path(flag_file).exists(): # if cls.check_itask_terminate_status(it): # break # time.sleep(1) # # cls.itask_slice_generator_by_i4_meta(ne_iset_number, tid, it, max_space_size, manager_tuple) working_hosts_number = 5 for i in range(working_hosts_number * 200): task_queue.put((ITaskSignal.kill_signal, -1)) logging.info("all itasks has been dispatched")
def itask_slices_generator(cls, isc_config_file): msg_text = "%s init task slices generator ..." % str(cls) logging.info(msg_text) msg.send_message(msg_text) manager, task_queue, ht_task_queue, result_queue = \ SearchQueueManager.init_task_worker_queue_manager() isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks for tid in range(len(isc_tasks)): it = isc_tasks[tid] i4_iset_ids = list(it.meta_data.search_i4_composed_iset_ids) i4_iset_size = len(i4_iset_ids) min_ne = 1 max_ne = len(i4_iset_ids) max_left_zone_length = 10 left_zone_size = max_left_zone_length if i4_iset_size < left_zone_size: left_zone_size = i4_iset_size // 2 left_zone_iset_ids = i4_iset_ids[0:left_zone_size] right_zone_iset_ids = i4_iset_ids[left_zone_size:] for ne_iset_number in range(min_ne, max_ne + 1): task_slices = CombinationSearchingSpaceSplitter.vandermonde_generator( left_zone_iset_ids, right_zone_iset_ids, ne_iset_number) for ts in task_slices: task_queue.put((tid, ts)) working_hosts_number = 5 for i in range(working_hosts_number * 200): task_queue.put((ITaskSignal.kill_signal, -1)) logging.info("all itasks has been dispatched")
def init_kmn_isc_task_workers(cls, isc_config_file="isets-tasks.json", is_check_valid_rules=True): payload = config.worker_payload worker_pool = Pool(payload) pathlib.Path(config.task_host_lock_file).touch() SearchWorkerQueueManger.register("get_task_queue") SearchWorkerQueueManger.register("get_result_queue") manager = SearchWorkerQueueManger(address=(config.task_host, config.task_host_port), authkey=bytes(config.task_host_key, encoding="utf-8")) manager.connect() result_queue = manager.get_result_queue() host_ip = ssh.get_host_ip() result_queue.put( (ITaskSignal.add_worker_signal, config.worker_host_name, host_ip)) logging.info("task worker host %s start ..." % config.worker_host_name) # 初始化不等价条件目录文件 isc_tasks = ITaskConfig(isc_config_file) isc_tasks = isc_tasks.isc_tasks cls.init_worker_host_nse_envs(isc_tasks) for i in range(payload): worker_pool.apply_async(cls.kmn_isc_task_worker, args=(cls, isc_config_file, i + 1, is_check_valid_rules)) worker_pool.close() worker_pool.join() # if pathlib.Path(task_worker_host_lock_file).exists(): result_queue.put( (ITaskSignal.kill_signal, config.worker_host_name, host_ip)) logging.info("task worker host %s send kill signal ..." % config.worker_host_name) logging.info("task worker host %s exit ..." % config.worker_host_name)
def kmn_isc_task_worker(cls, isc_config_file="isets-tasks.json", worker_id=1, is_check_valid_rules=True): manager_tuple = SearchQueueManager.init_task_worker_queue_manager() # manager_tuple = (manager, task_queue, ht_task_queue, result_queue) ht_task_queue = manager_tuple[2] result_queue = manager_tuple[3] worker_name = "worker-%d" % worker_id worker_host_name = config.worker_host_name processed_ht_task_slices_number = 0 msg_text = "task worker %s start!" % (worker_name) logging.info(msg_text) isc_tasks = ITaskConfig(isc_config_file) isc_tasks = isc_tasks.isc_tasks is_process_ht_task_queue = False while True: if not pathlib.Path(config.task_host_lock_file).exists(): break is_task_finish = cls.check_itasks_finish_status(isc_tasks) if is_task_finish: logging.info("%s:%s all itasks terminate ..." % (worker_host_name, worker_name)) break if ht_task_queue.empty(): if is_process_ht_task_queue: is_process_ht_task_queue = False logging.info("%s:%s waiting for ht task queue ..." % (worker_host_name, worker_name)) time.sleep(1) continue is_process_ht_task_queue = True processed_ht_task_slices_number += 1 ts = ht_task_queue.get() start_time = datetime.now() itask_id = ts[0] itask = isc_tasks[itask_id] ne_iset_number, task_check_number, se_conditions_cache, nse_conditions_cache = \ cls.search_ht_task_slice(cls, itask, ts[1]) end_time = datetime.now() if ne_iset_number > itask.rule_number and len( nse_conditions_cache) > 0: print("debug info (wrong nse conditions): ", ts) if len(se_conditions_cache) > 0: result_queue.put((ITaskSignal.se_condition_signal, itask_id, se_conditions_cache)) if len(nse_conditions_cache) > 0: result_queue.put((ITaskSignal.nse_condition_signal, itask_id, nse_conditions_cache)) result_tuple = (ITaskSignal.stat_signal, itask_id, ne_iset_number, task_check_number, task_check_number, 0, (start_time, end_time)) result_queue.put(result_tuple) logging.info("%s processes %d ht itask slices" % (worker_name, processed_ht_task_slices_number))
def itask_slices_generator(cls, isc_config_file="isets-tasks.json"): msg_text = "%s init task slices generator ..." % str(cls) logging.info(msg_text) msg.send_message(msg_text) SearchWorkerQueueManger.register("get_task_queue") SearchWorkerQueueManger.register("get_result_queue") manager = SearchWorkerQueueManger(address=(config.task_host, config.task_host_port), authkey=bytes(config.task_host_key, encoding="utf-8")) manager.connect() task_queue = manager.get_task_queue() result_queue = manager.get_result_queue() isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks for tid in range(len(isc_tasks)): it = isc_tasks[tid] min_ne = it.min_ne max_ne = it.max_ne unknown_iset_number = len(it.meta_data.search_space_iset_ids) rule_number = it.rule_number left_zone_iset_ids = it.meta_data.search_i4_composed_iset_ids left_zone_length = len(left_zone_iset_ids) right_zone_length = unknown_iset_number - left_zone_length is_use_extended_rules = it.is_use_extended_rules for i in range(min_ne, max_ne + 1): ne_iset_number = i for left_iset_number in range(ne_iset_number + 1): right_iset_number = ne_iset_number - left_iset_number if left_iset_number > left_zone_length or right_iset_number > right_zone_length: continue task_iter = itertools.combinations(left_zone_iset_ids, left_iset_number) for left_ti in task_iter: left_iset_ids = list(left_ti) is_contain_semi_valid_rule = iscm.check_contain_rules_without_i_n_iset( 4, left_iset_ids, rule_number, is_use_extended_rules) if is_contain_semi_valid_rule: check_cnt = 0 # C(right_zone_length, right_iset_number) task_number = CombinaryCounter.compute_comb( right_zone_length, right_iset_number) semi_valid_skip_cnt = task_number stat_item = (ITaskSignal.stat_signal, tid, ne_iset_number, check_cnt, task_number, semi_valid_skip_cnt, None) result_queue.put(stat_item) else: task_item = (tid, (ne_iset_number, set(left_zone_iset_ids), left_iset_ids)) # print(task_item) task_queue.put(task_item) working_hosts_number = 5 for i in range(working_hosts_number * 200): task_queue.put((ITaskSignal.kill_signal, -1)) logging.info("all itasks has been dispatched")
def kmn_isc_task_worker(cls, isc_config_file="isets-tasks.json", worker_id=1, is_check_valid_rules=True): manager_tuple = SearchQueueManager.init_task_worker_queue_manager() # manager_tuple = (manager, task_queue, ht_task_queue, result_queue) task_queue = manager_tuple[1] result_queue = manager_tuple[3] start_time = datetime.now() worker_name = "worker-%d" % worker_id worker_host_name = config.worker_host_name processed_task_slices_number = 0 msg_text = "task worker %s start!" % (worker_name) logging.info(msg_text) isc_tasks = ITaskConfig(isc_config_file) isc_tasks = isc_tasks.isc_tasks raw_condition_files = list() for itask in isc_tasks: rule_number = sum(itask.k_m_n) search_space_iset_number = len( itask.meta_data.search_space_iset_ids) itask.loaded_non_se_condition_files.add(0) for i in range(rule_number + 1, search_space_iset_number + 1): itask.loaded_non_se_condition_files.add(i) rf = riu.get_empty_raw_icondition_file(*itask.k_m_n, itask.lp_type, itask.is_use_extended_rules, str(worker_id)) raw_condition_files.append(rf) is_process_task_queue = False task_slice_cache = None last_nse_iset_number = 0 result_queue_cache = list() single_round_processed_task_number = 0 sleep_cnt = 0 while True: # if sleep_cnt == 10: # sleep_cnt = 0 # # logging.error(("result queue cache size ", len(result_queue_cache))) # # logging.error("result queue cache has %d items, send sleep cnt 10", len(result_queue_cache)) # result_queue_cache = cls.batch_send_stat_info_2_result_queue(cls, result_queue_cache, # result_queue, start_time) if not pathlib.Path(config.task_host_lock_file).exists(): break is_task_finish = cls.check_itasks_finish_status(isc_tasks) if is_task_finish: logging.info("%s:%s all itasks terminate ..." % (worker_host_name, worker_name)) break if task_slice_cache is None: if task_queue.empty(): # logging.error("result queue cache has %d items, send task queue empty", # len(result_queue_cache)) # result_queue_cache = cls.batch_send_stat_info_2_result_queue(cls, result_queue_cache, # result_queue, start_time) if is_process_task_queue: logging.info("%s:%s waiting for task queue ... " % (worker_host_name, worker_name)) is_process_task_queue = False time.sleep(1) sleep_cnt += 1 continue else: task_slice_cache = task_queue.get() processed_task_slices_number += 1 single_round_processed_task_number += 1 is_process_task_queue = True itask_id = task_slice_cache[0] if itask_id == ITaskSignal.kill_signal: break itask = isc_tasks[itask_id] if itask.is_task_finish: task_slice_cache = None continue """ task_slice = (left_split, left, left_zone_length, right_choice_number) if left_split = True, left is left_isets else left is left_isets_size """ task_slice = task_slice_cache[1] rule_number = sum(itask.k_m_n) ne_iset_number = cls.compute_ne_iset_number(task_slice) nse_ne_iset_number = ne_iset_number - 1 load_nse_complete = cls.task_worker_load_nse_conditions( itask, ne_iset_number) if not load_nse_complete: # # logging.error("result queue cache has %d items, send load nse not complete", len(result_queue_cache)) # result_queue_cache = cls.batch_send_stat_info_2_result_queue(cls, result_queue_cache, result_queue, start_time) if last_nse_iset_number != nse_ne_iset_number: last_nse_iset_number = nse_ne_iset_number logging.info( (task_slice, "%s:%s waiting for %d-%d-%d nse complete file %d" % (worker_host_name, worker_name, *itask.k_m_n, nse_ne_iset_number))) time.sleep(1) sleep_cnt += 1 continue rq_cache, ht_check_items = cls.process_merge_small_task_slices( cls, itask_id, itask, task_slice, manager_tuple) result_queue_cache.extend(rq_cache) task_slice_cache = None raw_data_file = raw_condition_files[itask_id] ht_stat = cls.process_ht_tasks(cls, ht_check_items, itask_id, itask, ne_iset_number, result_queue, raw_data_file) if ht_stat is not None: result_queue_cache.append(ht_stat) # if len(result_queue_cache) > 2000: # logging.error("result queue cache has %d items, send cache size > 20000", len(result_queue_cache)) if ne_iset_number <= rule_number: result_queue_cache = cls.batch_send_stat_info_2_result_queue( cls, result_queue_cache, result_queue, start_time) else: if task_queue.qsize() < 1000 or len( result_queue_cache) > 100000: result_queue_cache = cls.batch_send_stat_info_2_result_queue( cls, result_queue_cache, result_queue, start_time) if single_round_processed_task_number == 10000: msg_text = "%s:%s processes %d isc task slices, new round process %d task slices ... " % ( worker_host_name, worker_name, processed_task_slices_number, single_round_processed_task_number) single_round_processed_task_number = 0 logging.info(msg_text) msg_text = "%s:%s processes %d isc task slices, new round process %d task slices ... " % ( worker_host_name, worker_name, processed_task_slices_number, single_round_processed_task_number) logging.info(msg_text)
def kmn_isc_task_worker(cls, isc_config_file="isets-tasks.json", worker_id=1, is_check_valid_rules=True): manager_tuple = SearchQueueManager.init_task_worker_queue_manager() # manager_tuple = (manager, task_queue, ht_task_queue, result_queue) ht_task_queue = manager_tuple[2] result_queue = manager_tuple[3] worker_name = "worker-%d" % worker_id worker_host_name = config.worker_host_name processed_ht_task_slices_number = 0 msg_text = "task worker %s start!" % (worker_name) logging.info(msg_text) isc_tasks = ITaskConfig(isc_config_file) isc_tasks = isc_tasks.isc_tasks data_files = list() data_file_current_lines = list() for itask in isc_tasks: # data_file_current_lines.append(-1) file_path = riu.get_complete_raw_icondition_file( *itask.k_m_n, itask.lp_type, itask.is_use_extended_rules) # file = open(file_path, mode="r", encoding="utf-8") data_files.append(file_path) is_process_ht_task_queue = False while True: if not pathlib.Path(config.task_host_lock_file).exists(): break if ht_task_queue.empty(): if is_process_ht_task_queue: is_process_ht_task_queue = False logging.info("%s:%s waiting for ht task queue ..." % (worker_host_name, worker_name)) time.sleep(1) continue is_process_ht_task_queue = True processed_ht_task_slices_number += 1 ts = ht_task_queue.get() start_time = datetime.now() itask_id = ts[0] if itask_id == ITaskSignal.kill_signal: break itask = isc_tasks[itask_id] data_file = data_files[itask_id] task_check_number, se_conditions_cache, nse_conditions_cache = \ cls.verify_ht_tasks_from_file_data(cls, itask, ts[1], data_file) end_time = datetime.now() if len(se_conditions_cache) > 0: result_queue.put((ITaskSignal.se_condition_signal, itask_id, se_conditions_cache)) if len(nse_conditions_cache) > 0: result_queue.put((ITaskSignal.nse_condition_signal, itask_id, nse_conditions_cache)) result_tuple = (ITaskSignal.stat_signal, itask_id, task_check_number, (start_time, end_time)) result_queue.put(result_tuple) logging.info("%s processes %d ht itask slices" % (worker_name, processed_ht_task_slices_number))
def load_itasks(): isc_tasks = ITaskConfig(isc_config_file) isc_tasks = isc_tasks.isc_tasks return isc_tasks
def init_kmn_isc_task_master_from_config( cls, isc_config_file="isets-tasks.json", sleep_time=30): manager, task_queue, ht_task_queue, result_queue = \ SearchQueueManager.init_task_master_queue_manager() manager_tuple = (manager, task_queue, ht_task_queue, result_queue) localhost_ip = ssh.get_host_ip() ts_generator_pool = cls.init_task_slices_generator_pool( cls, isc_config_file) ht_pool = cls.init_pre_task_worker_pool(cls, isc_config_file, result_queue) working_hosts_number = 0 # ht_checking_results = list() isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks for itask in isc_tasks: itask.init_task_numbers() if os.path.exists(itask.result_file): os.remove(itask.result_file) # ht_checking_results.append(list()) msg_text = "isc task master start, load %d isc tasks from %s" % ( len(isc_tasks), isc_config_file) logging.info(msg_text) msg.send_message(msg_text) sleep_cnt = 0 online_hosts = set() progress_msg_cnt = 10 task_finish = False print_loop = 100 print_cnt = 0 while not task_finish: print_cnt += 1 if print_cnt == print_loop: cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple, working_hosts_number, False) sleep_cnt = 0 print_cnt = 0 if sleep_cnt == progress_msg_cnt: cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple, working_hosts_number, False) sleep_cnt = 0 print_cnt = 0 task_finish = cls.check_itasks_status(cls, isc_tasks, online_hosts, manager_tuple, working_hosts_number) if result_queue.empty(): time.sleep(sleep_time) sleep_cnt += 1 continue whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks) whn_number = whn_diff[0] host_ip = whn_diff[1] working_hosts_number += whn_number if whn_number == 1: if host_ip != localhost_ip: online_hosts.add(host_ip) elif whn_number == -1: if host_ip != localhost_ip: online_hosts.remove(host_ip) ts_generator_pool.join() ht_pool.join() HTCheckingWorker.send_worker_terminate_info(HTCheckingWorker, localhost_ip, result_queue) while working_hosts_number > 0: if sleep_cnt == 10: cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple, working_hosts_number, True) sleep_cnt = 0 if result_queue.empty(): time.sleep(sleep_time) sleep_cnt += 1 continue whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks) working_hosts_number += whn_diff[0] msg_text = "isc tasks finish!" logging.info(msg_text) msg.send_message(msg=msg_text) for it in isc_tasks: it.dump_tmp_se_condition_saving_mem() msg_text = it.get_final_detail_progress_info() logging.info(msg_text) msg.send_message(msg=msg_text) return isc_tasks
def kmn_isc_task_worker(cls, isc_config_file="isets-tasks.json", worker_id=1, is_check_valid_rules=True): manager_tuple = SearchQueueManager.init_task_worker_queue_manager() # manager_tuple = (manager, task_queue, ht_task_queue, result_queue) task_queue = manager_tuple[1] start_time = datetime.now() worker_name = "worker-%d" % worker_id worker_host_name = config.worker_host_name processed_task_slices_number = 0 msg_text = "task worker %s start!" % (worker_name) logging.info(msg_text) isc_tasks = ITaskConfig(isc_config_file) isc_tasks = isc_tasks.isc_tasks for itask in isc_tasks: # itask.loaded_non_se_condition_files.add(1) itask.loaded_non_se_condition_files.add(0) is_process_task_queue = False task_slice_cache = None last_nse_iset_number = 0 result_queue_cache = list() while True: if not pathlib.Path(config.task_host_lock_file).exists(): break is_task_finish = cls.check_itasks_finish_status(isc_tasks) if is_task_finish: logging.info("%s:%s all itasks terminate ..." % (worker_host_name, worker_name)) break if task_slice_cache is None: if task_queue.empty(): if is_process_task_queue: logging.info("%s:%s waiting for task queue ... " % (worker_host_name, worker_name)) is_process_task_queue = False time.sleep(1) continue else: task_slice_cache = task_queue.get() processed_task_slices_number += 1 is_process_task_queue = True itask_id = task_slice_cache[0] if itask_id == ITaskSignal.kill_signal: break itask = isc_tasks[itask_id] if itask.is_task_finish: task_slice_cache = None continue task_slice = task_slice_cache[1] nse_iset_number = task_slice[2] + len(task_slice[0]) - 1 load_nse_complete = cls.task_worker_load_nse_conditions( itask, nse_iset_number + 1) if not load_nse_complete: if last_nse_iset_number != nse_iset_number: last_nse_iset_number = nse_iset_number logging.info( (task_slice, "%s:%s waiting for %d-%d-%d nse complete file %d" % (worker_host_name, worker_name, *itask.k_m_n, nse_iset_number))) result_queue_cache = cls.batch_send_stat_info_2_result_queue( cls, result_queue_cache, manager_tuple[3], start_time) time.sleep(1) continue rq_cache = cls.process_task_slice(cls, itask_id, itask, task_slice, manager_tuple) result_queue_cache.extend(rq_cache) task_slice_cache = None if len(result_queue_cache) > 2000: result_queue_cache = cls.batch_send_stat_info_2_result_queue( cls, result_queue_cache, manager_tuple[3], start_time) logging.info("%s processes %d isc task slices ... " % (worker_name, processed_task_slices_number))
def init_kmn_isc_task_master_from_config( cls, isc_config_file="isets-tasks.json", sleep_time=30): manager, task_queue, ht_task_queue, result_queue = \ SearchQueueManager.init_task_master_queue_manager() manager_tuple = (manager, task_queue, ht_task_queue, result_queue) localhost_ip = ssh.get_host_ip() isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks result_record = list() for itask in isc_tasks: isnse.clear_task_terminate_flag_files(*itask.k_m_n) i4_iset_size = len(itask.meta_data.search_i4_composed_iset_ids) file = i4u.get_kmn_i4_all_result_file(*itask.k_m_n) if os.path.exists(file): os.remove(file) record = [2**i4_iset_size - 1, 0, list(), file] result_record.append(record) ts_generator_pool = cls.init_task_slices_generator_pool( cls, isc_config_file) pre_task_pool = cls.init_pre_task_worker_pool(cls, isc_config_file, result_queue) working_hosts_number = 0 msg_text = "isc task master start, load %d isc tasks from %s" % ( len(isc_tasks), isc_config_file) logging.info(msg_text) msg.send_message(msg_text) sleep_cnt = 0 online_hosts = set() progress_msg_cnt = 10 task_finish = False print_loop = 10 print_cnt = 0 while not task_finish: print_cnt += 1 if print_cnt == print_loop: cls.send_itasks_progress_info(cls, result_record, manager_tuple, working_hosts_number, False) sleep_cnt = 0 print_cnt = 0 if sleep_cnt == progress_msg_cnt: cls.send_itasks_progress_info(cls, result_record, manager_tuple, working_hosts_number, False) sleep_cnt = 0 print_cnt = 0 task_finish = cls.check_i4_tasks_status(cls, result_record) if result_queue.empty(): time.sleep(sleep_time) sleep_cnt += 1 continue whn_diff = cls.process_i4_result_queue(cls, result_queue, result_record) whn_number = whn_diff[0] host_ip = whn_diff[1] working_hosts_number += whn_number if whn_number == 1: if host_ip != localhost_ip: online_hosts.add(host_ip) elif whn_number == -1: if host_ip != localhost_ip: online_hosts.remove(host_ip) ts_generator_pool.join() pre_task_pool.join() I4SearchWorker.send_worker_terminate_info(I4SearchWorker, localhost_ip, result_queue) while working_hosts_number > 0: if sleep_cnt == 10: cls.send_itasks_progress_info(cls, result_record, manager_tuple, working_hosts_number, True) sleep_cnt = 0 if result_queue.empty(): time.sleep(sleep_time) sleep_cnt += 1 continue whn_diff = cls.process_i4_result_queue(cls, result_queue, result_record) working_hosts_number += whn_diff[0] msg_text = "isc tasks finish!" logging.info(msg_text) msg.send_message(msg=msg_text) cls.send_itasks_progress_info(cls, result_record, manager_tuple, working_hosts_number, True) return isc_tasks
def init_kmn_isc_task_master_from_config(cls, isc_config_file="isets-tasks.json", sleep_time=30): start_time = datetime.now() manager, task_queue, ht_task_queue, result_queue = \ SearchQueueManager.init_task_master_queue_manager() manager_tuple = (manager, task_queue, ht_task_queue, result_queue) localhost_ip = ssh.get_host_ip() isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks for itask in isc_tasks: itask.init_task_numbers() isnse.clear_task_terminate_flag_files(*itask.k_m_n) ts_generator_pool = cls.init_task_slices_generator_pool(cls, isc_config_file) pre_pool = cls.init_pre_task_worker_pool(cls, isc_config_file, result_queue) working_hosts_number = 0 msg_text = "isc task master start, load %d isc tasks from %s" % (len(isc_tasks), isc_config_file) logging.info(msg_text) msg.send_message(msg_text) sleep_cnt = 0 online_hosts = set() progress_msg_cnt = 10 task_finish = False print_loop = 100 print_cnt = 0 while not task_finish: print_cnt += 1 if print_cnt == print_loop: cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple, working_hosts_number, False) for it in isc_tasks: it.save_progress_info() sleep_cnt = 0 print_cnt = 0 if sleep_cnt == progress_msg_cnt: cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple, working_hosts_number, False) for it in isc_tasks: it.save_progress_info() sleep_cnt = 0 print_cnt = 0 task_finish = cls.check_itasks_status(cls, isc_tasks, online_hosts, manager_tuple, working_hosts_number) if result_queue.empty(): time.sleep(sleep_time) sleep_cnt += 1 continue whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks) whn_number = whn_diff[0] host_ip = whn_diff[1] working_hosts_number += whn_number if whn_number == 1: if host_ip != localhost_ip: online_hosts.add(host_ip) cls.update_nse_files_to_new_host(host_ip, isc_tasks) elif whn_number == -1: if host_ip != localhost_ip: online_hosts.remove(host_ip) ts_generator_pool.join() pre_pool.join() RawIConditionSearchWorker.send_worker_terminate_info(RawIConditionSearchWorker, localhost_ip, result_queue) while working_hosts_number > 0: if sleep_cnt == 10: cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple, working_hosts_number, True) sleep_cnt = 0 if result_queue.empty(): time.sleep(sleep_time) sleep_cnt += 1 continue whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks) working_hosts_number += whn_diff[0] end_time = datetime.now() msg_text = "isc tasks finish, running time: %s" % str(end_time - start_time) logging.info(msg_text) msg.send_message(msg=msg_text) for it in isc_tasks: it.task_finish() msg_text = it.get_final_detail_progress_info() logging.info(msg_text) msg.send_message(msg=msg_text) # attached_files.append(it.result_file) return isc_tasks
def init_kmn_isc_task_master_from_config( cls, isc_config_file="isets-tasks.json", sleep_time=30): start_time = datetime.now() SearchMasterQueueManger.register("get_task_queue", callable=cls.get_global_task_queue) SearchMasterQueueManger.register("get_result_queue", callable=cls.get_global_result_queue) manager = SearchMasterQueueManger(address=(config.task_host, config.task_host_port), authkey=bytes(config.task_host_key, encoding="utf-8")) manager.start() task_queue = manager.get_task_queue() result_queue = manager.get_result_queue() localhost_ip = ssh.get_host_ip() task_generator = Pool(2) task_generator.apply_async(cls.itask_slices_generator, args=(cls, isc_config_file)) task_generator.close() working_hosts_number = 0 msg_text = "isc task master start, load isc tasks from %s" % ( isc_config_file) logging.info(msg_text) msg.send_message(msg_text) isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks for itask in isc_tasks: itask.init_task_numbers() sleep_cnt = 0 online_hosts = set() progress_msg_cnt = 10 task_finish = False print_loop = 100000 print_cnt = 0 while not task_finish: print_cnt += 1 if print_cnt == print_loop: cls.send_itasks_progress_info(cls, isc_tasks, task_queue, working_hosts_number, False) sleep_cnt = 0 print_cnt = 0 if sleep_cnt == progress_msg_cnt: cls.send_itasks_progress_info(cls, isc_tasks, task_queue, working_hosts_number, False) sleep_cnt = 0 print_cnt = 0 task_finish = cls.check_itasks_status(cls, isc_tasks, online_hosts, task_queue, working_hosts_number) if result_queue.empty(): time.sleep(sleep_time) sleep_cnt += 1 continue whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks) whn_number = whn_diff[0] host_ip = whn_diff[1] working_hosts_number += whn_number if whn_number == 1: if host_ip != localhost_ip: online_hosts.add(host_ip) cls.update_nse_files_to_new_host(host_ip, isc_tasks) elif whn_number == -1: if host_ip != localhost_ip: online_hosts.remove(host_ip) task_generator.join() while working_hosts_number > 0: if sleep_cnt == 10: cls.send_itasks_progress_info(cls, isc_tasks, task_queue, working_hosts_number, True) sleep_cnt = 0 if result_queue.empty(): time.sleep(sleep_time) sleep_cnt += 1 continue whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks) working_hosts_number += whn_diff[0] msg_texts = [] attached_files = [] for it in isc_tasks: it.task_finish() msg_texts.append(it.get_final_detail_progress_info()) attached_files.append(it.result_file) msg_text = "isc tasks finish! \n\t\t%s" % "\n\t\t".join(msg_texts) logging.info(msg_text) msg.send_message(msg=msg_text, attached_files=attached_files) return isc_tasks
def itask_slices_generator(cls, isc_config_file): max_space_size = 100000000000 msg_text = "%s init task slices generator ..." % str(cls) logging.info(msg_text) msg.send_message(msg_text) manager, task_queue, ht_task_queue, result_queue = \ SearchQueueManager.init_task_worker_queue_manager() isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks for tid in range(len(isc_tasks)): it = isc_tasks[tid] min_ne = it.min_ne max_ne = it.max_ne isnse.clear_task_space_layer_finish_flag_files(*it.k_m_n, min_ne, max_ne) left_zone_length = len(it.meta_data.search_i4_composed_iset_ids) search_isets = copy.deepcopy(it.meta_data.search_space_iset_ids) search_isets_length = len(search_isets) max_left_zone_length = 12 if left_zone_length > max_left_zone_length: left_zone_length = 12 rule_number = sum(it.k_m_n) left_zone_iset_ids = search_isets[0:left_zone_length] right_zone_iset_ids = search_isets[left_zone_length:] for ne_iset_number in range(min_ne, max_ne + 1): msg_text = "generating %d-%d-%d %d layer task slices" % (*it.k_m_n, ne_iset_number) logging.info(msg_text) if ne_iset_number <= rule_number: left_split = True task_slices = CombinationSearchingSpaceSplitter.vandermonde_generator( left_zone_iset_ids, right_zone_iset_ids, ne_iset_number) for ts in task_slices: new_ts = (left_split, set(ts[0]), left_zone_length, ts[2]) task_queue.put((tid, new_ts)) else: if not cls.check_itask_terminate_status(it): flag_file = isnse.get_task_space_layer_finish_flag_file(*it.k_m_n, ne_iset_number - 2) while not pathlib.Path(flag_file).exists(): if cls.check_itask_terminate_status(it): break time.sleep(1) task_slices = CombinationSearchingSpaceSplitter.merge_small_near_uniform_vandermonde_generator( left_zone_iset_ids, right_zone_iset_ids, ne_iset_number, max_space_size=max_space_size) ts_cnt = 0 for ts in task_slices: task_queue.put((tid, ts)) ts_cnt += 1 if ts_cnt % 10000 == 0 and cls.check_itask_terminate_status(it): break working_hosts_number = 5 for i in range(working_hosts_number * 200): task_queue.put((ITaskSignal.kill_signal, -1)) logging.info("all itasks has been dispatched")
def kmn_isc_task_worker(cls, isc_config_file="isets-tasks.json", worker_id=1, is_check_valid_rules=True): SearchWorkerQueueManger.register("get_task_queue") SearchWorkerQueueManger.register("get_result_queue") manager = SearchWorkerQueueManger(address=(config.task_host, config.task_host_port), authkey=bytes(config.task_host_key, encoding="utf-8")) is_check_valid_rules = False manager.connect() task_queue = manager.get_task_queue() result_queue = manager.get_result_queue() worker_name = "worker-%d" % worker_id worker_host_name = config.worker_host_name msg_text = "task worker %s start!" % (worker_name) logging.info(msg_text) isc_tasks = ITaskConfig(isc_config_file) isc_tasks = isc_tasks.isc_tasks processed_task_slices_number = 0 for itask in isc_tasks: itask.loaded_non_se_condition_files.add(1) itask.loaded_non_se_condition_files.add(0) first_print_debug_log = True while True: if not pathlib.Path(config.task_host_lock_file).exists(): break if task_queue.empty(): if first_print_debug_log: logging.info("waiting for isc task slices") first_print_debug_log = False time.sleep(2) continue first_print_debug_log = True task_slice = task_queue.get() if task_slice[0] == ITaskSignal.kill_signal: msg_text = "%s:%s isc task worker terminate ..." % ( worker_host_name, worker_name) logging.info(msg_text) break itask = isc_tasks[task_slice[0]] task_name = worker_name + ("-task-%d" % processed_task_slices_number) cls.process_kmn_itask_slice(cls, itask, task_slice, task_name, result_queue, is_check_valid_rules) processed_task_slices_number += 1 logging.info("%s processes %d isc task slices" % (worker_name, processed_task_slices_number))