Пример #1
0
    def itask_slices_generator(cls, isc_config_file):
        msg_text = "%s init task slices generator ..." % str(cls)
        logging.info(msg_text)
        msg.send_message(msg_text)

        manager, task_queue, ht_task_queue, result_queue = \
            SearchQueueManager.init_task_worker_queue_manager()

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks

        for tid in range(len(isc_tasks)):
            it = isc_tasks[tid]
            ht_task_number = it.ht_task_number
            batch_size = 100
            ht_task_slices = [
                i for i in range(0, ht_task_number + 1, batch_size)
            ]
            if ht_task_slices[-1] < ht_task_number:
                ht_task_slices.append(ht_task_number)

            for i in range(1, len(ht_task_slices)):
                ts = (tid, (ht_task_slices[i - 1], ht_task_slices[i]))
                ht_task_queue.put(ts)

        working_hosts_number = 5
        for i in range(working_hosts_number * 200):
            ht_task_queue.put((ITaskSignal.kill_signal, -1))
        logging.info("all itasks has been dispatched")
        msg.send_message("all itasks has been dispatched")
Пример #2
0
    def init_kmn_isc_task_workers(cls,
                                  isc_config_file="isets-tasks.json",
                                  is_check_valid_rules=True,
                                  result_queue=None):
        payload = config.worker_payload
        worker_pool = Pool(payload)
        pathlib.Path(config.task_host_lock_file).touch()

        if result_queue is None:
            manager, task_queue, ht_task_queue, result_queue = \
                SearchQueueManager.init_task_worker_queue_manager()

        host_ip = ssh.get_host_ip()
        result_queue.put(
            (ITaskSignal.add_worker_signal, config.worker_host_name, host_ip))
        logging.info("task worker host %s start ..." % config.worker_host_name)

        # 初始化不等价条件目录文件
        isc_tasks = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks.isc_tasks
        cls.init_worker_host_nse_envs(isc_tasks)

        for i in range(payload):
            worker_pool.apply_async(cls.kmn_isc_task_worker,
                                    args=(cls, isc_config_file, i + 1,
                                          is_check_valid_rules))
        worker_pool.close()
        return worker_pool, result_queue, host_ip
Пример #3
0
    def kmn_isc_task_worker(cls,
                            isc_config_file="isets-tasks.json",
                            worker_id=1,
                            is_check_valid_rules=True):
        manager_tuple = SearchQueueManager.init_task_worker_queue_manager()
        # manager_tuple = (manager, task_queue, ht_task_queue, result_queue)
        task_queue = manager_tuple[1]
        result_queue = manager_tuple[3]

        worker_name = "worker-%d" % worker_id
        worker_host_name = config.worker_host_name
        processed_task_slices_number = 0

        msg_text = "task worker %s start!" % (worker_name)
        logging.info(msg_text)
        isc_tasks = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks.isc_tasks

        is_process_task_queue = False
        while True:
            if not pathlib.Path(config.task_host_lock_file).exists():
                break

            if task_queue.empty():
                if is_process_task_queue:
                    logging.info("%s:%s waiting for task queue ... " %
                                 (worker_host_name, worker_name))
                    is_process_task_queue = False
                time.sleep(1)
                continue

            task_slice_cache = task_queue.get()
            processed_task_slices_number += 1
            is_process_task_queue = True

            itask_id = task_slice_cache[0]
            if itask_id == ITaskSignal.kill_signal:
                break

            itask = isc_tasks[itask_id]

            task_slice = task_slice_cache[1]

            i4_ne_isets, task_number = cls.process_semi_valid_task_slices(
                cls, itask_id, itask, task_slice)
            result_queue.put(
                (ITaskSignal.se_condition_signal, itask_id, i4_ne_isets))
            result_queue.put((ITaskSignal.stat_signal, itask_id, task_number))

        logging.info("%s processes %d isc task slices ... " %
                     (worker_name, processed_task_slices_number))
Пример #4
0
    def itask_slices_generator(cls, isc_config_file="isets-tasks.json"):
        msg_text = "%s init task slices generator ..." % str(cls)
        logging.info(msg_text)
        msg.send_message(msg_text)

        SearchWorkerQueueManger.register("get_task_queue")
        SearchWorkerQueueManger.register("get_result_queue")
        manager = SearchWorkerQueueManger(address=(config.task_host,
                                                   config.task_host_port),
                                          authkey=bytes(config.task_host_key,
                                                        encoding="utf-8"))
        manager.connect()
        task_queue = manager.get_task_queue()

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks

        for tid in range(len(isc_tasks)):
            it = isc_tasks[tid]
            min_ne = it.min_ne
            max_ne = it.max_ne
            search_iset_ids = it.meta_data.search_space_iset_ids
            unknown_iset_number = len(search_iset_ids)
            left_length = int(unknown_iset_number / 2)
            if left_length > 12:
                left_length = 12

            right_length = unknown_iset_number - left_length
            left_zone_isets = search_iset_ids[0:left_length]

            for i in range(min_ne, max_ne + 1):
                ne_iset_number = i
                for left_iset_number in range(ne_iset_number + 1):
                    right_iset_number = ne_iset_number - left_iset_number
                    if left_iset_number > left_length or right_iset_number > right_length:
                        continue

                    task_iter = itertools.combinations(left_zone_isets,
                                                       left_iset_number)
                    for left_ti in task_iter:
                        task_item = (tid,
                                     (ne_iset_number, set(left_zone_isets),
                                      list(left_ti)))
                        # print(task_item)
                        task_queue.put(task_item)

        working_hosts_number = 5
        for i in range(working_hosts_number * 200):
            task_queue.put((ITaskSignal.kill_signal, -1))
        logging.info("all itasks has been dispatched")
Пример #5
0
    def itask_slices_generator(cls, isc_config_file):
        msg_text = "%s init task slices generator ..." % str(cls)
        logging.info(msg_text)
        msg.send_message(msg_text)

        manager, task_queue, ht_task_queue, result_queue = \
            SearchQueueManager.init_task_worker_queue_manager()

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks

        for tid in range(len(isc_tasks)):
            it = isc_tasks[tid]
            min_ne = it.min_ne
            max_ne = it.max_ne

            right_zone_iset_ids = set(
                copy.deepcopy(it.meta_data.search_space_iset_ids))
            left_zone_iset_ids = set(it.meta_data.search_i4_composed_iset_ids)

            max_left_zone_length = 12
            if len(left_zone_iset_ids) > max_left_zone_length:
                left_zone_iset_ids = list(
                    left_zone_iset_ids)[0:max_left_zone_length]
                left_zone_iset_ids = set(left_zone_iset_ids)

            right_zone_iset_ids = right_zone_iset_ids.difference(
                left_zone_iset_ids)

            for ne_iset_number in range(min_ne, max_ne + 1):
                if not cls.check_itask_terminate_status(it):
                    task_slices = CombinationSearchingSpaceSplitter.near_uniform_vandermonde_generator(
                        left_zone_iset_ids, right_zone_iset_ids,
                        ne_iset_number)
                    ts_cnt = 0
                    for ts in task_slices:
                        new_ts = (set(ts[0]), set(ts[1]), ts[2])
                        task_queue.put((tid, new_ts))

                        ts_cnt += 1
                        if ts_cnt % 10000 == 0 and cls.check_itask_terminate_status(
                                it):
                            break

        working_hosts_number = 5
        for i in range(working_hosts_number * 200):
            task_queue.put((ITaskSignal.kill_signal, -1))
        logging.info("all itasks has been dispatched")
Пример #6
0
    def itask_slices_generator(cls, isc_config_file):
        max_space_size = 10000000000
        msg_text = "%s init task slices generator ..." % str(cls)
        logging.info(msg_text)
        msg.send_message(msg_text)

        manager_tuple = SearchQueueManager.init_task_worker_queue_manager()
        task_queue = manager_tuple[1]

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks

        for tid in range(len(isc_tasks)):
            it = isc_tasks[tid]
            min_ne = it.min_ne
            max_ne = it.max_ne
            rule_number = sum(it.k_m_n)
            isnse.clear_task_space_layer_finish_flag_files(
                *it.k_m_n, min_ne, max_ne)

            for ne_iset_number in range(min_ne, max_ne + 1):
                msg_text = "generating %d-%d-%d %d layer task slices" % (
                    *it.k_m_n, ne_iset_number)
                logging.info(msg_text)

                cls.itask_slice_generator_by_i4_meta(ne_iset_number, tid, it,
                                                     max_space_size,
                                                     manager_tuple)

                # if ne_iset_number <= rule_number:
                #     cls.itask_slice_generator_by_i4_meta(ne_iset_number, tid, it, max_space_size, manager_tuple)
                # else:
                #     if not cls.check_itask_terminate_status(it):
                #         flag_file = isnse.get_task_space_layer_finish_flag_file(*it.k_m_n, ne_iset_number - 2)
                #         while not pathlib.Path(flag_file).exists():
                #             if cls.check_itask_terminate_status(it):
                #                 break
                #             time.sleep(1)
                #
                #         cls.itask_slice_generator_by_i4_meta(ne_iset_number, tid, it, max_space_size, manager_tuple)

        working_hosts_number = 5
        for i in range(working_hosts_number * 200):
            task_queue.put((ITaskSignal.kill_signal, -1))
        logging.info("all itasks has been dispatched")
Пример #7
0
    def itask_slices_generator(cls, isc_config_file):
        msg_text = "%s init task slices generator ..." % str(cls)
        logging.info(msg_text)
        msg.send_message(msg_text)

        manager, task_queue, ht_task_queue, result_queue = \
            SearchQueueManager.init_task_worker_queue_manager()

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks

        for tid in range(len(isc_tasks)):
            it = isc_tasks[tid]
            i4_iset_ids = list(it.meta_data.search_i4_composed_iset_ids)
            i4_iset_size = len(i4_iset_ids)
            min_ne = 1
            max_ne = len(i4_iset_ids)
            max_left_zone_length = 10
            left_zone_size = max_left_zone_length

            if i4_iset_size < left_zone_size:
                left_zone_size = i4_iset_size // 2

            left_zone_iset_ids = i4_iset_ids[0:left_zone_size]
            right_zone_iset_ids = i4_iset_ids[left_zone_size:]

            for ne_iset_number in range(min_ne, max_ne + 1):

                task_slices = CombinationSearchingSpaceSplitter.vandermonde_generator(
                    left_zone_iset_ids, right_zone_iset_ids, ne_iset_number)
                for ts in task_slices:
                    task_queue.put((tid, ts))

        working_hosts_number = 5
        for i in range(working_hosts_number * 200):
            task_queue.put((ITaskSignal.kill_signal, -1))
        logging.info("all itasks has been dispatched")
Пример #8
0
    def init_kmn_isc_task_workers(cls,
                                  isc_config_file="isets-tasks.json",
                                  is_check_valid_rules=True):
        payload = config.worker_payload
        worker_pool = Pool(payload)
        pathlib.Path(config.task_host_lock_file).touch()
        SearchWorkerQueueManger.register("get_task_queue")
        SearchWorkerQueueManger.register("get_result_queue")
        manager = SearchWorkerQueueManger(address=(config.task_host,
                                                   config.task_host_port),
                                          authkey=bytes(config.task_host_key,
                                                        encoding="utf-8"))
        manager.connect()
        result_queue = manager.get_result_queue()
        host_ip = ssh.get_host_ip()

        result_queue.put(
            (ITaskSignal.add_worker_signal, config.worker_host_name, host_ip))
        logging.info("task worker host %s start ..." % config.worker_host_name)

        # 初始化不等价条件目录文件
        isc_tasks = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks.isc_tasks
        cls.init_worker_host_nse_envs(isc_tasks)

        for i in range(payload):
            worker_pool.apply_async(cls.kmn_isc_task_worker,
                                    args=(cls, isc_config_file, i + 1,
                                          is_check_valid_rules))
        worker_pool.close()
        worker_pool.join()
        # if pathlib.Path(task_worker_host_lock_file).exists():
        result_queue.put(
            (ITaskSignal.kill_signal, config.worker_host_name, host_ip))
        logging.info("task worker host %s send kill signal ..." %
                     config.worker_host_name)
        logging.info("task worker host %s exit ..." % config.worker_host_name)
Пример #9
0
    def kmn_isc_task_worker(cls,
                            isc_config_file="isets-tasks.json",
                            worker_id=1,
                            is_check_valid_rules=True):
        manager_tuple = SearchQueueManager.init_task_worker_queue_manager()
        # manager_tuple = (manager, task_queue, ht_task_queue, result_queue)
        ht_task_queue = manager_tuple[2]
        result_queue = manager_tuple[3]

        worker_name = "worker-%d" % worker_id
        worker_host_name = config.worker_host_name
        processed_ht_task_slices_number = 0

        msg_text = "task worker %s start!" % (worker_name)
        logging.info(msg_text)
        isc_tasks = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks.isc_tasks

        is_process_ht_task_queue = False

        while True:
            if not pathlib.Path(config.task_host_lock_file).exists():
                break

            is_task_finish = cls.check_itasks_finish_status(isc_tasks)
            if is_task_finish:
                logging.info("%s:%s all itasks terminate ..." %
                             (worker_host_name, worker_name))
                break

            if ht_task_queue.empty():
                if is_process_ht_task_queue:
                    is_process_ht_task_queue = False
                    logging.info("%s:%s waiting for ht task queue ..." %
                                 (worker_host_name, worker_name))
                time.sleep(1)
                continue

            is_process_ht_task_queue = True
            processed_ht_task_slices_number += 1

            ts = ht_task_queue.get()
            start_time = datetime.now()
            itask_id = ts[0]
            itask = isc_tasks[itask_id]

            ne_iset_number, task_check_number, se_conditions_cache, nse_conditions_cache = \
                cls.search_ht_task_slice(cls, itask, ts[1])
            end_time = datetime.now()

            if ne_iset_number > itask.rule_number and len(
                    nse_conditions_cache) > 0:
                print("debug info (wrong nse conditions): ", ts)

            if len(se_conditions_cache) > 0:
                result_queue.put((ITaskSignal.se_condition_signal, itask_id,
                                  se_conditions_cache))

            if len(nse_conditions_cache) > 0:
                result_queue.put((ITaskSignal.nse_condition_signal, itask_id,
                                  nse_conditions_cache))

            result_tuple = (ITaskSignal.stat_signal, itask_id, ne_iset_number,
                            task_check_number, task_check_number, 0,
                            (start_time, end_time))
            result_queue.put(result_tuple)

        logging.info("%s processes  %d ht itask slices" %
                     (worker_name, processed_ht_task_slices_number))
    def itask_slices_generator(cls, isc_config_file="isets-tasks.json"):
        msg_text = "%s init task slices generator ..." % str(cls)
        logging.info(msg_text)
        msg.send_message(msg_text)

        SearchWorkerQueueManger.register("get_task_queue")
        SearchWorkerQueueManger.register("get_result_queue")
        manager = SearchWorkerQueueManger(address=(config.task_host,
                                                   config.task_host_port),
                                          authkey=bytes(config.task_host_key,
                                                        encoding="utf-8"))
        manager.connect()
        task_queue = manager.get_task_queue()
        result_queue = manager.get_result_queue()

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks

        for tid in range(len(isc_tasks)):
            it = isc_tasks[tid]
            min_ne = it.min_ne
            max_ne = it.max_ne
            unknown_iset_number = len(it.meta_data.search_space_iset_ids)
            rule_number = it.rule_number

            left_zone_iset_ids = it.meta_data.search_i4_composed_iset_ids

            left_zone_length = len(left_zone_iset_ids)
            right_zone_length = unknown_iset_number - left_zone_length
            is_use_extended_rules = it.is_use_extended_rules

            for i in range(min_ne, max_ne + 1):
                ne_iset_number = i
                for left_iset_number in range(ne_iset_number + 1):
                    right_iset_number = ne_iset_number - left_iset_number
                    if left_iset_number > left_zone_length or right_iset_number > right_zone_length:
                        continue

                    task_iter = itertools.combinations(left_zone_iset_ids,
                                                       left_iset_number)
                    for left_ti in task_iter:
                        left_iset_ids = list(left_ti)
                        is_contain_semi_valid_rule = iscm.check_contain_rules_without_i_n_iset(
                            4, left_iset_ids, rule_number,
                            is_use_extended_rules)

                        if is_contain_semi_valid_rule:
                            check_cnt = 0
                            # C(right_zone_length, right_iset_number)
                            task_number = CombinaryCounter.compute_comb(
                                right_zone_length, right_iset_number)
                            semi_valid_skip_cnt = task_number
                            stat_item = (ITaskSignal.stat_signal, tid,
                                         ne_iset_number, check_cnt,
                                         task_number, semi_valid_skip_cnt,
                                         None)
                            result_queue.put(stat_item)
                        else:
                            task_item = (tid, (ne_iset_number,
                                               set(left_zone_iset_ids),
                                               left_iset_ids))
                            # print(task_item)
                            task_queue.put(task_item)

        working_hosts_number = 5
        for i in range(working_hosts_number * 200):
            task_queue.put((ITaskSignal.kill_signal, -1))
        logging.info("all itasks has been dispatched")
    def kmn_isc_task_worker(cls,
                            isc_config_file="isets-tasks.json",
                            worker_id=1,
                            is_check_valid_rules=True):
        manager_tuple = SearchQueueManager.init_task_worker_queue_manager()
        # manager_tuple = (manager, task_queue, ht_task_queue, result_queue)
        task_queue = manager_tuple[1]
        result_queue = manager_tuple[3]
        start_time = datetime.now()

        worker_name = "worker-%d" % worker_id
        worker_host_name = config.worker_host_name
        processed_task_slices_number = 0

        msg_text = "task worker %s start!" % (worker_name)
        logging.info(msg_text)
        isc_tasks = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks.isc_tasks

        raw_condition_files = list()
        for itask in isc_tasks:
            rule_number = sum(itask.k_m_n)
            search_space_iset_number = len(
                itask.meta_data.search_space_iset_ids)
            itask.loaded_non_se_condition_files.add(0)
            for i in range(rule_number + 1, search_space_iset_number + 1):
                itask.loaded_non_se_condition_files.add(i)

            rf = riu.get_empty_raw_icondition_file(*itask.k_m_n, itask.lp_type,
                                                   itask.is_use_extended_rules,
                                                   str(worker_id))
            raw_condition_files.append(rf)

        is_process_task_queue = False
        task_slice_cache = None
        last_nse_iset_number = 0
        result_queue_cache = list()
        single_round_processed_task_number = 0
        sleep_cnt = 0
        while True:

            # if sleep_cnt == 10:
            #     sleep_cnt = 0
            #     # logging.error(("result queue cache size ", len(result_queue_cache)))
            #     # logging.error("result queue cache has %d items, send sleep cnt 10", len(result_queue_cache))
            #     result_queue_cache = cls.batch_send_stat_info_2_result_queue(cls, result_queue_cache,
            #                                                                  result_queue, start_time)

            if not pathlib.Path(config.task_host_lock_file).exists():
                break

            is_task_finish = cls.check_itasks_finish_status(isc_tasks)
            if is_task_finish:
                logging.info("%s:%s all itasks terminate ..." %
                             (worker_host_name, worker_name))
                break

            if task_slice_cache is None:
                if task_queue.empty():
                    # logging.error("result queue cache has %d items, send task queue empty",
                    #               len(result_queue_cache))
                    # result_queue_cache = cls.batch_send_stat_info_2_result_queue(cls, result_queue_cache,
                    #                                                              result_queue, start_time)
                    if is_process_task_queue:
                        logging.info("%s:%s waiting for task queue ... " %
                                     (worker_host_name, worker_name))
                        is_process_task_queue = False
                    time.sleep(1)
                    sleep_cnt += 1
                    continue
                else:
                    task_slice_cache = task_queue.get()
                    processed_task_slices_number += 1
                    single_round_processed_task_number += 1
                    is_process_task_queue = True

            itask_id = task_slice_cache[0]
            if itask_id == ITaskSignal.kill_signal:
                break

            itask = isc_tasks[itask_id]

            if itask.is_task_finish:
                task_slice_cache = None
                continue
            """
            task_slice = (left_split, left, left_zone_length, right_choice_number)
            if left_split = True, left is left_isets
            else left is left_isets_size
            """

            task_slice = task_slice_cache[1]

            rule_number = sum(itask.k_m_n)
            ne_iset_number = cls.compute_ne_iset_number(task_slice)
            nse_ne_iset_number = ne_iset_number - 1

            load_nse_complete = cls.task_worker_load_nse_conditions(
                itask, ne_iset_number)
            if not load_nse_complete:
                # # logging.error("result queue cache has %d items, send load nse not complete", len(result_queue_cache))
                # result_queue_cache = cls.batch_send_stat_info_2_result_queue(cls, result_queue_cache, result_queue, start_time)
                if last_nse_iset_number != nse_ne_iset_number:
                    last_nse_iset_number = nse_ne_iset_number
                    logging.info(
                        (task_slice,
                         "%s:%s waiting for %d-%d-%d nse complete file %d" %
                         (worker_host_name, worker_name, *itask.k_m_n,
                          nse_ne_iset_number)))
                time.sleep(1)
                sleep_cnt += 1
                continue

            rq_cache, ht_check_items = cls.process_merge_small_task_slices(
                cls, itask_id, itask, task_slice, manager_tuple)
            result_queue_cache.extend(rq_cache)
            task_slice_cache = None

            raw_data_file = raw_condition_files[itask_id]
            ht_stat = cls.process_ht_tasks(cls, ht_check_items, itask_id,
                                           itask, ne_iset_number, result_queue,
                                           raw_data_file)
            if ht_stat is not None:
                result_queue_cache.append(ht_stat)

            # if len(result_queue_cache) > 2000:
            # logging.error("result queue cache has %d items, send cache size > 20000", len(result_queue_cache))

            if ne_iset_number <= rule_number:
                result_queue_cache = cls.batch_send_stat_info_2_result_queue(
                    cls, result_queue_cache, result_queue, start_time)
            else:
                if task_queue.qsize() < 1000 or len(
                        result_queue_cache) > 100000:
                    result_queue_cache = cls.batch_send_stat_info_2_result_queue(
                        cls, result_queue_cache, result_queue, start_time)

            if single_round_processed_task_number == 10000:
                msg_text = "%s:%s processes %d isc task slices, new round process %d task slices ... " % (
                    worker_host_name, worker_name,
                    processed_task_slices_number,
                    single_round_processed_task_number)
                single_round_processed_task_number = 0
                logging.info(msg_text)

        msg_text = "%s:%s processes %d isc task slices, new round process %d task slices ... " % (
            worker_host_name, worker_name, processed_task_slices_number,
            single_round_processed_task_number)
        logging.info(msg_text)
Пример #12
0
    def kmn_isc_task_worker(cls,
                            isc_config_file="isets-tasks.json",
                            worker_id=1,
                            is_check_valid_rules=True):
        manager_tuple = SearchQueueManager.init_task_worker_queue_manager()
        # manager_tuple = (manager, task_queue, ht_task_queue, result_queue)
        ht_task_queue = manager_tuple[2]
        result_queue = manager_tuple[3]

        worker_name = "worker-%d" % worker_id
        worker_host_name = config.worker_host_name
        processed_ht_task_slices_number = 0

        msg_text = "task worker %s start!" % (worker_name)
        logging.info(msg_text)
        isc_tasks = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks.isc_tasks

        data_files = list()
        data_file_current_lines = list()

        for itask in isc_tasks:
            # data_file_current_lines.append(-1)
            file_path = riu.get_complete_raw_icondition_file(
                *itask.k_m_n, itask.lp_type, itask.is_use_extended_rules)
            # file = open(file_path, mode="r", encoding="utf-8")
            data_files.append(file_path)

        is_process_ht_task_queue = False

        while True:
            if not pathlib.Path(config.task_host_lock_file).exists():
                break

            if ht_task_queue.empty():
                if is_process_ht_task_queue:
                    is_process_ht_task_queue = False
                    logging.info("%s:%s waiting for ht task queue ..." %
                                 (worker_host_name, worker_name))
                time.sleep(1)
                continue

            is_process_ht_task_queue = True
            processed_ht_task_slices_number += 1

            ts = ht_task_queue.get()
            start_time = datetime.now()
            itask_id = ts[0]

            if itask_id == ITaskSignal.kill_signal:
                break

            itask = isc_tasks[itask_id]

            data_file = data_files[itask_id]

            task_check_number, se_conditions_cache, nse_conditions_cache = \
                cls.verify_ht_tasks_from_file_data(cls, itask, ts[1], data_file)

            end_time = datetime.now()

            if len(se_conditions_cache) > 0:
                result_queue.put((ITaskSignal.se_condition_signal, itask_id,
                                  se_conditions_cache))

            if len(nse_conditions_cache) > 0:
                result_queue.put((ITaskSignal.nse_condition_signal, itask_id,
                                  nse_conditions_cache))

            result_tuple = (ITaskSignal.stat_signal, itask_id,
                            task_check_number, (start_time, end_time))
            result_queue.put(result_tuple)

        logging.info("%s processes  %d ht itask slices" %
                     (worker_name, processed_ht_task_slices_number))
def load_itasks():
    isc_tasks = ITaskConfig(isc_config_file)
    isc_tasks = isc_tasks.isc_tasks
    return isc_tasks
Пример #14
0
    def init_kmn_isc_task_master_from_config(
            cls, isc_config_file="isets-tasks.json", sleep_time=30):
        manager, task_queue, ht_task_queue, result_queue = \
            SearchQueueManager.init_task_master_queue_manager()
        manager_tuple = (manager, task_queue, ht_task_queue, result_queue)
        localhost_ip = ssh.get_host_ip()
        ts_generator_pool = cls.init_task_slices_generator_pool(
            cls, isc_config_file)
        ht_pool = cls.init_pre_task_worker_pool(cls, isc_config_file,
                                                result_queue)
        working_hosts_number = 0
        # ht_checking_results = list()

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks
        for itask in isc_tasks:
            itask.init_task_numbers()
            if os.path.exists(itask.result_file):
                os.remove(itask.result_file)
        #     ht_checking_results.append(list())

        msg_text = "isc task master start, load %d isc tasks from %s" % (
            len(isc_tasks), isc_config_file)
        logging.info(msg_text)
        msg.send_message(msg_text)

        sleep_cnt = 0
        online_hosts = set()

        progress_msg_cnt = 10
        task_finish = False
        print_loop = 100
        print_cnt = 0

        while not task_finish:
            print_cnt += 1

            if print_cnt == print_loop:
                cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple,
                                              working_hosts_number, False)
                sleep_cnt = 0
                print_cnt = 0

            if sleep_cnt == progress_msg_cnt:
                cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple,
                                              working_hosts_number, False)
                sleep_cnt = 0
                print_cnt = 0

            task_finish = cls.check_itasks_status(cls, isc_tasks, online_hosts,
                                                  manager_tuple,
                                                  working_hosts_number)
            if result_queue.empty():
                time.sleep(sleep_time)
                sleep_cnt += 1
                continue

            whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks)

            whn_number = whn_diff[0]
            host_ip = whn_diff[1]
            working_hosts_number += whn_number

            if whn_number == 1:
                if host_ip != localhost_ip:
                    online_hosts.add(host_ip)
            elif whn_number == -1:
                if host_ip != localhost_ip:
                    online_hosts.remove(host_ip)

        ts_generator_pool.join()
        ht_pool.join()
        HTCheckingWorker.send_worker_terminate_info(HTCheckingWorker,
                                                    localhost_ip, result_queue)

        while working_hosts_number > 0:
            if sleep_cnt == 10:
                cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple,
                                              working_hosts_number, True)
                sleep_cnt = 0

            if result_queue.empty():
                time.sleep(sleep_time)
                sleep_cnt += 1
                continue

            whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks)
            working_hosts_number += whn_diff[0]

        msg_text = "isc tasks finish!"
        logging.info(msg_text)
        msg.send_message(msg=msg_text)

        for it in isc_tasks:
            it.dump_tmp_se_condition_saving_mem()
            msg_text = it.get_final_detail_progress_info()
            logging.info(msg_text)
            msg.send_message(msg=msg_text)

        return isc_tasks
Пример #15
0
    def kmn_isc_task_worker(cls,
                            isc_config_file="isets-tasks.json",
                            worker_id=1,
                            is_check_valid_rules=True):
        manager_tuple = SearchQueueManager.init_task_worker_queue_manager()
        # manager_tuple = (manager, task_queue, ht_task_queue, result_queue)
        task_queue = manager_tuple[1]
        start_time = datetime.now()

        worker_name = "worker-%d" % worker_id
        worker_host_name = config.worker_host_name
        processed_task_slices_number = 0

        msg_text = "task worker %s start!" % (worker_name)
        logging.info(msg_text)
        isc_tasks = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks.isc_tasks

        for itask in isc_tasks:
            # itask.loaded_non_se_condition_files.add(1)
            itask.loaded_non_se_condition_files.add(0)

        is_process_task_queue = False
        task_slice_cache = None
        last_nse_iset_number = 0
        result_queue_cache = list()
        while True:
            if not pathlib.Path(config.task_host_lock_file).exists():
                break

            is_task_finish = cls.check_itasks_finish_status(isc_tasks)
            if is_task_finish:
                logging.info("%s:%s all itasks terminate ..." %
                             (worker_host_name, worker_name))
                break

            if task_slice_cache is None:
                if task_queue.empty():
                    if is_process_task_queue:
                        logging.info("%s:%s waiting for task queue ... " %
                                     (worker_host_name, worker_name))
                        is_process_task_queue = False
                    time.sleep(1)
                    continue
                else:
                    task_slice_cache = task_queue.get()
                    processed_task_slices_number += 1
                    is_process_task_queue = True

            itask_id = task_slice_cache[0]
            if itask_id == ITaskSignal.kill_signal:
                break

            itask = isc_tasks[itask_id]

            if itask.is_task_finish:
                task_slice_cache = None
                continue

            task_slice = task_slice_cache[1]
            nse_iset_number = task_slice[2] + len(task_slice[0]) - 1

            load_nse_complete = cls.task_worker_load_nse_conditions(
                itask, nse_iset_number + 1)
            if not load_nse_complete:
                if last_nse_iset_number != nse_iset_number:
                    last_nse_iset_number = nse_iset_number
                    logging.info(
                        (task_slice,
                         "%s:%s waiting for %d-%d-%d nse complete file %d" %
                         (worker_host_name, worker_name, *itask.k_m_n,
                          nse_iset_number)))
                result_queue_cache = cls.batch_send_stat_info_2_result_queue(
                    cls, result_queue_cache, manager_tuple[3], start_time)
                time.sleep(1)
                continue

            rq_cache = cls.process_task_slice(cls, itask_id, itask, task_slice,
                                              manager_tuple)
            result_queue_cache.extend(rq_cache)
            task_slice_cache = None

            if len(result_queue_cache) > 2000:
                result_queue_cache = cls.batch_send_stat_info_2_result_queue(
                    cls, result_queue_cache, manager_tuple[3], start_time)

        logging.info("%s processes %d isc task slices ... " %
                     (worker_name, processed_task_slices_number))
Пример #16
0
    def init_kmn_isc_task_master_from_config(
            cls, isc_config_file="isets-tasks.json", sleep_time=30):
        manager, task_queue, ht_task_queue, result_queue = \
            SearchQueueManager.init_task_master_queue_manager()
        manager_tuple = (manager, task_queue, ht_task_queue, result_queue)
        localhost_ip = ssh.get_host_ip()

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks
        result_record = list()
        for itask in isc_tasks:
            isnse.clear_task_terminate_flag_files(*itask.k_m_n)
            i4_iset_size = len(itask.meta_data.search_i4_composed_iset_ids)
            file = i4u.get_kmn_i4_all_result_file(*itask.k_m_n)
            if os.path.exists(file):
                os.remove(file)
            record = [2**i4_iset_size - 1, 0, list(), file]
            result_record.append(record)

        ts_generator_pool = cls.init_task_slices_generator_pool(
            cls, isc_config_file)

        pre_task_pool = cls.init_pre_task_worker_pool(cls, isc_config_file,
                                                      result_queue)

        working_hosts_number = 0

        msg_text = "isc task master start, load %d isc tasks from %s" % (
            len(isc_tasks), isc_config_file)
        logging.info(msg_text)
        msg.send_message(msg_text)

        sleep_cnt = 0
        online_hosts = set()

        progress_msg_cnt = 10
        task_finish = False
        print_loop = 10
        print_cnt = 0
        while not task_finish:
            print_cnt += 1

            if print_cnt == print_loop:
                cls.send_itasks_progress_info(cls, result_record,
                                              manager_tuple,
                                              working_hosts_number, False)
                sleep_cnt = 0
                print_cnt = 0

            if sleep_cnt == progress_msg_cnt:
                cls.send_itasks_progress_info(cls, result_record,
                                              manager_tuple,
                                              working_hosts_number, False)
                sleep_cnt = 0
                print_cnt = 0

            task_finish = cls.check_i4_tasks_status(cls, result_record)
            if result_queue.empty():
                time.sleep(sleep_time)
                sleep_cnt += 1
                continue

            whn_diff = cls.process_i4_result_queue(cls, result_queue,
                                                   result_record)

            whn_number = whn_diff[0]
            host_ip = whn_diff[1]
            working_hosts_number += whn_number

            if whn_number == 1:
                if host_ip != localhost_ip:
                    online_hosts.add(host_ip)
            elif whn_number == -1:
                if host_ip != localhost_ip:
                    online_hosts.remove(host_ip)

        ts_generator_pool.join()
        pre_task_pool.join()

        I4SearchWorker.send_worker_terminate_info(I4SearchWorker, localhost_ip,
                                                  result_queue)

        while working_hosts_number > 0:
            if sleep_cnt == 10:
                cls.send_itasks_progress_info(cls, result_record,
                                              manager_tuple,
                                              working_hosts_number, True)
                sleep_cnt = 0

            if result_queue.empty():
                time.sleep(sleep_time)
                sleep_cnt += 1
                continue

            whn_diff = cls.process_i4_result_queue(cls, result_queue,
                                                   result_record)
            working_hosts_number += whn_diff[0]

        msg_text = "isc tasks finish!"
        logging.info(msg_text)
        msg.send_message(msg=msg_text)
        cls.send_itasks_progress_info(cls, result_record, manager_tuple,
                                      working_hosts_number, True)

        return isc_tasks
    def init_kmn_isc_task_master_from_config(cls, isc_config_file="isets-tasks.json", sleep_time=30):
        start_time = datetime.now()
        manager, task_queue, ht_task_queue, result_queue = \
            SearchQueueManager.init_task_master_queue_manager()
        manager_tuple = (manager, task_queue, ht_task_queue, result_queue)
        localhost_ip = ssh.get_host_ip()

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks
        for itask in isc_tasks:
            itask.init_task_numbers()
            isnse.clear_task_terminate_flag_files(*itask.k_m_n)

        ts_generator_pool = cls.init_task_slices_generator_pool(cls, isc_config_file)
        pre_pool = cls.init_pre_task_worker_pool(cls, isc_config_file, result_queue)
        working_hosts_number = 0

        msg_text = "isc task master start, load %d isc tasks from %s" % (len(isc_tasks), isc_config_file)
        logging.info(msg_text)
        msg.send_message(msg_text)

        sleep_cnt = 0
        online_hosts = set()

        progress_msg_cnt = 10
        task_finish = False
        print_loop = 100
        print_cnt = 0
        while not task_finish:
            print_cnt += 1

            if print_cnt == print_loop:
                cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple, working_hosts_number, False)
                for it in isc_tasks:
                    it.save_progress_info()
                sleep_cnt = 0
                print_cnt = 0

            if sleep_cnt == progress_msg_cnt:
                cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple, working_hosts_number, False)
                for it in isc_tasks:
                    it.save_progress_info()
                sleep_cnt = 0
                print_cnt = 0

            task_finish = cls.check_itasks_status(cls, isc_tasks, online_hosts, manager_tuple, working_hosts_number)
            if result_queue.empty():
                time.sleep(sleep_time)
                sleep_cnt += 1
                continue

            whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks)

            whn_number = whn_diff[0]
            host_ip = whn_diff[1]
            working_hosts_number += whn_number

            if whn_number == 1:
                if host_ip != localhost_ip:
                    online_hosts.add(host_ip)
                    cls.update_nse_files_to_new_host(host_ip, isc_tasks)
            elif whn_number == -1:
                if host_ip != localhost_ip:
                    online_hosts.remove(host_ip)

        ts_generator_pool.join()
        pre_pool.join()
        RawIConditionSearchWorker.send_worker_terminate_info(RawIConditionSearchWorker, localhost_ip, result_queue)

        while working_hosts_number > 0:
            if sleep_cnt == 10:
                cls.send_itasks_progress_info(cls, isc_tasks, manager_tuple, working_hosts_number, True)
                sleep_cnt = 0

            if result_queue.empty():
                time.sleep(sleep_time)
                sleep_cnt += 1
                continue

            whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks)
            working_hosts_number += whn_diff[0]

        end_time = datetime.now()
        msg_text = "isc tasks finish, running time: %s" % str(end_time - start_time)
        logging.info(msg_text)
        msg.send_message(msg=msg_text)

        for it in isc_tasks:
            it.task_finish()
            msg_text = it.get_final_detail_progress_info()
            logging.info(msg_text)
            msg.send_message(msg=msg_text)
            # attached_files.append(it.result_file)

        return isc_tasks
Пример #18
0
    def init_kmn_isc_task_master_from_config(
            cls, isc_config_file="isets-tasks.json", sleep_time=30):
        start_time = datetime.now()
        SearchMasterQueueManger.register("get_task_queue",
                                         callable=cls.get_global_task_queue)
        SearchMasterQueueManger.register("get_result_queue",
                                         callable=cls.get_global_result_queue)
        manager = SearchMasterQueueManger(address=(config.task_host,
                                                   config.task_host_port),
                                          authkey=bytes(config.task_host_key,
                                                        encoding="utf-8"))
        manager.start()
        task_queue = manager.get_task_queue()
        result_queue = manager.get_result_queue()
        localhost_ip = ssh.get_host_ip()

        task_generator = Pool(2)
        task_generator.apply_async(cls.itask_slices_generator,
                                   args=(cls, isc_config_file))
        task_generator.close()

        working_hosts_number = 0
        msg_text = "isc task master start, load isc tasks from %s" % (
            isc_config_file)
        logging.info(msg_text)
        msg.send_message(msg_text)

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks
        for itask in isc_tasks:
            itask.init_task_numbers()

        sleep_cnt = 0
        online_hosts = set()

        progress_msg_cnt = 10
        task_finish = False
        print_loop = 100000
        print_cnt = 0
        while not task_finish:
            print_cnt += 1

            if print_cnt == print_loop:
                cls.send_itasks_progress_info(cls, isc_tasks, task_queue,
                                              working_hosts_number, False)
                sleep_cnt = 0
                print_cnt = 0

            if sleep_cnt == progress_msg_cnt:
                cls.send_itasks_progress_info(cls, isc_tasks, task_queue,
                                              working_hosts_number, False)
                sleep_cnt = 0
                print_cnt = 0

            task_finish = cls.check_itasks_status(cls, isc_tasks, online_hosts,
                                                  task_queue,
                                                  working_hosts_number)
            if result_queue.empty():
                time.sleep(sleep_time)
                sleep_cnt += 1
                continue

            whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks)

            whn_number = whn_diff[0]
            host_ip = whn_diff[1]
            working_hosts_number += whn_number

            if whn_number == 1:
                if host_ip != localhost_ip:
                    online_hosts.add(host_ip)
                    cls.update_nse_files_to_new_host(host_ip, isc_tasks)
            elif whn_number == -1:
                if host_ip != localhost_ip:
                    online_hosts.remove(host_ip)

        task_generator.join()

        while working_hosts_number > 0:
            if sleep_cnt == 10:
                cls.send_itasks_progress_info(cls, isc_tasks, task_queue,
                                              working_hosts_number, True)
                sleep_cnt = 0

            if result_queue.empty():
                time.sleep(sleep_time)
                sleep_cnt += 1
                continue

            whn_diff = cls.process_result_queue(cls, result_queue, isc_tasks)
            working_hosts_number += whn_diff[0]

        msg_texts = []
        attached_files = []
        for it in isc_tasks:
            it.task_finish()
            msg_texts.append(it.get_final_detail_progress_info())
            attached_files.append(it.result_file)

        msg_text = "isc tasks finish! \n\t\t%s" % "\n\t\t".join(msg_texts)
        logging.info(msg_text)
        msg.send_message(msg=msg_text, attached_files=attached_files)
        return isc_tasks
    def itask_slices_generator(cls, isc_config_file):
        max_space_size = 100000000000
        msg_text = "%s init task slices generator ..." % str(cls)
        logging.info(msg_text)
        msg.send_message(msg_text)

        manager, task_queue, ht_task_queue, result_queue = \
            SearchQueueManager.init_task_worker_queue_manager()

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks

        for tid in range(len(isc_tasks)):
            it = isc_tasks[tid]
            min_ne = it.min_ne
            max_ne = it.max_ne
            isnse.clear_task_space_layer_finish_flag_files(*it.k_m_n, min_ne, max_ne)

            left_zone_length = len(it.meta_data.search_i4_composed_iset_ids)
            search_isets = copy.deepcopy(it.meta_data.search_space_iset_ids)
            search_isets_length = len(search_isets)

            max_left_zone_length = 12
            if left_zone_length > max_left_zone_length:
                left_zone_length = 12

            rule_number = sum(it.k_m_n)
            left_zone_iset_ids = search_isets[0:left_zone_length]
            right_zone_iset_ids = search_isets[left_zone_length:]

            for ne_iset_number in range(min_ne, max_ne + 1):
                msg_text = "generating %d-%d-%d %d layer task slices" % (*it.k_m_n, ne_iset_number)
                logging.info(msg_text)
                if ne_iset_number <= rule_number:
                    left_split = True
                    task_slices = CombinationSearchingSpaceSplitter.vandermonde_generator(
                        left_zone_iset_ids, right_zone_iset_ids, ne_iset_number)
                    for ts in task_slices:
                        new_ts = (left_split, set(ts[0]), left_zone_length, ts[2])
                        task_queue.put((tid, new_ts))
                else:
                    if not cls.check_itask_terminate_status(it):
                        flag_file = isnse.get_task_space_layer_finish_flag_file(*it.k_m_n, ne_iset_number - 2)
                        while not pathlib.Path(flag_file).exists():
                            if cls.check_itask_terminate_status(it):
                                break
                            time.sleep(1)

                        task_slices = CombinationSearchingSpaceSplitter.merge_small_near_uniform_vandermonde_generator(
                            left_zone_iset_ids, right_zone_iset_ids, ne_iset_number, max_space_size=max_space_size)
                        ts_cnt = 0
                        for ts in task_slices:
                            task_queue.put((tid, ts))

                            ts_cnt += 1
                            if ts_cnt % 10000 == 0 and cls.check_itask_terminate_status(it):
                                break

        working_hosts_number = 5
        for i in range(working_hosts_number * 200):
            task_queue.put((ITaskSignal.kill_signal, -1))
        logging.info("all itasks has been dispatched")
Пример #20
0
    def kmn_isc_task_worker(cls,
                            isc_config_file="isets-tasks.json",
                            worker_id=1,
                            is_check_valid_rules=True):

        SearchWorkerQueueManger.register("get_task_queue")
        SearchWorkerQueueManger.register("get_result_queue")
        manager = SearchWorkerQueueManger(address=(config.task_host,
                                                   config.task_host_port),
                                          authkey=bytes(config.task_host_key,
                                                        encoding="utf-8"))

        is_check_valid_rules = False

        manager.connect()
        task_queue = manager.get_task_queue()
        result_queue = manager.get_result_queue()

        worker_name = "worker-%d" % worker_id

        worker_host_name = config.worker_host_name
        msg_text = "task worker %s start!" % (worker_name)
        logging.info(msg_text)
        isc_tasks = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks.isc_tasks
        processed_task_slices_number = 0

        for itask in isc_tasks:
            itask.loaded_non_se_condition_files.add(1)
            itask.loaded_non_se_condition_files.add(0)

        first_print_debug_log = True
        while True:
            if not pathlib.Path(config.task_host_lock_file).exists():
                break

            if task_queue.empty():
                if first_print_debug_log:
                    logging.info("waiting for isc task slices")
                    first_print_debug_log = False
                time.sleep(2)
                continue

            first_print_debug_log = True

            task_slice = task_queue.get()
            if task_slice[0] == ITaskSignal.kill_signal:
                msg_text = "%s:%s isc task worker terminate ..." % (
                    worker_host_name, worker_name)
                logging.info(msg_text)
                break

            itask = isc_tasks[task_slice[0]]
            task_name = worker_name + ("-task-%d" %
                                       processed_task_slices_number)
            cls.process_kmn_itask_slice(cls, itask, task_slice, task_name,
                                        result_queue, is_check_valid_rules)
            processed_task_slices_number += 1

        logging.info("%s processes %d isc task slices" %
                     (worker_name, processed_task_slices_number))