Exemplo n.º 1
0
def split_kmn_raw_conditions_by_ne_iset_numbers(k_size, m_size, n_size, lp_type, is_use_extened_rules):
    data_file = riu.get_complete_raw_icondition_file(k_size, m_size, n_size, lp_type, is_use_extened_rules)
    data_file = open(data_file, encoding="utf-8", mode="r")
    data_dir = riu.get_raw_condition_split_data_dir(k_size, m_size, n_size)
    if not os.path.exists(data_dir):
        os.mkdir(data_dir)

    split_files = dict()

    for data in data_file:
        data_item = data.strip("\r\n ").split(",")
        ne = len(data_item)
        if ne not in split_files:
            outf = os.path.join(data_dir, str(ne))
            outf = open(outf, mode="w", encoding="utf-8")
            split_files[ne] = outf
        else:
            outf= split_files[ne]

        outf.write(data)

    for ne in split_files:
        split_files[ne].close()

    data_file.close()
Exemplo n.º 2
0
def merge_worker_kmn_raw_conditions(k_size, m_size, n_size, lp_type, is_use_extened_rules):
    payload = config.worker_payload
    hostname = config.worker_host_name
    outf = riu.get_raw_icondition_file_path(k_size, m_size, n_size, lp_type, is_use_extened_rules, hostname)
    outf = open(outf, encoding="utf-8", mode="w")

    for i in range(1, payload + 1):
        dataf = riu.get_raw_icondition_file_path(k_size, m_size, n_size, lp_type, is_use_extened_rules, str(i))
        print("merge %s ..." % dataf)
        with open(dataf, encoding="utf-8", mode="r") as df:
            for data in df:
                outf.write(data)
    outf.close()
Exemplo n.º 3
0
def count_raw_condition_split_data(k_size, m_size, n_size):
    files = riu.get_raw_condition_split_data_files(k_size, m_size, n_size)
    cnt = 0
    for f in files:
        print("counting %s ..." % f)
        with open(f, encoding="utf-8", mode="r") as df:
            for data in df:
                cnt += 1

    print("has %d data " % cnt)
Exemplo n.º 4
0
def merge_all_kmn_raw_conditions(k_size, m_size, n_size, lp_type, is_use_extended_rules):
    data_files = get_all_kmn_raw_data_files(k_size, m_size, n_size)
    complete_data_file = riu.get_complete_raw_icondition_file(k_size, m_size, n_size, lp_type, is_use_extended_rules)
    outf = open(complete_data_file, mode="a", encoding="utf-8")
    for df in data_files:
        print("merge %s ..." % df)
        with open(df, mode="r", encoding="utf-8") as sub_data:
            for data in sub_data:
                outf.write(data)

    outf.close()
Exemplo n.º 5
0
def merge_and_clean_worker_kmn_raw_conditions(k_size, m_size, n_size, lp_type, is_use_extened_rules, clean_ne_iset_numbers):
    payload = config.worker_payload
    hostname = config.worker_host_name
    outf = riu.get_raw_icondition_file_path(k_size, m_size, n_size, lp_type, is_use_extened_rules, hostname)
    outf = open(outf, encoding="utf-8", mode="w")
    cnt = 0
    for i in range(1, payload + 1):
        dataf = riu.get_raw_icondition_file_path(k_size, m_size, n_size, lp_type, is_use_extened_rules, str(i))
        print("merge %s ..." % dataf)
        if not os.path.exists(dataf):
            continue

        with open(dataf, encoding="utf-8", mode="r") as df:
            for data in df:
                ne_isets = data.strip("\r\n ").split(",")
                if len(ne_isets) not in clean_ne_iset_numbers:
                    cnt += 1
                    outf.write(data)
    outf.close()
    print("worker %s has %d raw conditions" % (hostname, cnt))
    return cnt
Exemplo n.º 6
0
def test_line_cache():
    data_file = riu.get_complete_raw_icondition_file(1, 1, 0, "lpmln", False)
    for i in range(0, 1200):
        print(i, linecache.getline(data_file, i))
Exemplo n.º 7
0
    def kmn_isc_task_worker(cls,
                            isc_config_file="isets-tasks.json",
                            worker_id=1,
                            is_check_valid_rules=True):
        manager_tuple = SearchQueueManager.init_task_worker_queue_manager()
        # manager_tuple = (manager, task_queue, ht_task_queue, result_queue)
        ht_task_queue = manager_tuple[2]
        result_queue = manager_tuple[3]

        worker_name = "worker-%d" % worker_id
        worker_host_name = config.worker_host_name
        processed_ht_task_slices_number = 0

        msg_text = "task worker %s start!" % (worker_name)
        logging.info(msg_text)
        isc_tasks = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks.isc_tasks

        data_files = list()
        data_file_current_lines = list()

        for itask in isc_tasks:
            # data_file_current_lines.append(-1)
            file_path = riu.get_complete_raw_icondition_file(
                *itask.k_m_n, itask.lp_type, itask.is_use_extended_rules)
            # file = open(file_path, mode="r", encoding="utf-8")
            data_files.append(file_path)

        is_process_ht_task_queue = False

        while True:
            if not pathlib.Path(config.task_host_lock_file).exists():
                break

            if ht_task_queue.empty():
                if is_process_ht_task_queue:
                    is_process_ht_task_queue = False
                    logging.info("%s:%s waiting for ht task queue ..." %
                                 (worker_host_name, worker_name))
                time.sleep(1)
                continue

            is_process_ht_task_queue = True
            processed_ht_task_slices_number += 1

            ts = ht_task_queue.get()
            start_time = datetime.now()
            itask_id = ts[0]

            if itask_id == ITaskSignal.kill_signal:
                break

            itask = isc_tasks[itask_id]

            data_file = data_files[itask_id]

            task_check_number, se_conditions_cache, nse_conditions_cache = \
                cls.verify_ht_tasks_from_file_data(cls, itask, ts[1], data_file)

            end_time = datetime.now()

            if len(se_conditions_cache) > 0:
                result_queue.put((ITaskSignal.se_condition_signal, itask_id,
                                  se_conditions_cache))

            if len(nse_conditions_cache) > 0:
                result_queue.put((ITaskSignal.nse_condition_signal, itask_id,
                                  nse_conditions_cache))

            result_tuple = (ITaskSignal.stat_signal, itask_id,
                            task_check_number, (start_time, end_time))
            result_queue.put(result_tuple)

        logging.info("%s processes  %d ht itask slices" %
                     (worker_name, processed_ht_task_slices_number))
    def kmn_isc_task_worker(cls,
                            isc_config_file="isets-tasks.json",
                            worker_id=1,
                            is_check_valid_rules=True):
        manager_tuple = SearchQueueManager.init_task_worker_queue_manager()
        # manager_tuple = (manager, task_queue, ht_task_queue, result_queue)
        task_queue = manager_tuple[1]
        result_queue = manager_tuple[3]
        start_time = datetime.now()

        worker_name = "worker-%d" % worker_id
        worker_host_name = config.worker_host_name
        processed_task_slices_number = 0

        msg_text = "task worker %s start!" % (worker_name)
        logging.info(msg_text)
        isc_tasks = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks.isc_tasks

        raw_condition_files = list()
        for itask in isc_tasks:
            rule_number = sum(itask.k_m_n)
            search_space_iset_number = len(
                itask.meta_data.search_space_iset_ids)
            itask.loaded_non_se_condition_files.add(0)
            for i in range(rule_number + 1, search_space_iset_number + 1):
                itask.loaded_non_se_condition_files.add(i)

            rf = riu.get_empty_raw_icondition_file(*itask.k_m_n, itask.lp_type,
                                                   itask.is_use_extended_rules,
                                                   str(worker_id))
            raw_condition_files.append(rf)

        is_process_task_queue = False
        task_slice_cache = None
        last_nse_iset_number = 0
        result_queue_cache = list()
        single_round_processed_task_number = 0
        sleep_cnt = 0
        while True:

            # if sleep_cnt == 10:
            #     sleep_cnt = 0
            #     # logging.error(("result queue cache size ", len(result_queue_cache)))
            #     # logging.error("result queue cache has %d items, send sleep cnt 10", len(result_queue_cache))
            #     result_queue_cache = cls.batch_send_stat_info_2_result_queue(cls, result_queue_cache,
            #                                                                  result_queue, start_time)

            if not pathlib.Path(config.task_host_lock_file).exists():
                break

            is_task_finish = cls.check_itasks_finish_status(isc_tasks)
            if is_task_finish:
                logging.info("%s:%s all itasks terminate ..." %
                             (worker_host_name, worker_name))
                break

            if task_slice_cache is None:
                if task_queue.empty():
                    # logging.error("result queue cache has %d items, send task queue empty",
                    #               len(result_queue_cache))
                    # result_queue_cache = cls.batch_send_stat_info_2_result_queue(cls, result_queue_cache,
                    #                                                              result_queue, start_time)
                    if is_process_task_queue:
                        logging.info("%s:%s waiting for task queue ... " %
                                     (worker_host_name, worker_name))
                        is_process_task_queue = False
                    time.sleep(1)
                    sleep_cnt += 1
                    continue
                else:
                    task_slice_cache = task_queue.get()
                    processed_task_slices_number += 1
                    single_round_processed_task_number += 1
                    is_process_task_queue = True

            itask_id = task_slice_cache[0]
            if itask_id == ITaskSignal.kill_signal:
                break

            itask = isc_tasks[itask_id]

            if itask.is_task_finish:
                task_slice_cache = None
                continue
            """
            task_slice = (left_split, left, left_zone_length, right_choice_number)
            if left_split = True, left is left_isets
            else left is left_isets_size
            """

            task_slice = task_slice_cache[1]

            rule_number = sum(itask.k_m_n)
            ne_iset_number = cls.compute_ne_iset_number(task_slice)
            nse_ne_iset_number = ne_iset_number - 1

            load_nse_complete = cls.task_worker_load_nse_conditions(
                itask, ne_iset_number)
            if not load_nse_complete:
                # # logging.error("result queue cache has %d items, send load nse not complete", len(result_queue_cache))
                # result_queue_cache = cls.batch_send_stat_info_2_result_queue(cls, result_queue_cache, result_queue, start_time)
                if last_nse_iset_number != nse_ne_iset_number:
                    last_nse_iset_number = nse_ne_iset_number
                    logging.info(
                        (task_slice,
                         "%s:%s waiting for %d-%d-%d nse complete file %d" %
                         (worker_host_name, worker_name, *itask.k_m_n,
                          nse_ne_iset_number)))
                time.sleep(1)
                sleep_cnt += 1
                continue

            rq_cache, ht_check_items = cls.process_merge_small_task_slices(
                cls, itask_id, itask, task_slice, manager_tuple)
            result_queue_cache.extend(rq_cache)
            task_slice_cache = None

            raw_data_file = raw_condition_files[itask_id]
            ht_stat = cls.process_ht_tasks(cls, ht_check_items, itask_id,
                                           itask, ne_iset_number, result_queue,
                                           raw_data_file)
            if ht_stat is not None:
                result_queue_cache.append(ht_stat)

            # if len(result_queue_cache) > 2000:
            # logging.error("result queue cache has %d items, send cache size > 20000", len(result_queue_cache))

            if ne_iset_number <= rule_number:
                result_queue_cache = cls.batch_send_stat_info_2_result_queue(
                    cls, result_queue_cache, result_queue, start_time)
            else:
                if task_queue.qsize() < 1000 or len(
                        result_queue_cache) > 100000:
                    result_queue_cache = cls.batch_send_stat_info_2_result_queue(
                        cls, result_queue_cache, result_queue, start_time)

            if single_round_processed_task_number == 10000:
                msg_text = "%s:%s processes %d isc task slices, new round process %d task slices ... " % (
                    worker_host_name, worker_name,
                    processed_task_slices_number,
                    single_round_processed_task_number)
                single_round_processed_task_number = 0
                logging.info(msg_text)

        msg_text = "%s:%s processes %d isc task slices, new round process %d task slices ... " % (
            worker_host_name, worker_name, processed_task_slices_number,
            single_round_processed_task_number)
        logging.info(msg_text)