def merge_small_near_uniform_vandermonde_generator( left_zone_elements, right_zone_elements, choice_number, max_space_size=2000000000): left_zone_length = len(left_zone_elements) right_zone_length = len(right_zone_elements) if choice_number > left_zone_length + right_zone_length: raise RuntimeError( "choice_number > left_zone_length + right_zone_length") for left_choice_number in range(choice_number + 1): right_choice_number = choice_number - left_choice_number if left_choice_number > left_zone_length or right_choice_number > right_zone_length: continue sub_space_size = CombinaryCounter.compute_comb(left_zone_length, left_choice_number) * \ CombinaryCounter.compute_comb(right_zone_length, right_choice_number) if sub_space_size / max_space_size < 100: left_split = False slice = (left_split, left_choice_number, left_zone_length, right_choice_number) yield slice else: left_split = True split_iter = itertools.combinations(left_zone_elements, left_choice_number) for lce in split_iter: left_choice = list(copy.deepcopy(lce)) slice = (left_split, left_choice, left_zone_length, right_choice_number) yield slice return True
def yanghui_split_checker(max_elements_size=10): all_elements = {i for i in range(max_elements_size)} split_size = random.randint(0, max_elements_size + 1) choice_elements = { random.randint(-1, max_elements_size) for i in range(split_size) } print("split elements: ", choice_elements) for choice_number in range(max_elements_size + 1): total_search_number = CombinaryCounter.compute_comb( max_elements_size, choice_number) slices_search_number = 0 # all_elements = copy.deepcopy(elements) is_split, searching_slices = CombinationSearchingSpaceSplitter.yanghui_split( all_elements, choice_number, choice_elements) for ts in searching_slices: slices_search_number += CombinaryCounter.compute_comb( len(ts[1]), ts[2]) msg_text = "C(%d, %d), search slices number %d: real = %d, slices sum = %d, is same %s, is split %s" % ( max_elements_size, choice_number, len(searching_slices), total_search_number, slices_search_number, str(total_search_number == slices_search_number), str(is_split)) print(msg_text) if total_search_number != slices_search_number: raise RuntimeError(msg_text)
def near_uniform_vandermonde_generator_checker(max_elements_size=10): elements = [i for i in range(max_elements_size)] left_length = max_elements_size // 3 if left_length > 12: left_length = 12 left_zone = elements[0:left_length] right_zone = elements[left_length:] for choice_number in range(max_elements_size + 1): total_search_number = CombinaryCounter.compute_comb( max_elements_size, choice_number) slices_search_number = 0 searching_slices = CombinationSearchingSpaceSplitter.near_uniform_vandermonde_generator( left_zone, right_zone, choice_number, 20) slice_cnt = 0 for ts in searching_slices: slice_cnt += 1 ts_size = CombinaryCounter.compute_comb(len(ts[1]), ts[2]) print(ts, "size: ", ts_size) slices_search_number += ts_size msg_text = "C(%d, %d), search slices number %d: real = %d, slices sum = %d, is same %s" % ( max_elements_size, choice_number, slice_cnt, total_search_number, slices_search_number, str(total_search_number == slices_search_number)) print(msg_text) if total_search_number != slices_search_number: raise RuntimeError(msg_text)
def near_uniform_vandermonde_generator(left_zone_elements, right_zone_elements, choice_number, max_space_size=2000000000): spaces = list() spaces.append( (list(), left_zone_elements, right_zone_elements, choice_number)) while len(spaces) > 0: new_spaces = list() for sp in spaces: space_slices = CombinationSearchingSpaceSplitter.vandermonde_generator( sp[1], sp[2], sp[3]) for s_slice in space_slices: space_size = CombinaryCounter.compute_comb( len(s_slice[1]), s_slice[2]) if space_size <= max_space_size: s_slice[0].extend(sp[0]) yield s_slice else: new_all_zone = s_slice[1] new_left_zone = new_all_zone[0:s_slice[2]] new_right_zone = new_all_zone[s_slice[2]:] s_slice[0].extend(sp[0]) new_ts = (s_slice[0], new_left_zone, new_right_zone, s_slice[2]) new_spaces.append(new_ts) spaces = new_spaces
def yanghui_triangle_number_sets(nse_isets, original_left_isets, all_isets, pick_number): skip_number = 0 task_slices = list() remain_nse_isets = nse_isets.difference(original_left_isets) if len(remain_nse_isets) == 0: skip_number = CombinaryCounter.compute_comb(len(all_isets), pick_number) return skip_number, task_slices if remain_nse_isets.issubset(all_isets) and len(remain_nse_isets) <= pick_number: nse_size = len(remain_nse_isets) remain_nse_isets = list(remain_nse_isets) eliminate_atoms = set() right_zone_isets = copy.deepcopy(all_isets) for i in range(nse_size + 1): if i == nse_size: skip_number = CombinaryCounter.compute_comb(len(right_zone_isets), pick_number - nse_size) else: left_isets = copy.deepcopy(eliminate_atoms) eliminate_atoms.add(remain_nse_isets[i]) right_zone_isets.remove(remain_nse_isets[i]) right_isets_number = pick_number - len(left_isets) left_isets = left_isets.union(original_left_isets) task_item = (left_isets, copy.deepcopy(right_zone_isets), right_isets_number) task_slices.append(task_item) else: task_item = (original_left_isets, all_isets, pick_number) task_slices.append(task_item) real = CombinaryCounter.compute_comb(len(all_isets), pick_number) compute = skip_number for ti in task_slices: compute += CombinaryCounter.compute_comb(len(ti[1]), ti[2]) # print(real, compute, compute == real) # # print("skip number ", skip_number) # print("compute tasks: ") # for ti in task_slices: # print(ti) # # if compute != real: # raise RuntimeError("wrong case: ", pick_number) return skip_number, task_slices
def compute_search_space_size(search_isets_size, choice_number): max_sub_space_size = 100000000000 left_zone_size = 12 right_zone_size = search_isets_size - left_zone_size space_size = CombinaryCounter.compute_comb(search_isets_size, choice_number) sub_spaces_size_sum = 0 for i in range(choice_number + 1): left_choice_size = CombinaryCounter.compute_comb(left_zone_size, i) right_choice_size = CombinaryCounter.compute_comb(right_zone_size, choice_number - i) subspace_size = left_choice_size * right_choice_size sub_spaces_size_sum += subspace_size bigger = subspace_size > max_sub_space_size ratio = subspace_size / max_sub_space_size print("i = %d, subspace size = %d, is bigger than max size: %s, %.3f" % (i, subspace_size, str(bigger), ratio)) if ratio > 100: right_bigger = right_choice_size > max_sub_space_size right_ratio = right_choice_size / max_sub_space_size print("\t right choice size %d, is bigger than max size: %s, %.3f" % (right_choice_size, right_bigger, right_ratio)) if space_size != sub_spaces_size_sum: print("wrong case!")
def meta_data_checker(k_size, m_size, n_size): key = I4RawSearchMaster.get_kmn_meta_key(k_size, m_size, n_size) meta = I4RawSearchMaster.i4_meta[key] i4_iset_size = len(meta) for i in range(1, i4_iset_size): real_tuple_size = CombinaryCounter.compute_comb( i4_iset_size - 1, i) non_semi_valid_tuple_size = meta[i] left = real_tuple_size - non_semi_valid_tuple_size print( "choose %d elements, has %d tuples, %d non-semi-valid tuples, remain %d tuples" % (i, real_tuple_size, non_semi_valid_tuple_size, left))
def process_one_nse_subpart_task_slice(cls, nse_isets, task_slice): """ :param cls: :param nse_isets: :param task_slice: (left_iset_ids, right_zone_iset_ids, right_zone_choice_number) :return: """ original_left_isets = set(task_slice[0]) remained_nse_isets = nse_isets.difference(original_left_isets) yang_task_slices = list() if len(remained_nse_isets) == 0: skip_number = CombinaryCounter.compute_comb( len(task_slice[1]), task_slice[2]) return skip_number, yang_task_slices if not remained_nse_isets.issubset(task_slice[1]): skip_number = 0 yang_task_slices.append(task_slice) return skip_number, yang_task_slices nse_isets_size = len(remained_nse_isets) right_zone_isets = task_slice[1].difference(remained_nse_isets) v_generator = CombinationSearchingSpaceSplitter.vandermonde_generator( remained_nse_isets, right_zone_isets, task_slice[2]) skip_number = 0 for slice in v_generator: if len(slice[0]) == nse_isets_size: skip_number += CombinaryCounter.compute_comb( len(slice[1]), slice[2]) continue for a in original_left_isets: slice[0].append(a) new_slice = (set(slice[0]), set(slice[1]), slice[2]) yang_task_slices.append(new_slice) return skip_number, yang_task_slices
def yanghui_triangle_number_sets_2(minmal_i4_isets_tuples, left_iset_ids, right_zone_isets, right_iset_number): left_iset_ids = set(left_iset_ids) right_zone_isets = set(right_zone_isets) task_slices = [(left_iset_ids, right_zone_isets, right_iset_number)] skip_task_number = 0 cnt = 0 for nse in minmal_i4_isets_tuples: nse_new_task_slices = list() for ts in task_slices: ts_skip_task_number, new_task_slices = yanghui_triangle_number_sets(nse, *ts) skip_task_number += ts_skip_task_number nse_new_task_slices.extend(new_task_slices) task_slices = nse_new_task_slices cnt += 1 print("nse %d: " % cnt, nse) for ts in task_slices: print("\t", ts) print("\n") real = CombinaryCounter.compute_comb(len(right_zone_isets), right_iset_number) compute = skip_task_number for ti in task_slices: compute += CombinaryCounter.compute_comb(len(ti[1]), ti[2]) print(real, compute, compute == real) print("skip number ", skip_task_number) print("compute tasks: ") for ti in task_slices: print(ti) if compute != real: raise RuntimeError("wrong case: ", pick_number) return skip_task_number, task_slices
def eliminate_one_nse_condition(cls, nse_isets, original_left_isets, all_isets, pick_number): skip_number = 0 task_slices = list() remain_nse_isets = nse_isets.difference(original_left_isets) if len(remain_nse_isets) == 0: skip_number = CombinaryCounter.compute_comb( len(all_isets), pick_number) return skip_number, task_slices if remain_nse_isets.issubset( all_isets) and len(remain_nse_isets) <= pick_number: nse_size = len(remain_nse_isets) remain_nse_isets = list(remain_nse_isets) eliminate_atoms = set() right_zone_isets = copy.deepcopy(all_isets) for i in range(nse_size + 1): if i == nse_size: skip_number = CombinaryCounter.compute_comb( len(right_zone_isets), pick_number - nse_size) else: left_isets = copy.deepcopy(eliminate_atoms) eliminate_atoms.add(remain_nse_isets[i]) right_zone_isets.remove(remain_nse_isets[i]) right_isets_number = pick_number - len(left_isets) left_isets = left_isets.union(original_left_isets) task_item = (left_isets, copy.deepcopy(right_zone_isets), right_isets_number) task_slices.append(task_item) else: task_item = (original_left_isets, all_isets, pick_number) task_slices.append(task_item) return skip_number, task_slices
def process_semi_valid_task_slices(cls, itask_id, itask, task_slice): left_isets = task_slice[0] right_zone_isets = task_slice[1] right_zone_choice_number = task_slice[2] ne_iset_number = len(left_isets) + right_zone_choice_number search_i4_isets = set(itask.meta_data.search_i4_composed_iset_ids) skip_number = 0 new_task_slices = list() right_zone_i4_isets = right_zone_isets.intersection(search_i4_isets) if len(right_zone_i4_isets) == 0: v_generator = [task_slice] else: right_zone_non_i4_isets = right_zone_isets.difference( right_zone_i4_isets) v_generator = CombinationSearchingSpaceSplitter.vandermonde_generator( right_zone_i4_isets, right_zone_non_i4_isets, right_zone_choice_number) for ts in v_generator: new_left_ids = left_isets.union(set(ts[0])) is_contain_semi_valid_rule = iscm.check_contain_rules_without_i_n_iset( 4, new_left_ids, itask.rule_number, itask.is_use_extended_rules) if is_contain_semi_valid_rule: skip_number += CombinaryCounter.compute_comb(len(ts[1]), ts[2]) else: new_task_slices.append((new_left_ids, set(ts[1]), ts[2])) valid_skip_result = None if skip_number > 0: valid_skip_result = (itask_id, ne_iset_number, 0, skip_number, skip_number) # print("valid skip ", valid_skip_result) # result_queue.put(result_tuple) return new_task_slices, valid_skip_result
def init_task_numbers(self): unknown_iset_number = len(self.meta_data.search_space_iset_ids) for i in range(self.min_ne, self.max_ne + 1): task_number = CombinaryCounter.compute_comb(unknown_iset_number, i) self.task_total_number += task_number self.hierarchical_task_number[i] = task_number
def itask_slices_generator(cls, isc_config_file="isets-tasks.json"): msg_text = "%s init task slices generator ..." % str(cls) logging.info(msg_text) msg.send_message(msg_text) SearchWorkerQueueManger.register("get_task_queue") SearchWorkerQueueManger.register("get_result_queue") manager = SearchWorkerQueueManger(address=(config.task_host, config.task_host_port), authkey=bytes(config.task_host_key, encoding="utf-8")) manager.connect() task_queue = manager.get_task_queue() result_queue = manager.get_result_queue() isc_tasks_cfg = ITaskConfig(isc_config_file) isc_tasks = isc_tasks_cfg.isc_tasks for tid in range(len(isc_tasks)): it = isc_tasks[tid] min_ne = it.min_ne max_ne = it.max_ne unknown_iset_number = len(it.meta_data.search_space_iset_ids) rule_number = it.rule_number left_zone_iset_ids = it.meta_data.search_i4_composed_iset_ids left_zone_length = len(left_zone_iset_ids) right_zone_length = unknown_iset_number - left_zone_length is_use_extended_rules = it.is_use_extended_rules for i in range(min_ne, max_ne + 1): ne_iset_number = i for left_iset_number in range(ne_iset_number + 1): right_iset_number = ne_iset_number - left_iset_number if left_iset_number > left_zone_length or right_iset_number > right_zone_length: continue task_iter = itertools.combinations(left_zone_iset_ids, left_iset_number) for left_ti in task_iter: left_iset_ids = list(left_ti) is_contain_semi_valid_rule = iscm.check_contain_rules_without_i_n_iset( 4, left_iset_ids, rule_number, is_use_extended_rules) if is_contain_semi_valid_rule: check_cnt = 0 # C(right_zone_length, right_iset_number) task_number = CombinaryCounter.compute_comb( right_zone_length, right_iset_number) semi_valid_skip_cnt = task_number stat_item = (ITaskSignal.stat_signal, tid, ne_iset_number, check_cnt, task_number, semi_valid_skip_cnt, None) result_queue.put(stat_item) else: task_item = (tid, (ne_iset_number, set(left_zone_iset_ids), left_iset_ids)) # print(task_item) task_queue.put(task_item) working_hosts_number = 5 for i in range(working_hosts_number * 200): task_queue.put((ITaskSignal.kill_signal, -1)) logging.info("all itasks has been dispatched")
def itask_slice_generator_by_i4_meta(ne_iset_number, itask_id, itask, max_space_size, manager_tuple): task_queue = manager_tuple[1] result_queue = manager_tuple[3] kmn_key = I4RawSearchMaster.get_kmn_meta_key(*itask.k_m_n) i4_meta = I4RawSearchMaster.i4_meta[kmn_key] left_zone_length = len(itask.meta_data.search_i4_composed_iset_ids) search_isets_length = len(itask.meta_data.search_space_iset_ids) right_zone_length = search_isets_length - left_zone_length task_slice_cnt = 0 if ne_iset_number <= right_zone_length: semi_valid_i4_slices_size = CombinaryCounter.compute_comb( right_zone_length, ne_iset_number) valid_skip_number = CombinaryCounter.compute_comb( right_zone_length, ne_iset_number) result_tuple = (ITaskSignal.stat_signal, itask_id, ne_iset_number, 0, valid_skip_number, valid_skip_number, None) result_queue.put(result_tuple) for left_choice in range(1, left_zone_length + 1): right_choice = ne_iset_number - left_choice if right_choice > right_zone_length or left_choice > ne_iset_number: continue single_slice_right_task_number = CombinaryCounter.compute_comb( right_zone_length, right_choice) task_i4_slice_number = max_space_size // single_slice_right_task_number + 1 non_semi_valid_i4_slices_size = i4_meta[left_choice] itask_sizes = non_semi_valid_i4_slices_size // task_i4_slice_number if itask_sizes < 92 or itask_sizes > 1200: itask_sizes = 1200 if itask_sizes > non_semi_valid_i4_slices_size: itask_sizes = non_semi_valid_i4_slices_size task_i4_slice_number = non_semi_valid_i4_slices_size // itask_sizes itask_splitting_points = [ i * task_i4_slice_number for i in range(itask_sizes) ] if len(itask_splitting_points) == 0: itask_splitting_points.append(0) if itask_splitting_points[-1] < non_semi_valid_i4_slices_size: itask_splitting_points.append(non_semi_valid_i4_slices_size) for i in range(1, len(itask_splitting_points)): itask_slice_tuple = (left_choice, itask_splitting_points[i - 1], itask_splitting_points[i], right_choice) itask_slice_tuple = (itask_id, itask_slice_tuple) task_queue.put(itask_slice_tuple) task_slice_cnt += 1 total_i4_silces_size = CombinaryCounter.compute_comb( left_zone_length, left_choice) semi_valid_i4_slices_size = total_i4_silces_size - non_semi_valid_i4_slices_size if semi_valid_i4_slices_size > 0: valid_skip_number = semi_valid_i4_slices_size * single_slice_right_task_number result_tuple = (ITaskSignal.stat_signal, itask_id, ne_iset_number, 0, valid_skip_number, valid_skip_number, None) result_queue.put(result_tuple) msg_text = "itask %d-%d-%d ne iset number %d, put %d task slices" % ( *itask.k_m_n, ne_iset_number, task_slice_cnt) logging.info(msg_text) msg.send_message(msg_text)