def inclose4path(file, process, threshold=1): NB_CHECKS = 0 NB_SUB_CHECKS = 0 NB_COL_CHECKS = 0 supports, n_rows, n_cols = compute_supports(file) max_todo_size = 0 todo = deque() todo.append((set(range(0, n_rows)), set(), set(), 0)) while len(todo) != 0: max_todo_size = max(max_todo_size, len(todo)) rows, cols, P, y = todo.pop() P = set(P) # copy before modifying todo_inside = [] for j in range(y, n_cols): if j not in cols and j not in P: NB_COL_CHECKS += 1 g = rows.intersection(supports[j]) if len(g) == 0: P.add(j) elif len(g) == len(rows): cols.add(j) else: if len(g) < threshold: continue NB_CHECKS += 1 found_one = False if not found_one: for A, _, _, _ in todo: NB_SUB_CHECKS += 1 if g.issubset(A): found_one = True break if not found_one: for A, _ in todo_inside: NB_SUB_CHECKS += 1 if g.issubset(A): found_one = True break if not found_one: todo_inside.append((g, j)) process(rows, cols) for g, j in todo_inside: h = cols.union({j}) todo.append((g, h, P, j+1)) return NB_CHECKS, NB_SUB_CHECKS, NB_COL_CHECKS
def inclose5(file, process, threshold=1): def is_canonical(supports, g: Set[int], cols: Set[int], j): is_canonical.n_checks += 1 for jj in range(0, j): if jj not in cols: is_canonical.n_sub_checks += 1 if g.issubset(supports[jj]): return jj return -1 is_canonical.n_checks = 0 is_canonical.n_sub_checks = 0 NB_COL_CHECKS = 0 supports, n_rows, n_cols = compute_supports(file) max_todo_size = 0 todo = deque() todo.append((set(range(0, n_rows)), set(), set(), 0)) while len(todo) != 0: max_todo_size = max(max_todo_size, len(todo)) rows, cols, P, y = todo.pop() P = set(P) # copy before modifying todo_inside = [] for j in range(y, n_cols): if j not in cols and j not in P: NB_COL_CHECKS += 1 g = rows.intersection(supports[j]) if len(g) == 0: P.add(j) elif len(g) == len(rows): cols.add(j) else: if len(g) < threshold: continue canonical = is_canonical(supports, g, cols, j) if canonical == -1: todo_inside.append((g, j)) elif canonical < y: P.add(j) process(rows, cols) #todo_inside = reversed(todo_inside) for g, j in todo_inside: h = cols.union({j}) todo.append((g, h, P, j + 1)) #print("MAX SIZE", max_todo_size) return is_canonical.n_checks, is_canonical.n_sub_checks, NB_COL_CHECKS
def inclose2(file, process, threshold=1): def is_canonical(supports, g: Set[int], cols: Set[int], j): is_canonical.n_checks += 1 for jj in range(0, j): if jj not in cols: is_canonical.n_sub_checks += 1 if g.issubset(supports[jj]): return False return True is_canonical.n_checks = 0 is_canonical.n_sub_checks = 0 NB_COL_CHECKS = 0 supports, n_rows, n_cols = compute_supports(file) max_todo_size = 0 todo = deque() todo.append((set(range(0, n_rows)), set(), 0)) while len(todo) != 0: max_todo_size = max(max_todo_size, len(todo)) rows, cols, y = todo.pop() todo_inside = [] for j in range(y, n_cols): if j not in cols: NB_COL_CHECKS += 1 g = rows.intersection(supports[j]) if len(g) == len(rows): cols.add(j) else: if len(g) < threshold: continue canonical = is_canonical(supports, g, cols, j) if canonical: todo_inside.append((g, j)) if len(rows) != 0: process(rows, cols) #todo_inside = reversed(todo_inside) for g, j in todo_inside: h = cols.union({j}) todo.append((g, h, j + 1)) return is_canonical.n_checks, is_canonical.n_sub_checks, NB_COL_CHECKS
#Computing the support of each pattern encountered in the first class in the second class nrowN = dfN.size # Create the dataframe of emerging patterns emerging_patterns = frequent_itemsets_T[['itemsets', 'length', 'support']] #Changing the name of the columns emerging_patterns.columns = ['Pattern', 'Size', 'supportT'] #One way to insert new columns #emerging_patterns = emerging_patterns.reindex(columns=['Pattern', 'Size', 'supportT', 'supportN', 'GrowthRatio']) emerging_patterns = emerging_patterns.assign(supportN=0.0, GrowthRatio=np.inf) for pattern in emerging_patterns.itertuples(): supportN = compute_supports(pattern.Pattern, dfN) if supportN > 0: emerging_patterns.at[pattern.Index, 'supportN'] = supportN emerging_patterns.at[pattern.Index, 'GrowthRatio'] = emerging_patterns.at[ pattern.Index, 'supportT'] / supportN #Sort the patterns by their support values and then by their sizes emerging_patterns.sort_values(['GrowthRatio', 'supportT', 'Size'], ascending=[False, False, False], inplace=True) emerging_patterns.to_csv("../results/emerging_patterns.csv", index=False) print("Done!")
def fcbo(matrix, process): NB_CANON = 0 NB_INCL = 0 NB_COLCHECK = 0 n = matrix.shape[0] m = matrix.shape[1] supports = compute_supports(matrix) def canon_check(Nj, B, j): nonlocal NB_INCL NB_INCL += 1 for i in range(0, j): if i in Nj and i not in B: return False return True def canon_check_eq(B, D, j): nonlocal NB_INCL NB_INCL += 1 for i in range(0, j): if (i in B) != (i in D): return False return True def supported(A): nonlocal NB_CANON nonlocal NB_INCL NB_CANON += 1 cols = set() for j in range(0, m): NB_INCL += 1 if all(matrix[i, j] != 0 for i in A): cols.add(j) return cols def compute(A, B, y, N): nonlocal NB_COLCHECK if len(A) != 0: process(A, B) M = [s for s in N] todo = [] for j in range(y, m): if j not in B: NB_COLCHECK += 1 if canon_check(N[j], B, j): C = A.intersection(supports[j]) D = supported(C) if canon_check_eq(B, D, j): todo.append((C, D, j)) else: M[j] = D for C, D, j in todo: compute(C, D, j+1, M) compute(set(range(0, n)), supported(set(range(0, n))), 0, [set() for _ in range(0, m)]) return NB_CANON, NB_INCL, NB_COLCHECK