示例#1
0
def inclose4path(file, process, threshold=1):
    NB_CHECKS = 0
    NB_SUB_CHECKS = 0
    NB_COL_CHECKS = 0

    supports, n_rows, n_cols = compute_supports(file)

    max_todo_size = 0
    todo = deque()
    todo.append((set(range(0, n_rows)), set(), set(), 0))
    while len(todo) != 0:
        max_todo_size = max(max_todo_size, len(todo))
        rows, cols, P, y = todo.pop()
        P = set(P)  # copy before modifying
        todo_inside = []
        for j in range(y, n_cols):
            if j not in cols and j not in P:
                NB_COL_CHECKS += 1
                g = rows.intersection(supports[j])
                if len(g) == 0:
                    P.add(j)
                elif len(g) == len(rows):
                    cols.add(j)
                else:
                    if len(g) < threshold:
                        continue

                    NB_CHECKS += 1
                    found_one = False

                    if not found_one:
                        for A, _, _, _ in todo:
                            NB_SUB_CHECKS += 1
                            if g.issubset(A):
                                found_one = True
                                break


                    if not found_one:
                        for A, _ in todo_inside:
                            NB_SUB_CHECKS += 1
                            if g.issubset(A):
                                found_one = True
                                break

                    if not found_one:
                        todo_inside.append((g, j))

        process(rows, cols)
        for g, j in todo_inside:
            h = cols.union({j})
            todo.append((g, h, P, j+1))

    return NB_CHECKS, NB_SUB_CHECKS, NB_COL_CHECKS
示例#2
0
def inclose5(file, process, threshold=1):
    def is_canonical(supports, g: Set[int], cols: Set[int], j):
        is_canonical.n_checks += 1
        for jj in range(0, j):
            if jj not in cols:
                is_canonical.n_sub_checks += 1
                if g.issubset(supports[jj]):
                    return jj
        return -1

    is_canonical.n_checks = 0
    is_canonical.n_sub_checks = 0
    NB_COL_CHECKS = 0

    supports, n_rows, n_cols = compute_supports(file)

    max_todo_size = 0
    todo = deque()
    todo.append((set(range(0, n_rows)), set(), set(), 0))
    while len(todo) != 0:
        max_todo_size = max(max_todo_size, len(todo))
        rows, cols, P, y = todo.pop()
        P = set(P)  # copy before modifying
        todo_inside = []
        for j in range(y, n_cols):
            if j not in cols and j not in P:
                NB_COL_CHECKS += 1
                g = rows.intersection(supports[j])
                if len(g) == 0:
                    P.add(j)
                elif len(g) == len(rows):
                    cols.add(j)
                else:
                    if len(g) < threshold:
                        continue

                    canonical = is_canonical(supports, g, cols, j)
                    if canonical == -1:
                        todo_inside.append((g, j))
                    elif canonical < y:
                        P.add(j)
        process(rows, cols)
        #todo_inside = reversed(todo_inside)
        for g, j in todo_inside:
            h = cols.union({j})
            todo.append((g, h, P, j + 1))

    #print("MAX SIZE", max_todo_size)
    return is_canonical.n_checks, is_canonical.n_sub_checks, NB_COL_CHECKS
def inclose2(file, process, threshold=1):
    def is_canonical(supports, g: Set[int], cols: Set[int], j):
        is_canonical.n_checks += 1
        for jj in range(0, j):
            if jj not in cols:
                is_canonical.n_sub_checks += 1
                if g.issubset(supports[jj]):
                    return False
        return True

    is_canonical.n_checks = 0
    is_canonical.n_sub_checks = 0
    NB_COL_CHECKS = 0

    supports, n_rows, n_cols = compute_supports(file)

    max_todo_size = 0
    todo = deque()
    todo.append((set(range(0, n_rows)), set(), 0))
    while len(todo) != 0:
        max_todo_size = max(max_todo_size, len(todo))
        rows, cols, y = todo.pop()
        todo_inside = []
        for j in range(y, n_cols):
            if j not in cols:
                NB_COL_CHECKS += 1
                g = rows.intersection(supports[j])
                if len(g) == len(rows):
                    cols.add(j)
                else:
                    if len(g) < threshold:
                        continue

                    canonical = is_canonical(supports, g, cols, j)
                    if canonical:
                        todo_inside.append((g, j))

        if len(rows) != 0:
            process(rows, cols)
        #todo_inside = reversed(todo_inside)
        for g, j in todo_inside:
            h = cols.union({j})
            todo.append((g, h, j + 1))

    return is_canonical.n_checks, is_canonical.n_sub_checks, NB_COL_CHECKS
示例#4
0
#Computing the support of each pattern encountered in the first class in the second class
nrowN = dfN.size

# Create the dataframe of emerging patterns
emerging_patterns = frequent_itemsets_T[['itemsets', 'length', 'support']]
#Changing the name of the columns
emerging_patterns.columns = ['Pattern', 'Size', 'supportT']

#One way to insert new columns
#emerging_patterns = emerging_patterns.reindex(columns=['Pattern', 'Size', 'supportT', 'supportN', 'GrowthRatio'])
emerging_patterns = emerging_patterns.assign(supportN=0.0, GrowthRatio=np.inf)

for pattern in emerging_patterns.itertuples():

    supportN = compute_supports(pattern.Pattern, dfN)

    if supportN > 0:
        emerging_patterns.at[pattern.Index, 'supportN'] = supportN
        emerging_patterns.at[pattern.Index,
                             'GrowthRatio'] = emerging_patterns.at[
                                 pattern.Index, 'supportT'] / supportN

#Sort the patterns by their support values and then by their sizes
emerging_patterns.sort_values(['GrowthRatio', 'supportT', 'Size'],
                              ascending=[False, False, False],
                              inplace=True)

emerging_patterns.to_csv("../results/emerging_patterns.csv", index=False)

print("Done!")
示例#5
0
def fcbo(matrix, process):

    NB_CANON = 0
    NB_INCL = 0
    NB_COLCHECK = 0

    n = matrix.shape[0]
    m = matrix.shape[1]

    supports = compute_supports(matrix)

    def canon_check(Nj, B, j):
        nonlocal NB_INCL
        NB_INCL += 1
        for i in range(0, j):
            if i in Nj and i not in B:
                return False
        return True

    def canon_check_eq(B, D, j):
        nonlocal NB_INCL
        NB_INCL += 1

        for i in range(0, j):
            if (i in B) != (i in D):
                return False
        return True

    def supported(A):
        nonlocal NB_CANON
        nonlocal NB_INCL
        NB_CANON += 1

        cols = set()
        for j in range(0, m):
            NB_INCL += 1
            if all(matrix[i, j] != 0 for i in A):
                cols.add(j)
        return cols

    def compute(A, B, y, N):
        nonlocal NB_COLCHECK

        if len(A) != 0:
            process(A, B)
        M = [s for s in N]
        todo = []
        for j in range(y, m):
            if j not in B:
                NB_COLCHECK += 1
                if canon_check(N[j], B, j):
                    C = A.intersection(supports[j])
                    D = supported(C)
                    if canon_check_eq(B, D, j):
                        todo.append((C, D, j))
                    else:
                        M[j] = D
        for C, D, j in todo:
            compute(C, D, j+1, M)

    compute(set(range(0, n)), supported(set(range(0, n))), 0, [set() for _ in range(0, m)])
    return NB_CANON, NB_INCL, NB_COLCHECK