示例#1
0
def attribute_exploration_pps(tuples):
    U = range(len(tuples[0]))  # Attributes
    n_atts = len(U)
    m_prime = [set([]) for i in range(len(U))]
    g_prime = []
    stats = Stats()

    fctx = FormalContext(g_prime, m_prime)

    m_prime.append(set(
        []))  # THIS SHOULD BE AFTER DECLARING THE FORMAL CONTEXT

    print("Processing data... ", end='')
    sys.stdout.flush()
    representations = [[row[j] for row in tuples] for j in U]
    print("done")

    # ATTRIBUTE ORDERING
    print("Building representations... ", end='')
    sys.stdout.flush()
    plis = [(build_pli(r), ri) for ri, r in enumerate(representations)]
    print("done")

    print("Ordering... ", end='')
    sys.stdout.flush()
    # ATTRIBUTE ORDERING
    # ex_order = [290, 17, 7, 7, 489, 14, 10, 31, 509, 6, 341, 151, 16, 28, 49, 4, 1, 19, 571, 810, 6, 8, 17]
    # plis.sort(key=lambda k: ex_order[k[1]], reverse=False) # Lexicographic
    plis.sort(key=lambda k: k[0], reverse=False)  # Lexicographic
    order = {j[1]: i for i, j in enumerate(plis)}  #Original order -> new order
    inv_order = {i: j
                 for j, i in order.items()
                 }  # At position i should be attribute j
    # print(order)
    # print(inv_order)
    # exit()

    # reco_order = { }

    plis = [
        i[0] for i in plis
    ]  # build_pli(representations[ inv_order[i] ]) for i in range(n_atts) ]
    print("done")

    print("Reconverting... ", end='')
    tuples = [[None] * n_atts for i in range(len(tuples))]
    # print(plis)
    # not_none = [0 for i in range(len(tuples))]

    for att in range(n_atts):
        att = inv_order[att]
        for i, cluster in enumerate(plis[att]):
            for row in cluster:
                tuples[row][att] = i

    print("done")

    # print(records)
    # print(tuples)
    # for ti, t in enumerate(tuples):
    #     tuples[ti] = [t[inv_order[i]] if any(ti in part for part in plis[i]) else None for i in range(len(t))]
    # tuples[ti] = [t[inv_order[i]] for i in range(len(t))]
    # print(tuples[ti], ti,  )
    # print (tuples)
    # tuples=records
    # records[]

    # print(plis)
    # # END ORDERING

    # VARIABLES FOR FAST STACKED NEXT CLOSURE
    Mjs = [set() for i in range(n_atts)]
    stack = [[None, m_prime[-1]], [None, set([]), Mjs]]

    # INITIALIZATION VARIABLES
    X = set([])
    fdt = FDTree(U)
    m_i = -1  # WE START WITH THE EMPTY INTENT REPRESENTED BY THIS

    # COUNTERS TO KEEP SOME PERFORMANCE STATISTICS
    cycles = 0
    cycles2 = 0
    avoided_closures = 0
    ncls = 0
    while X != U:
        cycles += 1
        if cycles % 1000 == 0:
            print("\rFDs:{}/{}/{}/{}/{} - {: <100}".format(
                fdt.n_fds, cycles, cycles2, len(g_prime),
                round((sum([len(mp) for mp in m_prime])) / len(m_prime)),
                ','.join(map(str, sorted(X)))),
                  end='')  #stack
            sys.stdout.flush()

        XJ = stack[-2][1].intersection(m_prime[m_i])
        if bool(XJ):
            # XJJ = reduce(set.intersection, (g_prime[g] for g in XJ))
            XJJ = set.intersection(*[g_prime[g] for g in XJ])
            # if len(XJ) == 1:
            #     XJJ = set(XJJ)
        else:
            XJJ = set(U)

        # AT THIS POINT WE HAVE XJJ WHICH IS OUR ESTIMATION OF THE CLOSURE
        # USING THE REPRESENTATION CONTEXT CALCULATED SO FAR
        # THE ACTUAL CLOSURE SHOULD BE XSS, HOWEVER IF
        # X = XJJ WE KNOW THAT XSS = XJJ AND WE CAN AVOID ITS
        # CALCULATION

        # XSS = None
        n_x = len(X)

        avoided_closures += n_x == len(XJJ)

        if n_x < len(XJJ):  # CHECKS WHETHER X==XJJ
            cycles2 += 1
            cache = []
            check(X, XJJ, tuples, n_atts, cache, plis, stats)

            if n_x < len(XJJ):
                fdt.add_fd(X, XJJ)
                # break
            else:
                cache.sort(key=len)
                gp = cache.pop()

                n_gp = len(g_prime)
                XJ.add(n_gp)
                for i in stack[1:]:
                    i[1].add(n_gp)
                for x in gp:
                    m_prime[x].add(n_gp)

                g_prime.append(gp)
                # XJJ.intersection_update(gp)

        new_atts = XJJ - X

        if not bool(new_atts) or m_i <= min(new_atts):
            m_i = U[-1]
            X = XJJ
        else:
            # print(stack)
            stack[-2][2][m_i] = XJJ
            # print('\t',m_i, XJJ)
            X.difference_update([m for m in X if m > m_i])

        stack[-1][1] = XJ

        X, m_i = fast_next_closure(X, U, fdt.l_close, m_i, stats, stack)

        # ncls += c

        stack[-1][0] = m_i

    # print ('--')
    # for g in g_prime:
    #    print (g)

    L = list(fdt.read_fds())
    print("\nNUMBER OF FDS:{}".format(len(L)))
    print("SAMPLING CONTEXT SIZE:{}".format(len(g_prime)))
    print("CYCLES:", cycles)
    print("DB CHECKS:", cycles2)
    print("GOOD CLOSURES:", avoided_closures)
    print("Closures:", stats.closures)
    print("Failures:", stats.failures)
    print("Row check:", stats.row_check)
    print("Conflicting Attributes:",
          [stats.conflicting_attributes[order[i]] for i in range(n_atts)])
    print("Non Conflicting Attributes:",
          [stats.non_conflicting[order[i]] for i in range(n_atts)])
    # print("EFF:", [abs(stats.conflicting_attributes[order[i]]-stats.non_conflicting[order[i]]) for i in range(n_atts)])
    print(order)
示例#2
0
def attribute_exploration_pps(tuples):
    alg = AddIntentAlgorithm()
    U = range(len(tuples[0]))  # Attributes
    n_atts = len(U)
    m_prime = [set([]) for i in range(len(U))]
    g_prime = []

    rand_tuples = list(range(len(tuples)))
    # random.shuffle(rand_tuples)
    rand_tuples.sort(key=lambda i: len(set(tuples[i])))

    fctx = FormalContext(g_prime, m_prime)
    sampled_tuples = []

    representations = [[row[j] for row in tuples] for j in U]

    # ATTRIBUTE ORDERING
    order = [(len(set(r)), ri) for ri, r in enumerate(representations)]
    order.sort(key=lambda k: k[0], reverse=False)
    # print (order)
    order = {j[1]: i
             for i, j in enumerate(order)}  #Original order -> new order
    inv_order = {i: j for j, i in order.items()}
    for ti, t in enumerate(tuples):
        tuples[ti] = [t[inv_order[i]] for i in range(len(t))]

    # # END ORDERING
    Mjs = [set() for i in range(n_atts)]
    stack = [[None, None], [None, set([]), Mjs]]

    X = set([])

    fdt = FDTree(U)
    m_i = -1

    cycles = 0
    cycles2 = 0
    XJ = set([])
    XJJ = fctx.closed_set(X)
    avoided_closures = 0
    ncls = 0
    g_sub = []
    while X != U:

        cycles += 1
        if cycles % 1000 == 0:
            print("\rFDs:{}/{}/{}/{}/{}/{}/{} - {: <100}".format(
                fdt.n_fds, cycles, cycles2, len(g_prime), len(alg.jip_objects),
                len(alg.elements),
                (sum([len(mp) for mp in m_prime])) / len(m_prime),
                ','.join(map(str, sorted(X)))),
                  end='')  #stack
            sys.stdout.flush()

        if m_i >= 0:
            XJ = stack[-2][1].intersection(m_prime[m_i])
            if bool(XJ):
                XJJ = reduce(set.intersection, (g_prime[g] for g in XJ))
                if len(XJ) == 1:
                    XJJ = set(XJJ)
            else:
                XJJ = set(range(len(m_prime)))

        # cache = {}
        XSS = None
        n_x = len(X)

        avoided_closures += n_x == len(XJJ)

        while n_x < len(XJJ):
            cycles2 += 1
            # sys.stdout.flush()
            if XSS is None:
                cache = []
                XSS = check(X, XJJ, tuples, n_atts, cache, rand_tuples)
                cache.sort(key=len)
                # cache = sorted(cache.items(), key=lambda k: len(k[1]))

            # sys.stdout.flush()

            if len(XJJ) == len(XSS):
                fdt.add_fd(X, XJJ)
                break
            else:
                gp = cache.pop()

                n_gp = len(g_prime)
                XJ.add(n_gp)
                for i in stack[1:]:
                    i[1].add(n_gp)
                for x in gp:
                    m_prime[x].add(n_gp)
                # print ('\t', gp)
                g_prime.append(gp)
                nid = alg.add_intent_iteration(gp)
                alg.add_object(n_gp, nid)
                # alg.objects[nid].append(n_gp)
                # print(len(list(alg.get_jips())), '/', n_gp+1)
                rem = set(alg.non_jip_objects())
                for m in range(n_atts):
                    m_prime[m].difference_update(rem)

                # print()
                # print (alg.inv_lat[nid])
                XJJ.intersection_update(gp)

        if not bool(XJJ - X) or m_i <= min(XJJ - X):
            m_i = U[-1]
            X = XJJ
        else:
            X.difference_update([m for m in X if m > m_i])

        stack[-1][1] = XJ

        X, m_i, c = fast_next_closure(X, U, fdt.l_close, m_i, stack)

        ncls += c

        stack[-1][0] = m_i
    # print ('--')
    # for g in g_prime:
    #    print (g)

    L = list(fdt.read_fds())
    print("\nN_FDS:{}".format(len(L)))
    print("SAMPLING CONTEXT SIZE:{}".format(len(g_prime)))
    print("CYCLES:", cycles)
    print("GOOD CLOSURES:", avoided_closures)
    print("Closures:", ncls)
    print(fdt.recursions)
    # print(alg.elements)
    jip = alg.jip_objects
    print(jip)
    # print ([alg.objects[i] for i in jip])
    print("JIP", len(jip))
示例#3
0
def attribute_exploration_pps(tuples):
    U = range(len(tuples[0]))  # Attributes
    n_atts = len(U)  # Number of attributes

    # rand_tuples = list(range(len(tuples)))
    # rand_tuples.sort(key=lambda i: len(set(tuples[i])))

    print("Processing data... ", end='')
    sys.stdout.flush()
    representations = [[row[j] for row in tuples] for j in U]
    print("done")
    plis = [(build_pli(r), ri) for ri, r in enumerate(representations)]
    stats = Stats()

    # ORDERING
    print("Ordering... ", end='')
    sys.stdout.flush()
    # ATTRIBUTE ORDERING
    plis.sort(key=lambda k: k[0], reverse=False)  # Lexicographic
    order = {j[1]: i for i, j in enumerate(plis)}  #Original order -> new order
    inv_order = {i: j
                 for j, i in order.items()
                 }  # At position i should be attribute j

    plis = [i[0] for i in plis]
    print("done")

    print("Reconverting... ", end='')
    tuples = [[None] * n_atts for i in range(len(tuples))]
    # print(plis)
    # not_none = [0 for i in range(len(tuples))]

    for att in range(n_atts):
        att = inv_order[att]
        for i, cluster in enumerate(plis[att]):
            for row in cluster:
                tuples[row][att] = i

    print("done")
    # # END ORDERING

    Mjs = [set()
           for i in range(n_atts)]  # Needed by fast version of next_closure
    stack = [[None, None], [None, set([]), Mjs]]  # Stack for next_closure

    X = set([])

    fdt = FDTree(U)
    m_i = -1

    cycles = 0
    cycles2 = 0
    XJ = set([])

    ncls = 0
    sU = set(U)
    while X != U:

        # Feedback Output
        cycles += 1
        if cycles % 1 == 0:
            print("\rFDs:{}/{}".format(fdt.n_fds, cycles),
                  ','.join(map(str, sorted(X))),
                  end='')  #stack
            sys.stdout.flush()

        # Stack re-use
        cache = []
        XSS = set(U)
        check(X, XSS, tuples, n_atts, cache, plis, stats)

        if len(X) != len(XSS):
            fdt.add_fd(X, XSS)
            stack[-1][-1][m_i] = XSS

        if not bool(XSS - X) or m_i <= min(XSS - X):
            m_i = U[-1]
            X = XSS
        else:
            X.difference_update([m for m in X if m > m_i])

        stack[-1][1] = XJ

        X, m_i = fast_next_closure(X, U, fdt.l_close, m_i, stats, stack)
        stack[-1][0] = m_i

    L = list(fdt.read_fds())
    print("\nN_FDS:{}".format(len(L)))
    print("CYCLES:", cycles)
    print("Closures:", ncls)
    print(fdt.recursions)
示例#4
0
def attribute_exploration_pps(tuples):
    U = range(len(tuples[0])) # Attributes
    n_atts = len(U)
    m_prime = [set([]) for i in range(len(U))]
    g_prime = []
    
    rand_tuples = list(range(len(tuples)))
    # random.shuffle(rand_tuples)
    rand_tuples.sort(key=lambda i: len(set(tuples[i])))

    fctx = FormalContext(g_prime, m_prime)

    m_prime.append(set([])) # THIS SHOULD BE AFTER DECLARING THE FORMAL CONTEXT

    representations = [[row[j] for row in tuples] for j in U]

    # ATTRIBUTE ORDERING
    order = [(len(set(r)), ri) for ri, r in enumerate(representations)]
    order.sort(key=lambda k: k[0], reverse=False)
    #print (order)
    order = {j[1]:i for i,j in enumerate(order)} #Original order -> new order
    inv_order = {i:j for j,i in order.items()}
    for ti, t in enumerate(tuples):
        tuples[ti] = [t[inv_order[i]] for i in range(len(t))]
    
    # # END ORDERING

    # VARIABLES FOR FAST STACKED NEXT CLOSURE
    Mjs = [set() for i in range(n_atts)]
    stack = [[None, m_prime[-1]],[None, set([]), Mjs, True]]

    # INITIALIZATION VARIABLES
    X = set([])
    fdt = FDTree(U)
    m_i = -1 # WE START WITH THE EMPTY INTENT REPRESENTED BY THIS
    
    # COUNTERS TO KEEP SOME PERFORMANCE STATISTICS
    cycles = 0
    cycles2 = 0
    avoided_closures = 0
    ncls = 0
    while X != U:
        cycles += 1
        # if cycles%1000 == 0:
        print ("{}||FDs:{}/{}/{}/{}/{} - {: <100}".format(stack[-2][-1], fdt.n_fds, cycles, cycles2, len(g_prime), (sum([len(mp) for mp in m_prime]))/len(m_prime), ','.join(map(str, sorted(X)))), end='\n') #stack
        sys.stdout.flush()

        XJ = stack[-2][1].intersection(m_prime[m_i])
        if bool(XJ):
            # XJJ = reduce(set.intersection, (g_prime[g] for g in XJ))
            XJJ = set.intersection(*[g_prime[g] for g in XJ])
            if len(XJ) == 1:
                XJJ = set(XJJ)
        else:
            XJJ = set(U)

        # AT THIS POINT WE HAVE XJJ WHICH IS OUR ESTIMATION OF THE CLOSURE
        # USING THE REPRESENTATION CONTEXT CALCULATED SO FAR
        # THE ACTUAL CLOSURE SHOULD BE XSS, HOWEVER IF 
        # X = XJJ WE KNOW THAT XSS = XJJ AND WE CAN AVOID ITS
        # CALCULATION

        XSS = None
        n_x = len(X)

        avoided_closures += n_x == len(XJJ)

        while n_x < len(XJJ): # CHECKS WHETHER X==XJJ

            cycles2 += 1

            if XSS is None:
                cache = []
                XSS = check(X, XJJ, tuples, n_atts, cache, rand_tuples)
                cache.sort(key=len)
            
            if len(XJJ) == len(XSS):
                fdt.add_fd(X, XJJ)
                print('\t', X, XJJ-X)
                for i in stack[1:]:
                    i[-1] = False
                # print(stack)
                break
            else:
                gp = cache.pop()

                n_gp = len(g_prime)


                XJ.add(n_gp)
                for i in stack[1:]:
                    i[1].add(n_gp)
                for x in gp:
                    m_prime[x].add(n_gp)

                g_prime.append(gp)
                XJJ.intersection_update(gp)



        new_atts = XJJ - X

        if not bool(new_atts) or m_i <= min(new_atts):
            m_i = U[-1]
            X = XJJ
        else:
            # print(stack)
            stack[-2][2][m_i] = XJJ
            X.difference_update([m for m in X if m > m_i])

        stack[-1][1] = XJ
        

        X, m_i,c = fast_next_closure(X, U, fdt.l_close, m_i, stack)
        stack[-1].append(True)

        ncls += c

        stack[-1][0] = m_i
        
    # print ('--')
    # for g in g_prime:
    #    print (g)

    L = list(fdt.read_fds())
    print ("\nNUMBER OF FDS:{}".format(len(L)))
    print ("SAMPLING CONTEXT SIZE:{}".format(len(g_prime)))
    print ("CYCLES:",cycles)
    print ("GOOD CLOSURES:", avoided_closures)
    print ("Closures:", ncls)
    print(fdt.recursions)