def relim(rinput, min_support=2): '''Finds frequent item sets of items appearing in a list of transactions based on Recursive Elimination algorithm by Christian Borgelt. In my synthetic tests, Relim outperforms other algorithms by a large margin. This is unexpected as FP-Growth is supposed to be superior, but this may be due to my implementation of these algorithms. :param rinput: The input of the algorithm. Must come from `get_relim_input`. :param min_support: The minimal support of a set to be included. :rtype: A set containing the frequent item sets and their support. ''' fis = set() report = {} _relim(rinput, fis, report, min_support) return report
def test_relim(should_print=False, ts=None, support=2): if ts is None: ts = get_default_transactions() relim_input = get_relim_input(ts, lambda e: e) fis = set() report = {} n = _relim(relim_input, fis, report, support) if should_print: print(n) print(report) return (n, report)
def _relim(rinput, fis, report, min_support): (relim_input, key_map) = rinput n = 0 # Maybe this one isn't necessary #a = deque(relim_input) a = relim_input while len(a) > 0: item = a[-1][0][1] s = a[-1][0][0] if s >= min_support: fis.add(item[1]) #print('Report {0} with support {1}'.format(fis, s)) report[frozenset(fis)] = s b = _new_relim_input(len(a) - 1, key_map) rest_lists = a[-1][1] for (count, rest) in rest_lists: if not rest: continue k = rest[0] index = key_map[k] new_rest = rest[1:] # Only add this rest if it's not empty! ((k_count, k), lists) = b[index] if len(new_rest) > 0: lists.append((count, new_rest)) b[index] = ((k_count + count, k), lists) n = n + 1 + _relim((b, key_map), fis, report, min_support) fis.remove(item[1]) rest_lists = a[-1][1] for (count, rest) in rest_lists: if not rest: continue k = rest[0] index = key_map[k] new_rest = rest[1:] ((k_count, k), lists) = a[index] if len(new_rest) > 0: lists.append((count, new_rest)) a[index] = ((k_count + count, k), lists) a.pop() return n