def sam(sam_input, min_support=2): '''Finds frequent item sets of items appearing in a list of transactions based on the Split and Merge algorithm by Christian Borgelt. :param sam_input: The input of the algorithm. Must come from `get_sam_input`. :param min_support: The minimal support of a set to be included. :rtype: A set containing the frequent item sets and their support. ''' fis = set() report = {} _sam(sam_input, fis, report, min_support) return report
def test_sam(should_print=False, ts=None, support=2): if ts is None: ts = get_default_transactions() sam_input = get_sam_input(ts, lambda e: e) fis = set() report = {} n = _sam(sam_input, fis, report, support) if should_print: print(n) print(report) return (n, report)
def _sam(sam_input, fis, report, min_support): n = 0 a = deque(sam_input) while len(a) > 0 and len(a[0][1]) > 0: b = deque() s = 0 i = a[0][1][0] while len(a) > 0 and len(a[0][1]) > 0 and a[0][1][0] == i: s = s + a[0][0] a[0] = (a[0][0], a[0][1][1:]) if len(a[0][1]) > 0: b.append(a.popleft()) else: a.popleft() c = deque(b) d = deque() while len(a) > 0 and len(b) > 0: if a[0][1] > b[0][1]: d.append(b.popleft()) elif a[0][1] < b[0][1]: d.append(a.popleft()) else: b[0] = (b[0][0] + a[0][0], b[0][1]) d.append(b.popleft()) a.popleft() while len(a) > 0: d.append(a.popleft()) while len(b) > 0: d.append(b.popleft()) a = d if s >= min_support: fis.add(i[1]) report[frozenset(fis)] = s #print('{0} with support {1}'.format(fis, s)) n = n + 1 + _sam(c, fis, report, min_support) fis.remove(i[1]) return n