def fis1(baskets, threshold): freq = {} frequent_items = {} cf = count_filter.CountPair(50000) for basket in baskets: for i in range(len(basket)): item = basket[i] if item not in freq: freq[item] = 0 freq[item] += 1 if freq[item] > threshold: frequent_items[item] = freq[item] for j in range(i + 1, len(basket)): a, b = min(basket[i], basket[j]), max(basket[i], basket[j]) cf.add((a, b)) return freq, frequent_items, cf
def fis2(baskets, f1, cf, threshold): freq = {} frequent_items = {} cf2 = count_filter.CountPair(50000) for basket in baskets: for i in range(len(basket)): for j in range(i + 1, len(basket)): a, b = min(basket[i], basket[j]), max(basket[i], basket[j]) if (basket[i] in f1) and (basket[j] in f1): if (a, b) not in freq: freq[(a, b)] = 0 freq[(a, b)] += 1 if freq[(a, b)] > threshold: frequent_items[(a, b)] = freq[(a, b)] for k in range(j + 1, len(basket)): l = [basket[i], basket[j], basket[k]] l.sort() a, b, c = l[0], l[1], l[2] cf2.add3((a, b, c)) return freq, frequent_items, cf2
import count_filter cf = count_filter.CountPair(50000) # cf.print() def gen_baskets(filename): baskets = [] with open(filename) as f: for line in f: items = line.strip().split(' ') itemlist = [] for i in items: itemlist.append(int(i)) baskets.append(itemlist) return baskets def fis1(baskets, threshold): freq = {} frequent_items = {} cf = count_filter.CountPair(50000) for basket in baskets: for i in range(len(basket)): item = basket[i] if item not in freq: freq[item] = 0 freq[item] += 1 if freq[item] > threshold: frequent_items[item] = freq[item]