def Apriori_main(data_fname, minSupport, out_fname='Apriori_results.txt'): lines, tid = readDataset(data_fname) t1 = clock() temp_freq = apriori(tid, target='s', supp=float(minSupport * 100), conf=100) CPU_time = clock() - t1 freq_items = convert2dic(temp_freq, lines) printResults(data_fname, minSupport, CPU_time, freq_items, out_fname) return (freq_items, CPU_time)
def Heuristic_Coeff_main(fname1, fname2, fname3, sup, mod_name): change_raw_data = 0 L = [] solution = None k = 0 # Read dataset and identify discrete items lines, tid = myiolib.readDataset(fname3) I = hcba_ext.get_1itemsets(tid) # Calculate support count abs_supp = ceil(sup * lines - 0.5) # Load F from file F = myiolib.readLargeData(fname1) # Load S from file S = minSet(myiolib.readSensitiveSet(fname2)) # Calculate the revised F start_time = clock() SS = supersets(S, F) Rev_Fd = list(set(F) - SS) rev_t = clock() - start_time Rev_Fd.sort(key=len, reverse=True) # Calculate minimal set of S sens_ind = [] for i in xrange(lines): for itemset in S: if itemset.issubset(tid[i]): sens_ind.append(i) break start_time = clock() coeffs, rem = hcba_ext.calculateCoeffs(tid, sup, sens_ind, S, F, Rev_Fd) # The initial objective => Elastic filtering cpx = cplex.Cplex() cpx.set_results_stream(None) # Add obj. sense and columns cpx.objective.set_sense(cpx.objective.sense.minimize) cpx.variables.add(obj=coeffs, lb=[0] * len(coeffs), ub=[1] * len(coeffs), types=[cpx.variables.type.integer] * len(coeffs)) # Build constraints for minimal S for itemset in S: ind = [] cur_supp = 0 for i in xrange(len(sens_ind)): if itemset.issubset(tid[sens_ind[i]]): ind.append(i) cur_supp += 1 cpx.linear_constraints.add( lin_expr=[SparsePair(ind=ind, val=[1] * len(ind))], senses=["G"], rhs=[cur_supp - abs_supp + 1], names=["c" + str(k)]) k += 1 cpx.solve() solution = map(int, cpx.solution.get_values()) # Apply sanitization for i in hcba_ext.get_indices(solution, 1): tid[sens_ind[i]] = tid[sens_ind[i]] - rem[i] change_raw_data += len(rem[i]) coeffs = None cpx = None F = None Rev_Fd = None exec_time = clock() - start_time ######----create out files-----###### out_file = open(mod_name + '_results.txt', 'w') for i in xrange(lines): k = ' '.join(sorted(tid[i])) print(k, file=out_file) out_file.close() tid = None return ("Not Applicable", change_raw_data, rev_t + exec_time)
def Coeff_Max_Accuracy_main(fname1, fname2, fname3, sup, mod_name): change_raw_data = 0 lines, tid = myiolib.readDataset(fname3) abs_supp = int(ceil(sup*lines)) F = myiolib.readLargeData(fname1) S = minSet(myiolib.readSensitiveSet(fname2)) SS = supersets(S, F) sens_ind = [] for i in xrange(lines): for itemset in S: if itemset.issubset(tid[i]): sens_ind.append(i) break start_time = clock() N = len(sens_ind) coeffs, rem = cbma_ext.calculateCoeffs(tid, sup, sens_ind, S, sorted(F, key = len)) cpx = cplex.Cplex() cpx.set_results_stream(None) cpx.objective.set_sense(cpx.objective.sense.minimize) cpx.variables.add(obj = coeffs, lb =(0,)*N, ub=(1,)*N, types=(cpx.variables.type.binary,)*N) del coeffs k = 0 for itemset in S: ind = [] cur_supp = 0 for i in xrange(N): if itemset.issubset(tid[sens_ind[i]]): ind.append(i) cur_supp += 1 cpx.linear_constraints.add(lin_expr = [SparsePair(ind = ind, val=(1,)*len(ind))], senses=["G"], rhs=[cur_supp - abs_supp + 1], names=["c"+str(k)]) k+=1 cpx.solve() solution = map(int, cpx.solution.get_values()) for i in cbma_ext.get_indices(solution, 1): tid[sens_ind[i]] = tid[sens_ind[i]] - rem[i] change_raw_data += len(rem[i]) cpx = None exec_time = clock()-start_time ######----create out files-----###### out_file = open(mod_name+'_results.txt', 'w') for i in xrange(lines): k = ' '.join(sorted(tid[i])) print(k, file = out_file) out_file.close() tid = None F = None return("Not Applicable", change_raw_data, exec_time)
def Heuristic_Coeff_main(fname1, fname2, fname3, sup, mod_name): change_raw_data = 0 L = [] solution = None k =0 # Read dataset and identify discrete items lines, tid = myiolib.readDataset(fname3) I = hcba_ext.get_1itemsets(tid) # Calculate support count abs_supp = ceil(sup*lines-0.5) # Load F from file F = myiolib.readLargeData(fname1) # Load S from file S = minSet(myiolib.readSensitiveSet(fname2)) # Calculate the revised F start_time = clock() SS = supersets(S, F) Rev_Fd = list(set(F)-SS) rev_t = clock() - start_time Rev_Fd.sort(key = len, reverse = True) # Calculate minimal set of S sens_ind =[] for i in xrange(lines): for itemset in S: if itemset.issubset(tid[i]): sens_ind.append(i) break start_time = clock() coeffs, rem = hcba_ext.calculateCoeffs(tid, sup, sens_ind, S, F, Rev_Fd) # The initial objective => Elastic filtering cpx = cplex.Cplex() cpx.set_results_stream(None) # Add obj. sense and columns cpx.objective.set_sense(cpx.objective.sense.minimize) cpx.variables.add(obj = coeffs, lb =[0]*len(coeffs), ub=[1]*len(coeffs), types=[cpx.variables.type.integer]*len(coeffs)) # Build constraints for minimal S for itemset in S: ind = [] cur_supp = 0 for i in xrange(len(sens_ind)): if itemset.issubset(tid[sens_ind[i]]): ind.append(i) cur_supp += 1 cpx.linear_constraints.add(lin_expr = [SparsePair(ind = ind, val=[1]*len(ind))], senses=["G"], rhs=[cur_supp - abs_supp + 1], names=["c"+str(k)]) k+=1 cpx.solve() solution = map(int, cpx.solution.get_values()) # Apply sanitization for i in hcba_ext.get_indices(solution, 1): tid[sens_ind[i]] = tid[sens_ind[i]] - rem[i] change_raw_data += len(rem[i]) coeffs = None cpx = None F = None Rev_Fd = None exec_time = clock()-start_time ######----create out files-----###### out_file = open(mod_name+'_results.txt', 'w') for i in xrange(lines): k = ' '.join(sorted(tid[i])) print(k, file = out_file) out_file.close() tid = None return("Not Applicable", change_raw_data, rev_t+exec_time)