def main(db_filename, cl_suppress, geo_suppress, suppress_out, k_val): cr = dbOpen(db_filename) yob_gentable = build_numeric_dict(cr, 'YoB_bins') forum_gentable = build_numeric_dict(cr, 'nforum_posts_bins') cgtable = get_pickled_table(geo_suppress) class_suppress = get_pickled_table(cl_suppress) prop_dict = make_list_dict(cr, yob_gentable, forum_gentable, cgtable, class_suppress) count_dict = make_count_dict(prop_dict) full_suppress_list = class_suppress suppress_total = len(class_suppress) print 'Number of suppressed records due to class identification is', suppress_total for i in range(1, k_val): count = 0 if i not in count_dict: print 'No properties with only ', str(i), 'records' continue for id_pair in count_dict[i]: for e in prop_dict[id_pair]: full_suppress_list.add(e) count += len(prop_dict[id_pair]) suppress_total += len(prop_dict[id_pair]) print 'Suppress records for value ', str(i), 'is', str(count) print 'Total suppressed records is ', suppress_total outf = open(suppress_out, 'w') pickle.dump(full_suppress_list, outf) outf.close()
def main(db_filename, cl_suppress, geo_suppress, suppress_out, k_val): cr = dbOpen(db_filename) yob_gentable = build_numeric_dict(cr, "YoB_bins") forum_gentable = build_numeric_dict(cr, "nforum_posts_bins") cgtable = get_pickled_table(geo_suppress) class_suppress = get_pickled_table(cl_suppress) prop_dict = make_list_dict(cr, yob_gentable, forum_gentable, cgtable, class_suppress) full_suppress_list = list(class_suppress) suppress_total = len(class_suppress) print "Number of suppressed records due to class identification is", suppress_total for k, v in prop_dict.iteritems(): if len(v) < k_val: full_suppress_list.extend(v) print "Total number of records suppressed =", str(len(full_suppress_list)) outf = open(suppress_out, "w") full_suppress_set = set(full_suppress_list) pickle.dump(full_suppress_set, outf) outf.close()
def main(db_filename, cl_suppress, geo_suppress, suppress_out, k_val): cr = dbOpen(db_filename) yob_gentable = build_numeric_dict(cr, 'YoB_bins') forum_gentable = build_numeric_dict(cr, 'nforum_posts_bins') cgtable = get_pickled_table(geo_suppress) class_suppress = get_pickled_table(cl_suppress) prop_dict = make_list_dict(cr, yob_gentable, forum_gentable, cgtable, class_suppress) full_suppress_list = list(class_suppress) suppress_total = len(class_suppress) print 'Number of suppressed records due to class identification is', suppress_total for k, v in prop_dict.iteritems(): if len(v) < k_val: full_suppress_list.extend(v) print 'Total number of records suppressed =', str(len(full_suppress_list)) outf = open(suppress_out, 'w') full_suppress_set = set(full_suppress_list) pickle.dump(full_suppress_set, outf) outf.close()
def main(dbfname, suppress_fname, outfname): """ Driver for the program; creates a triple of dictionaries for percentage participation by class and writes them to a single csv file The dictionaries created are for participation in the course in the full data set, in the de-identified data set, and in the records that are suppressed by the de-identification. These are written to the same csv file :param dbfname: The sqlite database containing the original data set :param suppress_fname: The file containing the set of suppressed records for de-identification :param outfname: The name of the file to create for the output. Note that if the file already exists, it will be overwritten :return: None """ cr = dbOpen(dbfname) suppress_set = get_pickled_table(suppress_fname) select_str = 'Select course_id, user_id, registered, viewed, explored, certified from source' cr.execute(select_str) orig_dict = {} suppress_dict = {} anon_dict = {} for i in cr.fetchall(): key = i[0] + i[1] ckey = i[0] add_participation(orig_dict, ckey, i) if key in suppress_set: add_participation(suppress_dict, ckey, i) else: add_participation(anon_dict, ckey, i) outf = open(outfname, 'w') csout = csv.writer(outf) csout.writerow(['Course ID', '% registered', '% viewed', '% explored', '% certified']) csout.writerow(['Suppressed records']) write_dictionary(csout, suppress_dict) csout.writerow(['Non-suppressed records']) write_dictionary(csout, anon_dict) csout.writerow(['Original records']) write_dictionary(csout, orig_dict) cr.close() outf.close()