def churn(adset_file, results_dir): '''INTERFACE: Churn is the number of ads per number of trials. Args: adset_file: Config file like "accounts.cf". results_dir: Directory path to save experiment results. ''' file_set_lists = adParser.parse_conf(adset_file) ad_truth = adLib.true_ds_of_ads("dbs/adTruth.db") churn_out = "User\tTrials\tAll\tD\tR\tX\n" make_dir(results_dir) for user in file_set_lists: ad_list = [] churn_out += "\t".join([user, "0", "0", "0", "0", "0"]) + "\n" for i in range(0, len(file_set_lists[user])): # for i in range(len(file_set_lists[user])-1, -1, -1): tmp_list = adParser.parse_html_set(file_set_lists[user][i]) ad_list = adOps.union([ad_list, tmp_list]) type_counts = adLib.ad_types_count(ad_list, ad_truth) churn_out += "\t".join([user, str(i+1), str(len(ad_list)), \ types_count_str(type_counts)]) + "\n" fd = open(results_dir + "/" + user + ".txt", "w") fd.write(adOps.get_ads_str(ad_list)) fd.flush() fd.close() fd = open(results_dir + "/churn.txt", "w") fd.write(churn_out) fd.flush() fd.close()
Name: Sravan Bhamidipati Date: 18th January, 2013 Purpose: Global variables used throughout. ''' import adLib import os ACCOUNT_TRUTH_DB = "dbs/accountTruth.db" AD_TRUTH_DB = "dbs/adTruth.db" ACCOUNT_TRUTH = adLib.true_ds_of_accounts(ACCOUNT_TRUTH_DB) DS_TRUTH = adLib.true_accounts_of_ds(ACCOUNT_TRUTH) AD_TRUTH = adLib.true_ds_of_ads(AD_TRUTH_DB) MODELS = ["p_agg", "p_exp", "p_r_agg", "p_r_agg2", "p_r_exp", "p_r_exp2", \ "p1_r1_agg", "p1_r1_exp", "p1_r2_agg", "p1_r2_exp", "p2_r1_agg", \ "p2_r1_exp", "p2_r2_agg", "p2_r2_exp", "r_agg", "r_exp", \ "wt_p_agg", "wt_p_exp", "wt_p_r_agg", "wt_p_r_agg2", "wt_p_r_exp", \ "wt_p_r_exp2", "wt_p1_r1_agg", "wt_p1_r1_exp", "wt_p1_r2_agg", \ "wt_p1_r2_exp", "wt_p2_r1_agg", "wt_p2_r1_exp", "wt_p2_r2_agg", \ "wt_p2_r2_exp", "wt_r_agg", "wt_r_exp"] # TODO: p_harmonic, pr_harmonic, wt_p_harmonic, wt_pr_harmonic ALPHAS = adLib.float_range(0, 1, 0.1) BETAS = adLib.float_range(0, 1, 0.1) THRESHOLDS = adLib.float_range(0, 0.1, 0.01) ALPHAS = [0.1] BETAS = [0.72]
def compare_accounts(adset_file, results_dir): '''INTERFACE: Compare a "base" and "other" account to see which of the ads in "base" are found in "other". Args: adset_file: Config file like "accounts.cf" specifying "base" and "other". results_dir: Directory path to save experiment results. ''' file_sets = adParser.parse_conf(adset_file) ad_truth = adLib.true_ds_of_ads("dbs/adTruth.db") if "base" in file_sets and "other" in file_sets: base_file_sets = file_sets["base"] other_file_sets = file_sets["other"] else: print "ERROR:", adset_file, "doesn't specify base and other accounts." return make_dir(results_dir) result_str = "Base\tCount\tDs\tRs\tXs\tOther\tNF\tDs\tRs\tXs\tCommon\tDs\tRs\tXs\n" for b in range(0, len(base_file_sets)): # print "BaseTrial", b base_ads = adParser.parse_html_set(base_file_sets[b]) # adLib.dump_ads(base_ads, results_dir + "/base" + str(b) + ".txt") base_count = len(base_ads) base_tc = adLib.ad_types_count(base_ads, ad_truth) other_ads = [] prev_diff = base_count printed = False for o in range(0, len(other_file_sets)): other_ads = adOps.union([other_ads, \ adParser.parse_html_set(other_file_sets[o])]) base_ads = adOps.difference(base_ads, other_ads) diff = len(base_ads) diff_tc = adLib.ad_types_count(base_ads, ad_truth) common = base_count - diff common_tc = {} for key in base_tc: common_tc[key] = base_tc[key] - diff_tc[key] if (not printed and o == len(other_file_sets)-1) or diff != prev_diff: printed = True result_str += "\t".join([str(b), str(base_count), \ types_count_str(base_tc), str(o), str(diff), \ types_count_str(diff_tc), str(common), \ types_count_str(common_tc)]) + "\n" prev_diff = diff if diff == 0: break # adLib.dump_ads(base_ads, results_dir + "/diff" + str(b) + ".txt") # adLib.dump_ads(other_ads, results_dir + "/other.txt") fd = open(results_dir + "/results.txt", "w") fd.write(result_str) fd.flush() fd.close()