def run_k_loc_sim (trace_file_name, use_homo_DS_cost = True): """ Run a simulation where the running parameter is the num of caches, and access costs are all 1. If the input parameter "h**o" is true, the access costs are uniform 1, and the miss penalty is 300/7. Else, the access costs are 1, 2, 4, and the miss penalty is 100. """ max_num_of_req = 4300000 # Shorten the num of requests for debugging / shorter runs k_loc = 1 num_of_DSs = 8 requests = gen_requests (trace_file_name, max_num_of_req, k_loc) # In this sim', each item's location will be calculated as a hash of the key. Hence we actually don't use the k_loc pre-computed entries. trace_file_name = trace_file_name.split("/")[0] num_of_req = requests.shape[0] output_file = open ("../res/" + trace_file_name + "_k_loc.res", "a") if (num_of_req < 4300000): print ('Note: you used only {} requests for a num of caches sim' .format(num_of_req)) for k_loc in [3]: for uInterval in [256]: DS_cost = calc_DS_cost (num_of_DSs, use_homo_DS_cost) missp = 50 * np.average (DS_cost) # for alg_mode in [sim.ALG_PGM_FNA_MR1_BY_ANALYSIS]: # for alg_mode in [sim.ALG_PGM_FNO_MR1_BY_HIST]: for alg_mode in [sim.ALG_OPT]: print("now = ", datetime.now(), 'running k_loc sim') tic() sm = sim.Simulator(output_file, trace_file_name, alg_mode, requests, DS_cost, uInterval = uInterval, k_loc = k_loc, use_given_loc_per_item = False) sm.run_simulator() toc()
def run_num_of_caches_sim (trace_file_name, use_homo_DS_cost = True): """ Run a simulation where the running parameter is the num of caches, and access costs are all 1. If the input parameter "h**o" is true, the access costs are uniform 1, and the miss penalty is 300/7. Else, the access costs are 1, 2, 4, and the miss penalty is 100. """ DS_size = 10000 max_num_of_req = 4300000 # Shorten the num of requests for debugging / shorter runs requests = gen_requests (trace_file_name, max_num_of_req) trace_file_name = trace_file_name.split("/")[0] num_of_req = requests.shape[0] output_file = open ("../res/" + trace_file_name + "_num_of_caches.res", "a") if (num_of_req < 4300000): print ('Note: you used only {} requests for a num of caches sim' .format(num_of_req)) for num_of_DSs in [1, 2, 3, 4, 5, 6, 7, 8]: for uInterval in [1024]: DS_cost = calc_DS_cost (num_of_DSs, use_homo_DS_cost) missp = 50 * np.average (DS_cost) for alg_mode in [sim.ALG_PGM_FNO_MR1_BY_ANALYSIS]: #[sim.ALG_OPT, sim.ALG_PGM_FNO_MR1_BY_HIST, sim.ALG_PGM_FNA_MR1_BY_HIST]: print("now = ", datetime.now(), 'running num of caches sim') tic() sm = sim.Simulator(output_file, trace_file_name, alg_mode, requests, DS_cost, uInterval = uInterval, use_given_loc_per_item = False) sm.run_simulator() toc()
def run_sim_collection(DS_size, BF_size, beta, requests, client_DS_dist, client_DS_BW, bw_regularization): DS_insert_mode = 1 main_sim_dict = {} for k_loc in [1]: #, 3, 5]: print('k_loc = ', k_loc) k_loc_sim_dict = {} for alg_mode in [ sim.ALG_OPT ]: #, sim.ALG_PGM, sim.ALG_CHEAP, sim.ALG_ALL, sim.ALG_KNAP, sim.ALG_POT]: tic() sm = sim.Simulator(alg_mode, DS_insert_mode, requests, client_DS_dist, client_DS_BW, bw_regularization, beta, k_loc, DS_size=DS_size, BF_size=BF_size) sm.start_simulator() toc() k_loc_sim_dict[alg_mode] = sm main_sim_dict[k_loc] = k_loc_sim_dict return main_sim_dict
def run_sim_collection(DS_size, FP_rate_vals, beta, k_loc, requests, client_DS_dist, client_DS_BW, bw_regularization): DS_insert_mode = 1 main_sim_dict = {} for FP_rate in FP_rate_vals: print('FP_rate = ', FP_rate) BF_size = BF_size_for_DS_size[FP_rate][DS_size] DS_size_sim_dict = {} for alg_mode in [ sim.ALG_OPT, sim.ALG_PGM, sim.ALG_CHEAP, sim.ALG_ALL, sim.ALG_KNAP, sim.ALG_POT ]: tic() print(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")) sm = sim.Simulator(alg_mode, DS_insert_mode, requests, client_DS_dist, client_DS_BW, bw_regularization, beta, k_loc, DS_size=DS_size, BF_size=BF_size) sm.start_simulator() toc() DS_size_sim_dict[alg_mode] = sm main_sim_dict[FP_rate] = DS_size_sim_dict return main_sim_dict
def run_sim_collection(DS_size_vals, FP_rate, beta, k_loc, requests, client_DS_dist, client_DS_BW, bw_regularization): DS_insert_mode = 1 main_sim_dict = {} for DS_size in DS_size_vals: BF_size = BF_size_for_DS_size[FP_rate][DS_size] print('DS_size = ', DS_size) DS_size_sim_dict = {} for alg_mode in [ sim.ALG_OPT ]: #, sim.ALG_ALL, sim.ALG_CHEAP, sim.ALG_POT, sim.ALG_PGM]: # in the homogeneous setting, no need to run Knap since it is equivalent to 6 (Pot) tic() print(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S")) sm = sim.Simulator(alg_mode, DS_insert_mode, requests, client_DS_dist, client_DS_BW, bw_regularization, beta, k_loc, DS_size=DS_size, BF_size=BF_size) sm.start_simulator() toc() DS_size_sim_dict[alg_mode] = sm main_sim_dict[DS_size] = DS_size_sim_dict return main_sim_dict
def run_FN_by_uInterval_sim (trace_file_name): max_num_of_req = 1000000 # Shorten the num of requests for debugging / shorter runs requests = gen_requests (trace_file_name, max_num_of_req) # In this sim', each item's location will be calculated as a hash of the key. Hence we actually don't use the k_loc pre-computed entries. DS_cost = calc_DS_cost(num_of_DSs=1) trace_file_name = trace_file_name.split("/")[0] num_of_req = requests.shape[0] print("now = ", datetime.now(), 'running FN_by_uInterval_sim sim') for bpe in [4, 8, 16]: output_file = open ("../res/" + trace_file_name + "_FN_by_uInterval_bpe" + str(bpe) +".res", "a") for uInterval in [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]: tic() sm = sim.Simulator(output_file, trace_file_name, sim.ALG_MEAURE_FP_FN, requests, DS_cost, verbose = 0, bpe = bpe, uInterval = uInterval, use_given_loc_per_item = False) sm.run_simulator() toc()
def run_FN_by_staleness_sim (): max_num_of_req = 1000000 # Shorten the num of requests for debugging / shorter runs DS_cost = calc_DS_cost () output_file = open ("../res/FN_by_staleness.res", "a") print("now = ", datetime.now(), 'running FN_by_staleness sim') for trace_file_name in ['scarab/scarab.recs.trace.20160808T073231Z.15M_req_1000K_3DSs.csv', 'umass/storage/F2.3M_req_1000K_3DSs.csv']: requests = gen_requests (trace_file_name, max_num_of_req) # In this sim', each item's location will be calculated as a hash of the key. Hence we actually don't use the k_loc pre-computed entries. trace_file_name = trace_file_name.split("/")[0] num_of_req = requests.shape[0] printf (output_file, '\n\ntrace = {}\n///////////////////\n' .format (trace_file_name)) for bpe in [2, 4, 8, 16]: tic() sm = sim.Simulator(output_file, trace_file_name, sim.ALG_PGM_FNO_MR1_BY_HIST, requests, DS_cost, bpe = bpe, verbose = sim.CNT_FN_BY_STALENESS, uInterval = 8192, use_given_loc_per_item = True) sm.run_simulator() toc()
def run_uInterval_sim (trace_file_name, use_homo_DS_cost = False): """ Run a simulation where the running parameter is uInterval. """ max_num_of_req = 1000000 # Shorten the num of requests for debugging / shorter runs num_of_DSs = 3 requests = gen_requests (trace_file_name, max_num_of_req) trace_file_name = trace_file_name.split("/")[0] num_of_req = requests.shape[0] DS_cost = calc_DS_cost (num_of_DSs, use_homo_DS_cost) output_file = open ("../res/" + trace_file_name + "_uInterval.res", "a") print("now = ", datetime.now(), 'running uInterval sim') for alg_mode in [sim.ALG_PGM_FNA_MR1_BY_ANALYSIS]: for uInterval in [8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 16]: if (alg_mode == sim.ALG_PGM_FNA_MR1_BY_ANALYSIS and uInterval < 50): # When uInterval < parameters updates interval, FNO and FNA are identical, so no need to run also FNA continue tic() sm = sim.Simulator(output_file, trace_file_name, alg_mode, requests, DS_cost, uInterval = uInterval) sm.run_simulator() toc()
def run_cache_size_sim (trace_file_name, use_homo_DS_cost = False): """ Run a simulation where the running parameter is cache_size. """ max_num_of_req = 4300000 # Shorten the num of requests for debugging / shorter runs num_of_DSs = 3 requests = gen_requests (trace_file_name, max_num_of_req) trace_file_name = trace_file_name.split("/")[0] num_of_req = requests.shape[0] DS_cost = calc_DS_cost (num_of_DSs, use_homo_DS_cost) output_file = open ("../res/" + trace_file_name + "_cache_size.res", "a") if (num_of_req < 4300000): print ('Note: you used only {} requests for a cache size sim' .format(num_of_req)) for DS_size in [1000, 2000, 4000, 8000, 16000, 32000]: for uInterval in [1024, 256]: for alg_mode in [sim.ALG_PGM_FNO_MR1_BY_ANALYSIS]: #[sim.ALG_PGM_FNA_MR1_BY_HIST, sim.ALG_OPT, sim.ALG_PGM_FNO_MR1_BY_HIST]: print("now = ", datetime.now(), 'running cache_size sim') tic() sm = sim.Simulator(output_file, trace_file_name, alg_mode, requests, DS_cost, uInterval = uInterval, DS_size = DS_size) sm.run_simulator() toc()
def run_bpe_sim (trace_file_name, use_homo_DS_cost = False): """ Run a simulation where the running parameter is bpe. If the input parameter "h**o" is true, the access costs are uniform 1, and the miss penalty is 300/7. Else, the access costs are 1, 2, 4, and the miss penalty is 100. """ max_num_of_req = 1000000 # Shorten the num of requests for debugging / shorter runs num_of_DSs = 3 requests = gen_requests (trace_file_name, max_num_of_req) trace_file_name = trace_file_name.split("/")[0] num_of_req = requests.shape[0] DS_cost = calc_DS_cost (num_of_DSs, use_homo_DS_cost) output_file = open ("../res/" + trace_file_name + "_bpe.res", "a") print("now = ", datetime.now(), 'running bpe sim') for bpe in [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]: for uInterval in [1024, 256]: for alg_mode in [sim.ALG_PGM_FNO_MR1_BY_ANALYSIS]: #sim.ALG_PGM_FNO_MR1_BY_HIST]: #sim.ALG_PGM_FNO_MR1_BY_ANALYSIS tic() sm = sim.Simulator(output_file, trace_file_name, alg_mode, requests, DS_cost, bpe = bpe, uInterval = uInterval) sm.run_simulator() toc()
def run_var_missp_sim (trace_file_name, use_homo_DS_cost = False, print_est_mr=True, print_real_mr=False, max_num_of_req=700000): """ Run a simulation with different miss penalties for the initial table """ num_of_DSs = 3 uInterval = 1000 requests = gen_requests (trace_file_name, max_num_of_req) # Generate a dataframe of requests from the input trace file num_of_req = requests.shape[0] DS_cost = calc_DS_cost (num_of_DSs, use_homo_DS_cost) output_file = open ("../res/tbl.res", "a") # est_mr_output_file = open (('../res/{}_est_mr.res' .format (trace_file_name.split ('/')[1].split('.csv')[0])), 'w') if (print_est_mr) else None # real_mr_output_file = 1 if (print_real_mr) else None print("now = ", datetime.now(), 'running tbl sim') for missp in [50]: #, 100, 500]: for alg_mode in [sim.ALG_PGM_FNA_MR1_BY_ANALYSIS]: tic() sm = sim.Simulator(output_file, trace_file_name.split("/")[0], alg_mode, requests, DS_cost, uInterval = uInterval, missp = missp, print_est_vs_real_mr = True, DS_size = 10000) sm.run_simulator() toc()
# Some dummy params' values, just for generating the simulator, which will calculate the costs DS_size = 1000 FP_rate = 0.02 BF_size = 8000 beta = 100 alg_mode = sim.ALG_OPT DS_insert_mode = 1 file_index = 6 k_loc = 1 trace_df = pd.read_csv('../../Python_Infocom19/trace_5m_%d.csv' % file_index) req_df = trace_df.head(100) sm = sim.Simulator(alg_mode, DS_insert_mode, req_df, client_DS_dist, client_DS_BW, bw_regularization, beta, k_loc, DS_size=DS_size, BF_size=BF_size) cost_array = sm.client_DS_cost # # build histogram data all_unique_costs = cost_array[np.nonzero(cost_array)] print(all_unique_costs) max_val = np.ceil(np.max(all_unique_costs)).astype('int') hist_values = np.histogram( all_unique_costs, bins=max_val, range=(1, max_val + 1))[0] / float( all_unique_costs.size) plt.bar(range(1, 30), hist_values)