def analyze_non_strided_delinq_loads(global_pc_smptrace_hist, global_pc_stride_hist, prefetch_decisions, exec_file, num_samples, avg_mem_latency): ins_src_regs_dict = {} ins_dst_regs_dict = {} ins_tags_dict = {} branch_dict = {} routine_BB_dict = {} # information maps for Memory operations ins_base_reg_dict = {} ins_mem_dis_dict = {} ins_idx_reg_dict = {} ins_mem_scale_dict = {} global_prefetchable_pcs = [] delinq_load_address_list = [] for delinq_load_addr in prefetch_decisions.keys(): pref_param = prefetch_decisions[delinq_load_addr] if "ptr" in pref_param.pf_type: delinq_load_address_list.append(delinq_load_addr) delinq_load_address_list = sorted(delinq_load_address_list) conf = Conf1(exec_file, delinq_load_address_list, num_samples, avg_mem_latency) irr_list = [] print >> sys.stderr, "\nSample freq irregular accesses!\n" for pc in delinq_load_address_list: pc_smptrace_hist = global_pc_smptrace_hist[pc] l3mr = prefetch_decisions[pc].l3_mr l2mr = prefetch_decisions[pc].l2_mr l1mr = prefetch_decisions[pc].l1_mr sample_freq = float(len(pc_smptrace_hist.keys()))/float(num_samples) score = float(sample_freq)*float(l3mr) irr_list += [(pc, sample_freq, l3mr, l2mr, l1mr, score)] sorted_irr_list = sorted(irr_list, key=operator.itemgetter(5), reverse=True) trimmed_delinq_load_addr_list = [] count = 0 for tup in sorted_irr_list: pc = tup[0] sample_freq = tup[1] l3mr = tup[2] l2mr = tup[3] l1mr = tup[4] score = tup[5] if count < 15: trimmed_delinq_load_addr_list += [pc] count = count + 1 pc_stride_hist = global_pc_stride_hist[pc] sorted_x = sorted(pc_stride_hist.iteritems(), key=operator.itemgetter(1), reverse=True) sample_count = sum([pair[1] for pair in sorted_x]) max_stride = sorted_x[0][0] max_stride_freq = float(sorted_x[0][1])/float(sample_count) print >> sys.stderr, "\npc:%lx freq:%lf l3mr:%lf l2mr:%lf l1mr:%lf score:%lf"%(pc, sample_freq, l3mr, l2mr, l1mr, score) for delinq_load_addr in trimmed_delinq_load_addr_list: #delinq_load_address_list: cfg = disassm.get_func_disassm(conf.exec_file, delinq_load_addr) if not (cfg.ins_tags_dict[delinq_load_addr] == 'Read' or cfg.ins_tags_dict[delinq_load_addr] == 'Write'): continue # if float(len(global_pc_smptrace_hist[delinq_load_addr].keys()))/float(conf.num_samples) < 0.005: # continue (pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, is_ind, stride) = ins_trace_ptr_nobj_analysis.detect_pointer_chasing(global_pc_smptrace_hist, global_pc_stride_hist, delinq_load_addr, None, cfg, conf) analyze_pointer_prefetch(pointer_update_addr_dict, prefetch_decisions, pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf, is_ind, stride) # (pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop) = ins_trace_analysis.detect_pointer_chasing(global_pc_smptrace_hist, delinq_load_addr, prefetch_decisions, cfg, conf) # analyze_pointer_prefetch(pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf) if delinq_load_addr in conf.indirect_pref_decisions: do_cost_benefit_analysis(cfg, conf, delinq_load_addr, prefetch_decisions) decide_prefetch_schedules(cfg, conf) print_indirect_prefetch_decisions(conf)
for (pc_rdist_hist, pc_stride_hist, pc_freq_hist, pc_time_hist, pc_corr_hist, pc_fwd_rdist_hist, pc_smptrace_hist) in burst_hists: continue ins_trace_ptr_nobj_analysis.add_trace_to_global_pc_smptrace_hist(global_pc_smptrace_hist, pc_smptrace_hist) ins_trace_ptr_nobj_analysis.add_to_pc_stride_hist(pc_stride_hist, global_pc_stride_hist) print >> sys.stderr, "Starting trace analysis..." for delinq_load_addr in delinq_load_address_list: cfg = disassm.get_func_disassm(conf.exec_file, delinq_load_addr) if not (cfg.ins_tags_dict[delinq_load_addr] == 'Read' or cfg.ins_tags_dict[delinq_load_addr] == 'Write'): continue print >> sys.stderr, "Sample frequency %lx: %lf"%(delinq_load_addr, float(len(pc_smptrace_hist.keys()))/float(conf.num_samples)) (pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, is_ind, stride) = ins_trace_ptr_nobj_analysis.detect_pointer_chasing(global_pc_smptrace_hist, global_pc_stride_hist, delinq_load_addr, None, cfg, conf) # analyze_pointer_prefetch(pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf, is_ind, stride) analyze_pointer_prefetch(pointer_update_addr_dict, [], pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf, is_ind, stride) decide_prefetch_schedules(cfg, conf) print_indirect_prefetch_decisions(conf) if __name__ == "__main__": main()