def get_and_log_mst_weight_from_checker(input_graph, force_recompute=False, inputslogfn=None): """Returns the a 2-tuple of (input, weight). If force_recompute is not True, then it will check the input log cache to see if we already know the answer first. Logs the result.""" ti = __get_ti(input_graph) # load in the inputs in the category of input_graph if inputslogfn is None: logfn = InputSolution.get_path_to(ti.prec, ti.dims, ti.min, ti.max) else: logfn = inputslogfn ds = DataSet.read_from_file(InputSolution, logfn) if ds.dataset.has_key(ti): input_soln = ds.dataset[ti] do_log = True # see if we already know the answer if not force_recompute: if input_soln.has_mst_weight(): return (ti, input_soln.mst_weight) # cache hit! else: # if we weren't tracking the input before, don't start now do_log = False # compute the answer and (if specified) save it w = compute_mst_weight(input_graph) if do_log: if input_soln.update_mst_weight(w): ds.save_to_file(logfn) return (ti, w)
def gather_weight_data(wtype): # get the results results = {} # maps |V| to ResultAccumulator ds = DataSet.read_from_file(WeightResult, WeightResult.get_path_to(wtype)) for data in ds.dataset.values(): result = results.get(data.input().num_verts) if result is None: result = ResultAccumulator(data.mst_weight) results[data.input().num_verts] = result else: result.add_data(data.mst_weight) try: # open a file to output to fh = open(DATA_PATH + wtype + '.dat', 'w') # compute relevant stats and output them print >> fh, '#|V|\tLower\tAverage\tUpper (Lower/Upper from 99% CI)' keys = results.keys() keys.sort() for num_verts in keys: r = results[num_verts] r.compute_stats() if len(r.values) > 1: print >> fh, '%u\t%.3f\t%.3f\t%.3f\t%u' % ( num_verts, r.lower99, r.mean, r.upper99, len(r.values)) fh.close() return 0 except IOError, e: print sys.stderr, "failed to write file: " + str(e) return -1
def gather_weight_data(wtype): # get the results results = {} # maps |V| to ResultAccumulator ds = DataSet.read_from_file(WeightResult, WeightResult.get_path_to(wtype)) for data in ds.dataset.values(): result = results.get(data.input().num_verts) if result is None: result = ResultAccumulator(data.mst_weight) results[data.input().num_verts] = result else: result.add_data(data.mst_weight) try: # open a file to output to fh = open(DATA_PATH + wtype + '.dat', 'w') # compute relevant stats and output them print >> fh, '#|V|\tLower\tAverage\tUpper (Lower/Upper from 99% CI)' keys = results.keys() keys.sort() for num_verts in keys: r = results[num_verts] r.compute_stats() if len(r.values) > 1: print >> fh, '%u\t%.3f\t%.3f\t%.3f\t%u' % (num_verts, r.lower99, r.mean, r.upper99, len(r.values)) fh.close() return 0 except IOError, e: print sys.stderr, "failed to write file: " + str(e) return -1
def gather_perf_data(alg, rev, index, latest): """Gathers performance data for a single revision of an algorithm""" print 'gathering perf data for %s (rev=%s index=%u latest=%s)' % (alg, rev, index, str(latest)) # get the results results = {} # maps (|V|, |E|) to ResultAccumulator ds = DataSet.read_from_file(PerfResult, PerfResult.get_path_to(rev)) for data in ds.dataset.values(): key = (data.input().num_verts, data.input().num_edges) result = results.get(key) if result is None: result = ResultAccumulator(data.time_sec) result.defaultCI = DEFAULT_CI results[key] = result else: result.add_data(data.time_sec) # put the results in order keys_density = results.keys() keys_density.sort(density_compare) keys_pom = results.keys() keys_pom.sort(pom_compare) keys = {} keys['density'] = keys_density keys['pom'] = keys_pom # compute stats for all the results for num_verts in results.keys(): results[num_verts].compute_stats() # generate dat files for each x-axis cross important vertex counts for xaxis in keys: if xaxis == 'pom': computex = lambda v, e : get_percent_of_max(v, e) elif xaxis == 'density': computex = lambda v, e : get_density(v, e) else: print >> sys.stderr, "unexpected x-axis value: " + str(xaxis) sys.exit(-1) header_txt = '#|V|\t|E|\t' + xaxis + '\tLower\tAverage\tUpper\t#Runs (Lower/Upper from ' + str(DEFAULT_CI) + '% CI)' for vip in IMPORTANT_VERTS: # open a file to output to dat = get_output_dat_name(xaxis, alg, rev, index, vip) print 'creating ' + dat if latest: latest_fn = make_latest(xaxis, alg, rev, index, vip) try: fh = open(dat, 'w') # compute relevant stats and output them print >> fh, header_txt count = 0 for (v, e) in keys[xaxis]: if vip=='all' or vip==v: count += 1 r = results[(v, e)] x = computex(v, e) print >> fh, '%u\t%u\t%.6f\t%.3f\t%.3f\t%.3f\t%u' % (v, e, x, r.lower99, r.mean, r.upper99, len(r.values)) fh.close() # don't create empty files if count == 0: quiet_remove(dat) if latest: quiet_remove(latest_fn) except IOError, e: print sys.stderr, "failed to write file: " + str(e) return -1
def main(): usage = """usage: %prog [options] Searches for missing results and uses run_test.py to collect it.""" parser = OptionParser(usage) parser.add_option("-i", "--input_graph", metavar="FILE", help="restrict the missing data check to the specified input graph") parser.add_option("-l", "--inputs-list-file", metavar="FILE", help="collect data for all inputs in the specified log file") parser.add_option("--list-only", action="store_true", default=False, help="only list missing data (do not collect it)") parser.add_option("-n", "--num-runs", type="int", default="1", help="number of desired runs per revision-input combination [default: 1]") parser.add_option("-r", "--rev", help="restrict the missing data check to the specified revision, or 'all' [default: current]") group = OptionGroup(parser, "Data Collection Options") group.add_option("-p", "--performance", action="store_true", default=True, help="collect performance data (this is the default)") group.add_option("-c", "--correctness", action="store_true", default=False, help="collect correctness data") parser.add_option_group(group) group2 = OptionGroup(parser, "Weight (Part II) Data Collection Options") group2.add_option("-v", "--num_vertices", metavar="V", type="int", default=0, help="collect weight data for V vertices (requires -d or -e)") group2.add_option("-d", "--dims", metavar="D", type="int", default=0, help="collect weight data for randomly positioned vertices in D-dimensional space (requires -v)") group2.add_option("-e", "--edge", action="store_true", default=False, help="collect weight data for random uniform edge weights in the range (0, 1] (requires -v)") parser.add_option_group(group2) (options, args) = parser.parse_args() if len(args) > 0: parser.error("too many arguments") if options.num_runs < 1: parser.error("-n must be at least 1") input_solns = None # prepare for a weight data collection num_on = 0 weight_test = False if options.num_vertices > 0: weight_test = True if options.input_graph or options.inputs_list_file: parser.error('-i, -l, and -v are mutually exclusive') if options.dims > 0: num_on += 1 wtype = 'loc%u' % options.dims if options.edge: num_on += 1 wtype = 'edge' if num_on == 0: parser.error('-v requires either -d or -e be specified too') if options.num_runs > 1: options.num_runs = 1 print 'warning: -v truncates the number of runs to 1 (weight should not change b/w runs)' input_path = InputSolution.get_path_to(15, options.dims, 0.0, 1.0) print 'reading inputs to run on from ' + input_path input_solns = DataSet.read_from_file(InputSolution, input_path) revs = [None] # not revision-specific (assuming our alg is correct) get_results_for_rev = lambda _ : DataSet.read_from_file(WeightResult, WeightResult.get_path_to(wtype)) collect_missing_data = collect_missing_weight_data elif options.dims > 0 or options.edge: parser.error('-v is required whenever -d or -e is used') # handle -i, -l: collect data for a particular graph(s) if options.input_graph and options.inputs_list_file: parser.error('-i and -l are mutually exclusive') if options.input_graph is not None: try: i = extract_input_footer(options.input_graph) except ExtractInputFooterError, e: parser.error(e) input_solns = DataSet({0:InputSolution(i.prec,i.dims,i.min,i.max,i.num_verts,i.num_edges,i.seed)})
get_results_for_rev = lambda _ : DataSet.read_from_file(WeightResult, WeightResult.get_path_to(wtype)) collect_missing_data = collect_missing_weight_data elif options.dims > 0 or options.edge: parser.error('-v is required whenever -d or -e is used') # handle -i, -l: collect data for a particular graph(s) if options.input_graph and options.inputs_list_file: parser.error('-i and -l are mutually exclusive') if options.input_graph is not None: try: i = extract_input_footer(options.input_graph) except ExtractInputFooterError, e: parser.error(e) input_solns = DataSet({0:InputSolution(i.prec,i.dims,i.min,i.max,i.num_verts,i.num_edges,i.seed)}) elif options.inputs_list_file is not None: input_solns = DataSet.read_from_file(InputSolution, options.inputs_list_file) # prepare for a correctness data collection if options.correctness: num_on += 1 get_results_for_rev = lambda rev : DataSet.read_from_file(CorrResult, CorrResult.get_path_to(rev)) options.inputs_list_file_arg = '' if options.inputs_list_file is None else ' -l ' + options.inputs_list_file collect_missing_data = lambda w,x,y,z: collect_missing_correctness_data(w,x,y,z,options.inputs_list_file_arg) # make sure no more than 1 type of data collection was specified if num_on > 1: parser.error('at most one of -c, -d, and -e may be specified') elif num_on == 0: # prepare for a performance data collection (default if nothing else is specified) get_results_for_rev = lambda rev : DataSet.read_from_file(PerfResult, PerfResult.get_path_to(rev)) collect_missing_data = collect_missing_performance_data
#!/usr/bin/env python from data import DataSet, InputSolution from check_output import get_and_log_mst_weight_from_checker from generate_input import main as generate_input import sys, time if len(sys.argv) != 2: print 'usage: gather_correctness.py LOG_FN' sys.exit(-1) # get the file to read inputs from logfn = sys.argv[1] ds = DataSet.read_from_file(InputSolution, logfn) # compute correctness for each input inputs = ds.dataset.keys() # Input objects inputs.sort() on = 0 for i in inputs: on += 1 # figure out how to generate the graph and where it will be sotred argstr = '-mt ' + i.make_args_for_generate_input() input_graph = generate_input(argstr.split(), get_output_name_only=True) print time.ctime(time.time()) + ' input # ' + str( on) + ' => gathering correctness data for ' + argstr # generate the graph generate_input(argstr.split()) # compute the weight for the graph
#!/usr/bin/env python from data import DataSet, InputSolution from check_output import get_and_log_mst_weight_from_checker from generate_input import main as generate_input import sys, time if len(sys.argv) != 2: print 'usage: gather_correctness.py LOG_FN' sys.exit(-1) # get the file to read inputs from logfn = sys.argv[1] ds = DataSet.read_from_file(InputSolution, logfn) # compute correctness for each input inputs = ds.dataset.keys() # Input objects inputs.sort() on = 0 for i in inputs: on += 1 # figure out how to generate the graph and where it will be sotred argstr = '-mt ' + i.make_args_for_generate_input() input_graph = generate_input(argstr.split(), get_output_name_only=True) print time.ctime(time.time()) + ' input # ' + str(on) + ' => gathering correctness data for ' + argstr # generate the graph generate_input(argstr.split()) # compute the weight for the graph get_and_log_mst_weight_from_checker(input_graph, force_recompute=False, inputslogfn=logfn)