def main(argv=sys.argv[1:]): gplot = get_path_to_project_root() + 'writeup/figures/part2-fit-%s.gnuplot' % argv[0] os.system("gnuplot %s 2>&1 | egrep '%%|reduced' > tmp" % gplot) fh = open('tmp', 'r') lines = fh.readlines() fh.close() os.remove('tmp') res = [] ri = 0 fit = None for line in lines: var = sapinn(pat_var, line.rstrip(), None) if var is not None: if fit is not None: ri += 1 res.append(fit) fit = Fit(rates[ri], var[0]) if fit is not None: fit.parse_line(line) # last one res.append(fit) # print from best to worst for r in res: r.update_score() print 'best to worst fit for %s:' % gplot print '%-17s\tRCS\tAvgErr\tMaxErr\tFit' % 'Fit Type' res.sort() for r in res: print str(r)
def main(argv=sys.argv[1:]): gplot = get_path_to_project_root( ) + 'writeup/figures/part2-fit-%s.gnuplot' % argv[0] os.system("gnuplot %s 2>&1 | egrep '%%|reduced' > tmp" % gplot) fh = open('tmp', 'r') lines = fh.readlines() fh.close() os.remove('tmp') res = [] ri = 0 fit = None for line in lines: var = sapinn(pat_var, line.rstrip(), None) if var is not None: if fit is not None: ri += 1 res.append(fit) fit = Fit(rates[ri], var[0]) if fit is not None: fit.parse_line(line) # last one res.append(fit) # print from best to worst for r in res: r.update_score() print 'best to worst fit for %s:' % gplot print '%-17s\tRCS\tAvgErr\tMaxErr\tFit' % 'Fit Type' res.sort() for r in res: print str(r)
def main(argv=sys.argv): if len(argv) != 2: die("""usage: check_with_diff.py INPUT_GRAPH Checks the current mst for a given input file and reports the diff, if any. It does not do anything smart in terms of caching the correctness checker's output, so it will be recomputed every time this is run. """) else: input_graph = argv[1] if not os.path.exists(input_graph): die("%s not found" % input_graph) # get our mst output mst = get_path_to_mst_binary(make_sure_it_exists=False) mst_out = random_tmp_filename(10) if os.system('%s %s > %s' % (mst, input_graph, mst_out)) != 0: quiet_remove(mst_out) die("failed to run mst (or exited with an error code)") # get the checker's output checker = get_path_to_checker_binary(make_sure_it_exists=True) checker_out = random_tmp_filename(10) if os.system('%s %s true > %s' % (checker, input_graph, checker_out)) != 0: quiet_remove(mst_out) quiet_remove(checker_out) die("failed to run checker (or exited with an error code)") # check just the MST weight first mst_w = extract_answer(mst_out) checker_w = extract_answer(checker_out) fmt = '%.1f' # tolerance to 1 decimal place str_mst_w = fmt % mst_w str_checker_w = fmt % checker_w if str_mst_w == str_checker_w: print 'Weights match! %s %s' % (str_mst_w, str_checker_w) quiet_remove(mst_out) quiet_remove(checker_out) return 0 else: print 'Weight mistmatch, comparing vertices!! (checker_mst (%s) - our_mst (%s) = %s)' % ( str(checker_w), str(mst_w), str(checker_w - mst_w)) # sort them mst_out2 = random_tmp_filename(10) checker_out2 = random_tmp_filename(10) sort_and_order = get_path_to_project_root( ) + 'src/input/sort_and_order.py ' os.system(sort_and_order + mst_out + ' 1 > ' + mst_out2) os.system(sort_and_order + checker_out + ' 1 > ' + checker_out2) # compare them os.system('diff %s %s && echo Edges are the same!' % (checker_out2, mst_out2)) quiet_remove(mst_out) quiet_remove(mst_out2) quiet_remove(checker_out) quiet_remove(checker_out2) return 0
def main(argv=sys.argv): if len(argv) != 2: die("""usage: check_with_diff.py INPUT_GRAPH Checks the current mst for a given input file and reports the diff, if any. It does not do anything smart in terms of caching the correctness checker's output, so it will be recomputed every time this is run. """) else: input_graph = argv[1] if not os.path.exists(input_graph): die("%s not found" % input_graph) # get our mst output mst = get_path_to_mst_binary(make_sure_it_exists=False) mst_out = random_tmp_filename(10) if os.system('%s %s > %s' % (mst, input_graph, mst_out)) != 0: quiet_remove(mst_out) die("failed to run mst (or exited with an error code)") # get the checker's output checker = get_path_to_checker_binary(make_sure_it_exists=True) checker_out = random_tmp_filename(10) if os.system('%s %s true > %s' % (checker, input_graph, checker_out)) != 0: quiet_remove(mst_out) quiet_remove(checker_out) die("failed to run checker (or exited with an error code)") # check just the MST weight first mst_w = extract_answer(mst_out) checker_w = extract_answer(checker_out) fmt = '%.1f' # tolerance to 1 decimal place str_mst_w = fmt % mst_w str_checker_w = fmt % checker_w if str_mst_w == str_checker_w: print 'Weights match! %s %s' % (str_mst_w, str_checker_w) quiet_remove(mst_out) quiet_remove(checker_out) return 0 else: print 'Weight mistmatch, comparing vertices!! (checker_mst (%s) - our_mst (%s) = %s)' % (str(checker_w), str(mst_w), str(checker_w - mst_w)) # sort them mst_out2 = random_tmp_filename(10) checker_out2 = random_tmp_filename(10) sort_and_order = get_path_to_project_root() + 'src/input/sort_and_order.py ' os.system(sort_and_order + mst_out + ' 1 > ' + mst_out2) os.system(sort_and_order + checker_out + ' 1 > ' + checker_out2) # compare them os.system('diff %s %s && echo Edges are the same!' % (checker_out2, mst_out2)) quiet_remove(mst_out) quiet_remove(mst_out2) quiet_remove(checker_out) quiet_remove(checker_out2) return 0
def get_path_to(prec, dims, min_val, max_val): may_be_part2_input = (prec==15 and min_val==0 and max_val==1) if dims == 0: if prec==1 and min_val==0 and max_val==100000: logbasename = 'perf.inputs' elif may_be_part2_input: logbasename = 'p2-redge.inputs' else: logbasename = 'other-redge.inputs' else: if may_be_part2_input and dims>=2 and dims<=4: logbasename = 'p2-rvert-%ud.inputs' % dims else: logbasename = 'other-rvert.inputs' return get_path_to_project_root() + 'input/' + logbasename
def get_path_to(prec, dims, min_val, max_val): may_be_part2_input = (prec == 15 and min_val == 0 and max_val == 1) if dims == 0: if prec == 1 and min_val == 0 and max_val == 100000: logbasename = 'perf.inputs' elif may_be_part2_input: logbasename = 'p2-redge.inputs' else: logbasename = 'other-redge.inputs' else: if may_be_part2_input and dims >= 2 and dims <= 4: logbasename = 'p2-rvert-%ud.inputs' % dims else: logbasename = 'other-rvert.inputs' return get_path_to_project_root() + 'input/' + logbasename
fmt = 'got %u variations, now adding %u complete graphs for a total of %u variations' print fmt % (len(variations), len(edges), len(variations) + len(edges)) # add the complete graphs for e in edges: v = ceil(vertices_in_complete_undirected_graph(e)) e = edges_in_complete_undirected_graph(v) variations.append((e, v)) # compute expected size tot_sz = 0 max_sz = 0 for (e, v) in variations: # per edge size cost is chars for two vertices, an edge weight, and spacing my_sz = (e * (2 * len(str(v)) + 8 + 3)) max_sz = max(max_sz, my_sz) tot_sz += my_sz tot_sz /= (1024 * 1024 * 1024) max_sz /= (1024 * 1024) print 'aggregate input size is %uGB (max input file size is %uMB)' % ( int(tot_sz), int(max_sz)) # generate the graphs for i in range(1, num_versions + 1): ifl = get_path_to_project_root() + 'input/nperf-%u.inputs' % i print 'generating inputs for ' + ifl for (e, v) in variations: argstr = '--dont-generate -q -l %s -n %u %u' % (ifl, e, v) generate_input(argstr.split())
#!/usr/bin/env python from data import DataSet, PerfResult, get_tracked_algs_and_revs from generate_input import get_density, get_percent_of_max from result import ResultAccumulator from mstutil import get_path_to_project_root, quiet_remove import os, sys DATA_PATH = get_path_to_project_root() + 'writeup/data/perf/' # figure out which revisions correspond to which algorithms TRACKED = get_tracked_algs_and_revs() # vertex values to create data files for IMPORTANT_VERTS = [250, 700, 4473, 'all'] # confidence interval to use DEFAULT_CI = 99 def get_output_dat_name(xaxis, alg, rev, index, num_verts): """Gets the name of an output file for a particular revision of an algorithm""" return DATA_PATH + '%s-%s-%s-%u-%s' % (xaxis, alg, str(num_verts), index, rev) def make_latest(xaxis, alg, rev, index, num_verts): """Updates the symlink which points to the latest data file for an algorithm""" o = '../' + get_output_dat_name(xaxis, alg, rev, index, num_verts)[len(DATA_PATH):] linkname = DATA_PATH + 'latest/%s-%s-%s-latest' % (xaxis, alg, str(num_verts)) quiet_remove(linkname) os.symlink(o, linkname) return linkname
#!/usr/bin/env python from data import DataSet, WeightResult from result import ResultAccumulator from mstutil import get_path_to_project_root import os, sys DATA_PATH = get_path_to_project_root() + 'writeup/data/weight/' def gather_weight_data(wtype): # get the results results = {} # maps |V| to ResultAccumulator ds = DataSet.read_from_file(WeightResult, WeightResult.get_path_to(wtype)) for data in ds.dataset.values(): result = results.get(data.input().num_verts) if result is None: result = ResultAccumulator(data.mst_weight) results[data.input().num_verts] = result else: result.add_data(data.mst_weight) try: # open a file to output to fh = open(DATA_PATH + wtype + '.dat', 'w') # compute relevant stats and output them print >> fh, '#|V|\tLower\tAverage\tUpper (Lower/Upper from 99% CI)' keys = results.keys() keys.sort() for num_verts in keys:
def main(argv=sys.argv[1:]): usage = """usage: %prog [options] NUM_VERTICES Generates input for evaluating performance at different edge densities for a given |V|.""" parser = OptionParser(usage) parser.add_option("-a", "--additive-step-type", action="store_true", default=False, help="use additive stepping between numbers of vertices [default: multiplicative]") parser.add_option("-l", "--min", type="float", default=4.0, help="minimum edge:vertex ratio to generate a graph for [default: %default]") parser.add_option("-n", "--num-per-step", type="int", default=1, metavar="n", help="number of inputs to generate for each step [default: %default]") parser.add_option("-s", "--step", type="float", default=2.0, help="step amount between edge:vertex ratios (see -a) [default: %default]") parser.add_option("-u", "--max", type="float", default=1500.0, help="maximum edge:vertex ratio to generate a graph for [default: %default]") (options, args) = parser.parse_args(argv) if len(args) < 1: parser.error("missing NUM_VERTICES") elif len(args) > 1: parser.error("too many arguments") try: num_verts = int(args[0]) inputs_list_file = get_path_to_project_root() + 'input/density-%u.inputs' % num_verts except ValueError: parser.error('NUM_NUM_VERTSERTICES must be a positive integer') if num_verts < 1: parser.error('NUM_VERTICES must be at least 1') if options.min < 1 or options.min > options.max: parser.error('-l must be in the range [1, max]') max_edges = edges_in_complete_undirected_graph(num_verts) max_ratio = max_edges / num_verts if options.max < 1: parser.error('-u must be at least 1') elif options.max > max_ratio: fmt = 'max edge:vertex ratio for %u vertices is %.1f, but -u specified a ratio of %u' parser.error(fmt % (num_verts, max_ratio, options.max)) if options.additive_step_type: if options.step < 1.0: parser.error('-s must be greater than or equal to 1 when -a is used') elif options.step <= 1.0: parser.error('-s must be greater than 1 when -a is not used') d = options.min while True: for _ in range(options.num_per_step): num_edges = int(d * num_verts) # stay within a complete graph, but round up to one if we get very close if num_edges > max_edges or (num_edges/float(max_edges)) > 0.995: num_edges = max_edges d = options.max + 1 # stop after this iteration args = '-p1 -l %s -n %u %u' % (inputs_list_file, num_edges, num_verts) try: print 'generating new input: ' + args ret = generate_input(args.split()) except Exception, errstr: print >> sys.stderr, 'generate_density_inputs failed for: ' + args + ': ' + str(errstr) return -1 if ret != 0: print >> sys.stderr, 'generate_density_inputs failed for: ' + args return -1 if d >= options.max: break if options.additive_step_type: d += options.step else: d *= options.step if d > options.max: d = options.max
def get_path_to(rev): path = get_path_to_project_root() + 'result/perf/' if not os.path.exists(path): os.makedirs(path) return path + rev
#!/usr/bin/env python from mstutil import die, get_path_to_project_root from shutil import copy2, copytree, move from time import strftime import os def sh_or_die(cmd, msg): if os.system(cmd) != 0: die(cmd + ' failed: ' + msg) root = get_path_to_project_root() submit = root + 'submit/' sh_or_die('rm -rf ' + submit, 'could not remove the old submit dir: %s' % submit) # build the binaries src = root + 'src/' srcmst = src + 'mst' sh_or_die('make -C %s && test -f %s' % (src, srcmst), 'unable to build the binaries') copy2(srcmst, root + 'mst') sh_or_die('make -C %s clean' % src, 'unable to cleanup the build byproducts') # copy in the source and make files copytree(src, submit) move(root + 'mst', submit + 'mst') # build the report
for p in poms: e = int(get_edges_from_percent_of_max(v, p)) density = e / float(v) if density > 55 and density < 135 and v < 500: what = 'skipped: ' else: what = 'added: ' variations.append( (e, v) ) print '%sv=%u pom=%.2f e=%u => density=%.2f' % (what, v, p, e, e/float(v)) # compute expected size tot_sz = 0 max_sz = 0 for (e, v) in variations: # per edge size cost is chars for two vertices, an edge weight, and spacing my_sz = (e * (2 * len(str(v)) + 8 + 3)) max_sz = max(max_sz, my_sz) tot_sz += my_sz tot_sz /= (1024 * 1024) max_sz /= (1024 * 1024) print 'aggregate input size is %uMB (max input file size is %uMB)' % (int(tot_sz), int(max_sz)) # generate the graphs for i in range(1, num_versions+1): ifl = get_path_to_project_root() + 'input/cperf-%u.inputs' % i print 'generating inputs for ' + ifl for (e, v) in variations: argstr = '--dont-generate -q -l %s -n %u %u' % (ifl, e, v) generate_input(argstr.split())
def get_path_to(wtype): path = get_path_to_project_root() + 'result/weight/' if not os.path.exists(path): os.makedirs(path) return path + wtype
def main(argv=sys.argv[1:]): usage = """usage: %prog [options] NUM_VERTICES Generates input for evaluating performance at different edge densities for a given |V|.""" parser = OptionParser(usage) parser.add_option( "-a", "--additive-step-type", action="store_true", default=False, help= "use additive stepping between numbers of vertices [default: multiplicative]" ) parser.add_option( "-l", "--min", type="float", default=4.0, help= "minimum edge:vertex ratio to generate a graph for [default: %default]" ) parser.add_option( "-n", "--num-per-step", type="int", default=1, metavar="n", help="number of inputs to generate for each step [default: %default]") parser.add_option( "-s", "--step", type="float", default=2.0, help= "step amount between edge:vertex ratios (see -a) [default: %default]") parser.add_option( "-u", "--max", type="float", default=1500.0, help= "maximum edge:vertex ratio to generate a graph for [default: %default]" ) (options, args) = parser.parse_args(argv) if len(args) < 1: parser.error("missing NUM_VERTICES") elif len(args) > 1: parser.error("too many arguments") try: num_verts = int(args[0]) inputs_list_file = get_path_to_project_root( ) + 'input/density-%u.inputs' % num_verts except ValueError: parser.error('NUM_NUM_VERTSERTICES must be a positive integer') if num_verts < 1: parser.error('NUM_VERTICES must be at least 1') if options.min < 1 or options.min > options.max: parser.error('-l must be in the range [1, max]') max_edges = edges_in_complete_undirected_graph(num_verts) max_ratio = max_edges / num_verts if options.max < 1: parser.error('-u must be at least 1') elif options.max > max_ratio: fmt = 'max edge:vertex ratio for %u vertices is %.1f, but -u specified a ratio of %u' parser.error(fmt % (num_verts, max_ratio, options.max)) if options.additive_step_type: if options.step < 1.0: parser.error( '-s must be greater than or equal to 1 when -a is used') elif options.step <= 1.0: parser.error('-s must be greater than 1 when -a is not used') d = options.min while True: for _ in range(options.num_per_step): num_edges = int(d * num_verts) # stay within a complete graph, but round up to one if we get very close if num_edges > max_edges or (num_edges / float(max_edges)) > 0.995: num_edges = max_edges d = options.max + 1 # stop after this iteration args = '-p1 -l %s -n %u %u' % (inputs_list_file, num_edges, num_verts) try: print 'generating new input: ' + args ret = generate_input(args.split()) except Exception, errstr: print >> sys.stderr, 'generate_density_inputs failed for: ' + args + ': ' + str( errstr) return -1 if ret != 0: print >> sys.stderr, 'generate_density_inputs failed for: ' + args return -1 if d >= options.max: break if options.additive_step_type: d += options.step else: d *= options.step if d > options.max: d = options.max
#!/usr/bin/env python from data import DataSet, WeightResult from result import ResultAccumulator from mstutil import get_path_to_project_root import os, sys DATA_PATH = get_path_to_project_root() + 'writeup/data/weight/' def gather_weight_data(wtype): # get the results results = {} # maps |V| to ResultAccumulator ds = DataSet.read_from_file(WeightResult, WeightResult.get_path_to(wtype)) for data in ds.dataset.values(): result = results.get(data.input().num_verts) if result is None: result = ResultAccumulator(data.mst_weight) results[data.input().num_verts] = result else: result.add_data(data.mst_weight) try: # open a file to output to fh = open(DATA_PATH + wtype + '.dat', 'w') # compute relevant stats and output them print >> fh, '#|V|\tLower\tAverage\tUpper (Lower/Upper from 99% CI)' keys = results.keys() keys.sort() for num_verts in keys: r = results[num_verts]
#!/usr/bin/env python from mstutil import die, get_path_to_project_root from shutil import copy2, copytree, move from time import strftime import os def sh_or_die(cmd, msg): if os.system(cmd) != 0: die(cmd + ' failed: ' + msg) root = get_path_to_project_root() submit = root + 'submit/' sh_or_die('rm -rf ' + submit, 'could not remove the old submit dir: %s' % submit) # build the binaries src = root + 'src/' srcmst = src + 'mst' sh_or_die('make -C %s && test -f %s' % (src, srcmst), 'unable to build the binaries') copy2(srcmst, root + 'mst') sh_or_die('make -C %s clean' % src, 'unable to cleanup the build byproducts') # copy in the source and make files copytree(src, submit) move(root + 'mst', submit + 'mst') # build the report writeup = root + 'writeup/' report = writeup + 'report.pdf' sh_or_die('make -C %s && test -f %s' % (writeup, report), 'unable to build the report')