def main(): # Command Line Arguments # Read name of CSV file from command line usage = "\n%prog results.csv tune.conf table.csv\nWhere results.csv was generated by the auto-tuner, tune.conf was the configuration file used for tuning, and table.csv is a new results file to be created." parser = OptionParser(usage=usage) options, args = parser.parse_args() if(len(args) != 3): parser.error("Expected three arguments: a CSV log file, a configuartion and a CSV file to be created.") csv_file = args[0] conf_file = args[1] output_file = args[2] # Import the CSV data print "Reading '" + csv_file + "'" tests, vars, possValues, repeat = readCSV(csv_file) # Get the VarTree from the conf file. print "Reading '" + conf_file + "'" vartree = readVarTree(conf_file) # Check the conf file and results file match. if vars != vartree.flatten(): print "The variable tree in the conf file does not match the variables in the CSV file." exit() # Use the variable independence info to generate the results of brute-force # testing using only those from the CSV file. print "Reconstructing exhaustive list of tests" all_data = reconstruct_data(vartree, possValues, tests) # all_data is a huge mapping from valuations to scores. # we should put it in order and in the correct format before outputting. #print "Raw Data:" #for val, score in all_data.iteritems(): # print str(val) + " -> " + str(score) # Calculate the exhaustive list of tests # reversed(vars) causes the early variables to vary slowest, which seems nicest (?) varVals = [[(var, val) for val in possValues[var]] for var in reversed(vars)] valuations = map(dict, crossproduct(varVals)) # Write out a new CSV file with the exhaustive results. # Would also be good to merge in the individual test scores for those # tests which WERE actually run print "Writing '" + output_file + "'" try: with open(output_file, 'wb') as f: writer = csv.writer(f) header = ["TestNo"] + vars + ["Score_"+str(i) for i in range(1,repeat+1)] + ["Score_Overall"] #print ", ".join(header) writer.writerow(header) for idx, valuation in enumerate(valuations): overall = [""] scores = [""] * repeat if dict2key(valuation) in all_data: overall = [str(all_data[dict2key(valuation)])] matching_tests = [t for t in tests if t[1] == valuation] if len(matching_tests) > 0: scores = matching_tests[0][2] row = [str(idx+1)] + [str(valuation[var]) for var in vars] + scores + overall #print ", ".join(row) writer.writerow(row) except IOError: print "Could not write to file: '" + output_file + "'" exit() print "Done" exit()
def mix_data(data_1, data_2): # To mix two sets of data, do the following: # 1. There must be at least one common row in the data sets. # 2. Calculate an 'offset' for each element of data_2 from the score of # this common row. # 3. The missing combinations of data_1 and data_2's tests can be found # by applying the offset to the test scores already in data_1. data_overall = data_1.copy() data_overall.update(data_2) # Find common row(s) common = [v for v in data_1.keys() if v in data_2.keys()] if len(common) == 0: print "Not enough tests in the input file to reconstruct the exhaustive list." exit() # In general, common may have many elements, but for CSV files generated # by the tuner, it will only have one. common = common[0] common_score = data_overall[common] #print "Mixing Data:" #print "data_1:" #for v,s in data_1.iteritems(): # print str(v) + " -> " + str(s) #print "data_2:" #for v,s in data_2.iteritems(): # print str(v) + " -> " + str(s) #print "common:" #print common # Calculate the differences from the common result, # map them to score offsets which are caused by that difference. offsets = {} for valuation, score in data_2.iteritems(): diff = dict2key(valuation_diff(common, valuation)) offsets[diff] = score - common_score del offsets[()] #print "offsets (of data_2 from common):" #print offsets # Now use these offsets to create new tests in data_overall # For every test in data_1, see if applying any of the ofsets to it would # result in some new data points to add to data_overall. for valuation, score in data_1.iteritems(): for val_diff, score_diff in offsets.iteritems(): new_valuation = dict(valuation) new_valuation.update(val_diff) new_valuation = dict2key(new_valuation) if new_valuation not in data_overall: if score is not None: data_overall[new_valuation] = score + score_diff #print "Found new data point: " #print str(new_valuation) + " -> " + str(data_overall[new_valuation]) #print "result: " #for v,s in data_overall.iteritems(): # print str(v) + " -> " + str(s) #print return data_overall
def reconstruct(vt, ancestors): # vt - The VarTree to generate an exhaustive list of tests for # ancestors - A valuation of the current settinbg of any ancestor variables. # N.B. this is DIFFERENT to 'presets' from the optimisation algorithm. # Calculate the list of top-level tests varVals = [[(var, val) for val in possValues[var]] for var in vt.vars] valuations = map(dict, crossproduct(varVals)) # data is the mapping of valuations to scores. data = {} if len(vt.subtrees) == 0: # At a leaf node, we should already have an exhaustive list of # tests, so simply return these. # The problem is to find which tests in the log were generated at # this point in the tree optimisation. To do this, we search for # all tests which have the same setting of ancestor variables and # which have a constant setting of all variables outside the # subtree. These will be the tests where this node was being # optimised this time through. In general, there may be more than # one such set, but for our optimisation algorithm, there will only # be one. relevant_tests = {} for t in tests: # Get the valuation of the OTHER parameters. outside_valuation = dict(t[1]) #copies for v in vt.flatten(): del outside_valuation[v] # We are only interested in tests with the correct setting of ancestor variables. if not match_anc(outside_valuation, ancestors): continue # Canonical format outside_valuation = dict2key(outside_valuation) # Add this test to the log if outside_valuation not in relevant_tests: relevant_tests[outside_valuation] = [] relevant_tests[outside_valuation].append(t) # Remove singletons (these are when other parameters are varying) for val in relevant_tests.keys(): if len(relevant_tests[val]) <= 1: del relevant_tests[val] #print "At node: " + str(vt) #print "ancestors: " + str(ancestors) #for val, ts in relevant_tests.iteritems(): # print "outside_valuation: " + str(val) # print "relevant tests: " + str([t[0] for t in ts]) #print # If the CSV file used was generated by the tuner's optimisation # algorithm, then relevant_tests should now only have one entry. # This is because we will never change variables outside of the # node then re-test it without first changing the ancestor values. if len(relevant_tests) == 0: print "Not enough information in CSV fiel to reconstruct an exhaustive list of tests." exit() if len(relevant_tests) > 1: print "Warning: More tests were found than expected, was this CSV file generated by the tuner? Continuing anyway." # If there are multiple entries, combine them. # If not, just use the one we have. relevant_tests_list = [] for ts in relevant_tests.itervalues(): relevant_tests_list += ts # Add the tests we have found to the table for this node. for t in relevant_tests_list: try: score = float(t[3]) except ValueError: score = None data[dict2key(t[1])] = score # Hopefully those were all the combinations we needed. The missing # ones will cause a failure or be added (as 'None') later. else: # At a tree node, for each top-level valuation we recursively # reconstruct the subtrees, then combine them to give an exhaustive # set of subtree tests. Once we have done this for all top-level # valuations, we have an exhaustive list of valuations of this node. for val in valuations: # Add the current top-level valuation to ancestors for the recursive calls. anc = ancestors.copy() anc.update(val) # Get the exhaustive tables for each subtree. subtree_data_list = [reconstruct(st, anc) for st in vt.subtrees] # Now combine the subtree tables to give one big table for all # subtrees for this valuation of the top-level variables. subtree_data = reduce(mix_data, subtree_data_list) # Now combine this into the overall table, for an exhaustive # set of tests for this entire node in the end. data.update(subtree_data) # Done return data
def main(): # Command Line Arguments # Read name of CSV file from command line usage = "\n%prog results.csv tune.conf table.csv\nWhere results.csv was generated by the auto-tuner, tune.conf was the configuration file used for tuning, and table.csv is a new results file to be created." parser = OptionParser(usage=usage) options, args = parser.parse_args() if (len(args) != 3): parser.error( "Expected three arguments: a CSV log file, a configuartion and a CSV file to be created." ) csv_file = args[0] conf_file = args[1] output_file = args[2] # Import the CSV data print "Reading '" + csv_file + "'" tests, vars, possValues, repeat = readCSV(csv_file) # Get the VarTree from the conf file. print "Reading '" + conf_file + "'" vartree = readVarTree(conf_file) # Check the conf file and results file match. if vars != vartree.flatten(): print "The variable tree in the conf file does not match the variables in the CSV file." exit() # Use the variable independence info to generate the results of brute-force # testing using only those from the CSV file. print "Reconstructing exhaustive list of tests" all_data = reconstruct_data(vartree, possValues, tests) # all_data is a huge mapping from valuations to scores. # we should put it in order and in the correct format before outputting. #print "Raw Data:" #for val, score in all_data.iteritems(): # print str(val) + " -> " + str(score) # Calculate the exhaustive list of tests # reversed(vars) causes the early variables to vary slowest, which seems nicest (?) varVals = [[(var, val) for val in possValues[var]] for var in reversed(vars)] valuations = map(dict, crossproduct(varVals)) # Write out a new CSV file with the exhaustive results. # Would also be good to merge in the individual test scores for those # tests which WERE actually run print "Writing '" + output_file + "'" try: with open(output_file, 'wb') as f: writer = csv.writer(f) header = ["TestNo"] + vars + [ "Score_" + str(i) for i in range(1, repeat + 1) ] + ["Score_Overall"] #print ", ".join(header) writer.writerow(header) for idx, valuation in enumerate(valuations): overall = [""] scores = [""] * repeat if dict2key(valuation) in all_data: overall = [str(all_data[dict2key(valuation)])] matching_tests = [t for t in tests if t[1] == valuation] if len(matching_tests) > 0: scores = matching_tests[0][2] row = [str(idx + 1)] + [str(valuation[var]) for var in vars] + scores + overall #print ", ".join(row) writer.writerow(row) except IOError: print "Could not write to file: '" + output_file + "'" exit() print "Done" exit()
def reconstruct(vt, ancestors): # vt - The VarTree to generate an exhaustive list of tests for # ancestors - A valuation of the current settinbg of any ancestor variables. # N.B. this is DIFFERENT to 'presets' from the optimisation algorithm. # Calculate the list of top-level tests varVals = [[(var, val) for val in possValues[var]] for var in vt.vars] valuations = map(dict, crossproduct(varVals)) # data is the mapping of valuations to scores. data = {} if len(vt.subtrees) == 0: # At a leaf node, we should already have an exhaustive list of # tests, so simply return these. # The problem is to find which tests in the log were generated at # this point in the tree optimisation. To do this, we search for # all tests which have the same setting of ancestor variables and # which have a constant setting of all variables outside the # subtree. These will be the tests where this node was being # optimised this time through. In general, there may be more than # one such set, but for our optimisation algorithm, there will only # be one. relevant_tests = {} for t in tests: # Get the valuation of the OTHER parameters. outside_valuation = dict(t[1]) #copies for v in vt.flatten(): del outside_valuation[v] # We are only interested in tests with the correct setting of ancestor variables. if not match_anc(outside_valuation, ancestors): continue # Canonical format outside_valuation = dict2key(outside_valuation) # Add this test to the log if outside_valuation not in relevant_tests: relevant_tests[outside_valuation] = [] relevant_tests[outside_valuation].append(t) # Remove singletons (these are when other parameters are varying) for val in relevant_tests.keys(): if len(relevant_tests[val]) <= 1: del relevant_tests[val] #print "At node: " + str(vt) #print "ancestors: " + str(ancestors) #for val, ts in relevant_tests.iteritems(): # print "outside_valuation: " + str(val) # print "relevant tests: " + str([t[0] for t in ts]) #print # If the CSV file used was generated by the tuner's optimisation # algorithm, then relevant_tests should now only have one entry. # This is because we will never change variables outside of the # node then re-test it without first changing the ancestor values. if len(relevant_tests) == 0: print "Not enough information in CSV fiel to reconstruct an exhaustive list of tests." exit() if len(relevant_tests) > 1: print "Warning: More tests were found than expected, was this CSV file generated by the tuner? Continuing anyway." # If there are multiple entries, combine them. # If not, just use the one we have. relevant_tests_list = [] for ts in relevant_tests.itervalues(): relevant_tests_list += ts # Add the tests we have found to the table for this node. for t in relevant_tests_list: try: score = float(t[3]) except ValueError: score = None data[dict2key(t[1])] = score # Hopefully those were all the combinations we needed. The missing # ones will cause a failure or be added (as 'None') later. else: # At a tree node, for each top-level valuation we recursively # reconstruct the subtrees, then combine them to give an exhaustive # set of subtree tests. Once we have done this for all top-level # valuations, we have an exhaustive list of valuations of this node. for val in valuations: # Add the current top-level valuation to ancestors for the recursive calls. anc = ancestors.copy() anc.update(val) # Get the exhaustive tables for each subtree. subtree_data_list = [ reconstruct(st, anc) for st in vt.subtrees ] # Now combine the subtree tables to give one big table for all # subtrees for this valuation of the top-level variables. subtree_data = reduce(mix_data, subtree_data_list) # Now combine this into the overall table, for an exhaustive # set of tests for this entire node in the end. data.update(subtree_data) # Done return data
def main(): # Command line arguments usage = "\n%prog results.csv tune.conf\nWhere results.csv was generated by the auto-tuner and tune.conf is the configuration file used for the tuning." parser = OptionParser(usage=usage) options, args = parser.parse_args() if(len(args) != 2): parser.error("Expected two arguments.") csv_file = args[0] conf_file = args[1] # Import the CSV data print "Reading '" + csv_file + "'" tests, vars, possValues, repeat = readCSV(csv_file) # tests is a list of tuples (test_no, valuation, score_list, score_overall) # Get the VarTree from the conf file. print "Reading '" + conf_file + "'" vartree = readVarTree(conf_file) # Check the conf file and results file match. if vars != vartree.flatten(): print "The variable tree in the conf file does not match the variables in the CSV file." exit() # For each node in the tree, find it's importance subtrees = get_subtrees(vartree) #print "%d subtrees: " % len(subtrees) #for st in subtrees: # print st # For each subtree, we want to find scores in the log which were recorded # while that subtree was varied. i.e. collect all scores where the other # variables were constant. In general, there may be more than one setting # of "other variables constant", so we'll take an avarage over these. For # the system's optimisation algorithm however, there should only be one. importance = {} for st in subtrees: for t in tests: # Get the valuation of the OTHER parameters, in a canonical format. outsideValuation = dict(t[1]) #copies for v in st.flatten(): del outsideValuation[v] outsideValuation = dict2key(outsideValuation) # Get the score. Use overall so some noise is reduced. try: score = float(t[3]) except ValueError: continue # Add the scores if importance.has_key(st): if importance[st].has_key(outsideValuation): importance[st][outsideValuation].append(score) else: importance[st][outsideValuation] = [score] else: importance[st] = {} importance[st][outsideValuation] = [score] # Remove singletons (these are when other parameters are varying) for st in importance.keys(): for val in importance[st].keys(): if len(importance[st][val]) <= 1: del importance[st][val] # Now importance[st] is a dict mapping possible other valuations to lists # of scores at that outside valuation. #print #for st in subtrees: # print "NODE: " + str(st) # print "SCORES: " + str(importance[st]) #print # Convert the lists of scores into ranges # And the ranges into an average for st in importance.keys(): for val in importance[st].keys(): importance[st][val] = score_range(importance[st][val]) importance[st] = avg_range(importance[st].values()) # Print the importances sorted: print for st, imp in sorted(importance.items(), key=lambda x: x[1], reverse=True): print "Node: " + str(st) print "Variation: %#.3g" % imp # (average range) print
def main(): # Command line arguments usage = "\n%prog results.csv tune.conf\nWhere results.csv was generated by the auto-tuner and tune.conf is the configuration file used for the tuning." parser = OptionParser(usage=usage) options, args = parser.parse_args() if (len(args) != 2): parser.error("Expected two arguments.") csv_file = args[0] conf_file = args[1] # Import the CSV data print "Reading '" + csv_file + "'" tests, vars, possValues, repeat = readCSV(csv_file) # tests is a list of tuples (test_no, valuation, score_list, score_overall) # Get the VarTree from the conf file. print "Reading '" + conf_file + "'" vartree = readVarTree(conf_file) # Check the conf file and results file match. if vars != vartree.flatten(): print "The variable tree in the conf file does not match the variables in the CSV file." exit() # For each node in the tree, find it's importance subtrees = get_subtrees(vartree) #print "%d subtrees: " % len(subtrees) #for st in subtrees: # print st # For each subtree, we want to find scores in the log which were recorded # while that subtree was varied. i.e. collect all scores where the other # variables were constant. In general, there may be more than one setting # of "other variables constant", so we'll take an avarage over these. For # the system's optimisation algorithm however, there should only be one. importance = {} for st in subtrees: for t in tests: # Get the valuation of the OTHER parameters, in a canonical format. outsideValuation = dict(t[1]) #copies for v in st.flatten(): del outsideValuation[v] outsideValuation = dict2key(outsideValuation) # Get the score. Use overall so some noise is reduced. try: score = float(t[3]) except ValueError: continue # Add the scores if importance.has_key(st): if importance[st].has_key(outsideValuation): importance[st][outsideValuation].append(score) else: importance[st][outsideValuation] = [score] else: importance[st] = {} importance[st][outsideValuation] = [score] # Remove singletons (these are when other parameters are varying) for st in importance.keys(): for val in importance[st].keys(): if len(importance[st][val]) <= 1: del importance[st][val] # Now importance[st] is a dict mapping possible other valuations to lists # of scores at that outside valuation. #print #for st in subtrees: # print "NODE: " + str(st) # print "SCORES: " + str(importance[st]) #print # Convert the lists of scores into ranges # And the ranges into an average for st in importance.keys(): for val in importance[st].keys(): importance[st][val] = score_range(importance[st][val]) importance[st] = avg_range(importance[st].values()) # Print the importances sorted: print for st, imp in sorted(importance.items(), key=lambda x: x[1], reverse=True): print "Node: " + str(st) print "Variation: %#.3g" % imp # (average range) print
def main(): # Command line arguments usage = "\n%prog results.csv\nWhere results.csv was generated by the auto-tuner." parser = OptionParser(usage=usage) options, args = parser.parse_args() if(len(args) != 1): parser.error("Expected a single CSV file argument.") csv_file = args[0] # Import the CSV data print "Reading '" + csv_file + "'" tests, vars, possValues, repeat = readCSV(csv_file) # tests is a list of tuples (test_no, valuation, score_list, score_overall) # Idea: # For each parameter, and each test, save the score to a list. # The lists are indexed by the valuation of all other parameters. # Remove all lists with only a single entry. # This gives, for each setting of outside parameters, the range of possible scores. importance = {} for var in vars: for t in tests: # Get the valuation of the OTHER parameters, in a canonical format. outsideValuation = dict(t[1]) #copies del outsideValuation[var] outsideValuation = dict2key(outsideValuation) # Get the score try: score = float(t[3]) except ValueError: continue # Add the scores if importance.has_key(var): if importance[var].has_key(outsideValuation): importance[var][outsideValuation].append(score) else: importance[var][outsideValuation] = [score] else: importance[var] = {} importance[var][outsideValuation] = [score] # Remove singletons (these are when other parameters are varying) for var in importance.keys(): for val in importance[var].keys(): if len(importance[var][val]) <= 1: del importance[var][val] # Print data #for var, lists in importance.iteritems(): # for val, l in lists.iteritems(): # print "VAR: " + str(var) # print "Valuation: " + str(val) # print "Scores: " + str(l) # print # Convert the lists of scores to ranges. for var in importance.keys(): for val in importance[var].keys(): importance[var][val] = score_range(importance[var][val]) # Print data #print #for var, ranges in importance.iteritems(): # print "Parameter: " + str(var) # print "Score Ranges: " + str(ranges.values()) # print "Average Range: %.2f" % avg_range(ranges.values()) # print # Convert the lists of ranges into an average for var in importance.keys(): importance[var] = avg_range(importance[var].values()) # Print the importances sorted: print for var, imp in sorted(importance.items(), key=lambda x: x[1], reverse=True): print "Parameter: " + str(var) print "Variation: %#.3g" % imp # (average range) print