worst_list = [True] if options.do_best_worst: worst_list = [False, True] start_description = "@@ Starting " + sys.argv[0] + " at " + time.ctime() + " with args " + str(sys.argv[1:]) print start_description print "base file", base_file print "algo keys", algo_keys print "worst list", worst_list logging.info(start_description) global base_relation global base_comments base_relation, base_comments, base_attrs, base_data = arff.readArff(base_file) base_data.sort() print "base_data", len(base_data), len(base_data[0]) print "classifer algorithms used:", algo_keys def doOneRun(): print "-------------------- START -------------------- " + time.ctime() print "input file:", base_file print "test fraction:", options.test_fraction print "algorithms:", getAlgoDictKeys() print "class index:", class_index print "do worst:", getDoWorst() logging.info("-------------------- START -------------------- " + time.ctime()) logging.info("input file: " + base_file) logging.info("test fraction: " + str(options.test_fraction))
# Set random seed so that each run gives same results random.seed(555) global output_dir parser = optparse.OptionParser('usage: python ' + sys.argv[0] + ' [options] <input file>') parser.add_option('-o', '--output', dest='output_dir', default='output', help='output directory') # parser.add_option('-d', '--dict', action='store_true', dest='dict_only', default=False, help='use dictionaray look-up only') #parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='show details in output') (options, args) = parser.parse_args() if len(args) < 1: print parser.usage print 'options:', options print 'args', args exit() filename = args[0] output_dir = options.output_dir misc.mkDir(output_dir) relation, comments, attributes, data = arff.readArff(filename) print 'Algorithms to test:', WC.all_algo_keys for algo_key in WC.all_algo_keys: print '======================= findBestAttributes:', filename, algo_key findBestAttributes(filename, algo_key, data, attributes)
if __name__ == '__main__': if len(sys.argv) < 3: print 'Usage: jython get_attribute_subset.py <base-arff-file> <attrs-arff-file>' sys.exit() base_filename = sys.argv[1] attrs_filename = sys.argv[2] out_filename = os.path.splitext(base_filename)[0] + '.attr_subset' + os.path.splitext(base_filename)[1] print base_filename print attrs_filename print out_filename relation, comments, attributes, data = arff.readArff(base_filename) _, _, attributes_subset, _ = arff.readArff(attrs_filename) attribute_index_map = {} for i,a in enumerate(attributes): attribute_index_map[a['name']] = i names_subset = [a['name'] for a in attributes_subset] indexes_subset = [] for name in attribute_index_map.keys(): if name in names_subset: indexes_subset.append(attribute_index_map[name]) out_attributes = [attributes[i] for i in indexes_subset] out_data = [[d[i] for i in indexes_subset] for d in data]