def makeWekaOptions(learning_rate, momentum, number_hidden, num_cv, costs = None): "Return Weka option string for specified values" options_map = {'M':momentum, 'L':learning_rate, 'H':number_hidden, 'x':num_cv} if costs: cost_matrix_path = csv.makeTempPath('cost') + '.cost' options_map['m'] = cost_matrix_path cost_matrix = ['%% Rows Columns', spaceSeparatedLine([2,2]), '%% Matrix elements', spaceSeparatedLine([0.0, costs['True']]), spaceSeparatedLine([costs['False'], 0.0])] file(cost_matrix_path, 'w').write('\n'.join(cost_matrix)) return mapToWekaOptions(options_map)
def testMatrixMLP(matrix, columns, opts = mlp_opts): "Run MLP on attributes with index in columns" c_x = columns + [-1] # include outcome sub_matrix = [[row[i] for i in c_x] for row in matrix] temp_base = csv.makeTempPath('subset'+('%03d'%len(columns))+'_') temp_csv = temp_base + '.csv' temp_results = temp_base + '.results' if is_testing: num_attributes = len(matrix[0]) - 1 accuracy,dt = 1.0/float(sum([abs(x-num_attributes/2) for x in columns])), 0.1 else: csv.writeCsv(temp_csv, sub_matrix) accuracy,dt = runMLPTrain(temp_csv, temp_results, opts) return (accuracy, temp_csv, temp_results, dt)
if False: selectAttibutesGA() if False: num_subset = 25 in_filename = csv.makeCsvPath('subset.best' + ('%03d'%num_subset)) csv_results_name = csv.makePath('hidden.layer.results') csv_summary_name = csv.makeCsvPath('hidden.layer.summary') csv_best_name = csv.makeCsvPath('hidden.layer.best') csv_summary = file(csv_summary_name, 'w') best_accuracy = 0.0 for num_hidden in range(1, num_subset): opts = '-H ' + str(num_hidden) + ' -x 4' out_filename = csv.makeCsvPath('num.hidden' + ('%03d'%num_hidden)) temp_base = csv.makeTempPath('num.hidden' + ('%03d'%num_hidden)) temp_results = temp_base + '.results' accuracy, duration = runMLPTrain(in_filename, temp_results, opts) summary = [num_hidden, accuracy, best_accuracy, duration, temp_results] print summary csv_line = ','.join([str(e) for e in summary]) csv_summary.write(csv_line + '\n') csv_summary.flush() if accuracy > best_accuracy: best_accuracy = accuracy shutil.copyfile(temp_results, csv_results_name) if False: num_subset = 25 in_filename = csv.makeCsvPath('subset.best' + ('%03d'%num_subset)) csv_results_name = csv.makePath('learning.rate.results')