def exp_sol(args, dt_train, dt_test, opts, cache_data_path): res = {} if args.retrain == True or osp.exists(cache_data_path) == False: for algo, opt in opts.iteritems(): if 'lambda' in opt and algo != 'liblinear': opt['lambda'] = np.hstack(([0], opt['lambda'])) res[algo] = [ np.zeros((args.shuffle, len(opts[algo]['lambda']))), np.zeros((args.shuffle, len(opts[algo]['lambda']))) ] for rid in xrange(args.shuffle): logging.info('random pass %d' % (rid)) rand_path = dt_train.rand_path(force=True) rand_path = dt_train.rand_path(tgt_type='bin', force=True) for algo, opt in opts.iteritems(): algo_res = run_sol(dt_train, dt_test, opt) res[algo][0][rid, :] = algo_res[0] res[algo][1][rid, :] = algo_res[1] if osp.exists(cache_data_path) == False: with open(cache_data_path, 'wb') as fh: cPickle.dump(res, fh) elif args.retrain == False: with open(cache_data_path, 'rb') as fh: res = cPickle.load(fh) #save sparsity and test ccuracy ddof = 1 if args.shuffle > 1 else 0 algo_list = [] ave_sparsity_list = [] ave_test_accu_list = [] for algo, vals in res.iteritems(): algo_list.append(algo) ave_sparsity = np.average(vals[0], axis=0) ave_test_accu = np.average(vals[1], axis=0) ave_sparsity_list.append(ave_sparsity) ave_test_accu_list.append(ave_test_accu) #draw sparsity vs test accuracy fig.plot(ave_sparsity_list, algo_list, ave_test_accu_list, 'Sparsity', 'Test Accuracy', dt_train.name + '-sparsity-test-error.pdf')
def main(): import mnist_loader training_data, validation_data, test_data = mnist_loader.load_data() import network import utils net = network.Network([784, 30, 30, 30, 10], init=utils.NormalWeightInitializer, cost=utils.CrossEntropyCost, norm=utils.L2Regularizer(lmbda=0.0001)) _, evaluation_accuracy, _, _ = net.SGD( 30, 10, .14, training_data, test_data, # early_stop=utils.NoImprovementInN(10), # learning_rate_adjustment=utils.NoImprovementInN(10), monitor_evaluation_accuracy=True) from fig import plot plot(evaluation_accuracy)
def exp_fs(dtrain, dtest, opts, output_path, repeat=1, retrains=None, fold_num=5, cv_process_num=1, draw_opts={}): """ Experiment to run all algorithms Parameters ---------- dtrain: DataSet traning dataset dtest: DataSet test dataset opts: dict options for each algorithm output_path: str output path to save the results repeat: int number of repeats to run the algorithms retrains: list[str] which algorithm should be retrained, even it has been trained before fold_num: int number of folds to do cross validation cv_process_num: int number of processes to do cross validaton """ if osp.exists(output_path) is True: with open(output_path, 'rb') as rfh: res = cPickle.load(rfh) else: res = {} retrains = [] if retrains is None else retrains if len(retrains) == 1 and retrains[0].lower() == 'all': retrains = opts.keys() retrains = [v.lower() for v in retrains] for algo_ori, opt in opts.iteritems(): algo = algo_ori.lower() if algo_ori in res and algo_ori != algo: res[algo] = res[algo_ori] del res[algo_ori] if (algo in res) and (algo not in retrains): continue res_len = len(opt['lambda']) res[algo] = [np.zeros((repeat, res_len)) for i in xrange(3)] for rid in xrange(repeat): if rid > 0 and (algo == 'liblinear' or algo == 'fgm'): for i in xrange(3): res[algo][i][rid, :] = res[algo][i][rid - 1, :] else: logging.info('random pass %d', rid) dtrain.rand_path(tgt_type='svm', force=True) algo_res = run_fs(dtrain, dtest, algo.lower(), opt, fold_num, cv_process_num) for i in xrange(3): res[algo][i][rid, :] = algo_res[i] #save results with open(output_path, 'wb') as wfh: cPickle.dump(res, wfh) algo_list = [] ave_feat_nums = [] ave_test_accuracy = [] for algo, opt in opts.iteritems(): if algo.lower() == 'gpu-mrmr': continue algo_list.append(algo) vals = res[algo.lower()] ave_feat_nums.append(np.average(vals[0], axis=0)) ave_test_accuracy.append(np.average(vals[1], axis=0)) #draw sparsity vs test accuracy fig.plot( ave_feat_nums, ave_test_accuracy, '#Selected Features', 'Test Accuracy (%)', algo_list, osp.join( dtrain.work_dir, dtrain.name.replace('_', '-') + '-%s-test-accuracy.pdf' % (fs_task)), **(draw_opts['accu'])) algo_list = [] ave_feat_nums = [] ave_train_time = [] for algo, opt in opts.iteritems(): algo_list.append(algo) vals = res[algo.lower()] ave_feat_nums.append(np.average(vals[0], axis=0)) ave_train_time.append(np.average(vals[2], axis=0)) fig.plot( ave_feat_nums, ave_train_time, '#Selected Features', 'Training Time (s)', algo_list, osp.join( dtrain.work_dir, dtrain.name.replace('_', '-') + '-%s-train-time.pdf' % (fs_task)), **(draw_opts['time']))
def exp_online(args, dt_train, dt_test, opts, cache_data_path): if args.retrain == True or osp.exists(cache_data_path) == False: res = {} res_log = {} for algo, opt in opts.iteritems(): res[algo] = np.zeros((args.shuffle, 4)) if algo != 'liblinear' and algo != 'vw': res_log[algo] = [None for i in xrange(args.shuffle)] for rid in xrange(args.shuffle): logging.info('random pass %d' % (rid)) rand_path = dt_train.rand_path(force=True) for algo, opt in opts.iteritems(): algo_res = run_ol(dt_train, dt_test, opt, args.retrain, args.fold_num) res[algo][rid, :] = algo_res[0:4] if algo != 'liblinear' and algo != 'vw': res_log[algo][rid] = algo_res[4] if osp.exists(cache_data_path) == False: cache_data = {'res': res, 'res_log': res_log} with open(cache_data_path, 'wb') as fh: cPickle.dump(cache_data, fh) elif args.retrain == False: with open(cache_data_path, 'rb') as fh: cache_data = cPickle.load(fh) res = cache_data['res'] res_log = cache_data['res_log'] #save accuracy and time cost out_file = open(args.output, 'w') line = '{0: <12}{1:<16}{1:<16}{2:<16}{2:<15}'.format( 'algorithm', 'train', 'test') print line out_file.write('%s\n' % (line)) line = '{0: <12}{1:<16}{2:<16}{1:<16}{2:<15}'.format( '', 'accuracy', 'time(s)') print line out_file.write('%s\n' % (line)) ddof = 1 if args.shuffle > 1 else 0 for algo, vals in res.iteritems(): ave_vals = np.average(vals, axis=0) std_vals = np.std(vals, axis=0, ddof=ddof) line = '{0: <12}{1:.4f}+/-{2:.4f} {3:.4f}+/-{4:.4f} {5:.4f}+/-{6:.4f} {7:.4f}+/-{8:.4f}'.format( algo, ave_vals[0], std_vals[0], ave_vals[1], std_vals[1], ave_vals[2], std_vals[2], ave_vals[3], std_vals[3]) print line out_file.write('%s\n' % (line)) #draw training log xs = [] error_rates = [] update_nums = [] algo_list = [] for algo, log in res_log.iteritems(): algo_list.append(algo) xs.append(log[0][:, 0].astype(np.int)) ave_update_nums = np.zeros(log[0][:, 2].shape) ave_error_rates = np.zeros(log[0][:, 3].shape) for rid in xrange(args.shuffle): ave_update_nums = ave_update_nums + log[rid][:, 2] ave_error_rates = ave_error_rates + log[rid][:, 3] error_rates.append(ave_error_rates / args.shuffle) update_nums.append(ave_update_nums / args.shuffle) fig.plot(xs, algo_list, error_rates, 'Number of samples', 'Cumulative Error Rate', dt_train.name + '-error-rate.pdf') fig.plot(xs, algo_list, update_nums, 'Number of samples', 'Cumulative Number of Updates', dt_train.name + '-update-num.pdf')
def exp_fs(dtrain, dtest, opts, output_path, repeat=1, retrains=None, fold_num=5, cv_process_num=1, draw_opts = {}): """ Experiment to run all algorithms Parameters ---------- dtrain: DataSet traning dataset dtest: DataSet test dataset opts: dict options for each algorithm output_path: str output path to save the results repeat: int number of repeats to run the algorithms retrains: list[str] which algorithm should be retrained, even it has been trained before fold_num: int number of folds to do cross validation cv_process_num: int number of processes to do cross validaton """ if osp.exists(output_path) is True: with open(output_path, 'rb') as rfh: res = cPickle.load(rfh) else: res = {} retrains = [] if retrains is None else retrains if len(retrains) == 1 and retrains[0].lower() == 'all': retrains = opts.keys() retrains = [v.lower() for v in retrains] for algo_ori, opt in opts.iteritems(): algo = algo_ori.lower() if algo_ori in res and algo_ori != algo: res[algo] = res[algo_ori] del res[algo_ori] if (algo in res) and (algo not in retrains): continue res_len = len(opt['lambda']) res[algo] = [np.zeros((repeat, res_len)) for i in xrange(3)] for rid in xrange(repeat): if rid > 0 and (algo == 'liblinear' or algo == 'fgm'): for i in xrange(3): res[algo][i][rid, :] = res[algo][i][rid - 1, :] else: logging.info('random pass %d', rid) dtrain.rand_path(tgt_type='svm', force=True) algo_res = run_fs(dtrain, dtest, algo.lower(), opt, fold_num, cv_process_num) for i in xrange(3): res[algo][i][rid, :] = algo_res[i] #save results with open(output_path, 'wb') as wfh: cPickle.dump(res, wfh) algo_list = [] ave_feat_nums = [] ave_test_accuracy = [] for algo, opt in opts.iteritems(): if algo.lower() == 'gpu-mrmr': continue algo_list.append(algo) vals = res[algo.lower()] ave_feat_nums.append(np.average(vals[0], axis=0)) ave_test_accuracy.append(np.average(vals[1], axis=0)) #draw sparsity vs test accuracy fig.plot(ave_feat_nums, ave_test_accuracy, '#Selected Features', 'Test Accuracy (%)', algo_list, osp.join(dtrain.work_dir, dtrain.name.replace('_', '-') + '-%s-test-accuracy.pdf' %(fs_task)), **(draw_opts['accu'])) algo_list = [] ave_feat_nums = [] ave_train_time = [] for algo, opt in opts.iteritems(): algo_list.append(algo) vals = res[algo.lower()] ave_feat_nums.append(np.average(vals[0], axis=0)) ave_train_time.append(np.average(vals[2], axis=0)) fig.plot(ave_feat_nums, ave_train_time, '#Selected Features', 'Training Time (s)', algo_list, osp.join(dtrain.work_dir, dtrain.name.replace('_', '-') + '-%s-train-time.pdf' %(fs_task)), **(draw_opts['time']))
def exp_ol(dtrain, dtest, opts, output_path, repeat=1, retrains=None, fold_num=5, cv_process_num=1, draw_opts = {}): """ Experiment to run all algorithms Parameters ---------- dtrain: DataSet traning dataset dtest: DataSet test dataset opts: dict options for each algorithm output_path: str output path to save the results repeat: int number of repeats to run the algorithms retrains: list[str] which algorithm should be retrained, even it has been trained before fold_num: int number of folds to do cross validation cv_process_num: int number of processes to do cross validaton """ if osp.exists(output_path) is True: with open(output_path, 'rb') as rfh: save_obj = cPickle.load(rfh) else: save_obj = {'res':{}, 'train_log':{}} res = save_obj['res'] train_log = save_obj['train_log'] retrains = [] if retrains is None else retrains if len(retrains) == 1 and retrains[0].lower() == 'all': retrains = opts.keys() retrains = [v.lower() for v in retrains] for algo, opt in opts.iteritems(): algo = algo.lower() if algo in res and algo not in retrains: continue if algo not in retrains: retrains.append(algo) res[algo] = np.zeros((repeat, 4)) if algo != 'liblinear' and algo != 'vw': train_log[algo] = [None for i in xrange(repeat)] for rid in xrange(repeat): logging.info('random pass %d', rid) dtrain.rand_path(force=True) for algo in retrains: opt = opts[algo] if (algo in res) and (algo not in retrains): continue algo_res = run_ol(dtrain, dtest, algo, opt, fold_num=fold_num, cv_process_num=cv_process_num) res[algo][rid, :] = algo_res[0:4] if algo != 'liblinear' and algo != 'vw': train_log[algo][rid] = algo_res[4] #save results save_obj['res'] = res save_obj['train_log'] = train_log with open(output_path, 'wb') as wfh: cPickle.dump(save_obj, wfh) #print train and test results line = '{0: <12}{1:<16}{1:<16}{2:<16}{2:<15}'.format('algorithm', 'test', 'train') print line line = '{0: <12}{1:<16}{2:<16}{1:<16}{2:<15}'.format('', 'accuracy', 'time(s)') print line ddof = 1 if repeat > 1 else 0 for algo, opt in opts.iteritems(): vals = res[algo.lower()] ave_vals = np.average(vals, axis=0) std_vals = np.std(vals, axis=0, ddof=ddof) line = '{0: <12}{1:.4f}+/-{2:.4f} \ {3:.4f}+/-{4:.4f} \ {5:.4f}+/-{6:.4f} \ {7:.4f}+/-{8:.4f}'.format(algo, ave_vals[0], std_vals[0], ave_vals[1], std_vals[1], ave_vals[2], std_vals[2], ave_vals[3], std_vals[3]) print line #draw training log data_nums = [] update_nums = [] error_rates = [] algo_list = [] for algo, opt in opts.iteritems(): if algo == 'liblinear' or algo == 'vw': continue log = train_log[algo.lower()] algo_list.append(algo) data_nums.append(log[0][:,0].astype(np.int)) ave_update_nums = np.zeros(log[0][:,2].shape) ave_error_rates = np.zeros(log[0][:,3].shape) for rid in xrange(repeat): ave_update_nums = ave_update_nums + log[rid][:,2] ave_error_rates = ave_error_rates + log[rid][:,3] error_rates.append(ave_error_rates / repeat) update_nums.append(ave_update_nums / repeat) fig.plot(data_nums, error_rates, 'Number of samples', 'Cumulative Error Rate', algo_list, osp.join(dtrain.work_dir, dtrain.name.replace('_', '-') + '-error-rate.pdf'), **(draw_opts['train-error'])) fig.plot(data_nums, update_nums, 'Number of samples', 'Cumulative Number of Updates', algo_list, osp.join(dtrain.work_dir, dtrain.name.replace('_', '-') + '-update-num.pdf'), **(draw_opts['update-num']))