Пример #1
0
def exp_sol(args, dt_train, dt_test, opts, cache_data_path):
    res = {}
    if args.retrain == True or osp.exists(cache_data_path) == False:
        for algo, opt in opts.iteritems():
            if 'lambda' in opt and algo != 'liblinear':
                opt['lambda'] = np.hstack(([0], opt['lambda']))
            res[algo] = [
                np.zeros((args.shuffle, len(opts[algo]['lambda']))),
                np.zeros((args.shuffle, len(opts[algo]['lambda'])))
            ]

        for rid in xrange(args.shuffle):
            logging.info('random pass %d' % (rid))
            rand_path = dt_train.rand_path(force=True)
            rand_path = dt_train.rand_path(tgt_type='bin', force=True)
            for algo, opt in opts.iteritems():
                algo_res = run_sol(dt_train, dt_test, opt)
                res[algo][0][rid, :] = algo_res[0]
                res[algo][1][rid, :] = algo_res[1]

    if osp.exists(cache_data_path) == False:
        with open(cache_data_path, 'wb') as fh:
            cPickle.dump(res, fh)
    elif args.retrain == False:
        with open(cache_data_path, 'rb') as fh:
            res = cPickle.load(fh)

    #save sparsity and test ccuracy
    ddof = 1 if args.shuffle > 1 else 0

    algo_list = []
    ave_sparsity_list = []
    ave_test_accu_list = []
    for algo, vals in res.iteritems():
        algo_list.append(algo)
        ave_sparsity = np.average(vals[0], axis=0)
        ave_test_accu = np.average(vals[1], axis=0)
        ave_sparsity_list.append(ave_sparsity)
        ave_test_accu_list.append(ave_test_accu)

    #draw sparsity vs test accuracy
    fig.plot(ave_sparsity_list, algo_list, ave_test_accu_list, 'Sparsity',
             'Test Accuracy', dt_train.name + '-sparsity-test-error.pdf')
Пример #2
0
def main():
    import mnist_loader
    training_data, validation_data, test_data = mnist_loader.load_data()
    import network
    import utils
    net = network.Network([784, 30, 30, 30, 10],
                          init=utils.NormalWeightInitializer,
                          cost=utils.CrossEntropyCost,
                          norm=utils.L2Regularizer(lmbda=0.0001))
    _, evaluation_accuracy, _, _ = net.SGD(
        30,
        10,
        .14,
        training_data,
        test_data,
        # early_stop=utils.NoImprovementInN(10),
        # learning_rate_adjustment=utils.NoImprovementInN(10),
        monitor_evaluation_accuracy=True)

    from fig import plot
    plot(evaluation_accuracy)
Пример #3
0
def exp_fs(dtrain,
           dtest,
           opts,
           output_path,
           repeat=1,
           retrains=None,
           fold_num=5,
           cv_process_num=1,
           draw_opts={}):
    """
    Experiment to run all algorithms

    Parameters
    ----------
    dtrain: DataSet
        traning dataset
    dtest: DataSet
        test dataset
    opts: dict
        options for each algorithm
    output_path: str
        output path to save the results
    repeat: int
        number of repeats to run the algorithms
    retrains: list[str]
        which algorithm should be retrained, even it has been trained before
    fold_num: int
        number of folds to do cross validation
    cv_process_num: int
        number of processes to do cross validaton
    """

    if osp.exists(output_path) is True:
        with open(output_path, 'rb') as rfh:
            res = cPickle.load(rfh)
    else:
        res = {}

    retrains = [] if retrains is None else retrains
    if len(retrains) == 1 and retrains[0].lower() == 'all':
        retrains = opts.keys()
    retrains = [v.lower() for v in retrains]

    for algo_ori, opt in opts.iteritems():
        algo = algo_ori.lower()
        if algo_ori in res and algo_ori != algo:
            res[algo] = res[algo_ori]
            del res[algo_ori]

        if (algo in res) and (algo not in retrains):
            continue

        res_len = len(opt['lambda'])

        res[algo] = [np.zeros((repeat, res_len)) for i in xrange(3)]

        for rid in xrange(repeat):
            if rid > 0 and (algo == 'liblinear' or algo == 'fgm'):
                for i in xrange(3):
                    res[algo][i][rid, :] = res[algo][i][rid - 1, :]
            else:
                logging.info('random pass %d', rid)
                dtrain.rand_path(tgt_type='svm', force=True)
                algo_res = run_fs(dtrain, dtest, algo.lower(), opt, fold_num,
                                  cv_process_num)
                for i in xrange(3):
                    res[algo][i][rid, :] = algo_res[i]

    #save results
    with open(output_path, 'wb') as wfh:
        cPickle.dump(res, wfh)

    algo_list = []
    ave_feat_nums = []
    ave_test_accuracy = []
    for algo, opt in opts.iteritems():
        if algo.lower() == 'gpu-mrmr':
            continue
        algo_list.append(algo)
        vals = res[algo.lower()]
        ave_feat_nums.append(np.average(vals[0], axis=0))
        ave_test_accuracy.append(np.average(vals[1], axis=0))

    #draw sparsity vs test accuracy
    fig.plot(
        ave_feat_nums, ave_test_accuracy, '#Selected Features',
        'Test Accuracy (%)', algo_list,
        osp.join(
            dtrain.work_dir,
            dtrain.name.replace('_', '-') + '-%s-test-accuracy.pdf' %
            (fs_task)), **(draw_opts['accu']))

    algo_list = []
    ave_feat_nums = []
    ave_train_time = []
    for algo, opt in opts.iteritems():
        algo_list.append(algo)
        vals = res[algo.lower()]
        ave_feat_nums.append(np.average(vals[0], axis=0))
        ave_train_time.append(np.average(vals[2], axis=0))

    fig.plot(
        ave_feat_nums, ave_train_time, '#Selected Features',
        'Training Time (s)', algo_list,
        osp.join(
            dtrain.work_dir,
            dtrain.name.replace('_', '-') + '-%s-train-time.pdf' % (fs_task)),
        **(draw_opts['time']))
Пример #4
0
def exp_online(args, dt_train, dt_test, opts, cache_data_path):
    if args.retrain == True or osp.exists(cache_data_path) == False:
        res = {}
        res_log = {}
        for algo, opt in opts.iteritems():
            res[algo] = np.zeros((args.shuffle, 4))
            if algo != 'liblinear' and algo != 'vw':
                res_log[algo] = [None for i in xrange(args.shuffle)]

        for rid in xrange(args.shuffle):
            logging.info('random pass %d' % (rid))
            rand_path = dt_train.rand_path(force=True)
            for algo, opt in opts.iteritems():
                algo_res = run_ol(dt_train, dt_test, opt, args.retrain,
                                  args.fold_num)
                res[algo][rid, :] = algo_res[0:4]
                if algo != 'liblinear' and algo != 'vw':
                    res_log[algo][rid] = algo_res[4]

    if osp.exists(cache_data_path) == False:
        cache_data = {'res': res, 'res_log': res_log}
        with open(cache_data_path, 'wb') as fh:
            cPickle.dump(cache_data, fh)
    elif args.retrain == False:
        with open(cache_data_path, 'rb') as fh:
            cache_data = cPickle.load(fh)
        res = cache_data['res']
        res_log = cache_data['res_log']

    #save accuracy and time cost
    out_file = open(args.output, 'w')
    line = '{0: <12}{1:<16}{1:<16}{2:<16}{2:<15}'.format(
        'algorithm', 'train', 'test')
    print line
    out_file.write('%s\n' % (line))
    line = '{0: <12}{1:<16}{2:<16}{1:<16}{2:<15}'.format(
        '', 'accuracy', 'time(s)')
    print line
    out_file.write('%s\n' % (line))
    ddof = 1 if args.shuffle > 1 else 0
    for algo, vals in res.iteritems():
        ave_vals = np.average(vals, axis=0)
        std_vals = np.std(vals, axis=0, ddof=ddof)
        line = '{0: <12}{1:.4f}+/-{2:.4f} {3:.4f}+/-{4:.4f} {5:.4f}+/-{6:.4f} {7:.4f}+/-{8:.4f}'.format(
            algo, ave_vals[0], std_vals[0], ave_vals[1], std_vals[1],
            ave_vals[2], std_vals[2], ave_vals[3], std_vals[3])
        print line
        out_file.write('%s\n' % (line))

    #draw training log

    xs = []
    error_rates = []
    update_nums = []
    algo_list = []
    for algo, log in res_log.iteritems():
        algo_list.append(algo)
        xs.append(log[0][:, 0].astype(np.int))
        ave_update_nums = np.zeros(log[0][:, 2].shape)
        ave_error_rates = np.zeros(log[0][:, 3].shape)
        for rid in xrange(args.shuffle):
            ave_update_nums = ave_update_nums + log[rid][:, 2]
            ave_error_rates = ave_error_rates + log[rid][:, 3]
        error_rates.append(ave_error_rates / args.shuffle)
        update_nums.append(ave_update_nums / args.shuffle)

    fig.plot(xs, algo_list, error_rates, 'Number of samples',
             'Cumulative Error Rate', dt_train.name + '-error-rate.pdf')
    fig.plot(xs, algo_list, update_nums, 'Number of samples',
             'Cumulative Number of Updates', dt_train.name + '-update-num.pdf')
Пример #5
0
Файл: fs.py Проект: LIBOL/LIBSOL
def exp_fs(dtrain, dtest,
           opts,
           output_path,
           repeat=1,
           retrains=None,
           fold_num=5,
           cv_process_num=1,
           draw_opts = {}):
    """
    Experiment to run all algorithms

    Parameters
    ----------
    dtrain: DataSet
        traning dataset
    dtest: DataSet
        test dataset
    opts: dict
        options for each algorithm
    output_path: str
        output path to save the results
    repeat: int
        number of repeats to run the algorithms
    retrains: list[str]
        which algorithm should be retrained, even it has been trained before
    fold_num: int
        number of folds to do cross validation
    cv_process_num: int
        number of processes to do cross validaton
    """

    if osp.exists(output_path) is True:
        with open(output_path, 'rb') as rfh:
            res = cPickle.load(rfh)
    else:
        res = {}

    retrains = [] if retrains is None else retrains
    if len(retrains) == 1 and retrains[0].lower() == 'all':
        retrains = opts.keys()
    retrains = [v.lower() for v in retrains]

    for algo_ori, opt in opts.iteritems():
        algo = algo_ori.lower()
        if algo_ori in res and algo_ori != algo:
            res[algo] = res[algo_ori]
            del res[algo_ori]

        if (algo in res) and (algo not in retrains):
            continue

        res_len = len(opt['lambda'])

        res[algo] = [np.zeros((repeat, res_len)) for i in xrange(3)]

        for rid in xrange(repeat):
            if rid > 0 and (algo == 'liblinear' or algo == 'fgm'):
                for i in xrange(3):
                    res[algo][i][rid, :] = res[algo][i][rid - 1, :]
            else:
                logging.info('random pass %d', rid)
                dtrain.rand_path(tgt_type='svm', force=True)
                algo_res = run_fs(dtrain, dtest, algo.lower(), opt, fold_num, cv_process_num)
                for i in xrange(3):
                    res[algo][i][rid, :] = algo_res[i]

    #save results
    with open(output_path, 'wb') as wfh:
        cPickle.dump(res, wfh)

    algo_list = []
    ave_feat_nums = []
    ave_test_accuracy = []
    for algo, opt in opts.iteritems():
        if algo.lower() == 'gpu-mrmr':
            continue
        algo_list.append(algo)
        vals = res[algo.lower()]
        ave_feat_nums.append(np.average(vals[0], axis=0))
        ave_test_accuracy.append(np.average(vals[1], axis=0))

    #draw sparsity vs test accuracy
    fig.plot(ave_feat_nums,
             ave_test_accuracy,
             '#Selected Features',
             'Test Accuracy (%)',
             algo_list,
             osp.join(dtrain.work_dir, dtrain.name.replace('_', '-') + '-%s-test-accuracy.pdf' %(fs_task)),
             **(draw_opts['accu']))

    algo_list = []
    ave_feat_nums = []
    ave_train_time = []
    for algo, opt in opts.iteritems():
        algo_list.append(algo)
        vals = res[algo.lower()]
        ave_feat_nums.append(np.average(vals[0], axis=0))
        ave_train_time.append(np.average(vals[2], axis=0))


    fig.plot(ave_feat_nums,
             ave_train_time,
             '#Selected Features',
             'Training Time (s)',
             algo_list,
             osp.join(dtrain.work_dir, dtrain.name.replace('_', '-') + '-%s-train-time.pdf' %(fs_task)),
             **(draw_opts['time']))
Пример #6
0
def exp_ol(dtrain, dtest,
           opts,
           output_path,
           repeat=1,
           retrains=None,
           fold_num=5,
           cv_process_num=1,
           draw_opts = {}):
    """
    Experiment to run all algorithms

    Parameters
    ----------
    dtrain: DataSet
        traning dataset
    dtest: DataSet
        test dataset
    opts: dict
        options for each algorithm
    output_path: str
        output path to save the results
    repeat: int
        number of repeats to run the algorithms
    retrains: list[str]
        which algorithm should be retrained, even it has been trained before
    fold_num: int
        number of folds to do cross validation
    cv_process_num: int
        number of processes to do cross validaton
    """

    if osp.exists(output_path) is True:
        with open(output_path, 'rb') as rfh:
            save_obj = cPickle.load(rfh)
    else:
        save_obj = {'res':{}, 'train_log':{}}

    res = save_obj['res']
    train_log = save_obj['train_log']

    retrains = [] if retrains is None else retrains
    if len(retrains) == 1 and retrains[0].lower() == 'all':
        retrains = opts.keys()
    retrains = [v.lower() for v in retrains]

    for algo, opt in opts.iteritems():
        algo = algo.lower()
        if algo in res and algo not in retrains:
            continue

        if algo not in retrains:
            retrains.append(algo)

        res[algo] = np.zeros((repeat, 4))

        if algo != 'liblinear' and algo != 'vw':
            train_log[algo] = [None for i in xrange(repeat)]

    for rid in xrange(repeat):
        logging.info('random pass %d', rid)
        dtrain.rand_path(force=True)
        for algo in retrains:
            opt = opts[algo]
            if (algo in res) and (algo not in retrains):
                continue
            algo_res = run_ol(dtrain, dtest,
                              algo,
                              opt,
                              fold_num=fold_num,
                              cv_process_num=cv_process_num)
            res[algo][rid, :] = algo_res[0:4]
            if algo != 'liblinear' and algo != 'vw':
                train_log[algo][rid] = algo_res[4]

    #save results
    save_obj['res'] = res
    save_obj['train_log'] = train_log
    with open(output_path, 'wb') as wfh:
        cPickle.dump(save_obj, wfh)

    #print train and test results
    line = '{0: <12}{1:<16}{1:<16}{2:<16}{2:<15}'.format('algorithm', 'test', 'train')
    print line
    line = '{0: <12}{1:<16}{2:<16}{1:<16}{2:<15}'.format('', 'accuracy', 'time(s)')
    print line
    ddof = 1 if repeat > 1 else 0
    for algo, opt in opts.iteritems():
        vals = res[algo.lower()]
        ave_vals = np.average(vals, axis=0)
        std_vals = np.std(vals, axis=0, ddof=ddof)
        line = '{0: <12}{1:.4f}+/-{2:.4f} \
                {3:.4f}+/-{4:.4f} \
                {5:.4f}+/-{6:.4f} \
                {7:.4f}+/-{8:.4f}'.format(algo,
                                          ave_vals[0], std_vals[0],
                                          ave_vals[1], std_vals[1],
                                          ave_vals[2], std_vals[2],
                                          ave_vals[3], std_vals[3])
        print line

    #draw training log
    data_nums = []
    update_nums = []
    error_rates = []
    algo_list = []

    for algo, opt in opts.iteritems():
        if algo == 'liblinear' or algo == 'vw':
            continue
        log = train_log[algo.lower()]
        algo_list.append(algo)
        data_nums.append(log[0][:,0].astype(np.int))
        ave_update_nums = np.zeros(log[0][:,2].shape)
        ave_error_rates = np.zeros(log[0][:,3].shape)
        for rid in xrange(repeat):
            ave_update_nums = ave_update_nums + log[rid][:,2]
            ave_error_rates = ave_error_rates + log[rid][:,3]
        error_rates.append(ave_error_rates / repeat)
        update_nums.append(ave_update_nums / repeat)

    fig.plot(data_nums,
             error_rates,
             'Number of samples',
             'Cumulative Error Rate',
             algo_list,
             osp.join(dtrain.work_dir, dtrain.name.replace('_', '-') + '-error-rate.pdf'),
             **(draw_opts['train-error']))

    fig.plot(data_nums,
             update_nums,
             'Number of samples',
             'Cumulative Number of Updates',
             algo_list,
             osp.join(dtrain.work_dir, dtrain.name.replace('_', '-') + '-update-num.pdf'),
             **(draw_opts['update-num']))