示例#1
0
def impute_gain(x, odir):
    python_exe = 'python3'
    script = '{}/gain/gain.py'.format(utilmlab.get_proj_dir())
    fn_i = '{}/xmissing.csv.gz'.format(odir)
    fn_o = '{}/ximputed.csv'.format(odir)
    x.to_csv(fn_i, compression='gzip', index=False)
    if os.path.isfile(fn_o):
        os.remove(fn_o)
    utilmlab.exe_cmd(
        logger, '{} {} -i {} -o {} --testall 1'.format(python_exe, script,
                                                       fn_i, fn_o))
    return pd.read_csv(fn_o)
    numexp = args.numexp

    proj_dir = utilmlab.get_proj_dir() \
        if args.projdir is None else args.projdir

    alg = 'asac'

    if args.exe is not None:
        python_exe = args.exe
    else:
        python_exe = 'python' if sys.version_info[0] < 3 else 'python3'

    niter = args.it
    version = 1

    resdir = '{}/result/{}/v_{}/h_{}'.format(
        proj_dir, alg, version,
        os.environ['HOSTNAME'] if 'HOSTNAME' in os.environ else 'unknown')

    utilmlab.ensure_dir(resdir)

    logger = utilmlab.init_logger(resdir, 'log_test_{}.txt'.format(alg))

    result_lst = []

    script = Path('{}/alg/asac/Main_Synthetic_Exp1.py'.format(proj_dir))

    utilmlab.exe_cmd(
        logger, '{} {} {}'.format(python_exe, script,
                                  '--it {} -n {}'.format(niter, numexp)))
        if dataset == 'bc':
            # from sklearn: A copy of UCI ML Breast
            # Cancer Wisconsin (Diagnostic) dataset
            x, y = load_breast_cancer(return_X_y=True)
        elif dataset == 'cover':
            x, y = fetch_covtype(return_X_y=True)
        else:
            assert 0
        lbl = 'target'
        df = pd.DataFrame(x)
        df[lbl] = y
        df.to_csv(fn_csv, index=False, compression='gzip', sep=sep)

        utilmlab.exe_cmd(
            logger,
            '{} {} -i {} --target {} -o {} --verbose {} --it {} -n {} --separator {}'
            .format(python_exe, script, fn_csv, lbl, resdir, verbose, niter,
                    nsample, sep))

        utilmlab.exe_cmd(
            logger,
            '{} {} -i {} -o {} --verbose {} --it {} --target {} --model {} --separator {}'
            ' --nstage {} -n {}'.format(
                python_exe,
                script,
                fn_csv,
                resdir,
                verbose,
                niter,
                lbl,
                fn_model,
示例#4
0
    dataset = 'bc'

    odir = '{}/misc/dataset_{}'.format(resdir, dataset)

    fn_csv, fn_missing_csv, fn_imputed_csv = set_filenames(odir)

    script_create_missing = Path(
        '{}/alg/gain/create_missing.py'.format(proj_dir))
    script = Path('{}/alg/gain/gain.py'.format(proj_dir))
    script_ana = Path('{}/alg/gain/gain_ana.py'.format(proj_dir))

    for islabel in [0, 1]:
        for autocat in [0, 1, 2]:
            utilmlab.exe_cmd(
                logger, '{} {} --dataset {} -o {} '
                '--oref {} --istarget {}'.format(python_exe,
                                                 script_create_missing,
                                                 dataset, fn_missing_csv,
                                                 fn_csv, islabel))

            utilmlab.exe_cmd(
                logger, '{} {} -i {} {} '
                '-o {} --it {} --testall 1 --autocategorical {}'.format(
                    python_exe, script, fn_missing_csv,
                    '--target target' if islabel else '', fn_imputed_csv,
                    niter, autocat))

    result_lst = []

    dataset_prop = [('spambase', None), ('spambase', 'label'), ('bc', None),
                    ('spam', None), ('letter-recognition', None),
                    ('letter-recognition', 'lettr'), ('letter', None)]
示例#5
0
    if not os.path.isfile('{}/alg/gcit/ccle_experiments/data/mutation.txt.gz'.
                          format(proj_dir)):
        print('warning: data files for ccle_experiments not found')
        sys.exit(0)

    alg = 'gcit'
    version = 1
    if args.exe is not None:
        python_exe = args.exe
    else:
        python_exe = 'python' if sys.version_info[0] < 3 else 'python3'

    if args.o is None:
        resdir = '{}/result/{}/v_{}/h_{}'.format(
            proj_dir, alg, version,
            os.environ['HOSTNAME'] if 'HOSTNAME' in os.environ else 'unknown')
    else:
        resdir = args.o

    utilmlab.ensure_dir(resdir)

    logger = utilmlab.init_logger(resdir, 'log_test_{}.txt'.format(alg))

    script = Path('{}/alg/gcit/ccle_experiments/'
                  'ccle_experiment.py'.format(proj_dir))

    odir = resdir

    utilmlab.exe_cmd(logger, '{} {}'.format(python_exe, script))
示例#6
0
        # execute all unit tests

        f_lst = utilmlab.find_file_dir(
            '{}/alg'.format(proj_dir),
            'test_*.py')

        logger.info('Unit tests found:{}'.format(f_lst))

        for fpy in f_lst:
            if 'test_alg.py' in fpy:
                continue
            utilmlab.exe_cmd(
                logger,
                '{} {} {} {}'.format(
                    python_exe,
                    Path(fpy),
                    '--it {}'.format(args.it) if args.it is not None else '',
                    '--exe {}'.format(args.exe) if args.exe is not None else ''
                )
            )

    # execute all notebooks

    f_lst = utilmlab.find_file_dir(
        '{}/alg'.format(utilmlab.get_proj_dir()),
        '*.ipynb')

    logger.info('notebooks found:{}'.format(f_lst))

    cwd = os.getcwd()
    if not is_only_notebook:

        # execute all unit tests

        f_lst = utilmlab.find_file_dir(test_dir, 'test_*.py')

        logger.info('Unit tests found:{}'.format(f_lst))

        for fpy in f_lst:
            if 'test_alg.py' in fpy:
                continue
            time_start = time.time()
            utilmlab.exe_cmd(
                logger, '{} {} {} {}'.format(
                    python_exe, Path(fpy),
                    '--it {}'.format(args.it) if args.it is not None else '',
                    '--exe {}'.format(args.exe)
                    if args.exe is not None else ''))
            logger.info('time={}'.format(time.time() - time_start))

    # execute all notebooks

    f_lst = utilmlab.find_file_dir(test_dir, '*.ipynb')

    logger.info('notebooks found:{}'.format(f_lst))

    cwd = os.getcwd()

    fn_html_lst = []
    for fnb in f_lst:
示例#8
0
        elif dataset == 'cover':
            x, y = fetch_covtype(return_X_y=True)
        else:
            assert 0

        lbl = 'target'
        df = pd.DataFrame(x)
        df[lbl] = y
        df.to_csv(fn_csv, index=False, compression='gzip', sep=sep)

        try:
            utilmlab.exe_cmd(
                logger,
                'Rscript {} -i {} --target {} --exe {} --it {} '
                ' --replication {} --projdir {}'.format(
                    script, fn_csv, lbl, python_exe, niter, nreplication,
                    proj_dir),
                assert_on_error=
                not generate_error  # assert if an error is not expected
            )
        except:
            if generate_error:
                logger.info('expected error generated')
                pass
            assert 0

    fn_data_csv = '{}/data.csv'.format(resdir)
    fn_json = '{}/generated_data_properties.json'.format(resdir)

    utilmlab.exe_cmd(
        logger, 'Rscript {}/alg/knockoffgan/gen_data.r -o {} --target {} '
示例#9
0
    for dataset in ['csv', 'bc', 'spambase']:

        odir = '{}/dataset_{}'.format(resdir, dataset)

        utilmlab.ensure_dir(odir)

        fn_feature_score = '{}/feature_score.csv.gz'.format(odir)
        fn_json = '{}/feature_score.csv.json'.format(odir)
        fn_plot_sample = '{}/sample.png'.format(odir)
        fn_plot_global = '{}/global.png'.format(odir)

        if dataset == 'csv':
            fn_csv = '{}/spambase.csv.gz'.format(utilmlab.get_data_dir())
            utilmlab.exe_cmd(
                logger, '{} {} -i {} --target label --it {} -o {}'.format(
                    python_exe, script, fn_csv, nepoch, fn_feature_score))
        else:
            if not data_loader_mlab.is_available(dataset):
                continue
            utilmlab.exe_cmd(
                logger, '{} {} --dataset {} --it {} -o {}'.format(
                    python_exe, script, dataset, nepoch, fn_feature_score))

        utilmlab.exe_cmd(
            logger, '{} {} -i {} -o {}'.format(python_exe, script_ana,
                                               fn_feature_score, fn_json))

        utilmlab.exe_cmd(
            logger, '{} {} -i {} -oglobal {} -osample {}'.format(
                python_exe, script_plot, fn_feature_score, fn_plot_global,
            '--{} {}'.format(el, args_d[el]) for el in args_d.keys()])

        dataset = args_d['dataset']
        
        odir = '{}/dataset_{}'.format(
            odir,
            dataset)

        utilmlab.ensure_dir(odir)

        utilmlab.exe_cmd(
            logger,
            '{} {} {} {} {} {} {}'.format(
                python_exe,
                script,
                cmd_arg,
                '--it {}'.format(niter),
                '-o {}'.format(odir),
                '--itout {}'.format(
                    args.itout) if args.itout is not None else '',
                '--itrs {}'.format(
                    args.itrs) if args.itrs is not None else ''))

        utilmlab.exe_cmd(
            logger,
            '{} {} {} {} {}'.format(
                python_exe,
                script_ana,
                cmd_arg,
                '-o {}'.format(odir),
                '--itout {}'.format(
                    args.itout) if args.itout is not None else ''))