Exemplo n.º 1
0
def create_run_script(
        main_filepath,
        argname_lst,
        argvalue_lst,
        script_filepath,
        # entry_folderpath=None,
        output_filepath=None,
        profile_filepath=None):

    sc_lines = ['#!/bin/bash', 'set -e']
    # # change into the entry folder if provided.
    # if entry_folderpath is not None:
    #     sc_lines += ['cd %s' % entry_folderpath]
    # call the main function.
    sc_lines += generate_call_lines(
        **tb_ut.subset_dict_via_selection(locals(), [
            'main_filepath', 'argname_lst', 'argvalue_lst', 'output_filepath',
            'profile_filepath'
        ]))
    # change back to the previous folder if I change to some other folder.
    # if entry_folderpath is not None:
    #     sc_lines += ['cd -']
    tb_io.write_textfile(script_filepath, sc_lines, with_newline=True)
    # add run permissions.
    st = os.stat(script_filepath)
    exec_bits = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
    os.chmod(script_filepath, st.st_mode | exec_bits)
Exemplo n.º 2
0
def write_server_run_script():
    assert servertype == 'bridges'
    # NOTE: to edit according to the configuration needed.
    jobname = jobtype
    time_budget_in_hours = 48  # max 48 hours
    mem_budget_in_gb = 16
    partition_name = 'GPU-shared'
    num_cpus = 1  # probably ask a CPU for each GPU (or more if you have data loaders)
    num_gpus = 1  # up to 4 if k80, up to 2 if p100
    gpu_type = 'k80'  # in ['k80', 'p100']

    script_header = [
        '#!/bin/bash',
        '#SBATCH --nodes=1',
        '#SBATCH --partition=%s' % partition_name,
        '#SBATCH --cpus-per-task=%d' % num_cpus,
        '#SBATCH --gres=gpu:%s:%d' % (gpu_type, num_gpus),
        '#SBATCH --mem=%dM' % tb_rs.convert_between_byte_units(
            mem_budget_in_gb, src_units='gigabytes', dst_units='megabytes'),
        '#SBATCH --time=%d' % tb_lg.convert_between_time_units(
            time_budget_in_hours, src_units='hours', dst_units='minutes'),
        '#SBATCH --job-name=%s' % jobname,
    ]
    # NOTE: changes to the environment can be put in the run script.
    script_body = [
        'module load tensorflow/1.5_gpu',
        'PYTHONPATH=%s:$PYTHONPATH' % remote_folderpath,
        'python -u %s > log_%s.txt' % (main_relfilepath, jobname)
    ]

    script_filepath = tb_fs.join_paths([local_folderpath, "run.sh"])
    tb_io.write_textfile(script_filepath, script_header + [''] + script_body)
    subprocess.check_output(['chmod', '+x', script_filepath])
Exemplo n.º 3
0
def create_project_folder(folderpath, project_name, initialize_git_repo=False):
    fn = lambda xs: tb_fs.join_paths([folderpath, project_name] + xs)

    tb_fs.create_folder(fn([]))
    # typical directories
    tb_fs.create_folder(fn([project_name]))
    tb_fs.create_folder(fn(["analyses"]))
    tb_fs.create_folder(fn(["data"]))
    tb_fs.create_folder(fn(["experiments"]))
    tb_fs.create_folder(fn(["notes"]))
    tb_fs.create_folder(fn(["temp"]))

    # code files (in order): data, preprocessing, model definition, model training,
    # model evaluation, main to generate the results with different relevant
    # parameters, setting up different experiments, analyze the results and
    # generate plots and tables.
    tb_fs.create_file(fn([project_name, "__init__.py"]))
    tb_fs.create_file(fn([project_name, "data.py"]))
    tb_fs.create_file(fn([project_name, "preprocess.py"]))
    tb_fs.create_file(fn([project_name, "model.py"]))
    tb_fs.create_file(fn([project_name, "train.py"]))
    tb_fs.create_file(fn([project_name, "evaluate.py"]))
    tb_fs.create_file(fn([project_name, "main.py"]))
    tb_fs.create_file(fn([project_name, "experiment.py"]))
    tb_fs.create_file(fn([project_name, "analyze.py"]))

    # add an empty script that can be used to download data.
    tb_fs.create_file(fn(["data", "download_data.py"]))

    # common notes to keep around.
    tb_fs.create_file(fn(["notes", "journal.txt"]))
    tb_fs.create_file(fn(["notes", "reading_list.txt"]))
    tb_fs.create_file(fn(["notes", "todos.txt"]))

    # placeholders
    tb_io.write_textfile(fn(["experiments", "readme.txt"]),
                         ["All experiments will be placed under this folder."])

    tb_io.write_textfile(fn(["temp", "readme.txt"]), [
        "Here lie temporary files that are relevant or useful for the project "
        "but that are not kept under version control."
    ])

    tb_io.write_textfile(fn(["analyses", "readme.txt"]), [
        "Here lie files containing information extracted from the "
        "results of the experiments. Tables and plots are typical examples."
    ])

    # typical git ignore file.
    tb_io.write_textfile(
        fn([".gitignore"]),
        ["data", "experiments", "temp", "*.pyc", "*.pdf", "*.aux"])

    if initialize_git_repo:
        subprocess.call("cd %s && git init && git add -f .gitignore * && "
                        "git commit -a -m \"Initial commit for %s.\" && cd -" %
                        (fn([]), project_name),
                        shell=True)
def create_runall_script(experiment_folderpath):
    fo_names = tb_fs.list_folders(
        experiment_folderpath, recursive=False, use_relative_paths=True)
    num_exps = len(
        [n for n in fo_names if tb_fs.path_last_element(n).startswith('cfg')])

    # creating the script.
    sc_lines = ['#!/bin/bash']
    sc_lines += [
        tb_fs.join_paths([experiment_folderpath,
                          "cfg%d" % i, 'run.sh']) for i in xrange(num_exps)
    ]

    # creating the run all script.
    out_filepath = tb_fs.join_paths([experiment_folderpath, 'run.sh'])
    tb_io.write_textfile(out_filepath, sc_lines, with_newline=True)
    st = os.stat(out_filepath)
    exec_bits = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
    os.chmod(out_filepath, st.st_mode | exec_bits)
Exemplo n.º 5
0
def create_runall_script_with_parallelization(experiment_folderpath):
    fo_names = tb_fs.list_folders(experiment_folderpath,
                                  recursive=False,
                                  use_relative_paths=True)
    num_exps = len(
        [n for n in fo_names if tb_fs.path_last_element(n).startswith('cfg')])

    # creating the script.
    sc_lines = [
        '#!/bin/bash', 'if [ "$#" -lt 0 ] && [ "$#" -gt 3 ]; then',
        '    echo "Usage: run.sh [worker_id num_workers] [--force-rerun]"',
        '    exit 1', 'fi', 'force_rerun=0',
        'if [ $# -eq 0 ] || [ $# -eq 1 ]; then', '    worker_id=0',
        '    num_workers=1', '    if [ $# -eq 1 ]; then',
        '        if [ "$1" != "--force-rerun" ]; then',
        '            echo "Usage: run.sh [worker_id num_workers] [--force-rerun]"',
        '            exit 1', '        else', '            force_rerun=1',
        '        fi', '    fi', 'else', '    worker_id=$1',
        '    num_workers=$2', '    if [ $# -eq 3 ]; then',
        '        if [ "$3" != "--force-rerun" ]; then',
        '            echo "Usage: run.sh [worker_id num_workers] [--force-rerun]"',
        '            exit 1', '        else', '            force_rerun=1',
        '        fi', '    fi', 'fi',
        'if [ $num_workers -le $worker_id ] || [ $worker_id -lt 0 ]; then',
        '    echo "Invalid call: requires 0 <= worker_id < num_workers."',
        '    exit 1', 'fi'
        '',
        'num_exps=%d' % num_exps, 'i=0', 'while [ $i -lt $num_exps ]; do',
        '    if [ $(($i % $num_workers)) -eq $worker_id ]; then',
        '        if [ ! -f %s ] || [ $force_rerun -eq 1 ]; then' %
        tb_fs.join_paths([experiment_folderpath, "cfg$i", 'results.json']),
        '            echo cfg$i',
        '            %s' %
        tb_fs.join_paths([experiment_folderpath, "cfg$i", 'run.sh']),
        '        fi', '    fi', '    i=$(($i + 1))', 'done'
    ]
    # creating the run all script.
    out_filepath = tb_fs.join_paths([experiment_folderpath, 'run.sh'])
    tb_io.write_textfile(out_filepath, sc_lines, with_newline=True)
    st = os.stat(out_filepath)
    exec_bits = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
    os.chmod(out_filepath, st.st_mode | exec_bits)
Exemplo n.º 6
0
def create_table_from_experiment(experiment_name,
                                 rows,
                                 columns,
                                 values,
                                 abort_if_incomplete_configs=True,
                                 use_checkpoints=False,
                                 single_row_multitable=False,
                                 print_to_terminal=True,
                                 max_column_width=10**9,
                                 abort_if_different_keys=True):

    _, xs = explore_experiment('experiments/%s' % experiment_name,
                               use_checkpoints)

    cfgs = []
    res = []
    for (c, r) in xs:
        if r is not None:
            cfgs.append(c)
            res.append(r)
        else:
            assert not abort_if_incomplete_configs
    xs = tb_ut.zip_toggle([cfgs, res])

    ks = keys_with_variation(cfgs)
    c = dict(cfgs[0])
    for k in ks:
        c.pop(k)

    ks.pop('out_folder')
    print("***%s***" % experiment_name)
    pprint(ks)
    print()

    ds = [summarize_results(tb_ut.merge_dicts(x)) for x in xs]

    # if the values are with respective
    if any([
            v in values for v in [
                'dev_precision', 'dev_recall', 'dev_fb1', 'test_precision',
                'test_recall', 'test_fb1'
            ]
    ]):

        def _extract_fn(fpath):

            out = subprocess.check_output(
                ["cat %s | data/conll_2000/conlleval.txt" % fpath], shell=True)

            res_line = out.split('\n')[1]
            f1 = float(res_line.split(';')[-1].split(": ")[1])

            p, r, fb1 = map(lambda x: 0.01 * float(x.split(': ')[1]),
                            res_line.split('%; '))[1:]

            return p, r, fb1

        # add the test and dev performances to the file.
        for d in ds:
            (d['dev_precision'], d['dev_recall'], d['dev_fb1']) = _extract_fn(
                tb_fs.join_paths([d['out_folder'], 'pred_dev.txt']))

            (d['test_precision'], d['test_recall'],
             d['test_fb1']) = _extract_fn(
                 tb_fs.join_paths([d['out_folder'], 'pred_test.txt']))

            # this is the final, last run for conll2000
            fpath = tb_fs.join_paths([d['out_folder'], 'final_pred_test.txt'])
            if tb_fs.file_exists(fpath):

                (d['final_test_precision'], d['final_test_recall'],
                 d['final_test_fb1']) = _extract_fn(fpath)

    df = tb_ut.create_dataframe(ds, abort_if_different_keys)

    # # shorten the names appropriately.
    df = df.rename(columns={k: k[:max_column_width] for k in rows})
    rows = [k[:max_column_width] for k in rows]

    # determines teh table layout.
    if not single_row_multitable:

        ts = [
            df.pivot_table(index=rows, columns=columns, values=[v])
            for v in values
        ]

    else:
        ts = [
            df.pivot_table(
                index=rows, columns=columns,
                values=values)  #.sort_values('dev_accuracy', ascending=False)
        ]

    tb_fs.create_folder('analyses/%s' % experiment_name, abort_if_exists=False)
    s_c = pformat(c)
    ss_df = [
        t.to_string(float_format=get_float_formatter(2, 100.0)) for t in ts
    ]

    lines = [s_c]
    for s in ss_df:
        lines.append('')
        lines.append(s)

    if print_to_terminal:
        # print to terminal
        for s in lines:
            print(s)

    # write to file
    tb_io.write_textfile('analyses/%s/results.txt' % experiment_name, lines)
    tb_io.write_csvfile(ds,
                        'analyses/%s/results.csv' % experiment_name,
                        sort_keys=True,
                        abort_if_different_keys=abort_if_different_keys)