Пример #1
0
def gen_qsubs():
    print('Generating qsub scripts...')
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    script_id = NAME.split('_')[0]

    num_per_run = get_num_per_run()

    num_scripts = 0
    for start in range(0, len(fns), num_per_run):
        end = start + num_per_run
        command = f'python {NAME}.py run_single {start} {end}'

        # Write shell scripts
        sh_fn = qsubs_dir + f'q_{script_id}_{start}_{end}.sh'
        with open(sh_fn, 'w') as f:
            f.write(f'#!/bin/bash\n{command}\n')
        num_scripts += 1

        qsub_commands.append(
            f'qsub -j y -V -P regevlab -l h_rt=1:00:00, -wd {_config.SRC_DIR} {sh_fn} &'
        )

    # Save commands
    commands_fn = qsubs_dir + '_commands.sh'
    with open(commands_fn, 'w') as f:
        f.write('\n'.join(qsub_commands))
    subprocess.check_output(f'chmod +x {commands_fn}', shell=True)

    print(f'Wrote {num_scripts} shell scripts to {qsubs_dir}')
    return
Пример #2
0
 def __init__(self, **options):
   """options: Override any of the defaults in settings.py by passing in
   key/value pairs where the key is in lower case. For instance:
   workspace_def_path=/tmp/my_file.xml will override the workspace_def_path
   default.
   """
   check_dependencies.check_dependencies()
   self._options = self._set_options(**options)
   self._command_processor = command_processor.CommandProcessor(self._options)
   util.ensure_dir_exists(self._options['workspace_cache_root'])
Пример #3
0
def gen_qsubs_remainder(NAME, chart_fnm, extension):
    # Only gen qsubs for unsubmmited jobs, by
    print('Generating qsub scripts...')
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    script_id = NAME.split('_')[0]

    out_dir = _config.OUT_PLACE + NAME + '/'
    fns = set(os.listdir(out_dir))

    qdf = pd.read_csv(inp_dir + f'{chart_fnm}.csv', index_col=0)
    names = qdf['Name (unique)']
    num_per_run = len(names) // MAX_QSUB_PROCESSES
    if num_per_run * MAX_QSUB_PROCESSES < len(names):
        num_per_run += 1

    num_scripts = 0
    for i in range(0, len(qdf), num_per_run):
        out_fn = names[i] + extension
        if out_fn in fns:
            continue

        start = i
        end = start + num_per_run
        command = f'python {NAME}.py run_qsubs {chart_fnm} {start} {end}'

        # Write shell scripts
        sh_fn = qsubs_dir + f'q_{script_id}_{chart_fnm}_{start}_{end}.sh'
        with open(sh_fn, 'w') as f:
            f.write(f'#!/bin/bash\n{command}\n')
        num_scripts += 1

        # Write qsub commands
        if NAME == 'c_dijkstra':
            if 'highlevel' in chart_fnm:
                vmem = 'h_vmem=8G'
            else:
                vmem = 'h_vmem=4G'
        else:
            vmem = 'h_vmem=1G'

        qsub_commands.append(
            f'qsub -j y -V -P regevlab -l h_rt=1:00:00,{vmem} -wd {_config.SRC_DIR} {sh_fn} &'
        )

    # Save commands
    commands_fn = qsubs_dir + '_commands.sh'
    with open(commands_fn, 'w') as f:
        f.write('\n'.join(qsub_commands))
    subprocess.check_output(f'chmod +x {commands_fn}', shell=True)

    print(f'Wrote {num_scripts} shell scripts to {qsubs_dir}')
    return
Пример #4
0
def main():
    sns.set()
    plt.rcParams['figure.figsize'] = (16.0, 8.0)
    document = CollegeDocumentLoader().load(join_curdir("data", "data.meta"))
    geodata = GeoLoader() \
        .load(join_curdir("data", "ne_110m_admin_1_states_provinces.shp"))
    data = CollegeLoader(document).load(join_curdir("data", "data.csv"))
    ensure_dir_exists(join_curdir("results"))
    plot_p1(data, document)
    plot_p2(data, document, geodata)
    plot_p3(data)
Пример #5
0
def upload_file():
    try:
        filename = request.headers.get('FILE_NAME')
        if 'process' in request.args:
            auto_process_scan = request.args.get('process').lower() in [
                'true', '1'
            ]
        else:
            auto_process_scan = cfg.AUTOPROCESS
        log.info('Receiving %s, autoprocess=%s', filename, auto_process_scan)

        if allowed_file(filename):
            filename = secure_filename(filename)
            # determine final staging path for file and check if the file already exists
            basename = os.path.splitext(filename)[0]
            stagingdir = os.path.join(cfg.STAGING_FOLDER, basename)
            stagingpath = os.path.join(stagingdir, filename)
            if os.path.exists(stagingpath):
                log.info('File already exists on server: %s', stagingpath)
                receive_file(request, filename)
                return util.ret_ok('File already exists on server')
            # temp location to receive stream
            tmppath = os.path.join(cfg.TEMP_FOLDER, filename)
            with open(tmppath, 'wb') as f:
                receive_file(request, filename, f)

            # move to staging area dir and return
            util.ensure_dir_exists(stagingdir)
            shutil.move(tmppath, stagingpath
                        )  # TODO: check if move succeeded and log error if not
            log.info('Staged ' + filename + ' to ' + stagingdir)
            # If uploading is complete try to trigger processing
            if scan_done_uploading(stagingdir):
                log.info('Scan done uploading to ' + stagingdir)
                if auto_process_scan:
                    #NOTE: Comment out lines below to disable automated scan processing trigger
                    indexThread = threading.Thread(target=preprocess,
                                                   args=(basename, log))
                    indexThread.start()
                    processThread = threading.Thread(target=trigger_processing,
                                                     args=(basename, log))
                    processThread.start()
            return util.ret_ok()
        else:
            log.error('File type not allowed: ' + filename)
            log.error(request)
            raise Error(message=('File type not allowed: ' + filename),
                        status_code=415)
    except Exception as e:
        log.error(traceback.format_exc())
        #raise Error(message=('Unknown exception encountered %s' % str(e)), status_code=500)
        raise e
Пример #6
0
def gen_qsubs(modelexp_nm=''):
    # Generate qsub shell scripts and commands for easy parallelization
    print('Generating qsub scripts...')
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    if modelexp_nm == '':
        modelexp_nm = 'modelexp_simple'

    print(f'Writing qsubs for {modelexp_nm}. OK?')
    input()

    exp_design = pd.read_csv(_config.DATA_DIR + f'{modelexp_nm}.csv')
    hyperparam_cols = [col for col in exp_design.columns if col != 'Name']

    # Parse df into dict
    hyperparam_combinations = dict()
    for idx, row in exp_design.iterrows():
        nm = row['Name']
        hps = '+'.join([f'{hp}:{row[hp]}' for hp in hyperparam_cols])
        hyperparam_combinations[nm] = hps

    # Generate qsubs
    num_scripts = 0
    for hyperparam_nm in hyperparam_combinations:
        hyperparam_setting = hyperparam_combinations[hyperparam_nm]

        command = f'python {NAME}.py {modelexp_nm} {hyperparam_nm} {hyperparam_setting}'
        script_id = NAME.split('_')[0]

        # Write shell scripts
        sh_fn = qsubs_dir + f'q_{modelexp_nm}_{hyperparam_nm}.sh'
        with open(sh_fn, 'w') as f:
            f.write('#!/bin/bash\n%s\n' % (command))
        num_scripts += 1

        # Write qsub commands
        qsub_commands.append(
            f'qsub -V -P regevlab -l h_rt=16:00:00,h_vmem=4G -l os=RedHat7 -wd {_config.SRC_DIR} {sh_fn} &'
        )

    # Save commands
    commands_fn = qsubs_dir + '_commands.sh'
    with open(commands_fn, 'w') as f:
        f.write('\n'.join(qsub_commands))

    subprocess.check_output(f'chmod +x {commands_fn}', shell=True)
    print(f'Wrote {num_scripts} shell scripts to {qsubs_dir}')
    return
Пример #7
0
def gen_qsubs(NAME, chart_fnm):
    # Generate qsub shell scripts and commands for easy parallelization
    print('Generating qsub scripts...')
    qsubs_dir = _config.QSUBS_DIR + NAME + '/'
    util.ensure_dir_exists(qsubs_dir)
    qsub_commands = []

    script_id = NAME.split('_')[0]

    qdf = pd.read_csv(inp_dir + chart_fnm + '.csv', index_col=0)
    names = qdf['Name (unique)']
    num_per_run = len(names) // MAX_QSUB_PROCESSES
    if num_per_run * MAX_QSUB_PROCESSES < len(names):
        num_per_run += 1

    num_scripts = 0
    for start in range(0, len(qdf), num_per_run):
        end = start + num_per_run
        command = f'python {NAME}.py run_qsubs {chart_fnm} {start} {end}'

        # Write shell scripts
        sh_fn = qsubs_dir + f'q_{script_id}_{chart_fnm}_{start}_{end}.sh'
        with open(sh_fn, 'w') as f:
            f.write(f'#!/bin/bash\n{command}\n')
        num_scripts += 1

        # Write qsub commands
        if NAME == 'c_dijkstra':
            if 'highlevel' in chart_fnm:
                vmem = 'h_vmem=8G'
            else:
                vmem = 'h_vmem=4G'
        else:
            vmem = 'h_vmem=1G'

        qsub_commands.append(
            f'qsub -j y -V -P regevlab -l h_rt=1:00:00,{vmem} -wd {_config.SRC_DIR} {sh_fn} &'
        )

    # Save commands
    commands_fn = qsubs_dir + '_commands.sh'
    with open(commands_fn, 'w') as f:
        f.write('\n'.join(qsub_commands))
    subprocess.check_output(f'chmod +x {commands_fn}', shell=True)

    print(f'Wrote {num_scripts} shell scripts to {qsubs_dir}')
    return
Пример #8
0
    def download(self, track):
        codec = self.extension[1:]
        bitrate = 192

        track.ym_track.download(ensure_dir_exists(track.cache_entry), codec,
                                bitrate)

        return self.read(track)
Пример #9
0
def run_inference(batch_nm='', exp_nm='', custom_hyperparams=''):
    parse_custom_hyperparams(custom_hyperparams)
    set_random_seed()

    # Load data
    dataset_nm = hparams['dataset']
    print(f'Loading dataset {dataset_nm} ...')
    data, data_stats = load_data(dataset_nm)
    print(data_stats)

    # Set up environment
    global out_dir
    if batch_nm == '': batch_nm = 'unnamed'
    out_dir = out_dir + batch_nm + '/' if batch_nm != '' else out_dir
    util.ensure_dir_exists(out_dir)
    global fold_nm
    fold_nm = f'{exp_nm}' if bool(exp_nm != '') else ''

    # Set up model
    print('Setting up model ...')
    model = LowLevelGPModel(data_stats).to(device)

    print('Created parameters:')
    total_params = sum(p.numel() for p in model.parameters()
                       if p.requires_grad)
    for param in model.parameters():
        print(type(param.data), param.shape)

    # Set up optimizers
    sgd_optimizer = pyro.optim.SGD({
        'lr': hparams['learning_rate'],
        'momentum': hparams['momentum'],
        'weight_decay': hparams['weight_decay'],
    })
    optimizer = sgd_optimizer

    create_model_dir()
    copy_model_script()
    print_and_log(f'hparams: {custom_hyperparams}')
    print_and_log(f'Total num. model parameters: {total_params}')
    print_and_log(f'Custom folder name: {exp_nm}')
    print_and_log(f'Dataset name: {dataset_nm}')

    train_model(model, optimizer, data)
    return
Пример #10
0
def propose_genotypes(obs_marginals, out_fn, options=''):
    '''
    Proposes genotypes.
  '''
    if options:
        print(f'Using custom hyperparameters: {options}')
        parse_custom_hyperparams(options)

    setup(obs_marginals['Symbols and linkage group index'])
    groups = util.parse_read_groups(obs_marginals)

    print(f'Proposing genotypes ...')
    gts = get_default_genotypes(obs_marginals, groups)

    out_dir = os.path.dirname(out_fn)
    util.ensure_dir_exists(out_dir)
    print(f'Writing {len(gts)} genotypes to {out_fn} ...')
    with open(out_fn, 'w') as f:
        for gt in gts:
            f.write(f'{gt}\n')

    print('Done.')
    return gts
Пример #11
0
def command_package_init(*args):
    """
    admin package init package_name

    Initialize a new package of Sirikata. Packages are a build of
    Sirikata you might want to execute multiple services from. This
    command sets up the basic directory structure for a package,
    including a customizable configuration file which you probably
    want to edit after running this command.
    """

    if len(args) == 0:
        print 'No package name specified'
        return 1
    packname = args[0]

    # Setup build, install, and data directories
    util.ensure_dir_exists(package_path(packname))

    # Touch an empty config.py where the user can adjust settings
    config_py_file = open(package_path(packname, 'config.py'), 'w')
    config_py_file.close()

    return 0
Пример #12
0
# if cmd_line_opts.mode in ['simo', 'mimo']:
#     ax_params.append({
#         "name": "num_models",
#         "type": "range",
#         "bounds": [2, 8],
#     })

ax = AxClient()
ax.create_experiment(
    name="ensemble_net_tuning",
    parameters=ax_params,
    objective_name="final_loss",
    minimize=True,
)

u.ensure_dir_exists("logs/%s" % cmd_line_opts.group)
log = open("logs/%s/ax_trials.tsv" % cmd_line_opts.group, "w")
print("trial_index\tparameters\truntime\tfinal_loss", file=log)

end_time = time.time() + cmd_line_opts.run_time_sec

while time.time() < end_time:

    parameters, trial_index = ax.get_next_trial()
    log_record = [trial_index, json.dumps(parameters)]
    print("starting", log_record)

    class Opts(object):
        pass
    opts = Opts()
fitness_log = u.Log(opts.fitness_log_file)

generation = 0
while not es.stop():
    # fetch next set of trials
    trial_weights = es.ask()

    # run eval
    fitnesses = []
    for member_idx, weights in enumerate(trial_weights):
        agent.set_weights_of_model(weights)
        fitness = cartpole.fitness(agent)
        fitnesses.append(fitness)

    # update es
    # note: cma es is trying to minimise
    es.tell(trial_weights, -1 * np.array(fitnesses))

    # save best weights
    if opts.weights_dir is not None:
        u.ensure_dir_exists(opts.weights_dir)
        np.save("%s/%05d.npy" % (opts.weights_dir, generation), es.result[0])

    # give results
    generation += 1
    fitness_log.log(generation, np.max(fitnesses))
    es.result_pretty()
    sys.stdout.flush()

es.disp()
Пример #14
0
from PIL import Image, ImageDraw
import os
from data import H, W
import numpy as np
import util as u

# TODO: add opts

# generate synthetic toy data where each frame is it's own colour.
for frame_num, colour in enumerate(
    ['#ff0000', '#ffff00', '#00ff00', '#00ffff', '#0000ff', '#ff00ff']):
    for run_id in range(2):
        for camera_id in range(5):
            output_dir = "imgs/00_rgb/c%02d/r%02d/" % (camera_id, run_id)
            u.ensure_dir_exists(output_dir)

            img = Image.new('RGB', (W, H), (0, 0, 0))
            canvas = ImageDraw.Draw(img)

            # choose rectangle of min size
            rectangle_area = 0
            while rectangle_area < 1000:
                x0, x1 = np.random.randint(0, W, size=2)
                y0, y1 = np.random.randint(0, H, size=2)
                rectangle_area = np.abs((x1 - x0) * (y1 - y0))
                print("frame_num", frame_num, "rectangle_area", rectangle_area)
            canvas.rectangle([x0, y0, x1, y1], fill=colour)

            img.save("%s/f%03d.png" % (output_dir, frame_num))
Пример #15
0
#!/usr/bin/python3

from fuse import FUSE
from mpd_fs import MpdFilesystem
from util import ensure_dir_exists

if __name__ == "__main__":
    FUSE(MpdFilesystem(), ensure_dir_exists("Music/"), foreground=True)
Пример #16
0
'''
import _config, util
import sys, os, pickle, fnmatch, datetime, subprocess, functools, re
import numpy as np, pandas as pd
from collections import defaultdict, Counter

import b_graph, segment, _qsub
import _notelines, _movement, _stepcharts

# Default params
inp_dir_b = _config.OUT_PLACE + 'b_graph/'
inp_dir_segment = _config.OUT_PLACE + 'segment/'
inp_dir_c = _config.OUT_PLACE + 'c_dijkstra/'
NAME = util.get_fn(__file__)
out_dir = _config.OUT_PLACE + NAME + '/'
util.ensure_dir_exists(out_dir)

mover = None

import _annotate_local, _annotate_global, _annotate_post

annot_types = _annotate_local.annot_types
annot_types.update(_annotate_global.annot_types)
annot_types.update(_annotate_post.annot_types)

add_annots = {
    'Notes per second since downpress': float,
}
annot_types.update(add_annots)
'''
  Parsing
Пример #17
0
 def __init__(self):
     self.cache_dir = ensure_dir_exists("Cache/")
     self.extension = ".mp3"
Пример #18
0
feature_matrix_filename = "/var/shared/openstream/examples/jacobi-2d/14_11_1_instances.csv"

if experiment_folder.startswith("/"):
    output_schedule_filename = str(
        experiment_folder) + "/schedules/schedule.csv"
    MODEL_SAVE_FILENAME = experiment_folder + "/" + MODEL_SAVE_FILENAME
else:  # relative
    p = subprocess.Popen("echo `pwd`/" + str(experiment_folder) +
                         "/schedules/schedule.csv",
                         stdout=subprocess.PIPE,
                         shell=True)
    output, err = p.communicate()
    output_schedule_filename = output.decode("utf-8").replace("\n", "")
    MODEL_SAVE_FILENAME = "./" + experiment_folder + "/" + MODEL_SAVE_FILENAME

util.ensure_dir_exists(output_schedule_filename)
util.ensure_dir_exists(MODEL_SAVE_FILENAME)

logging.debug("Initializing...")

logging.debug("Agent...")
agent = Agent(saved_execution_times_prefix,
              adjacency_matrix_filename,
              feature_matrix_filename,
              benchmark,
              execution_features,
              output_schedule_filename=output_schedule_filename,
              adjacency_is_sparse=SPARSE_ADJ,
              num_repeats=NUM_REPEATS)

logging.debug("Replay Memory...")
Пример #19
0
    full_rgbs_t1.append(full_rgb)
    full_dithers_t1.append(full_true_dither)
full_rgbs_t1 = np.stack(full_rgbs_t1)
full_dithers_t0 = np.stack(full_dithers_t0)
full_dithers_t1 = np.stack(full_dithers_t1)


# jit the generator now (we'll use it for predicting against the full res
# images) and also the two loss fns
if JIT:
    generator = objax.Jit(generator)
    generator_loss = objax.Jit(generator_loss, generator.vars())
    discriminator_loss = objax.Jit(discriminator_loss, discriminator.vars())

# setup output directory for full res samples
u.ensure_dir_exists("full_res_samples/%s" % RUN)
if os.path.exists("full_res_samples/latest"):
    os.remove("full_res_samples/latest")
os.symlink(RUN, "full_res_samples/latest")

# init dataset iterator
dataset = data.dataset(manifest_file=opts.manifest_file,
                       batch_size=opts.batch_size,
                       patch_size=opts.patch_size)

# set up ckpting for G and D
generator_ckpt = objax.io.Checkpoint(
    logdir=f"ckpts/{RUN}/generator/", keep_ckpts=20)
discriminator_ckpt = objax.io.Checkpoint(
    logdir=f"ckpts/{RUN}/discriminator/", keep_ckpts=20)
Пример #20
0
parser.add_argument('--epochs', type=int, default=20)
parser.add_argument('--steps-per-epoch', type=int, default=20)
parser.add_argument(
    '--run',
    type=str,
    default='.',
    help='run name to use as postfix for model saving, tb output')
parser.add_argument('--model-input',
                    type=str,
                    default=None,
                    help='if set, load weights from this model file')

opts = parser.parse_args()
print(opts)

u.ensure_dir_exists("runs/%s" % opts.run)

triplet_selector = triplet_selection.TripletSelection(
    opts.img_dir, opts.negative_frame_range, opts.negative_selection_mode)

examples = data.a_p_n_iterator(opts.batch_size, triplet_selector)

model, inputs, loss_fn = m.construct_model(opts.embedding_dim,
                                           opts.model_input,
                                           opts.learning_rate, opts.margin)


class NumZeroLossCB(callbacks.Callback):
    def __init__(self, batch_size=16):
        self.batch_size = batch_size
        self.sess = tf.Session()
Пример #21
0
def predict(obs_reads_df, proposed_gts, out_dir, options=''):
    '''
    Main public-facing function.
  '''
    read_segments = util.parse_read_groups(obs_reads_df)
    check_valid_input(obs_reads_df, read_segments, proposed_gts)

    if options:
        print(f'Using custom hyperparameters: {options}')
        parse_custom_hyperparams(options)

    set_random_seed()

    # Load data and setup
    dataset = MarginalDirectedEvolutionDataset(obs_reads_df, proposed_gts,
                                               read_segments)
    updated_proposed_genotypes = dataset.genotypes

    print('Setting up ...')
    model = MarginalFitnessModel(dataset.package).to(device)

    # for param in model.parameters():
    #   print(type(param.data), param.shape)

    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=hparams['learning_rate'],
        weight_decay=hparams['weight_decay'],
    )

    schedulers = {
        'plateau':
        torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            patience=hparams['plateau_patience'],
            threshold=hparams['plateau_threshold'],
            factor=hparams['plateau_factor'],
            verbose=True,
            threshold_mode='rel',
        )
    }

    global model_dir
    model_dir = out_dir
    if out_dir[-1] != '/':
        out_dir += '/'
    util.ensure_dir_exists(out_dir)

    global log_fn
    log_fn = out_dir + '_log.out'
    with open(log_fn, 'w') as f:
        pass
    print_and_log('model dir: ' + model_dir)

    # Inference
    print(f'Running inference ...')
    fitness, fq_mat, pred_marginals = train_model(model, optimizer, schedulers,
                                                  dataset)

    package = {
        'fitness':
        fitness,
        'fq_mat':
        fq_mat,
        'pred_marginals':
        pred_marginals,
        'updated_proposed_gts':
        updated_proposed_genotypes,
        'num_updated_proposed_gts':
        len(updated_proposed_genotypes),
        'fitness_df':
        pd.DataFrame({
            'Genotype': list(updated_proposed_genotypes),
            'Inferred fitness': fitness,
        }),
        'genotype_matrix':
        pd.DataFrame(
            fq_mat.T,
            index=updated_proposed_genotypes,
            columns=dataset.timepoints,
        ),
    }

    # Save results
    for fn, df in {
            '_final_fitness.csv': package['fitness_df'],
            '_final_genotype_matrix.csv': package['genotype_matrix'],
    }.items():
        print(f'Saving {out_dir}{fn} ...')
        df.to_csv(out_dir + fn)

    out_fn = out_dir + f'_final_package.pkl'
    print(f'Saving {out_fn} ...')
    with open(out_fn, 'wb') as f:
        pickle.dump(package, f)

    print('Done.')
    return package