def test_taskgenerator_map(): jugfile = os.path.join(_jugdir, 'mapgenerator.py') store, space = jug.jug.init(jugfile, 'dict_store') assert not space['s'].can_run() simple_execute() assert len(value(space['v2s'])) == 16 assert len(value(space['v4s'])) == 16
def test_nfoldcrossvalidation(): store, space = jug.jug.init('milk/tests/data/jugparallel_jugfile.py', 'dict_store') options = jug.options.default_options jug.jug.execute(store, options) assert len(jug.value(space['classified'])) == 2 assert len(jug.value(space['classified_wpred'])) == 3
def test_util_timed_path(): from jug.hash import hash_one jug.task.Task.store = dict_store() system("touch test_file") t0 = jug.utils.timed_path('test_file') t1 = jug.utils.timed_path('test_file') h0 = hash_one(t0) h1 = hash_one(t1) assert h0 == h1 sleep(1.1) system("touch test_file") h1 = hash_one(t1) assert h0 != h1 assert value(t0) == 'test_file' assert value(t1) == 'test_file'
def test_kmeans(): from jug.task import alltasks store, space = jug.jug.init('milk/tests/data/jugparallel_kmeans_jugfile.py', 'dict_store') options = jug.options.default_options jug.jug.execute(store, options) assert len(value(space['clustered'])) == 2
def index_metadata(container): if isinstance(container, str): container_filename = container else: container_filename = container.name # Init mdat size mdat_input_offset = _init_mdat_size(container_filename) if not mdat_input_offset.can_load() and mdat_input_offset.can_run(): mdat_input_offset.run() mdat_input_offset = jug.value(mdat_input_offset) # Parse samples mp4 boxes headers b'4c04f56f18ab3a4cd039c28535df69471abab913' samples_headers = _get_samples_headers(container_filename, mdat_input_offset) # Create external moov file to avoid corruptions in the container moov_filename = _create_moov_file(container_filename, samples_headers) mdat_input_end = _index_bzna_input(container_filename, moov_filename, mdat_input_offset) mdat_target_end = _append_index_bzna_target(container_filename, moov_filename, mdat_input_end) mdat_fname_end = _append_index_bzna_fname(container_filename, moov_filename, mdat_target_end) mdat_data_end = _index_bzna_thumb(container_filename, moov_filename, mdat_fname_end) mdat_data_end = _update_mdat_size(container_filename, mdat_data_end) return _append_moov(container_filename, moov_filename, mdat_data_end)
def test_currymap(): np.random.seed(33) jug.task.Task.store = dict_store() A = np.random.rand(100) ts = jug.mapreduce.currymap(mapper2, list(zip(A,A))) simple_execute() assert np.allclose(np.array(value(ts)) , A*2)
def test_currymap(): np.random.seed(33) jug.task.Task.store = dict_store() A = np.random.rand(100) ts = jug.mapreduce.currymap(mapper2, list(zip(A, A))) simple_execute() assert np.allclose(np.array(value(ts)), A * 2)
def test_map(): np.random.seed(33) jug.task.Task.store = dict_store() A = np.random.rand(10000) ts = jug.mapreduce.map(mapper, A) simple_execute() ts = value(ts) assert np.all(ts == np.array(list(map(mapper, A))))
def test_map(): np.random.seed(33) jug.task.Task.store = dict_store() A = np.random.rand(10000) ts = jug.mapreduce.map(mapper, A) simple_execute() ts = value(ts) assert np.all(ts == np.array(list(map(mapper,A))))
def test_map(): np.random.seed(33) jug.task.Task.store = dict_store() A = np.random.rand(10000) t = jug.mapreduce.map(mapper, A) dfs_run(t) ts = value(t) assert np.all(ts == np.array(map(mapper,A)))
def test_nld_access_results(jugdir): gridjug.grid_jug(jugfile=PRIMES_JUGFILE, jugdir=jugdir, **NLD_GRIDMAP_PARAMS) _, jugspace = jug.init(jugfile=PRIMES_JUGFILE, jugdir=jugdir) assert jug.value(jugspace['primes10']) == [ True, True, False, True, False, True, False, False, False ]
def test_kmeans(): from jug.task import alltasks store, space = jug.jug.init('milk/tests/data/jugparallel_kmeans_jugfile.py', 'dict_store') options = jug.options.default_options assert len(alltasks) == 5 jug.jug.execute(store, options) assert len(value(space['clustered'])) == 2
def test_access_results(tmpdir): jugdir = tmpdir gridjug.grid_jug( jugfile=PRIMES_JUGFILE, jugdir=jugdir.strpath, local=True, ) _, jugspace = jug.init(jugfile=PRIMES_JUGFILE, jugdir=jugdir.strpath) assert jug.value(jugspace['primes10']) == [ True, True, False, True, False, True, False, False, False ]
def test_nld_access_results(jugdir): gridjug.grid_jug( jugfile=PRIMES_JUGFILE, jugdir=jugdir, **NLD_GRIDMAP_PARAMS ) _, jugspace = jug.init(jugfile=PRIMES_JUGFILE, jugdir=jugdir) assert jug.value(jugspace['primes10']) == [ True, True, False, True, False, True, False, False, False ]
def test_util_timed_path(tmpdir): from jug.hash import hash_one jug.task.Task.store = dict_store() tmpdir = str(tmpdir) test_file = path.join(tmpdir, 'test_file') with open(test_file, 'wt') as out: out.write("Hello World") t0 = jug.utils.timed_path(test_file) t1 = jug.utils.timed_path(test_file) h0 = hash_one(t0) h1 = hash_one(t1) assert h0 == h1 sleep(1.1) with open(test_file, 'wt') as out: out.write("Hello World") h1 = hash_one(t1) assert h0 != h1 assert value(t0) == test_file assert value(t1) == test_file
def assemble_sasa_h5(sasas, filename): import os import tables from tqdm import tqdm if not os.path.isdir(os.path.dirname(filename)): os.mkdir(os.path.dirname(filename)) if os.path.isfile(filename): raise FileExistsError(f"File '{filename}' already exists.") compression = tables.Filters(complevel=9, complib='zlib', shuffle=True) n_zeros = len(str(len(sasas))) + 1 print(filename) with tables.open_file(filename, 'a') as handle: shape = None for i, sasa in enumerate(tqdm(sasas)): data = jug.value(sasa.t) atom = tables.Atom.from_dtype(data.dtype) tag = 'sasas_' + str(i).zfill(n_zeros) if tag in handle.root: logger.warn('Tag %s already existed in %s. Overwriting.', tag, filename) handle.remove_node('/', name=tag) if shape is None: shape = data.shape elif len(shape) > 1: assert shape[1] == data.shape[ 1], "We had %s residues, but then loaded trajectory %s and it had %s." % ( shape[1], i, data.shape[1]) node = handle.create_carray(where='/', name=tag, atom=atom, shape=data.shape, filters=compression) node[:] = data sasa.t.unload() return filename
def dump(jugfile): # get absolute path and import the Tasks from the associated source code jugpath, ext = os.path.splitext(os.path.abspath(jugfile)) modulename = os.path.basename(jugpath) # set path to results and load execution script # NOTE: jugdir must be set *before* loading the source jug.set_jugdir(jugpath + '.jugdata') results = imp.load_source(modulename, jugpath + ext).results # initialize dictionary of completed results to be incrementally built below completed = OrderedDict() incomplete = False for name, experiment in results.items(): completed[name] = OrderedDict() for key, task in experiment.items(): completed[name][key] = [] for run in task: try: completed[name][key].append(jug.value(run)) except: incomplete = True pass # get rid of empty keys if len(completed[name][key]) == 0: completed[name].pop(key) if not completed[name]: completed.pop(name) # if dictionary is not empty, dump to pickle file if completed: if incomplete: jugpath += '.tmp' with open(jugpath + '.pkl', 'w') as fp: pickle.dump(completed, fp) return completed
def _index_bzna_input(filename, moov_filename, mdat_input_offset): moov = _load_moov(moov_filename) mvhd = next(find_boxes(moov.boxes, [b"mvhd"])) samples_headers = _get_samples_headers(filename, mdat_input_offset) samples_headers = jug.value(samples_headers) # bzna_input trak if next(find_traks(moov.boxes, [b"bzna_input\0"]), None) is not None: trak = next(find_traks(moov.boxes, [b"bzna_input\0"])) moov.boxes = [box for box in moov.boxes if box is not trak] samples_size = 0 sample_size = -1 sizes = [] for sample_header in samples_headers: # Every sample starts with a ftyp box if sample_header.type == b"ftyp": if sample_size >= 0: sizes.append(sample_size) samples_size += sample_size sample_size = 0 sample_size += sample_header.box_size sizes.append(sample_size) samples_size += sample_size # MOOV.TRAK trak = _make_bzna_input_trak(sizes, mdat_input_offset, mvhd.next_track_id) moov.append(trak) mvhd.next_track_id += 1 moov.refresh_box_size() with open(moov_filename, "wb") as moov_file: moov_file.write(bytes(moov)) return mdat_input_offset + samples_size
def test_kmeans(): store, space = jug.jug.init("milk/tests/data/jugparallel_kmeans_jugfile.py", "dict_store") simple_execute() assert len(value(space["clustered"])) == 2
def test_empty_mapreduce(): store, space = jug.jug.init('jug/tests/jugfiles/empty_mapreduce.py', 'dict_store') simple_execute() assert value(space['two']) == []
def twice(x): x2 = double(x) barrier() return double(value(x2))
def sum_partials(ts): total = 0.0 for t in ts: total+= jug.value(t.t) return total
def test_taskgenerator_map(): store, space = jug.jug.init('jug/tests/jugfiles/mapgenerator.py', 'dict_store') simple_execute() assert len(value(space['v2s'])) == 16
args = parser.parse_args() # set where the data is stored path = os.path.realpath(args.path) jugdata = os.path.join(path, 'execute.jugdata') set_jugdir(jugdata) # import data place holder execute_path = os.path.join(path, 'execute.py') data = imp.load_source('execute', execute_path).data for function in data.keys(): ax = pl.figure(1).gca() ax.cla() for method in data[function].keys(): runs = np.array(value(data[function][method])) N = runs.shape[0] T = runs.shape[1] x = np.arange(1, T+1) y = runs.mean(axis=0) e = runs.std(axis=0) / np.sqrt(N) * 3 ax.plot(x, y, lw=2, label=method) ax.fill_between(x, y-e, y+e, color=ax.lines[-1].get_color(), alpha=0.1) ax.axis('tight') ax.axis(xmin=0, xmax=T) ax.set_xlabel('iterations') ax.set_ylabel('function value') ax.legend(loc='best') ax.figure.canvas.draw()
import benchfunk import matplotlib.pyplot as plt import numpy as np from example import results import jug jug.set_jugdir('example.jugdata') fig, axs = plt.subplots(1, len(results), figsize=(5 * len(results), 4), sharex=True) for ax, (name, res) in zip(axs, results.items()): func, sn2 = name.split('(') sn2 = float(sn2[:-1]) obj = getattr(benchfunk.functions, func)(sn2) for key, xbest in res.items(): xbest = jug.value(xbest) xbest = np.array(xbest) ybest = np.array([obj.get_f(run) for run in xbest]) ax.plot(ybest.mean(0), label=key) ax.set_title(name) plt.show()
def test_empty_mapreduce(): jugfile = os.path.join(_jugdir, 'empty_mapreduce.py') store, space = jug.jug.init(jugfile, 'dict_store') simple_execute() assert value(space['two']) == []
# This tests an important regression: # adding the module to the module map *before* execfile()ing the jugfile makes # this not work. from jug import barrier, Task, value import jug.mapreduce import math def double(x): val = math.sqrt(2.)*math.sqrt(2.) return x*val two = jug.mapreduce.map(double, range(20)) barrier() def product(vals): import operator return reduce(operator.mul, vals) values = product(value(two))
def test_taskgenerator_map(): jugfile = os.path.join(_jugdir, 'mapgenerator.py') store, space = jug.jug.init(jugfile, 'dict_store') simple_execute() assert len(value(space['v2s'])) == 16
def __call__(self, i): return milk.nfoldcrossvalidation(value(self.features), value(self.labels), folds=[i], **value(self.kwargs))
# This tests an important regression: # adding the module to the module map *before* execfile()ing the jugfile makes # this not work. from jug import barrier, Task, value import jug.mapreduce import math from functools import reduce def double(x): val = math.sqrt(2.)*math.sqrt(2.) return x*val two = jug.mapreduce.map(double, list(range(20))) barrier() def product(vals): import operator return reduce(operator.mul, vals) values = product(value(two))
def test_kmeans(): store, space = jug.jug.init( 'milk/tests/data/jugparallel_kmeans_jugfile.py', 'dict_store') simple_execute() assert len(value(space['clustered'])) == 2
def test_nfoldcrossvalidation(): store, space = jug.jug.init('milk/tests/data/jugparallel_jugfile.py', 'dict_store') simple_execute() assert len(jug.value(space['classified'])) == 2 assert len(jug.value(space['classified_wpred'])) == 3
import benchfunk import matplotlib.pyplot as plt import numpy as np from example import results import jug jug.set_jugdir('example.jugdata') fig, axs = plt.subplots(1, len(results), figsize=(5*len(results), 4), sharex=True) for ax, (name, res) in zip(axs, results.items()): func, sn2 = name.split('(') sn2 = float(sn2[:-1]) obj = getattr(benchfunk.functions, func)(sn2) for key, xbest in res.items(): xbest = jug.value(xbest) xbest = np.array(xbest) ybest = np.array([obj.get_f(run) for run in xbest]) ax.plot(ybest.mean(0), label=key) ax.set_title(name) plt.show()
git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'])[:6] jug_dict = jug.init('experiments_explore.py', 'experiments_explore.jugdata/')[1] exs_stut = jug_dict['stut_experiments'] exs_gauss = jug_dict['gauss_experiments'] exs_survival = jug_dict['survival_experiments'] exs = exs_stut + exs_gauss + exs_survival filename = 'results/experiments_explore_data_all-{}-{}.pkl'.format(datetime.datetime.now().strftime('%d%m%y_%H:%M:%S'), git_hash) es = [] i = 0 for e in exs: try: es.append(jug.value(e.__dict__)) except Exception, e: print "Something went wrong" print e print i i += 1 #es are all our experiments wrapped up #Lets make a dataframe of results import pandas as pd exs_df = pd.DataFrame(columns=['fold', 'seed', 'num_inducing', 'fixZ', 'f_bias', 'g_bias', 'f_rbf_len', 'g_rbf_len', 'g_mean', 'f_rbf_var']) for e in es: default_d = {'fold':e['fold'], 'seed':e['seed'], 'num_inducing':e['num_inducing'], 'fixZ':e['fixZ'],
def _run_tasks(tasks) -> list: for task in tasks: _run_tasks(recursive_dependencies(task)) if not task.can_load() and task.can_run(): task.run() return jug.value(tasks)
import cPickle as pkl import argparse import os.path import imp from jug import set_jugdir, value # parse the path of the experiment to plot parser = argparse.ArgumentParser() parser.add_argument('path') args = parser.parse_args() # set where the data is stored path = os.path.realpath(args.path.rstrip('/')) jugdata = os.path.join(path, 'execute.jugdata') set_jugdir(jugdata) # import data place holder and fill in values execute_path = os.path.join(path, 'execute.py') data = imp.load_source('execute', execute_path).data data = value(data) # save to results directory root_path = os.path.dirname(os.path.realpath(__file__)) expt_name = os.path.basename(path) results_path = os.path.join(root_path, 'results') if not os.path.isdir(results_path): os.mkdir(results_path) pkl_file = os.path.join(results_path, expt_name) with open(pkl_file + '.pkl', 'wb') as f: pkl.dump(data, f)
def test_nfoldcrossvalidation(): store, space = jug.jug.init('milk/tests/data/jugparallel_jugfile.py', 'dict_store') options = jug.options.default_options jug.jug.execute(store, options) assert len(jug.value(space['classified'])) == 2 assert len(jug.value(space['classified_wpred'])) ==3
def sum_partials(ts): total = 0.0 for t in ts: total+= jug.value(t.base) return total
def test_nfoldcrossvalidation(): store, space = jug.jug.init("milk/tests/data/jugparallel_jugfile.py", "dict_store") simple_execute() assert len(jug.value(space["classified"])) == 2 assert len(jug.value(space["classified_wpred"])) == 3