def prob_data(self): return invoke_in_process_pool(3, Callable(predict_estimator, self.best_model, self.data_loader.train_x, self.data_loader.train_y), Callable(predict_estimator, self.best_model, self.data_loader.validation_x, self.data_loader.validation_y), Callable(predict_estimator, self.best_model, self.data_loader.test_x, self.data_loader.test_y) )
def show_samples(self): if self.data_loader.train_x.shape[ 1] == 1 & self.data_loader.train_y.shape[1] == 1: x_grid = np.linspace(self.data_loader.min_x, self.data_loader.max_x, 1000).reshape(-1, 1) try: samples_res = invoke_in_process_pool( "show_samples", 1, Callable( self.loaded_exp.train_eval_model_factory. predict_estimator, self.best_model, x_grid, None))[0] except NotImplementedError: print("not supported") return plt.figure(figsize=(16, 8)) plt.plot(self.data_loader.train_x, self.data_loader.train_y, 'r.', alpha=0.3, label="train") plt.plot(x_grid, samples_res["samples"], 'g.', alpha=0.3, label="predicted") plt.legend() plt.show() elif self.data_loader.train_x.shape[1] == 0: samples_res = invoke_in_process_pool( "show_samples", 1, Callable( self.loaded_exp.train_eval_model_factory.predict_estimator, self.best_model, None, None))[0] data = samples_res["samples"][:, :] df = pd.DataFrame(data) fig, ax = plt.subplots(1, 1, figsize=(16, 8)) axes = pd.plotting.scatter_matrix(df, alpha=0.2, ax=ax, color="black", label="train", diagonal='kde', density_kwds={'color': 'black'}) plt.tight_layout() plt.savefig('{ROOT}/samples.png') # if show: # plt.show(); return axes else: print("not supported")
def eval_best_model(self, data=["train", "validation", "test"]): callables = {'train': Callable(eval_estimator, self.best_model, self.data_loader.train_x, self.data_loader.train_y), "validation": Callable(eval_estimator, self.best_model, self.data_loader.validation_x, self.data_loader.validation_y), "test": Callable(eval_estimator, self.best_model, self.data_loader.test_x, self.data_loader.test_y)} selected = [callables[name] for name in data] return OrderedDict(zip(data,invoke_in_process_pool(len(data),*selected)))
def prob_data(self): return invoke_in_process_pool( "prob_data", 3, Callable( self.loaded_exp.train_eval_model_factory.predict_estimator, self.best_model, self.data_loader.train_x, self.data_loader.train_y), Callable( self.loaded_exp.train_eval_model_factory.predict_estimator, self.best_model, self.data_loader.validation_x, self.data_loader.validation_y), Callable( self.loaded_exp.train_eval_model_factory.predict_estimator, self.best_model, self.data_loader.test_x, self.data_loader.test_y))
def show_prob_data(self): res = invoke_in_process_pool(3, Callable(predict_estimator, self.best_model, self.data_loader.train_x, self.data_loader.train_y), Callable(predict_estimator, self.best_model, self.data_loader.validation_x, self.data_loader.validation_y), Callable(predict_estimator, self.best_model, self.data_loader.test_x, self.data_loader.test_y) ) for i in range(self.data_loader.test_y.shape[1]): pdf_name='pdf%d' % i if pdf_name in res[0]: fig, axes = plt.subplots(3, 1, figsize=(12,8)) axes[0].plot(res[0][pdf_name], alpha=0.3) axes[0].set_title("%s - train data" % pdf_name) axes[1].plot(res[1][pdf_name], alpha=0.3) axes[1].set_title("%s - validation data" % pdf_name) axes[2].plot(res[2][pdf_name], alpha=0.3) axes[2].set_title("%s - test data" % pdf_name) plt.tight_layout() fig, axes = plt.subplots(3, 1, figsize=(12,8)) axes[0].plot(res[0]['log_likelihood'], label="model", alpha=0.3) if self.data_loader.can_compute_ll(): axes[0].plot(self.data_loader.ll(self.data_loader.train_x,self.data_loader.train_y), label="True", alpha=0.3) axes[0].set_title("LL - train data") axes[0].legend() axes[1].plot(res[1]['log_likelihood'], label="model", alpha=0.3) if self.data_loader.can_compute_ll(): axes[1].plot(self.data_loader.ll(self.data_loader.validation_x, self.data_loader.validation_y), label="True", alpha=0.3) axes[1].set_title("LL - validation data") axes[1].legend() axes[2].plot(res[2]['log_likelihood'], label="model", alpha=0.3) if self.data_loader.can_compute_ll(): axes[2].plot(self.data_loader.ll(self.data_loader.test_x, self.data_loader.test_y), label="True", alpha=0.3) axes[2].set_title("LL - test data") axes[2].legend() plt.tight_layout() plt.show()
def eval_best_model_and_save(self): predict_train, predict_valid, predict_test = invoke_in_process_pool(self.num_workers, Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.train_x, self.train_eval_model.data_loader.train_y), Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.validation_x, self.train_eval_model.data_loader.validation_y), Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.test_x, self.train_eval_model.data_loader.test_y) ) self.best_model_train_ll = predict_train["log_likelihood"] self.best_model_valid_ll = predict_valid["log_likelihood"] self.best_model_test_ll = predict_test["log_likelihood"] self.save()
def show_distribution_slices(self, x_vals=None): if self.data_loader.train_x.shape[1] == 1 & self.data_loader.train_y.shape[1] == 1: y_data = self.data_loader.train_y x_data = self.data_loader.train_x y_vals = np.linspace(np.min(y_data), np.max(y_data), 100) if x_vals is None: x_vals = np.linspace(np.min(x_data), np.max(x_data), 10) plots = [] res = invoke_in_process_pool(4, *[Callable(predict_estimator, self.best_model, np.ones((len(y_vals), 1)) * x_val, y_vals.reshape(-1, 1)) for x_val in x_vals]) if self.data_loader.can_compute_ll(): true_pdf = invoke_in_process_pool(8, *[Callable(self.compute_true_pdf, np.ones((len(y_vals), 1)) * x_val, y_vals.reshape(-1, 1)) for x_val in x_vals]) for i, x_val in enumerate(x_vals): cdf_val = res[i]['cdf0'] if 'cdf0' in res[i] else None pdf_val = np.exp(res[i]['log_likelihood']) p = figure(title="pdf(x=%f)" % x_val, x_axis_label='y', y_axis_label='pdf0') if cdf_val is not None: p.extra_y_ranges = {"cdf_range": Range1d(start=min(cdf_val.flatten()), end=max(cdf_val.flatten()))} p.add_layout(LinearAxis(y_range_name="cdf_range"), 'right') # add a line renderer with legend and line thickness p.line(y_vals, pdf_val.flatten(), legend="pdf0", alpha=0.5, color="black") if self.data_loader.can_compute_ll(): p.line(y_vals, true_pdf[i].flatten(), legend="true", alpha=0.5, color="green") if cdf_val is not None: p.line(y_vals, cdf_val.flatten(), legend="cdf0", alpha=0.5, y_range_name="cdf_range", color="blue") plots.append(p) grid = gridplot(plots, ncols=2) show(grid); else: print("not supported")
def show_weights(self): vars = invoke_in_process_pool(1, Callable(retrieve_vars, self.best_model_dir))[0] for var in vars: if len(var[1].shape) <= 1: var[1] = var[1].reshape(1, -1) p = figure(x_range=(0, var[1].shape[1]), y_range=(0, var[1].shape[0]), title="weights of %s" % var[0], tooltips=[("x", "$x"), ("y", "$y"), ("value", "@image")]); p.image(image=[var[1]], x=0, y=0, dw=var[1].shape[1], dh=var[1].shape[0], palette="Spectral11"); show(p);
def show_samples(self): if self.data_loader.train_x.shape[1] == 1 & self.data_loader.train_y.shape[1] == 1: x_grid = np.linspace(self.data_loader.min_x, self.data_loader.max_x, 1000).reshape(-1, 1) try: samples_res = invoke_in_process_pool(1, Callable(predict_estimator, self.best_model, x_grid, None))[0] except NotImplementedError: print("not supported") return plt.figure(figsize=(16, 8)) plt.plot(self.data_loader.train_x, self.data_loader.train_y, 'r.', alpha=0.3, label="train"); plt.plot(x_grid, samples_res["samples0"], 'g.', alpha=0.3, label="predicted"); plt.legend() plt.show(); else: print("not supported")
def show_pdf_heatmap_model(self, paper=False, x_fixed=None, y_fixed=None, x_lim=None, y_lim=None, pdf_percentile_cut_off=None): return self.show_pdf_heatmap_compute_pdf_fun( lambda x, y: np.exp( invoke_in_process_pool( "show_pdf_heatmap_model", 1, Callable( self.loaded_exp.train_eval_model_factory. predict_estimator, self.best_model, x, y))[0][ 'log_likelihood']), "", paper=paper, x_fixed=x_fixed, y_fixed=y_fixed, x_lim=x_lim, y_lim=y_lim, pdf_percentile_cut_off=pdf_percentile_cut_off)
def sample_best_model(self, size, z, **kwargs): self.log.info("sample_best_model, size: %d, z.shape: %s", size, z.shape) return invoke_in_process_pool("sample_best_model", 0 ,Callable(self._sample_best_model, size, z, kwargs))[0]
def train_eval_best_model(self, data_sample_random_seeds=None): if len(self.best_model_train_eval) == self.num_samples_best_eval: print("Loaded best model eval for model: {model}, data set: {data_set}".format(model=FLAGS.model, data_set=FLAGS.data_set)) return print("Running best model eval for model: {model}, data set: {data_set}".format(model=FLAGS.model, data_set=FLAGS.data_set)) if FLAGS.plot: progress = FloatProgress(min=0, max=1) display(progress) else: printProgressBar(0, self.num_samples_best_eval, prefix='Progress best model eval {model}/{data_set}:'. format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50) futures = [] done = 0.0 with SameProcessExecutor() if self.num_workers <= 0 else concurrent.futures.ProcessPoolExecutor( self.num_workers) as executor: for i in range(self.num_samples_best_eval): inserted = False while not inserted: if len(futures) < self.num_workers: objective_fun = self.train_eval_model.create_train_eval(i) params = self.best_model_params.copy() params["tensorboard_folder"] = "tensorboard_best" params["sample_cross_validation"] = True params["data_sample_random_seed"] = generate_seed() if data_sample_random_seeds is None else data_sample_random_seeds[i] params["tf_random_seed"] = generate_seed() futures.append( WorkItem(i, None, params, executor.submit(objective_fun, args=None, **params))) inserted = True for wi in list(futures): try: model_dir, train_eval, validation_eval, test_eval = wi.future.result(0) self.best_task_eval_finished(futures, wi, model_dir, train_eval, validation_eval, test_eval) done += 1 if FLAGS.plot: progress.value = done / self.num_samples_best_eval else: printProgressBar(done, self.num_samples_best_eval, prefix='Progress best model eval {model}/{data_set}:'. format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50) except concurrent.futures.TimeoutError: pass if len(futures) == self.num_workers: time.sleep(5) for wi in list(futures): model_dir, train_eval, validation_eval, test_eval = wi.future.result() self.best_task_eval_finished(futures, wi, model_dir, train_eval, validation_eval, test_eval) done += 1 if FLAGS.plot: progress.value = done / self.num_samples_best_eval else: printProgressBar(done, done, prefix='Progress best model eval {model}/{data_set}:'. format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50) predict_train, predict_valid, predict_test = invoke_in_process_pool(self.num_workers, Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.train_x, self.train_eval_model.data_loader.train_y), Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.validation_x, self.train_eval_model.data_loader.validation_y), Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.test_x, self.train_eval_model.data_loader.test_y) ) self.best_model_train_ll = predict_train["log_likelihood"] self.best_model_valid_ll = predict_valid["log_likelihood"] self.best_model_test_ll = predict_test["log_likelihood"] self.save()
def create_compare_notebooks(): invoke_in_process_pool(4, *[Callable(create_compare_notebook, data_set) for data_set in FLAGS.data_sets.split(",")])
def create_vis_notebooks(models, data_sets): invoke_in_process_pool( "create_vis_notebooks", 3, *[ Callable(create_vis_notebook, model, data_set) for model in models.split(",") for data_set in data_sets.split(",") ])
def create_vis_notebooks(): invoke_in_process_pool(3, *[Callable(create_vis_notebook, model, data_set) for model in FLAGS.models.split(",") for data_set in FLAGS.data_sets.split(",")])
def create_compare_notebooks(data_sets): invoke_in_process_pool( "create_compare_notebooks", 4, *[ Callable(create_compare_notebook, data_set) for data_set in data_sets.split(",") ])
def eval_best_model(self, data_set): return invoke_in_process_pool("eval_best_model",1, Callable(self._eval_best_model, data_set))[0]
def generate_data(self): y_data = invoke_in_process_pool( 1, Callable(generate_in_tensorflow, self.op_factory, self.x_data))[0] return np.c_[self.x_data, y_data]
import os # python -m run_exp -data_set inv_sin -model rnade_laplace 2>&1 | tee run_exp.log from asynch import invoke_in_process_pool, Callable from experiment.factory import create_load_experiment from flags import FLAGS def run(): target_dir = os.path.join(FLAGS.dir, FLAGS.data_set) os.makedirs(target_dir, exist_ok=True) os.chdir(target_dir) exp = create_load_experiment() exp.eval_best_model_and_save() if __name__ == '__main__': invoke_in_process_pool(1, Callable(run))
def predict_best_model(self, data, collector, params={}, num_workers=None, op_names=[]): if num_workers is None: num_workers = 1 return invoke_in_process_pool("predict_best_model", num_workers, Callable(self._predict_best_model, self.data_loader, data, collector, params, op_names))[0]
if FLAGS.data_sample_random_seeds != "": print("data seeds from command line") data_sample_random_seeds = [int(seed) for seed in FLAGS.data_sample_random_seeds.split(",")] elif loaded_data_seeds is not None: print("loaded data seeds") data_sample_random_seeds = loaded_data_seeds else: print("generating data seeds") data_sample_random_seeds = [generate_seed() for _ in range(FLAGS.num_samples_best_eval)] store_data_seeds(data_sample_random_seeds) print("Data sample seeds: %s"%data_sample_random_seeds) for data_set in [data_set.strip() for data_set in FLAGS.data_sets.split(",")]: funcs.append(Callable(run_true_metrics, data_set=data_set)) for model in [model.strip() for model in FLAGS.models.split(",")]: funcs.append(Callable(run,model=model, data_set=data_set,data_sample_random_seeds = data_sample_random_seeds)) # First prefetch all data for data_set in FLAGS.data_sets.split(","): FLAGS.data_set = data_set target_dir = os.path.join(FLAGS.dir, data_set) os.makedirs(target_dir, exist_ok=True) os.chdir(target_dir) data_loader = create_data_loader() done = 0.0 futures = [] res = [None] * len(funcs) with concurrent.futures.ProcessPoolExecutor(FLAGS.num_parallel_experiments) as executor:
def compute_mi(self, z, **kwargs): return invoke_in_process_pool("compute_mi", 0 ,Callable(self._compute_mi, z, kwargs))[0]
def ll(self, x_data, y_data): return invoke_in_process_pool( 1, Callable(compute_ll, self.op_factory, x_data, y_data))[0]
def run(self): futures = [] if FLAGS.plot: progress = FloatProgress(min=0, max=1) display(progress) else: printProgressBar(0, self.num_samples, prefix='Progress experiment {model}/{data_set}:'. format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50) done = 0.0 with (SameProcessExecutor() if self.num_workers <= 0 else concurrent.futures.ProcessPoolExecutor( self.num_workers)) as executor: for i in range(self.num_samples): inserted = False while not inserted: if len(futures) < self.num_workers or self.num_workers <= 0: x = self.optimizer.ask() # x is a list of n_points points objective_fun = self.train_eval_model.create_train_eval(i) args_named = self.to_named_params(x) futures.append( WorkItem(i, x, args_named, executor.submit(objective_fun, args=None, **args_named))) inserted = True for wi in list(futures): try: model_dir, train_eval, validation_eval, test_eval = wi.future.result(0) self.train_eval_task_finished(futures, wi, model_dir, train_eval, validation_eval, test_eval) done += 1 if FLAGS.plot: progress.value = done / self.num_samples else: printProgressBar(done, self.num_samples, prefix='Progress experiment {model}/{data_set}:'. format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50) except concurrent.futures.TimeoutError: pass if len(futures) != 0 and len(futures) == self.num_workers: time.sleep(1) for wi in list(futures): model_dir, train_eval, validation_eval, test_eval = wi.future.result() self.train_eval_task_finished(futures, wi, model_dir, train_eval, validation_eval, test_eval) done += 1 if FLAGS.plot: progress.value = done / self.num_samples else: printProgressBar(done, self.num_samples, prefix='Progress experiment {model}/{data_set}:'. format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50) self.best_model = load_model(self.best_model_dir, self.best_model_params) predict_train, predict_valid, predict_test = invoke_in_process_pool(self.num_workers, Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.train_x, self.train_eval_model.data_loader.train_y), Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.validation_x, self.train_eval_model.data_loader.validation_y), Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.test_x, self.train_eval_model.data_loader.test_y) ) self.best_model_train_ll = predict_train["log_likelihood"] self.best_model_valid_ll = predict_valid["log_likelihood"] self.best_model_test_ll = predict_test["log_likelihood"] self.save()