Exemplo n.º 1
0
 def prob_data(self):
     return invoke_in_process_pool(3, Callable(predict_estimator, self.best_model,
                                             self.data_loader.train_x,
                                             self.data_loader.train_y),
                                 Callable(predict_estimator, self.best_model,
                                          self.data_loader.validation_x,
                                          self.data_loader.validation_y),
                                 Callable(predict_estimator, self.best_model,
                                          self.data_loader.test_x,
                                          self.data_loader.test_y)
                                   )
Exemplo n.º 2
0
    def show_samples(self):
        if self.data_loader.train_x.shape[
                1] == 1 & self.data_loader.train_y.shape[1] == 1:
            x_grid = np.linspace(self.data_loader.min_x,
                                 self.data_loader.max_x, 1000).reshape(-1, 1)

            try:
                samples_res = invoke_in_process_pool(
                    "show_samples", 1,
                    Callable(
                        self.loaded_exp.train_eval_model_factory.
                        predict_estimator, self.best_model, x_grid, None))[0]
            except NotImplementedError:
                print("not supported")
                return

            plt.figure(figsize=(16, 8))
            plt.plot(self.data_loader.train_x,
                     self.data_loader.train_y,
                     'r.',
                     alpha=0.3,
                     label="train")
            plt.plot(x_grid,
                     samples_res["samples"],
                     'g.',
                     alpha=0.3,
                     label="predicted")
            plt.legend()
            plt.show()
        elif self.data_loader.train_x.shape[1] == 0:
            samples_res = invoke_in_process_pool(
                "show_samples", 1,
                Callable(
                    self.loaded_exp.train_eval_model_factory.predict_estimator,
                    self.best_model, None, None))[0]
            data = samples_res["samples"][:, :]
            df = pd.DataFrame(data)
            fig, ax = plt.subplots(1, 1, figsize=(16, 8))
            axes = pd.plotting.scatter_matrix(df,
                                              alpha=0.2,
                                              ax=ax,
                                              color="black",
                                              label="train",
                                              diagonal='kde',
                                              density_kwds={'color': 'black'})

            plt.tight_layout()
            plt.savefig('{ROOT}/samples.png')
            # if show:
            #     plt.show();
            return axes
        else:
            print("not supported")
Exemplo n.º 3
0
    def eval_best_model(self, data=["train", "validation", "test"]):
        callables = {'train': Callable(eval_estimator, self.best_model,
                                                                  self.data_loader.train_x,
                                                                  self.data_loader.train_y),
                     "validation": Callable(eval_estimator, self.best_model,
                                                               self.data_loader.validation_x,
                                                               self.data_loader.validation_y),
                     "test": Callable(eval_estimator, self.best_model,
                                                               self.data_loader.test_x,
                                                               self.data_loader.test_y)}
        selected = [callables[name] for name in data]

        return OrderedDict(zip(data,invoke_in_process_pool(len(data),*selected)))
Exemplo n.º 4
0
 def prob_data(self):
     return invoke_in_process_pool(
         "prob_data", 3,
         Callable(
             self.loaded_exp.train_eval_model_factory.predict_estimator,
             self.best_model, self.data_loader.train_x,
             self.data_loader.train_y),
         Callable(
             self.loaded_exp.train_eval_model_factory.predict_estimator,
             self.best_model, self.data_loader.validation_x,
             self.data_loader.validation_y),
         Callable(
             self.loaded_exp.train_eval_model_factory.predict_estimator,
             self.best_model, self.data_loader.test_x,
             self.data_loader.test_y))
Exemplo n.º 5
0
    def show_prob_data(self):
        res = invoke_in_process_pool(3, Callable(predict_estimator, self.best_model,
                                                 self.data_loader.train_x,
                                                 self.data_loader.train_y),
                                     Callable(predict_estimator, self.best_model,
                                              self.data_loader.validation_x,
                                              self.data_loader.validation_y),
                                     Callable(predict_estimator, self.best_model,
                                              self.data_loader.test_x,
                                              self.data_loader.test_y)
                                     )

        for i in range(self.data_loader.test_y.shape[1]):
            pdf_name='pdf%d' % i
            if pdf_name in res[0]:
                fig, axes = plt.subplots(3, 1, figsize=(12,8))
                axes[0].plot(res[0][pdf_name], alpha=0.3)
                axes[0].set_title("%s - train data" % pdf_name)

                axes[1].plot(res[1][pdf_name], alpha=0.3)
                axes[1].set_title("%s - validation data" % pdf_name)

                axes[2].plot(res[2][pdf_name], alpha=0.3)
                axes[2].set_title("%s - test data" % pdf_name)
                plt.tight_layout()

        fig, axes = plt.subplots(3, 1, figsize=(12,8))

        axes[0].plot(res[0]['log_likelihood'], label="model", alpha=0.3)
        if self.data_loader.can_compute_ll():
            axes[0].plot(self.data_loader.ll(self.data_loader.train_x,self.data_loader.train_y), label="True", alpha=0.3)
        axes[0].set_title("LL - train data")
        axes[0].legend()

        axes[1].plot(res[1]['log_likelihood'], label="model", alpha=0.3)
        if self.data_loader.can_compute_ll():
            axes[1].plot(self.data_loader.ll(self.data_loader.validation_x, self.data_loader.validation_y), label="True", alpha=0.3)
        axes[1].set_title("LL - validation data")
        axes[1].legend()

        axes[2].plot(res[2]['log_likelihood'], label="model", alpha=0.3)
        if self.data_loader.can_compute_ll():
            axes[2].plot(self.data_loader.ll(self.data_loader.test_x, self.data_loader.test_y), label="True", alpha=0.3)
        axes[2].set_title("LL - test data")
        axes[2].legend()
        plt.tight_layout()

        plt.show()
Exemplo n.º 6
0
    def eval_best_model_and_save(self):
        predict_train, predict_valid, predict_test = invoke_in_process_pool(self.num_workers,
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.train_x,
                                                                                     self.train_eval_model.data_loader.train_y),
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.validation_x,
                                                                                     self.train_eval_model.data_loader.validation_y),
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.test_x,
                                                                                     self.train_eval_model.data_loader.test_y)
                                                                            )

        self.best_model_train_ll = predict_train["log_likelihood"]
        self.best_model_valid_ll = predict_valid["log_likelihood"]
        self.best_model_test_ll = predict_test["log_likelihood"]

        self.save()
Exemplo n.º 7
0
    def show_distribution_slices(self, x_vals=None):
        if self.data_loader.train_x.shape[1] == 1 & self.data_loader.train_y.shape[1] == 1:
            y_data = self.data_loader.train_y
            x_data = self.data_loader.train_x
            y_vals = np.linspace(np.min(y_data), np.max(y_data), 100)
            if x_vals is None:
                x_vals = np.linspace(np.min(x_data), np.max(x_data), 10)
            plots = []

            res = invoke_in_process_pool(4, *[Callable(predict_estimator, self.best_model,
                                                                 np.ones((len(y_vals), 1)) * x_val,
                                                                 y_vals.reshape(-1, 1)) for x_val in x_vals])

            if self.data_loader.can_compute_ll():
                true_pdf = invoke_in_process_pool(8, *[Callable(self.compute_true_pdf, np.ones((len(y_vals), 1)) * x_val,
                                                     y_vals.reshape(-1, 1)) for x_val in x_vals])



            for i, x_val in enumerate(x_vals):
                cdf_val = res[i]['cdf0'] if 'cdf0' in res[i] else None
                pdf_val = np.exp(res[i]['log_likelihood'])

                p = figure(title="pdf(x=%f)" % x_val, x_axis_label='y', y_axis_label='pdf0')
                if cdf_val is not None:
                    p.extra_y_ranges = {"cdf_range": Range1d(start=min(cdf_val.flatten()), end=max(cdf_val.flatten()))}
                    p.add_layout(LinearAxis(y_range_name="cdf_range"), 'right')

                # add a line renderer with legend and line thickness
                p.line(y_vals, pdf_val.flatten(), legend="pdf0", alpha=0.5, color="black")
                if self.data_loader.can_compute_ll():
                    p.line(y_vals, true_pdf[i].flatten(), legend="true", alpha=0.5, color="green")
                if cdf_val is not None:
                    p.line(y_vals, cdf_val.flatten(), legend="cdf0", alpha=0.5, y_range_name="cdf_range", color="blue")
                plots.append(p)

            grid = gridplot(plots, ncols=2)
            show(grid);
        else:
            print("not supported")
Exemplo n.º 8
0
    def show_weights(self):
        vars = invoke_in_process_pool(1, Callable(retrieve_vars, self.best_model_dir))[0]

        for var in vars:
            if len(var[1].shape) <= 1:
                var[1] = var[1].reshape(1, -1)

            p = figure(x_range=(0, var[1].shape[1]), y_range=(0, var[1].shape[0]), title="weights of %s" % var[0],
                       tooltips=[("x", "$x"), ("y", "$y"), ("value", "@image")]);

            p.image(image=[var[1]], x=0, y=0, dw=var[1].shape[1], dh=var[1].shape[0],
                    palette="Spectral11");
            show(p);
Exemplo n.º 9
0
    def show_samples(self):
        if self.data_loader.train_x.shape[1] == 1 & self.data_loader.train_y.shape[1] == 1:
            x_grid = np.linspace(self.data_loader.min_x, self.data_loader.max_x,
                                 1000).reshape(-1, 1)

            try:
                samples_res = invoke_in_process_pool(1, Callable(predict_estimator, self.best_model, x_grid, None))[0]
            except NotImplementedError:
                print("not supported")
                return

            plt.figure(figsize=(16, 8))
            plt.plot(self.data_loader.train_x, self.data_loader.train_y, 'r.',
                     alpha=0.3,
                     label="train");
            plt.plot(x_grid, samples_res["samples0"], 'g.', alpha=0.3, label="predicted");
            plt.legend()
            plt.show();
        else:
            print("not supported")
Exemplo n.º 10
0
 def show_pdf_heatmap_model(self,
                            paper=False,
                            x_fixed=None,
                            y_fixed=None,
                            x_lim=None,
                            y_lim=None,
                            pdf_percentile_cut_off=None):
     return self.show_pdf_heatmap_compute_pdf_fun(
         lambda x, y: np.exp(
             invoke_in_process_pool(
                 "show_pdf_heatmap_model", 1,
                 Callable(
                     self.loaded_exp.train_eval_model_factory.
                     predict_estimator, self.best_model, x, y))[0][
                         'log_likelihood']),
         "",
         paper=paper,
         x_fixed=x_fixed,
         y_fixed=y_fixed,
         x_lim=x_lim,
         y_lim=y_lim,
         pdf_percentile_cut_off=pdf_percentile_cut_off)
Exemplo n.º 11
0
 def sample_best_model(self, size, z, **kwargs):
     self.log.info("sample_best_model, size: %d, z.shape: %s", size, z.shape)
     return invoke_in_process_pool("sample_best_model", 0 ,Callable(self._sample_best_model, size, z, kwargs))[0]
Exemplo n.º 12
0
    def train_eval_best_model(self, data_sample_random_seeds=None):
        if len(self.best_model_train_eval) == self.num_samples_best_eval:
            print("Loaded best model eval for model: {model}, data set: {data_set}".format(model=FLAGS.model, data_set=FLAGS.data_set))
            return

        print("Running best model eval for model: {model}, data set: {data_set}".format(model=FLAGS.model,
                                                                                       data_set=FLAGS.data_set))

        if FLAGS.plot:
            progress = FloatProgress(min=0, max=1)
            display(progress)
        else:
            printProgressBar(0, self.num_samples_best_eval, prefix='Progress best model eval {model}/{data_set}:'.
                             format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50)
        futures = []
        done = 0.0

        with SameProcessExecutor() if self.num_workers <= 0 else concurrent.futures.ProcessPoolExecutor(
                self.num_workers) as executor:
            for i in range(self.num_samples_best_eval):
                inserted = False
                while not inserted:
                    if len(futures) < self.num_workers:
                        objective_fun = self.train_eval_model.create_train_eval(i)
                        params = self.best_model_params.copy()
                        params["tensorboard_folder"] = "tensorboard_best"
                        params["sample_cross_validation"] = True
                        params["data_sample_random_seed"] = generate_seed() if data_sample_random_seeds is None else data_sample_random_seeds[i]
                        params["tf_random_seed"] = generate_seed()
                        futures.append(
                            WorkItem(i, None, params, executor.submit(objective_fun, args=None, **params)))
                        inserted = True

                    for wi in list(futures):
                        try:
                            model_dir, train_eval, validation_eval, test_eval = wi.future.result(0)
                            self.best_task_eval_finished(futures, wi, model_dir, train_eval, validation_eval,
                                                         test_eval)
                            done += 1
                            if FLAGS.plot:
                                progress.value = done / self.num_samples_best_eval
                            else:
                                printProgressBar(done, self.num_samples_best_eval,
                                                 prefix='Progress best model eval {model}/{data_set}:'.
                                                 format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete',
                                                 length=50)
                        except concurrent.futures.TimeoutError:
                            pass

                    if len(futures) == self.num_workers:
                        time.sleep(5)

        for wi in list(futures):
            model_dir, train_eval, validation_eval, test_eval = wi.future.result()
            self.best_task_eval_finished(futures, wi, model_dir, train_eval, validation_eval,
                                         test_eval)
            done += 1
            if FLAGS.plot:
                progress.value = done / self.num_samples_best_eval
            else:
                printProgressBar(done, done,
                                 prefix='Progress best model eval {model}/{data_set}:'.
                                 format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete',
                                 length=50)

        predict_train, predict_valid, predict_test = invoke_in_process_pool(self.num_workers,
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.train_x,
                                                                                     self.train_eval_model.data_loader.train_y),
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.validation_x,
                                                                                     self.train_eval_model.data_loader.validation_y),
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.test_x,
                                                                                     self.train_eval_model.data_loader.test_y)
                                                                            )

        self.best_model_train_ll = predict_train["log_likelihood"]
        self.best_model_valid_ll = predict_valid["log_likelihood"]
        self.best_model_test_ll = predict_test["log_likelihood"]

        self.save()
Exemplo n.º 13
0
def create_compare_notebooks():
    invoke_in_process_pool(4, *[Callable(create_compare_notebook, data_set) for data_set in FLAGS.data_sets.split(",")])
Exemplo n.º 14
0
def create_vis_notebooks(models, data_sets):
    invoke_in_process_pool(
        "create_vis_notebooks", 3, *[
            Callable(create_vis_notebook, model, data_set)
            for model in models.split(",") for data_set in data_sets.split(",")
        ])
Exemplo n.º 15
0
def create_vis_notebooks():
    invoke_in_process_pool(3, *[Callable(create_vis_notebook, model, data_set) for model in FLAGS.models.split(",") for data_set in FLAGS.data_sets.split(",")])
Exemplo n.º 16
0
def create_compare_notebooks(data_sets):
    invoke_in_process_pool(
        "create_compare_notebooks", 4, *[
            Callable(create_compare_notebook, data_set)
            for data_set in data_sets.split(",")
        ])
Exemplo n.º 17
0
 def eval_best_model(self, data_set):
     return invoke_in_process_pool("eval_best_model",1, Callable(self._eval_best_model, data_set))[0]
Exemplo n.º 18
0
    def generate_data(self):
        y_data = invoke_in_process_pool(
            1, Callable(generate_in_tensorflow, self.op_factory,
                        self.x_data))[0]

        return np.c_[self.x_data, y_data]
Exemplo n.º 19
0
import os

# python -m run_exp -data_set inv_sin -model rnade_laplace  2>&1  | tee run_exp.log
from asynch import invoke_in_process_pool, Callable
from experiment.factory import create_load_experiment
from flags import FLAGS


def run():
    target_dir = os.path.join(FLAGS.dir, FLAGS.data_set)
    os.makedirs(target_dir, exist_ok=True)
    os.chdir(target_dir)
    exp = create_load_experiment()
    exp.eval_best_model_and_save()


if __name__ == '__main__':
    invoke_in_process_pool(1, Callable(run))
Exemplo n.º 20
0
    def predict_best_model(self, data, collector, params={}, num_workers=None, op_names=[]):
        if num_workers is None:
            num_workers = 1

        return invoke_in_process_pool("predict_best_model", num_workers,
                                      Callable(self._predict_best_model, self.data_loader, data, collector, params, op_names))[0]
    if FLAGS.data_sample_random_seeds != "":
        print("data seeds from command line")
        data_sample_random_seeds = [int(seed) for seed in FLAGS.data_sample_random_seeds.split(",")]
    elif loaded_data_seeds is not None:
        print("loaded data seeds")
        data_sample_random_seeds = loaded_data_seeds
    else:
        print("generating data seeds")
        data_sample_random_seeds = [generate_seed() for _ in range(FLAGS.num_samples_best_eval)]


    store_data_seeds(data_sample_random_seeds)
    print("Data sample seeds: %s"%data_sample_random_seeds)

    for data_set in [data_set.strip() for data_set in FLAGS.data_sets.split(",")]:
        funcs.append(Callable(run_true_metrics, data_set=data_set))
        for model in [model.strip() for model in FLAGS.models.split(",")]:
            funcs.append(Callable(run,model=model, data_set=data_set,data_sample_random_seeds = data_sample_random_seeds))

    # First prefetch all data
    for data_set in FLAGS.data_sets.split(","):
        FLAGS.data_set = data_set
        target_dir = os.path.join(FLAGS.dir, data_set)
        os.makedirs(target_dir, exist_ok=True)
        os.chdir(target_dir)
        data_loader = create_data_loader()

    done = 0.0
    futures = []
    res = [None] * len(funcs)
    with concurrent.futures.ProcessPoolExecutor(FLAGS.num_parallel_experiments) as executor:
Exemplo n.º 22
0
 def compute_mi(self, z, **kwargs):
     return invoke_in_process_pool("compute_mi", 0 ,Callable(self._compute_mi, z, kwargs))[0]
Exemplo n.º 23
0
 def ll(self, x_data, y_data):
     return invoke_in_process_pool(
         1, Callable(compute_ll, self.op_factory, x_data, y_data))[0]
Exemplo n.º 24
0
    def run(self):
        futures = []
        if FLAGS.plot:
            progress = FloatProgress(min=0, max=1)
            display(progress)
        else:
            printProgressBar(0, self.num_samples, prefix='Progress experiment {model}/{data_set}:'.
                             format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50)

        done = 0.0
        with (SameProcessExecutor() if self.num_workers <= 0 else concurrent.futures.ProcessPoolExecutor(
                self.num_workers)) as executor:
            for i in range(self.num_samples):
                inserted = False
                while not inserted:
                    if len(futures) < self.num_workers or self.num_workers <= 0:
                        x = self.optimizer.ask()  # x is a list of n_points points
                        objective_fun = self.train_eval_model.create_train_eval(i)
                        args_named = self.to_named_params(x)
                        futures.append(
                            WorkItem(i, x, args_named, executor.submit(objective_fun, args=None, **args_named)))
                        inserted = True

                    for wi in list(futures):
                        try:
                            model_dir, train_eval, validation_eval, test_eval = wi.future.result(0)
                            self.train_eval_task_finished(futures, wi, model_dir, train_eval, validation_eval,
                                                          test_eval)
                            done += 1
                            if FLAGS.plot:
                                progress.value = done / self.num_samples
                            else:
                                printProgressBar(done, self.num_samples, prefix='Progress experiment {model}/{data_set}:'.
                             format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50)
                        except concurrent.futures.TimeoutError:
                            pass

                    if len(futures) != 0 and len(futures) == self.num_workers:
                        time.sleep(1)

        for wi in list(futures):
            model_dir, train_eval, validation_eval, test_eval = wi.future.result()
            self.train_eval_task_finished(futures, wi, model_dir, train_eval, validation_eval, test_eval)
            done += 1
            if FLAGS.plot:
                progress.value = done / self.num_samples
            else:
                printProgressBar(done, self.num_samples, prefix='Progress experiment {model}/{data_set}:'.
                             format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50)

        self.best_model = load_model(self.best_model_dir, self.best_model_params)

        predict_train, predict_valid, predict_test = invoke_in_process_pool(self.num_workers,
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.train_x,
                                                                                     self.train_eval_model.data_loader.train_y),
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.validation_x,
                                                                                     self.train_eval_model.data_loader.validation_y),
                                                                            Callable(predict_estimator, self.best_model,
                                                                                     self.train_eval_model.data_loader.test_x,
                                                                                     self.train_eval_model.data_loader.test_y)
                                                                            )

        self.best_model_train_ll = predict_train["log_likelihood"]
        self.best_model_valid_ll = predict_valid["log_likelihood"]
        self.best_model_test_ll = predict_test["log_likelihood"]

        self.save()