def __init__(self,
                 exp_prefix,
                 est_params,
                 sim_params,
                 observations,
                 keys_of_interest,
                 n_mc_samples=10**7,
                 n_x_cond=5,
                 n_seeds=5,
                 use_gpu=True,
                 tail_measures=True):

        assert est_params and exp_prefix and sim_params and keys_of_interest
        assert observations.all()

        # every simulator configuration will be run multiple times with different randomness seeds
        sim_params = _add_seeds_to_sim_params(n_seeds, sim_params)

        self.observations = observations
        self.n_mc_samples = n_mc_samples
        self.n_x_cond = n_x_cond
        self.keys_of_interest = keys_of_interest
        self.exp_prefix = exp_prefix
        self.use_gpu = use_gpu
        self.tail_measures = tail_measures

        logger.configure(log_directory=config.DATA_DIR,
                         prefix=exp_prefix,
                         color='green')
        ''' ---------- Either load or generate the configs ----------'''
        config_pkl_path = os.path.join(logger.log_directory, logger.prefix,
                                       EXP_CONFIG_FILE)

        if os.path.isfile(config_pkl_path):
            logger.log("{:<70s} {:<30s}".format(
                "Loading experiment previous configs from file: ",
                config_pkl_path))
            self.configs = logger.load_pkl(EXP_CONFIG_FILE)
        else:
            logger.log("{:<70s} {:<30s}".format(
                "Generating and storing experiment configs under: ",
                config_pkl_path))
            self.configs = self._generate_configuration_variants(
                est_params, sim_params)
            logger.dump_pkl(data=self.configs, path=EXP_CONFIG_FILE)
        ''' ---------- Either load already existing results or start a new result collection ---------- '''
        results_pkl_path = os.path.join(logger.log_directory, logger.prefix,
                                        RESULTS_FILE)
        if os.path.isfile(results_pkl_path):
            logger.log_line("{:<70s} {:<30s}".format("Continue with: ",
                                                     results_pkl_path))
            self.gof_single_res_collection = dict(
                logger.load_pkl_log(RESULTS_FILE))

        else:  # start from scratch
            self.gof_single_res_collection = {}

        self.gof_results = GoodnessOfFitResults(self.gof_single_res_collection)
def load_dumped_estimator(dict_entry):
    assert len(dict_entry) == 1
    if type(dict_entry) == dict and len(dict_entry) == 1:
        dict_entry = list(dict_entry.values())[0]

    with tf.Session(graph=tf.Graph()) as sess:
        dict_entry.estimator = logger.load_pkl("model_dumps/" +
                                               dict_entry.task_name + ".pkl")
        print("loaded estimator for entry " + dict_entry.task_name)

    return dict_entry
示例#3
0
  def test_store_load_configrunner_pipeline(self):

    logger.configure(log_directory=config.DATA_DIR, prefix=EXP_PREFIX)
    test_dir = os.path.join(logger.log_directory, logger.prefix)
    if os.path.exists(test_dir):
      shutil.rmtree(test_dir)


    keys_of_interest = ['task_name', 'estimator', 'simulator', 'n_observations', 'center_sampling_method', 'x_noise_std', 'y_noise_std',
                        'ndim_x', 'ndim_y', 'n_centers', "n_mc_samples", "n_x_cond", 'mean_est', 'cov_est', 'mean_sim', 'cov_sim',
                        'kl_divergence', 'hellinger_distance', 'js_divergence', 'x_cond', 'random_seed', "mean_sim", "cov_sim",
                        "mean_abs_diff", "cov_abs_diff", "VaR_sim", "VaR_est", "VaR_abs_diff", "CVaR_sim", "CVaR_est", "CVaR_abs_diff",
                        "time_to_fit"]


    conf_est, conf_sim, observations = question1()
    conf_runner = ConfigRunner(EXP_PREFIX, conf_est, conf_sim, observations=observations, keys_of_interest=keys_of_interest,
                               n_mc_samples=1 * 10 ** 2, n_x_cond=5, n_seeds=5)

    conf_runner.configs = random.sample(conf_runner.configs, NUM_CONFIGS_TO_TEST)

    conf_runner.run_configurations(dump_models=True, multiprocessing=False)
    results_from_pkl_file = dict({logger.load_pkl(RESULTS_FILE)})

    """ check if model dumps have all been created """
    dump_dir = os.path.join(logger.log_directory, logger.prefix, 'model_dumps')
    model_dumps_list = os.listdir(dump_dir) # get list of all model files
    model_dumps_list_no_suffix = [os.path.splitext(entry)[0] for entry in model_dumps_list] # remove suffix

    for conf in conf_runner.configs:
      self.assertTrue(conf['task_name'] in model_dumps_list_no_suffix)


    """ check if model dumps can be used successfully"""
    for model_dump_i in model_dumps_list:
      #tf.reset_default_graph()
      with tf.Session(graph=tf.Graph()):
        model = logger.load_pkl("model_dumps/"+model_dump_i)
        self.assertTrue(model)
        if model.ndim_x == 1 and model.ndim_y == 1:
          self.assertTrue(model.plot3d(show=False))
示例#4
0
def test_log_data(setup):
    import numpy
    d1 = numpy.random.randn(20, 10)
    logger.log_data(d1, 'test_file.pkl')
    sleep(1.0)
    d2 = numpy.random.randn(20, 10)
    logger.log_data(d2, 'test_file.pkl', overwrite=True)
    sleep(1.0)

    data = logger.load_pkl('test_file.pkl')
    assert len(data) == 1, "data should contain only one array because we overwrote it."
    assert numpy.array_equal(data[0], d2), "first should be the same as d2"
示例#5
0
def test_save_pkl_abs_path(setup):
    import numpy

    d1 = numpy.random.randn(20, 10)
    logger.save_pkl(d1, "/tmp/ml-logger-test/test_file_1.pkl")
    sleep(0.1)

    data = logger.load_pkl("/tmp/ml-logger-test/test_file_1.pkl")
    assert len(
        data
    ) == 1, "data should contain only one array because we overwrote it."
    assert numpy.array_equal(data[0], d1), "first should be the same as d2"
示例#6
0
def test_load_pkl(setup):
    import numpy
    d1 = numpy.random.randn(20, 10)
    logger.log_data(d1, 'test_file.pkl')
    sleep(1.0)
    d2 = numpy.random.randn(20, 10)
    logger.log_data(d2, 'test_file.pkl')
    sleep(1.0)

    data = logger.load_pkl('test_file.pkl')
    assert len(data) == 2, "data should contain two arrays"
    assert numpy.array_equal(data[0], d1), "first should be the same as d1"
    assert numpy.array_equal(data[1], d2), "first should be the same as d2"
示例#7
0
def test_read_lambda():
    """veryfy that the object is correct"""
    from ml_logger import logger
    data, = logger.load_pkl("./test_data.pkl")
    print(*[f"{k}: {type(v)} - {str(v)}" for k, v in data.items()], sep="\n")
示例#8
0
def test_load_module(setup, test_module):
    result, = logger.load_pkl(f"modules/{0:04d}_Test.pkl")
    import numpy as np
    assert (result['var_1'] == np.ones(
        [100, 2])).all(), "should be the same as test data"