Python SampleStorageHDF示例，mlmc.sample_storage_hdf.SampleStorageHDF Python示例

示例#1

0

显示文件

文件： test_quantity_concept.py 项目： GeoMop/MLMC

 def dev_memory_usage_test(self):
     work_dir = "/home/martin/Documents/MLMC_quantity"
     sample_storage = SampleStorageHDF(
         file_path=os.path.join(work_dir, "mlmc_quantity_2.hdf5"))
     sample_storage.chunk_size = 1e6
     result_format = sample_storage.load_result_format()
     root_quantity = make_root_quantity(sample_storage, result_format)
     mean_root_quantity = estimate_mean(root_quantity)

示例#2

0

显示文件

文件： test_sampling_pools.py 项目： GeoMop/MLMC

def test_sampling_pools(sampling_pool, simulation_factory):
    n_moments = 5
    np.random.seed(123)
    t.sleep(5)

    work_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            '_test_tmp')
    if os.path.exists(work_dir):
        shutil.rmtree(work_dir)
    os.makedirs(work_dir)

    if simulation_factory.need_workspace:
        os.chdir(os.path.dirname(os.path.realpath(__file__)))
        shutil.copyfile('synth_sim_config_test.yaml',
                        os.path.join(work_dir, 'synth_sim_config.yaml'))

    sample_storage = SampleStorageHDF(file_path=os.path.join(
        work_dir, "mlmc_{}.hdf5".format(len(step_range))))
    # Plan and compute samples
    sampler = Sampler(sample_storage=sample_storage,
                      sampling_pool=sampling_pool,
                      sim_factory=simulation_factory,
                      level_parameters=step_range)

    true_domain = distr.ppf([0.0001, 0.9999])
    moments_fn = Legendre(n_moments, true_domain)

    sampler.set_initial_n_samples([10, 10, 10])
    sampler.schedule_samples()
    sampler.ask_sampling_pool_for_samples()

    quantity = mlmc.quantity.make_root_quantity(
        storage=sample_storage, q_specs=sample_storage.load_result_format())
    length = quantity['length']
    time = length[1]
    location = time['10']
    value_quantity = location[0]

    estimator = Estimate(quantity=value_quantity,
                         sample_storage=sample_storage,
                         moments_fn=moments_fn)
    means, vars = estimator.estimate_moments(moments_fn)

    assert means[0] == 1
    assert vars[0] == 0
    assert np.allclose(np.array(ref_means), np.array(means), atol=1e-5)
    assert np.allclose(np.array(ref_vars), np.array(ref_vars), atol=1e-5)

    if sampling_pool._debug:
        assert 'output' in next(os.walk(work_dir))[1]

示例#3

0

显示文件

def hdf_storage_factory():
    os.chdir(os.path.dirname(os.path.realpath(__file__)))
    work_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '_test_tmp')
    if os.path.exists(work_dir):
        shutil.rmtree(work_dir)
    os.makedirs(work_dir)

    # Create sample storages
    return SampleStorageHDF(file_path=os.path.join(work_dir, "mlmc_test.hdf5"))

示例#4

0

显示文件

    def process(self):
        sample_storage = SampleStorageHDF(file_path=os.path.join(
            self.work_dir, "mlmc_{}.hdf5".format(self.n_levels)))
        sample_storage.chunk_size = 1e8
        result_format = sample_storage.load_result_format()
        root_quantity = make_root_quantity(sample_storage, result_format)

        conductivity = root_quantity['conductivity']
        time = conductivity[1]  # times: [1]
        location = time['0']  # locations: ['0']
        q_value = location[0, 0]

        # @TODO: How to estimate true_domain?
        quantile = 0.001
        true_domain = mlmc.estimator.Estimate.estimate_domain(
            q_value, sample_storage, quantile=quantile)
        moments_fn = Legendre(self.n_moments, true_domain)

        estimator = mlmc.estimator.Estimate(quantity=q_value,
                                            sample_storage=sample_storage,
                                            moments_fn=moments_fn)
        means, vars = estimator.estimate_moments(moments_fn)

        moments_quantity = moments(root_quantity,
                                   moments_fn=moments_fn,
                                   mom_at_bottom=True)
        moments_mean = estimate_mean(moments_quantity)
        conductivity_mean = moments_mean['conductivity']
        time_mean = conductivity_mean[1]  # times: [1]
        location_mean = time_mean['0']  # locations: ['0']
        values_mean = location_mean[0]  # result shape: (1,)
        value_mean = values_mean[0]
        assert value_mean.mean == 1

        # true_domain = [-10, 10]  # keep all values on the original domain
        # central_moments = Monomial(self.n_moments, true_domain, ref_domain=true_domain, mean=means())
        # central_moments_quantity = moments(root_quantity, moments_fn=central_moments, mom_at_bottom=True)
        # central_moments_mean = estimate_mean(central_moments_quantity)

        #estimator.sub_subselect(sample_vector=[10000])

        #self.process_target_var(estimator)
        self.construct_density(estimator, tol=1e-8)

示例#5

0

显示文件

文件： test_quantity_concept.py 项目： GeoMop/MLMC

    def _create_sampler(self, step_range, clean=False, memory=False):
        # Set work dir
        os.chdir(os.path.dirname(os.path.realpath(__file__)))
        work_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                '_test_tmp')
        if clean:
            if os.path.exists(work_dir):
                shutil.rmtree(work_dir)
            os.makedirs(work_dir)

        # Create simulations
        failed_fraction = 0.1
        distr = stats.norm()
        simulation_config = dict(distr=distr,
                                 complexity=2,
                                 nan_fraction=failed_fraction,
                                 sim_method='_sample_fn')
        simulation_factory = SynthSimulationForTests(simulation_config)

        # shutil.copyfile('synth_sim_config.yaml', os.path.join(work_dir, 'synth_sim_config.yaml'))
        # simulation_config = {"config_yaml": os.path.join(work_dir, 'synth_sim_config.yaml')}
        # simulation_workspace = SynthSimulationWorkspace(simulation_config)

        # Create sample storages
        if memory:
            sample_storage = Memory()
        else:
            sample_storage = SampleStorageHDF(
                file_path=os.path.join(work_dir, "mlmc_test.hdf5"))
        # Create sampling pools
        sampling_pool = OneProcessPool()
        # sampling_pool_dir = OneProcessPool(work_dir=work_dir)

        if clean:
            if sampling_pool._output_dir is not None:
                if os.path.exists(work_dir):
                    shutil.rmtree(work_dir)
                os.makedirs(work_dir)
            if simulation_factory.need_workspace:
                os.chdir(os.path.dirname(os.path.realpath(__file__)))
                shutil.copyfile(
                    'synth_sim_config.yaml',
                    os.path.join(work_dir, 'synth_sim_config.yaml'))

        sampler = Sampler(sample_storage=sample_storage,
                          sampling_pool=sampling_pool,
                          sim_factory=simulation_factory,
                          level_parameters=step_range)

        return sampler, simulation_factory

示例#6

0

显示文件

文件： test_storage.py 项目： GeoMop/MLMC

def test_storage(storage, n_levels):
    if storage == 'memory':
        storage = Memory()
    elif storage == 'hdf':
        work_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                '_test_tmp')
        if os.path.exists(work_dir):
            shutil.rmtree(work_dir)
        os.makedirs(work_dir)
        storage = SampleStorageHDF(
            file_path=os.path.join(work_dir, "mlmc.hdf5".format()))

    n_successful = 5
    n_failed = 4
    res_length = 3
    format_quant = add_samples(storage,
                               n_levels,
                               n_successful=n_successful,
                               n_failed=n_failed,
                               res_lenght=res_length)

    scheduled = storage.load_scheduled_samples()

    assert len(scheduled) == n_levels
    for _, l_sch in scheduled.items():
        assert len(l_sch) == n_successful + n_failed

    results = storage.sample_pairs()

    assert len(results) == n_levels
    for level_res in results:
        assert level_res.shape[1] == n_successful
        assert level_res.shape[0] == res_length
        assert np.allclose(level_res[:, :, 0], 1)

    n_ops = storage.get_n_ops()
    assert len(n_ops) == n_levels

    loaded_format = storage.load_result_format()

    assert len(format_quant) == len(loaded_format)
    for f1, f2 in zip(format_quant, loaded_format):
        assert f1.name == f2.name
        assert f1.unit == f2.unit

    n_finished = storage.n_finished()

    assert len(n_finished) == n_levels
    assert np.allclose(n_finished, n_successful + n_failed)

示例#7

0

显示文件

文件： test_storage.py 项目： GeoMop/MLMC

def test_hdf_append():
    work_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            '_test_tmp')
    if os.path.exists(work_dir):
        shutil.rmtree(work_dir)
    os.makedirs(work_dir)
    storage = SampleStorageHDF(file_path=os.path.join(work_dir, "mlmc.hdf5"))
    n_levels = 4
    format_quant = add_samples(storage, n_levels)
    results = storage.sample_pairs()

    storage = SampleStorageHDF(file_path=os.path.join(work_dir, "mlmc.hdf5"))
    loaded_results = storage.sample_pairs()

    assert len(results) == n_levels
    for l_id, (level_res,
               level_res_loaded) in enumerate(zip(results, loaded_results)):
        assert np.allclose(np.array(level_res)[:, :, 0], 1)
        assert np.allclose(
            np.array(level_res)[:, :, 0],
            np.array(level_res_loaded)[:, :, 0])
        if l_id > 0:
            assert np.allclose(np.array(level_res)[:, :, 1], 0)
            assert np.allclose(
                np.array(level_res)[:, :, 1],
                np.array(level_res_loaded)[:, :, 1])

    n_ops = storage.get_n_ops()
    assert len(n_ops) == n_levels

    loaded_format = storage.load_result_format()
    assert len(format_quant) == len(loaded_format)
    for f1, f2 in zip(format_quant, loaded_format):
        assert f1.name == f2.name
        assert f1.unit == f2.unit

    n_finished = storage.n_finished()
    assert len(n_finished) == n_levels

示例#8

0

显示文件

    def setup_config(self, clean):
        """
        Simulation dependent configuration
        :param clean: bool, If True remove existing files
        :return: mlmc.sampler instance
        """
        # Set pbs config, flow123d, gmsh, ..., random fields are set in simulation class
        self.set_environment_variables()

        # Create Pbs sampling pool
        sampling_pool = self.create_sampling_pool()

        simulation_config = {
            'work_dir':
            self.work_dir,
            'env':
            dict(flow123d=self.flow123d, gmsh=self.gmsh,
                 gmsh_version=1),  # The Environment.
            'yaml_file':
            os.path.join(self.work_dir, '01_conductivity.yaml'),
            'geo_file':
            os.path.join(self.work_dir, 'square_1x1.geo'),
            'fields_params':
            dict(model='exp', sigma=4, corr_length=0.1),
            'field_template':
            "!FieldElementwise {mesh_data_file: \"$INPUT_DIR$/%s\", field_name: %s}"
        }

        # Create simulation factory
        simulation_factory = FlowSim(config=simulation_config, clean=clean)

        # Create HDF sample storage
        sample_storage = SampleStorageHDF(file_path=os.path.join(
            self.work_dir, "mlmc_{}.hdf5".format(self.n_levels)),
                                          #append=self.append
                                          )

        # Create sampler, it manages sample scheduling and so on
        sampler = Sampler(sample_storage=sample_storage,
                          sampling_pool=sampling_pool,
                          sim_factory=simulation_factory,
                          level_parameters=self.level_parameters)

        return sampler

示例#9

0

显示文件

文件： process.py 项目： GeoMop/MLMC

    def setup_config(self, step_range, clean):
        """
        Simulation dependent configuration
        :param step_range: Simulation's step range, length of them is number of levels
        :param clean: bool, If True remove existing files
        :return: mlmc.sampler instance
        """
        # Set pbs config, flow123d, gmsh, ..., random fields are set in simulation class
        self.set_environment_variables()

        # Create Pbs sampling pool
        sampling_pool = self.create_pbs_sampling_pool()

        #sampling_pool = OneProcessPool(work_dir=self.work_dir)  # Everything runs in one process
        #sampling_pool = ProcessPool(n_processes=4, work_dir=self.work_dir)  # Simulations run in different processes

        simulation_config = {
            'work_dir': self.work_dir,
            'env': dict(flow123d=self.flow123d, gmsh=self.gmsh, gmsh_version=1),  # The Environment.
            'yaml_file': os.path.join(self.work_dir, '01_conductivity.yaml'),
            # The template with a mesh and field placeholders
            'sim_param_range': step_range,  # Range of MLMC simulation parametr. Here the mesh step.
            'geo_file': os.path.join(self.work_dir, 'square_1x1.geo'),
            # The file with simulation geometry (independent of the step)
            # 'field_template': "!FieldElementwise {mesh_data_file: \"${INPUT}/%s\", field_name: %s}"
            'field_template': "!FieldElementwise {mesh_data_file: \"$INPUT_DIR$/%s\", field_name: %s}"
        }

        print()
        # Create simulation factory
        simulation_factory = FlowSim(config=simulation_config, clean=clean)

        # Create HDF sample storage
        sample_storage = SampleStorageHDF(
            file_path=os.path.join(self.work_dir, "mlmc_{}.hdf5".format(len(step_range))),
            append=self.append)

        # Create sampler, it manages sample scheduling and so on
        sampler = Sampler(sample_storage=sample_storage, sampling_pool=sampling_pool, sim_factory=simulation_factory,
                          level_parameters=step_range)

        return sampler

示例#10

0

显示文件

    def setup_config(self, n_levels, clean):
        """
        # TODO: specify, what should be done here.
        - creation of Simulation
        - creation of Sampler
        - hdf file ?
        - why step_range must be here ?


        Simulation dependent configuration
        :param step_range: Simulation's step range, length of them is number of levels
        :param clean: bool, If True remove existing files
        :return: mlmc.sampler instance
        """
        self.set_environment_variables()

        sampling_pool = self.create_sampling_pool()

        # Create simulation factory
        simulation_factory = Flow123d_WGC2020(config=self.config_dict,
                                              clean=clean)

        # Create HDF sample storage, possibly remove old one
        hdf_file = os.path.join(self.work_dir, "wgc2020_mlmc.hdf5")
        if self.clean:
            # Remove HFD5 file
            if os.path.exists(hdf_file):
                os.remove(hdf_file)
        sample_storage = SampleStorageHDF(file_path=hdf_file,
                                          append=self.append)

        # Create sampler, it manages sample scheduling and so on
        # the length of level_parameters must correspond to number of MLMC levels, at least 1 !!!
        sampler = Sampler(sample_storage=sample_storage,
                          sampling_pool=sampling_pool,
                          sim_factory=simulation_factory,
                          level_parameters=[1])

        return sampler

示例#11

0

显示文件

文件： process_debug.py 项目： GeoMop/MLMC

    def run(self, renew=False):
        np.random.seed(3)
        n_moments = 5
        failed_fraction = 0

        # work_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '_test_tmp')
        # if os.path.exists(work_dir):
        #     shutil.rmtree(work_dir)
        # os.makedirs(work_dir)

        distr = stats.norm()
        step_range = [0.1, 0.001]

        # User configure and create simulation instance
        simulation_config = dict(distr=distr,
                                 complexity=2,
                                 nan_fraction=failed_fraction,
                                 sim_method='_sample_fn')
        #simulation_config = {"config_yaml": 'synth_sim_config.yaml'}
        simulation_factory = SynthSimulation(simulation_config)

        if self.clean:
            os.remove(
                os.path.join(self.work_dir,
                             "mlmc_{}.hdf5".format(len(step_range))))

        sample_storage = SampleStorageHDF(file_path=os.path.join(
            self.work_dir, "mlmc_{}.hdf5".format(len(step_range))),
                                          append=self.append)
        sampling_pool = OneProcessPool()

        # Plan and compute samples
        sampler = Sampler(sample_storage=sample_storage,
                          sampling_pool=sampling_pool,
                          sim_factory=simulation_factory,
                          step_range=step_range)

        true_domain = distr.ppf([0.0001, 0.9999])
        moments_fn = Legendre(n_moments, true_domain)
        # moments_fn = Monomial(n_moments, true_domain)

        if renew:
            sampler.ask_sampling_pool_for_samples()
            sampler.renew_failed_samples()
            sampler.ask_sampling_pool_for_samples()
        else:
            sampler.set_initial_n_samples([12, 6])
            # sampler.set_initial_n_samples([1000])
            sampler.schedule_samples()
            sampler.ask_sampling_pool_for_samples()

        q_estimator = QuantityEstimate(sample_storage=sample_storage,
                                       moments_fn=moments_fn,
                                       sim_steps=step_range)
        #
        target_var = 1e-4
        sleep = 0
        add_coef = 0.1

        # # @TODO: test
        # # New estimation according to already finished samples
        # variances, n_ops = q_estimator.estimate_diff_vars_regression(sampler._n_scheduled_samples)
        # n_estimated = new_estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops,
        #                                                                    n_levels=sampler.n_levels)
        #
        # # Loop until number of estimated samples is greater than the number of scheduled samples
        # while not sampler.process_adding_samples(n_estimated, sleep, add_coef):
        #     # New estimation according to already finished samples
        #     variances, n_ops = q_estimator.estimate_diff_vars_regression(sampler._n_scheduled_samples)
        #     n_estimated = new_estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops,
        #                                                                        n_levels=sampler.n_levels)

        print("collected samples ", sampler._n_scheduled_samples)
        means, vars = q_estimator.estimate_moments(moments_fn)

        print("means ", means)
        print("vars ", vars)
        assert means[0] == 1
        assert np.isclose(means[1], 0, atol=1e-2)
        assert vars[0] == 0

示例#12

0

显示文件

文件： process_debug.py 项目： GeoMop/MLMC

    def run(self, renew=False):
        np.random.seed(3)
        n_moments = 5
        distr = stats.norm(loc=1, scale=2)
        step_range = [0.01, 0.001]

        # Set work dir
        os.chdir(os.path.dirname(os.path.realpath(__file__)))
        shutil.copyfile('synth_sim_config.yaml',
                        os.path.join(self.work_dir, 'synth_sim_config.yaml'))

        simulation_config = {
            "config_yaml": os.path.join(self.work_dir, 'synth_sim_config.yaml')
        }
        simulation_factory = SynthSimulationWorkspace(simulation_config)

        if self.clean:
            file_path = os.path.join(self.work_dir,
                                     "mlmc_{}.hdf5".format(len(step_range)))
            if os.path.exists(file_path):
                os.remove(
                    os.path.join(self.work_dir,
                                 "mlmc_{}.hdf5".format(len(step_range))))

        sample_storage = SampleStorageHDF(file_path=os.path.join(
            self.work_dir, "mlmc_{}.hdf5".format(len(step_range))),
                                          append=self.append)
        sampling_pool = SamplingPoolPBS(job_weight=20000000,
                                        work_dir=self.work_dir,
                                        clean=self.clean)

        pbs_config = dict(
            n_cores=1,
            n_nodes=1,
            select_flags=['cgroups=cpuacct'],
            mem='128mb',
            queue='charon_2h',
            home_dir='/storage/liberec3-tul/home/martin_spetlik/',
            pbs_process_file_dir=
            '/auto/liberec3-tul/home/martin_spetlik/MLMC_new_design/src/mlmc',
            python='python3',
            env_setting=[
                'cd {work_dir}', 'module load python36-modules-gcc',
                'source env/bin/activate',
                'pip3 install /storage/liberec3-tul/home/martin_spetlik/MLMC_new_design',
                'module use /storage/praha1/home/jan-hybs/modules',
                'module load python36-modules-gcc', 'module list'
            ])

        sampling_pool.pbs_common_setting(flow_3=True, **pbs_config)

        # Plan and compute samples
        sampler = Sampler(sample_storage=sample_storage,
                          sampling_pool=sampling_pool,
                          sim_factory=simulation_factory,
                          step_range=step_range)

        true_domain = distr.ppf([0.0001, 0.9999])
        moments_fn = Legendre(n_moments, true_domain)

        if renew:
            sampler.ask_sampling_pool_for_samples()
            sampler.renew_failed_samples()
            sampler.ask_sampling_pool_for_samples()
        else:
            sampler.set_initial_n_samples([12, 6])
            # sampler.set_initial_n_samples([1000])
            sampler.schedule_samples()
            sampler.ask_sampling_pool_for_samples()

        q_estimator = QuantityEstimate(sample_storage=sample_storage,
                                       moments_fn=moments_fn,
                                       sim_steps=step_range)

        # target_var = 1e-3
        # sleep = 0
        # add_coef = 0.1
        #
        # # @TODO: test
        # # New estimation according to already finished samples
        # variances, n_ops = q_estimator.estimate_diff_vars_regression(sampler._n_scheduled_samples)
        # n_estimated = new_estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops,
        #                                                                    n_levels=sampler.n_levels)
        # # Loop until number of estimated samples is greater than the number of scheduled samples
        # while not sampler.process_adding_samples(n_estimated, sleep, add_coef):
        #     # New estimation according to already finished samples
        #     variances, n_ops = q_estimator.estimate_diff_vars_regression(sampler._n_scheduled_samples)
        #     n_estimated = new_estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops,
        #                                                                        n_levels=sampler.n_levels)

        # print("collected samples ", sampler._n_created_samples)
        means, vars = q_estimator.estimate_moments(moments_fn)

        print("means ", means)
        print("vars ", vars)

示例#13

0

显示文件

def test_sampler_pbs(work_dir, clean=False, debug=False):
    np.random.seed(3)
    n_moments = 5
    distr = stats.norm(loc=1, scale=2)
    step_range = [0.5, 0.01]
    n_levels = 5

    # if clean:
    #     if os.path.isdir(work_dir):
    #         shutil.rmtree(work_dir, ignore_errors=True)
    os.makedirs(work_dir, mode=0o775, exist_ok=True)

    assert step_range[0] > step_range[1]
    level_parameters = []
    for i_level in range(n_levels):
        if n_levels == 1:
            level_param = 1
        else:
            level_param = i_level / (n_levels - 1)
        level_parameters.append(
            [step_range[0]**(1 - level_param) * step_range[1]**level_param])

    failed_fraction = 0
    simulation_config = dict(distr='norm',
                             complexity=2,
                             nan_fraction=failed_fraction,
                             sim_method='_sample_fn')

    with open(os.path.join(work_dir, 'synth_sim_config.yaml'), "w") as file:
        yaml.dump(simulation_config, file, default_flow_style=False)

    simulation_config = {
        "config_yaml": os.path.join(work_dir, 'synth_sim_config.yaml')
    }
    simulation_factory = SynthSimulationWorkspace(simulation_config)

    if clean and os.path.exists(
            os.path.join(work_dir, "mlmc_{}.hdf5".format(len(step_range)))):
        os.remove(
            os.path.join(work_dir, "mlmc_{}.hdf5".format(len(step_range))))

    if clean and os.path.exists(os.path.join(work_dir, "output")):
        shutil.rmtree(os.path.join(work_dir, "output"), ignore_errors=True)

    sample_storage = SampleStorageHDF(file_path=os.path.join(
        work_dir, "mlmc_{}.hdf5".format(len(step_range))))
    sampling_pool = SamplingPoolPBS(work_dir=work_dir, clean=clean)
    #sampling_pool = OneProcessPool()

    shutil.copyfile(
        os.path.join(work_dir, 'synth_sim_config.yaml'),
        os.path.join(sampling_pool._output_dir, 'synth_sim_config.yaml'))

    pbs_config = dict(
        n_cores=1,
        n_nodes=1,
        select_flags=['cgroups=cpuacct'],
        mem='2Gb',
        queue='charon',
        pbs_name='flow123d',
        walltime='72:00:00',
        optional_pbs_requests=[],  # e.g. ['#PBS -m ae', ...]
        home_dir='/auto/liberec3-tul/home/martin_spetlik/',
        python='python3',
        env_setting=[
            'cd $MLMC_WORKDIR',
            'module load python36-modules-gcc',
            'source env/bin/activate',
            # 'pip3 install /storage/liberec3-tul/home/martin_spetlik/MLMC_new_design',
            'module use /storage/praha1/home/jan-hybs/modules',
            'module load python36-modules-gcc',
            'module load flow123d',
            'module list'
        ])

    sampling_pool.pbs_common_setting(flow_3=True, **pbs_config)

    # Plan and compute samples
    sampler = Sampler(sample_storage=sample_storage,
                      sampling_pool=sampling_pool,
                      sim_factory=simulation_factory,
                      level_parameters=level_parameters)

    true_domain = distr.ppf([0.0001, 0.9999])
    moments_fn = Legendre(n_moments, true_domain)

    sampler.set_initial_n_samples([1e7, 5e6, 1e6, 5e5, 1e4])
    #sampler.set_initial_n_samples([1e1, 1e1, 1e1, 1e1, 1e1])
    #sampler.set_initial_n_samples([4, 4, 4, 4, 4])
    sampler.schedule_samples()
    n_running = sampler.ask_sampling_pool_for_samples()

    quantity = mlmc.quantity.make_root_quantity(
        storage=sample_storage, q_specs=sample_storage.load_result_format())
    length = quantity['length']
    time = length[1]
    location = time['10']
    value_quantity = location[0]

    estimator = Estimate(quantity=value_quantity,
                         sample_storage=sample_storage,
                         moments_fn=moments_fn)

    # target_var = 1e-3
    # sleep = 0
    # add_coef = 0.1
    #
    # # @TODO: test
    # # New estimation according to already finished samples
    # variances, n_ops = q_estimator.estimate_diff_vars_regression(sampler._n_scheduled_samples)
    # n_estimated = new_estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops,
    #                                                                    n_levels=sampler.n_levels)
    # # Loop until number of estimated samples is greater than the number of scheduled samples
    # while not sampler.process_adding_samples(n_estimated, sleep, add_coef):
    #     # New estimation according to already finished samples
    #     variances, n_ops = q_estimator.estimate_diff_vars_regression(sampler._n_scheduled_samples)
    #     n_estimated = new_estimator.estimate_n_samples_for_target_variance(target_var, variances, n_ops,
    #                                                                        n_levels=sampler.n_levels)

    #print("collected samples ", sampler._n_created_samples)
    means, vars = estimator.estimate_moments(moments_fn)

示例#14

0

显示文件

文件： test_quantity_concept.py 项目： GeoMop/MLMC

    def test_basics(self):
        """
        Test basic quantity properties, especially indexing
        """
        work_dir = _prepare_work_dir()
        sample_storage = SampleStorageHDF(
            file_path=os.path.join(work_dir, "mlmc.hdf5"))
        result_format, sizes = self.fill_sample_storage(sample_storage)
        root_quantity = make_root_quantity(sample_storage, result_format)

        means = estimate_mean(root_quantity)
        self.assertEqual(len(means.mean), np.sum(sizes))

        quantity_add = root_quantity + root_quantity
        means_add = estimate_mean(quantity_add)
        assert np.allclose((means.mean + means.mean), means_add.mean)

        length = root_quantity['length']
        means_length = estimate_mean(length)
        assert np.allclose((means.mean[sizes[0]:sizes[0] + sizes[1]]).tolist(),
                           means_length.mean.tolist())

        length_add = quantity_add['length']
        means_length_add = estimate_mean(length_add)
        assert np.allclose(means_length_add.mean, means_length.mean * 2)

        depth = root_quantity['depth']
        means_depth = estimate_mean(depth)
        assert np.allclose((means.mean[:sizes[0]]), means_depth.mean)

        # Interpolation in time
        locations = length.time_interpolation(2.5)
        mean_interp_value = estimate_mean(locations)

        # Select position
        position = locations['10']
        mean_position_1 = estimate_mean(position)
        assert np.allclose(
            mean_interp_value.mean[:len(mean_interp_value.mean) // 2],
            mean_position_1.mean.flatten())

        # Array indexing tests
        values = position
        values_mean = estimate_mean(values)
        assert values_mean[1:2].mean.shape == (1, 3)

        values = position
        values_mean = estimate_mean(values)
        assert values_mean[1].mean.shape == (3, )

        values = position[:, 2]
        values_mean = estimate_mean(values)
        assert len(values_mean.mean) == 2

        y = position[1, 2]
        y_mean = estimate_mean(y)
        assert len(y_mean.mean) == 1

        y = position[:, :]
        y_mean = estimate_mean(y)
        assert np.allclose(y_mean.mean, mean_position_1.mean)

        y = position[:1, 1:2]
        y_mean = estimate_mean(y)
        assert len(y_mean.mean) == 1

        y = position[:2, ...]
        y_mean = estimate_mean(y)
        assert len(y_mean.mean.flatten()) == 6

        value = values[1]
        value_mean = estimate_mean(value)
        assert values_mean.mean[1] == value_mean.mean

        value = values[0]
        value_mean = estimate_mean(value)
        assert values_mean.mean[0] == value_mean.mean

        position = locations['20']
        mean_position_2 = estimate_mean(position)
        assert np.allclose(
            mean_interp_value.mean[len(mean_interp_value.mean) // 2:],
            mean_position_2.mean.flatten())

        width = root_quantity['width']
        width_locations = width.time_interpolation(1.2)
        mean_width_interp_value = estimate_mean(width_locations)

        # Select position
        position = width_locations['30']
        mean_position_1 = estimate_mean(position)
        assert np.allclose(
            mean_width_interp_value.mean[:len(mean_width_interp_value.mean) //
                                         2], mean_position_1.mean.flatten())

        position = width_locations['40']
        mean_position_2 = estimate_mean(position)
        assert np.allclose(
            mean_width_interp_value.mean[len(mean_width_interp_value.mean) //
                                         2:], mean_position_2.mean.flatten())

        quantity_add = root_quantity + root_quantity
        means_add = estimate_mean(quantity_add)
        assert np.allclose((means.mean + means.mean), means_add.mean)

        length = quantity_add['length']
        means_length = estimate_mean(length)
        assert np.allclose(
            (means_add.mean[sizes[0]:sizes[0] + sizes[1]]).tolist(),
            means_length.mean.tolist())

        width = quantity_add['width']
        means_width = estimate_mean(width)
        assert np.allclose((means_add.mean[sizes[0] + sizes[1]:sizes[0] +
                                           sizes[1] + sizes[2]]).tolist(),
                           means_width.mean.tolist())

        # Concatenate quantities
        quantity_dict = Quantity.QDict([("depth", depth), ("length", length)])
        quantity_dict_mean = estimate_mean(quantity_dict)
        assert np.allclose(
            quantity_dict_mean.mean,
            np.concatenate((means_depth.mean, means_length.mean)))

        length_concat = quantity_dict['length']
        means_length_concat = estimate_mean(length_concat)
        assert np.allclose(means_length_concat.mean, means_length.mean)
        locations = length_concat.time_interpolation(2.5)
        mean_interp_value = estimate_mean(locations)
        position = locations['10']
        mean_position_1 = estimate_mean(position)
        assert np.allclose(
            mean_interp_value.mean[:len(mean_interp_value.mean) // 2],
            mean_position_1.mean.flatten())
        values = position[:, 2]
        values_mean = estimate_mean(values)
        assert len(values_mean.mean) == 2
        y = position[1, 2]
        y_mean = estimate_mean(y)
        assert len(y_mean.mean) == 1
        y_add = np.add(5, y)
        y_add_mean = estimate_mean(y_add)
        assert np.allclose(y_add_mean.mean, y_mean.mean + 5)
        depth = quantity_dict['depth']
        means_depth_concat = estimate_mean(depth)
        assert np.allclose((means.mean[:sizes[0]]), means_depth_concat.mean)

        quantity_array = Quantity.QArray([[length, length], [length, length]])
        quantity_array_mean = estimate_mean(quantity_array)
        assert np.allclose(
            quantity_array_mean.mean.flatten(),
            np.concatenate((means_length.mean, means_length.mean,
                            means_length.mean, means_length.mean)))

        quantity_timeseries = Quantity.QTimeSeries([(0, locations),
                                                    (1, locations)])
        quantity_timeseries_mean = estimate_mean(quantity_timeseries)
        assert np.allclose(
            quantity_timeseries_mean.mean,
            np.concatenate((mean_interp_value.mean, mean_interp_value.mean)))

        quantity_field = Quantity.QField([("f1", length), ("f2", length)])
        quantity_field_mean = estimate_mean(quantity_field)
        assert np.allclose(
            quantity_field_mean.mean,
            np.concatenate((means_length.mean, means_length.mean)))

示例#15

0

显示文件

文件： test_quantity_concept.py 项目： GeoMop/MLMC

    def test_binary_operations(self):
        """
        Test quantity binary operations
        """
        work_dir = _prepare_work_dir()
        sample_storage = SampleStorageHDF(
            file_path=os.path.join(work_dir, "mlmc.hdf5"))
        result_format, sizes = self.fill_sample_storage(sample_storage)
        root_quantity = make_root_quantity(sample_storage, result_format)
        const = 5

        means = estimate_mean(root_quantity)
        self.assertEqual(len(means.mean), np.sum(sizes))

        # Addition
        quantity_add = root_quantity + root_quantity
        means_add = estimate_mean(quantity_add)
        assert np.allclose((means.mean + means.mean), means_add.mean)

        quantity_add_const = root_quantity + const
        means_add_const = estimate_mean(quantity_add_const)
        means_add_const.mean

        quantity_add = root_quantity + root_quantity + root_quantity
        means_add = estimate_mean(quantity_add)
        assert np.allclose((means.mean + means.mean + means.mean),
                           means_add.mean)

        # Subtraction
        quantity_sub_const = root_quantity - const
        means_sub_const = estimate_mean(quantity_sub_const)
        means_sub_const.mean

        # Multiplication
        const_mult_quantity = root_quantity * const
        const_mult_mean = estimate_mean(const_mult_quantity)
        assert np.allclose((const * means.mean).tolist(),
                           const_mult_mean.mean.tolist())

        # True division
        const_div_quantity = root_quantity / const
        const_div_mean = estimate_mean(const_div_quantity)
        assert np.allclose((means.mean / const).tolist(),
                           const_div_mean.mean.tolist())

        # Mod
        const_mod_quantity = root_quantity % const
        const_mod_mean = estimate_mean(const_mod_quantity)
        const_mod_mean.mean

        # Further tests
        length = quantity_add['length']
        means_length = estimate_mean(length)
        assert np.allclose(means_add.mean[sizes[0]:sizes[0] + sizes[1]],
                           means_length.mean)

        width = quantity_add['width']
        means_width = estimate_mean(width)
        assert np.allclose(
            means_add.mean[sizes[0] + sizes[1]:sizes[0] + sizes[1] + sizes[2]],
            means_width.mean)

        quantity_add = root_quantity + root_quantity * const
        means_add = estimate_mean(quantity_add)
        assert np.allclose((means.mean + means.mean * const), means_add.mean)

        quantity_add_mult = root_quantity + root_quantity * root_quantity
        means_add = estimate_mean(quantity_add_mult)

        #### right operators ####
        # Addition
        const_add_quantity = const + root_quantity
        const_add_means = estimate_mean(const_add_quantity)
        assert np.allclose(means_add_const.mean, const_add_means.mean)

        # Subtraction
        const_sub_quantity = const - root_quantity
        const_sub_means = estimate_mean(const_sub_quantity)
        assert np.allclose(means_sub_const.mean, -const_sub_means.mean)

        # Multiplication
        const_mult_quantity = const * root_quantity
        const_mult_mean = estimate_mean(const_mult_quantity)
        assert np.allclose((const * means.mean), const_mult_mean.mean)

        # True division
        const_div_quantity = const / root_quantity
        const_div_mean = estimate_mean(const_div_quantity)
        assert len(const_div_mean.mean) == len(means.mean)

        # Mod
        const_mod_quantity = const % root_quantity
        const_mod_mean = estimate_mean(const_mod_quantity)
        assert len(const_mod_mean.mean) == len(means.mean)

示例#16

0

显示文件

文件： development_tests.py 项目： GeoMop/MLMC

def oneprocess_test():
    np.random.seed(3)
    n_moments = 5

    distr = stats.norm(loc=1, scale=2)
    step_range = [[0.01], [0.001], [0.0001]]

    # Set work dir
    os.chdir(os.path.dirname(os.path.realpath(__file__)))
    work_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                            '_test_tmp')
    if os.path.exists(work_dir):
        shutil.rmtree(work_dir)
    os.makedirs(work_dir)
    shutil.copyfile('synth_sim_config.yaml',
                    os.path.join(work_dir, 'synth_sim_config.yaml'))

    simulation_config = {
        "config_yaml": os.path.join(work_dir, 'synth_sim_config.yaml')
    }
    simulation_factory = SynthSimulationWorkspace(simulation_config)

    sample_storage = SampleStorageHDF(file_path=os.path.join(
        work_dir, "mlmc_{}.hdf5".format(len(step_range))))
    sampling_pool = OneProcessPool(work_dir=work_dir, debug=True)

    # Plan and compute samples
    sampler = Sampler(sample_storage=sample_storage,
                      sampling_pool=sampling_pool,
                      sim_factory=simulation_factory,
                      level_parameters=step_range)

    true_domain = distr.ppf([0.0001, 0.9999])
    moments_fn = Legendre(n_moments, true_domain)

    sampler.set_initial_n_samples()
    #sampler.set_initial_n_samples([1000])
    sampler.schedule_samples()
    sampler.ask_sampling_pool_for_samples()

    q_estimator = QuantityEstimate(sample_storage=sample_storage,
                                   moments_fn=moments_fn,
                                   sim_steps=step_range)

    target_var = 1e-3
    sleep = 0
    add_coef = 0.1

    # @TODO: test
    # New estimation according to already finished samples
    variances, n_ops = q_estimator.estimate_diff_vars_regression(
        sampler._n_scheduled_samples)
    n_estimated = new_estimator.estimate_n_samples_for_target_variance(
        target_var, variances, n_ops, n_levels=sampler.n_levels)
    # Loop until number of estimated samples is greater than the number of scheduled samples
    while not sampler.process_adding_samples(n_estimated, sleep, add_coef):
        # New estimation according to already finished samples
        variances, n_ops = q_estimator.estimate_diff_vars_regression(
            sampler._n_scheduled_samples)
        n_estimated = new_estimator.estimate_n_samples_for_target_variance(
            target_var, variances, n_ops, n_levels=sampler.n_levels)

    print("collected samples ", sampler._n_scheduled_samples)
    means, vars = q_estimator.estimate_moments(moments_fn)

    print("means ", means)
    print("vars ", vars)
    assert means[0] == 1
    assert np.isclose(means[1], 0, atol=5e-2)
    assert vars[0] == 0
    sampler.schedule_samples()
    sampler.ask_sampling_pool_for_samples()

    storage = sampler.sample_storage
    results = storage.sample_pairs()