Пример #1
0
def sample_pmc(analysis_file, posterior, base_directory='./', step_N=500, steps=10, final_N=5000,
               perplexity_threshold=1.0, weight_threshold=1e-10, sigma_test_stat=None, initial_proposal='clusters'):
    """
    Samples from a named posterior using the Population Monte Carlo (PMC) methods.

    The results of the find-cluster command are expected in EOS_BASE_DIRECTORY/POSTERIOR/clusters.
    The output file will be stored in EOS_BASE_DIRECTORY/POSTERIOR/pmc.

    :param analysis_file: The name of the analysis file that describes the named posterior, or an object of class `eos.AnalysisFile`.
    :type analysis_file: str or `eos.AnalysisFile`
    :param posterior: The name of the posterior.
    :type posterior: str
    :param base_directory: The base directory for the storage of data files. Can also be set via the EOS_BASE_DIRECTORY environment variable.
    :type base_directory: str, optional
    :param step_N: The number of samples to be used in each adaptation step. These samples will be discarded. Defaults to 500.
    :type step_N: int > 0, optional
    :param steps: The number of adaptation steps, which are used to adapt the PMC proposal to the posterior. Defaults to 10.
    :type steps: int > 0, optional
    :param final_N: The number of samples to be stored in the output file. Defaults to 5000,
    :type final_N: int > 0, optional
    :param perplexity_threshold: The threshold for the perplexity in the last step after which further adaptation steps are to be skipped. Defaults to 1.0.
    :type perplexity_threshold: 0.0 < float <= 1.0, optional
    :param weight_threshold: Mixture components with a weight smaller than this threshold are pruned.
    :type weight_threshold: 0.0 < float <= 1.0, optional.
    :param sigma_test_stat: If provided, the inverse CDF of -2*log(PDF) will be evaluated, using the provided values as the respective significance.
    :type sigma_test_stat: list or iterable
    :param initial_proposal: Specify where the initial proposal should be taken from; 'clusters' (default): use the proposal obtained using `find-clusters`;
    'product': use the proposal obtained from `mixture_product`; 'pmc': continue sampling from the previous `sample-pmc` results.
    :type initial_proposal: str, optional
     """

    output_path = os.path.join(base_directory, posterior, 'pmc')
    _set_log_file(output_path, 'log', mode='a' if (initial_proposal == 'pmc') else 'w')
    if type(analysis_file) is not eos.AnalysisFile:
        _analysis_file = eos.AnalysisFile(analysis_file)
    else:
        _analysis_file = analysis_file
    analysis = _analysis_file.analysis(posterior)
    rng = _np.random.mtrand.RandomState(1701)
    if initial_proposal == 'clusters':
        initial_density = eos.data.MixtureDensity(os.path.join(base_directory, posterior, 'clusters')).density()
    elif initial_proposal == 'pmc':
        previous_sampler = eos.data.PMCSampler(os.path.join(base_directory, posterior, 'pmc'))
        initial_density = previous_sampler.density()
    elif initial_proposal == 'product':
        initial_density = eos.data.MixtureDensity(os.path.join(base_directory, posterior, 'product')).density()
    else:
        eos.error("Could not initialize proposal in sample_pmc: argument {} is not supported.".format(initial_proposal))

    samples, weights, proposal = analysis.sample_pmc(initial_density, step_N=step_N, steps=steps, final_N=final_N,
                                                     rng=rng, final_perplexity_threshold=perplexity_threshold, weight_threshold=weight_threshold)

    if initial_proposal == 'pmc':
        samples = _np.concatenate((previous_sampler.samples, samples), axis=0)
        weights = _np.concatenate((previous_sampler.weights, weights), axis=0)

    eos.data.PMCSampler.create(output_path, analysis.varied_parameters, samples, weights, proposal, sigma_test_stat=sigma_test_stat)
Пример #2
0
def predict_observables(analysis_file, posterior, prediction, base_directory='./', begin=0, end=-1):
    '''
    Predicts a set of observables based on previously obtained PMC samples.

    The input files are expected in EOS_BASE_DIRECTORY/POSTERIOR/pmc.
    The output files will be stored in EOS_BASE_DIRECTORY/POSTERIOR/pred-PREDICTION.

    :param analysis_file: The name of the analysis file that describes the named posterior, or an object of class `eos.AnalysisFile`.
    :type analysis_file: str or `eos.AnalysisFile`
    :param posterior: The name of the posterior.
    :type posterior: str
    :param prediction: The name of the set of observables to predict.
    :type prediction: str
    :param base_directory: The base directory for the storage of data files. Can also be set via the EOS_BASE_DIRECTORY environment variable.
    :type base_directory: str, optional
    :param begin: The index of the first sample to use for the predictions. Defaults to 0.
    :type begin: int
    :param end: The index beyond the last sample to use for the predictions. Defaults to -1.
    :type begin: int
    '''
    _parameters = eos.Parameters()
    if type(analysis_file) is not eos.AnalysisFile:
        _analysis_file = eos.AnalysisFile(analysis_file)
    else:
        _analysis_file = analysis_file
    observables = _analysis_file.observables(prediction, _parameters)

    data = eos.data.PMCSampler(os.path.join(base_directory, posterior, 'pmc'))

    try:
        from tqdm import tqdm
        progressbar = tqdm
    except ImportError:
        progressbar = lambda x: x

    parameters = [_parameters[p['name']] for p in data.varied_parameters]
    observable_samples = []
    for i, sample in enumerate(progressbar(data.samples[begin:end])):
        for p, v in zip(parameters, sample):
            p.set(v)
        try:
            observable_samples.append([o.evaluate() for o in observables])
        except RuntimeError as e:
            eos.error('skipping prediction for sample {i} due to runtime error ({e}): {s}'.format(i=i, e=e, s=sample))
            observable_samples.append([_np.nan for o in observables])
    observable_samples = _np.array(observable_samples)

    output_path = os.path.join(base_directory, posterior, 'pred-{}'.format(prediction))
    eos.data.Prediction.create(output_path, observables, observable_samples, data.weights[begin:end])
Пример #3
0
def run_steps(analysis_file, base_directory='./'):
    """
    Runs a list of predefined steps recorded in the analysis file.

    Each step corresponds to a call to one of the following common tasks:
     - sample-mcmc
     - find-cluster
     - sample-pmc
     - predict-observables

    :param analysis_file: The name of the analysis file that describes the named posterior, or an object of class `eos.AnalysisFile`.
    :type analysis_file: str or `eos.AnalysisFile`
    """
    if type(analysis_file) is not eos.AnalysisFile:
        _analysis_file = eos.AnalysisFile(analysis_file)
    else:
        _analysis_file = analysis_file
    _analysis_file.run()
Пример #4
0
def sample_mcmc(analysis_file, posterior, chain, base_directory='./', pre_N=150, preruns=3, N=1000, stride=5, cov_scale=0.1, start_point=None):
    """
    Samples from a named posterior PDF using Markov Chain Monte Carlo (MCMC) methods.

    The output file will be stored in EOS_BASE_DIRECTORY/POSTERIOR/mcmc-CHAIN.

    :param analysis_file: The name of the analysis file that describes the named posterior, or an object of class `eos.AnalysisFile`.
    :type analysis_file: str or `eos.AnalysisFile`
    :param posterior: The name of the posterior PDF from which to draw the samples.
    :type posterior: str
    :param chain: The index assigned to the Markov chain. This value is used to seed the RNG for a reproducible analysis.
    :type chain: int >= 0
    :param base_directory: The base directory for the storage of data files. Can also be set via the EOS_BASE_DIRECTORY environment variable.
    :type base_directory: str, optional
    :param pre_N: The number of samples to be used for an adaptation in each prerun steps. These samples will be discarded.
    :type pre_N: int, optional
    :param preruns: The number of prerun steps, which are used to adapt the MCMC proposal to the posterior.
    :type preruns: int, optional
    :param N: The number of samples to be stored in the output file. Defaults to 1000.
    :type N: int, optional
    :param stride: The ratio of samples drawn over samples stored. For every S samples, S - 1 will be discarded. Defaults to 5.
    :type stride: int, optional
    :param cov_scale: Scale factor for the initial guess of the covariance matrix.
    :type cov_scale: float, optional
    :param start_point: Optional starting point for the chain
    :type start_point: list-like, optional
    """

    output_path = os.path.join(base_directory, posterior, 'mcmc-{:04}'.format(int(chain)))
    _set_log_file(output_path, 'log')
    if type(analysis_file) is not eos.AnalysisFile:
        _analysis_file = eos.AnalysisFile(analysis_file)
    else:
        _analysis_file = analysis_file
    analysis = _analysis_file.analysis(posterior)
    rng = _np.random.mtrand.RandomState(int(chain) + 1701)
    try:
        samples, weights = analysis.sample(N=N, stride=stride, pre_N=pre_N, preruns=preruns, rng=rng, cov_scale=cov_scale, start_point=start_point)
        eos.data.MarkovChain.create(output_path, analysis.varied_parameters, samples, weights)
    except RuntimeError as e:
        eos.error('encountered run time error ({e}) in parameter point:'.format(e=e))
        for p in analysis.varied_parameters:
            eos.error(' - {n}: {v}'.format(n=p.name(), v=p.evaluate()))