def test_lnprob_calculates_multi_phase_probability_for_success(datasets_db): """lnprob() successfully calculates the probability for equilibrium """ dbf = Database.from_string(CU_MG_TDB, fmt='tdb') datasets_db.insert(CU_MG_DATASET_ZPF_WORKING) comps = ['CU', 'MG', 'VA'] phases = ['LIQUID', 'FCC_A1', 'HCP_A3', 'LAVES_C15', 'CUMG2'] param = 'VV0001' orig_val = dbf.symbols[param].args[0].expr models = instantiate_models(dbf, comps, phases, parameters={param: orig_val}) eq_callables = build_callables(dbf, comps, phases, models, parameter_symbols=[param], output='GM', build_gradients=True, build_hessians=False, additional_statevars={v.N, v.P, v.T}) zpf_kwargs = { 'dbf': dbf, 'phases': phases, 'zpf_data': get_zpf_data(comps, phases, datasets_db), 'phase_models': models, 'callables': eq_callables, 'data_weight': 1.0, } opt = EmceeOptimizer(dbf) res = opt.predict([10], prior_rvs=[rv_zero()], symbols_to_fit=[param], zpf_kwargs=zpf_kwargs) assert np.isreal(res) assert np.isclose(res, -31.309645520830344, rtol=1e-6) res_2 = opt.predict([10000000], prior_rvs=[rv_zero()], symbols_to_fit=[param], zpf_kwargs=zpf_kwargs) assert not np.isclose(res_2, -31.309645520830344, rtol=1e-6)
def test_lnprob_calculates_multi_phase_probability_for_success(datasets_db): """lnprob() successfully calculates the probability for equilibrium """ dbf = Database.from_string(CU_MG_TDB, fmt='tdb') datasets_db.insert(CU_MG_DATASET_ZPF_WORKING) comps = ['CU', 'MG', 'VA'] phases = ['LIQUID', 'FCC_A1', 'HCP_A3', 'LAVES_C15', 'CUMG2'] param = 'VV0001' orig_val = dbf.symbols[param].args[0].expr initial_params = {param: orig_val} zpf_kwargs = { 'zpf_data': get_zpf_data(dbf, comps, phases, datasets_db, initial_params), 'data_weight': 1.0, } opt = EmceeOptimizer(dbf) res = opt.predict([10], prior_rvs=[rv_zero()], symbols_to_fit=[param], zpf_kwargs=zpf_kwargs) assert np.isreal(res) assert np.isclose(res, -31.309645520830344, rtol=1e-4) res_2 = opt.predict([10000000], prior_rvs=[rv_zero()], symbols_to_fit=[param], zpf_kwargs=zpf_kwargs) assert not np.isclose(res_2, -31.309645520830344, rtol=1e-6)
def test_lnprob_does_not_raise_on_LinAlgError(datasets_db): """lnprob() should catch LinAlgError raised by equilibrium and return -np.inf""" dbf = Database.from_string(CU_MG_TDB, fmt='tdb') comps = ['CU', 'MG', 'VA'] phases = ['LIQUID', 'FCC_A1', 'HCP_A3', 'LAVES_C15', 'CUMG2'] datasets_db.insert(CU_MG_DATASET_ZPF_WORKING) zpf_kwargs = {'dbf': dbf, 'phases': phases, 'zpf_data': get_zpf_data(comps, phases, datasets_db), 'data_weight': 1.0} res = opt.predict([10], prior_rvs=[rv_zero()], symbols_to_fit=['VV0001'], zpf_kwargs=zpf_kwargs) assert np.isneginf(res)
def test_zpf_error_zero(datasets_db): """Test that sum of square ZPF errors returns 0 for an exactly correct result""" datasets_db.insert(CU_MG_DATASET_ZPF_ZERO_ERROR) dbf = Database(CU_MG_TDB) comps = ['CU', 'MG', 'VA'] phases = list(dbf.phases.keys()) # ZPF weight = 1 kJ and there are two points in the tieline zero_error_prob = 2 * scipy.stats.norm(loc=0, scale=1000.0).logpdf(0.0) zpf_data = get_zpf_data(comps, phases, datasets_db) error = calculate_zpf_error(dbf, phases, zpf_data) assert np.isclose(error, zero_error_prob, rtol=1e-6)
def test_lnprob_calculates_associate_tdb(datasets_db): """lnprob() successfully calculates the probability for equilibrium """ dbf = Database.from_string(CU_MG_TDB_ASSOC, fmt='tdb') datasets_db.insert(CU_MG_DATASET_ZPF_WORKING) comps = ['CU', 'MG', 'VA'] phases = ['LIQUID', 'FCC_A1', 'HCP_A3', 'LAVES_C15', 'CUMG2'] param = 'VV0001' orig_val = dbf.symbols[param].args[0] initial_params = {param: orig_val} zpf_kwargs = { 'zpf_data': get_zpf_data(dbf, comps, phases, datasets_db, initial_params), 'data_weight': 1.0, } opt = EmceeOptimizer(dbf) res = opt.predict([10], prior_rvs=[rv_zero()], symbols_to_fit=[param], zpf_kwargs=zpf_kwargs) assert np.isreal(res) assert not np.isinf(res) assert np.isclose(res, -31.309645520830344, rtol=1e-6) # The purpose of this part is to test that the driving forces (and probability) # are different than the case of VV0001 = 10. res_2 = opt.predict([-10000000], prior_rvs=[rv_zero()], symbols_to_fit=[param], zpf_kwargs=zpf_kwargs) assert np.isreal(res_2) assert not np.isinf(res_2) # Accept a large rtol becuase the results should be _very_ different assert not np.isclose(res_2, -31.309645520830344, rtol=1e-2)
def setup_context(dbf, datasets, symbols_to_fit=None, data_weights=None, phase_models=None, make_callables=True): """ Set up a context dictionary for calculating error. Parameters ---------- dbf : Database A pycalphad Database that will be fit datasets : PickleableTinyDB A database of single- and multi-phase data to fit symbols_to_fit : list of str List of symbols in the Database that will be fit. If None (default) are passed, then all parameters prefixed with `VV` followed by a number, e.g. VV0001 will be fit. Returns ------- Notes ----- A copy of the Database is made and used in the context. To commit changes back to the original database, the dbf.symbols.update method should be used. """ dbf = copy.deepcopy(dbf) if phase_models is not None: comps = sorted(phase_models['components']) else: comps = sorted([sp for sp in dbf.elements]) if symbols_to_fit is None: symbols_to_fit = database_symbols_to_fit(dbf) else: symbols_to_fit = sorted(symbols_to_fit) data_weights = data_weights if data_weights is not None else {} if len(symbols_to_fit) == 0: raise ValueError( 'No degrees of freedom. Database must contain symbols starting with \'V\' or \'VV\', followed by a number.' ) else: _log.info('Fitting %s degrees of freedom.', len(symbols_to_fit)) for x in symbols_to_fit: if isinstance(dbf.symbols[x], symengine.Piecewise): _log.debug('Replacing %s in database', x) dbf.symbols[x] = dbf.symbols[x].args[0] # construct the models for each phase, substituting in the SymEngine symbol to fit. if phase_models is not None: model_dict = get_model_dict(phase_models) else: model_dict = {} _log.trace('Building phase models (this may take some time)') import time t1 = time.time() phases = sorted( filter_phases(dbf, unpack_components(dbf, comps), dbf.phases.keys())) parameters = dict(zip(symbols_to_fit, [0] * len(symbols_to_fit))) models = instantiate_models(dbf, comps, phases, model=model_dict, parameters=parameters) if make_callables: eq_callables = build_callables(dbf, comps, phases, models, parameter_symbols=symbols_to_fit, output='GM', build_gradients=True, build_hessians=True, additional_statevars={v.N, v.P, v.T}) else: eq_callables = None t2 = time.time() _log.trace('Finished building phase models (%0.2fs)', t2 - t1) _log.trace( 'Getting non-equilibrium thermochemical data (this may take some time)' ) t1 = time.time() thermochemical_data = get_thermochemical_data( dbf, comps, phases, datasets, model=model_dict, weight_dict=data_weights, symbols_to_fit=symbols_to_fit) t2 = time.time() _log.trace('Finished getting non-equilibrium thermochemical data (%0.2fs)', t2 - t1) _log.trace( 'Getting equilibrium thermochemical data (this may take some time)') t1 = time.time() eq_thermochemical_data = get_equilibrium_thermochemical_data( dbf, comps, phases, datasets, model=model_dict, parameters=parameters, data_weight_dict=data_weights) t2 = time.time() _log.trace('Finished getting equilibrium thermochemical data (%0.2fs)', t2 - t1) _log.trace('Getting ZPF data (this may take some time)') t1 = time.time() zpf_data = get_zpf_data(dbf, comps, phases, datasets, model=model_dict, parameters=parameters) t2 = time.time() _log.trace('Finished getting ZPF data (%0.2fs)', t2 - t1) # context for the log probability function # for all cases, parameters argument addressed in MCMC loop error_context = { 'symbols_to_fit': symbols_to_fit, 'zpf_kwargs': { 'zpf_data': zpf_data, 'data_weight': data_weights.get('ZPF', 1.0), }, 'equilibrium_thermochemical_kwargs': { 'eq_thermochemical_data': eq_thermochemical_data, }, 'thermochemical_kwargs': { 'thermochemical_data': thermochemical_data, }, 'activity_kwargs': { 'dbf': dbf, 'comps': comps, 'phases': phases, 'datasets': datasets, 'phase_models': models, 'callables': eq_callables, 'data_weight': data_weights.get('ACR', 1.0), }, } return error_context