def test_thermochemical_error_for_mixing_entropy_error_is_excess_only( datasets_db): """Tests that error in mixing entropy data is excess only (the ideal part is removed).""" # If this fails, make sure the ideal mixing contribution is removed. phase_models = { "components": ["AL", "B"], "phases": { "LIQUID": { "sublattice_model": [["AL", "B"]], "sublattice_site_ratios": [1] }, "FCC_A1": { "sublattice_model": [["AL", "B"]], "sublattice_site_ratios": [1] } } } dataset_excess_mixing = { "components": ["AL", "B"], "phases": ["FCC_A1"], "solver": { "sublattice_site_ratios": [1], "sublattice_occupancies": [[[0.5, 0.5]]], "sublattice_configurations": [[["AL", "B"]]], "mode": "manual" }, "conditions": { "P": 101325, "T": 298.15 }, "output": "SM_MIX", "values": [[[10]]], "excluded_model_contributions": ["idmix"] } datasets_db.insert(dataset_excess_mixing) dbf = generate_parameters(phase_models, datasets_db, 'SGTE91', 'linear') assert dbf.elements == {'AL', 'B'} assert set(dbf.phases.keys()) == {'LIQUID', 'FCC_A1'} assert len(dbf._parameters.search(where('parameter_type') == 'L')) == 1 phases = list(dbf.phases.keys()) comps = list(dbf.elements) # the error should be exactly 0 because we are only fitting to one point # the dataset is excess only zero_error_prob = scipy.stats.norm(loc=0, scale=0.2).logpdf( 0.0) # SM weight = 0.2 # Explicitly pass parameters={} to not try fitting anything thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[]) error = calculate_thermochemical_error(dbf, comps, thermochemical_data) assert np.isclose(error, zero_error_prob, atol=1e-6)
def test_mixing_energies_are_fit(datasets_db): """Tests that given mixing energy data, the excess parameter is fit.""" phase_models = { "components": ["AL", "B"], "phases": { "LIQUID" : { "sublattice_model": [["AL", "B"]], "sublattice_site_ratios": [1] }, "FCC_A1" : { "sublattice_model": [["AL", "B"]], "sublattice_site_ratios": [1] } } } dataset_excess_mixing = { "components": ["AL", "B"], "phases": ["FCC_A1"], "solver": { "sublattice_site_ratios": [1], "sublattice_occupancies": [[[0.5, 0.5]]], "sublattice_configurations": [[["AL", "B"]]], "mode": "manual" }, "conditions": { "P": 101325, "T": 298.15 }, "output": "HM_MIX", "values": [[[-10000]]] } datasets_db.insert(dataset_excess_mixing) dbf = generate_parameters(phase_models, datasets_db, 'SGTE91', 'linear') assert dbf.elements == {'AL', 'B'} assert set(dbf.phases.keys()) == {'LIQUID', 'FCC_A1'} assert len(dbf._parameters.search(where('parameter_type') == 'L')) == 1 assert dbf.symbols['VV0000'] == -40000 # check that read/write is ok read_dbf = dbf.from_string(dbf.to_string(fmt='tdb'), fmt='tdb') assert read_dbf.elements == {'AL', 'B'} assert set(read_dbf.phases.keys()) == {'LIQUID', 'FCC_A1'} assert len(read_dbf._parameters.search(where('parameter_type') == 'L')) == 1 from espei.error_functions import calculate_non_equilibrium_thermochemical_probability, get_thermochemical_data # the error should be exactly 0 because we are only fitting to one point zero_error_prob = scipy.stats.norm(loc=0, scale=500.0).logpdf(0.0) # HM weight = 500 # Explicitly pass parameters={} to not try fitting anything thermochemical_data = get_thermochemical_data(dbf, sorted(read_dbf.elements), list(read_dbf.phases.keys()), datasets_db, symbols_to_fit=[]) error = calculate_non_equilibrium_thermochemical_probability(thermochemical_data) assert np.isclose(error, zero_error_prob, atol=1e-6)
def test_thermochemical_error_with_multiple_T_X_points(datasets_db): """Multiple temperature and composition datapoints in a dataset for a mixing phase should be successful.""" datasets_db.insert(CU_MG_SM_MIX_T_X_FCC_A1) dbf = Database(CU_MG_TDB) phases = list(dbf.phases.keys()) comps = ['CU', 'MG', 'VA'] thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) error = calculate_thermochemical_error(dbf, comps, thermochemical_data) assert np.isclose(float(error), -3282497.2380024833, rtol=1e-6)
def test_thermochemical_error_with_multiple_T_points(datasets_db): """Multiple temperature datapoints in a dataset for a stoichiometric comnpound should be successful.""" datasets_db.insert(CU_MG_HM_MIX_T_CUMG2) dbf = Database(CU_MG_TDB) phases = list(dbf.phases.keys()) comps = ['CU', 'MG', 'VA'] thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) error = calculate_thermochemical_error(dbf, comps, thermochemical_data) assert np.isclose(error, -14.287293263253728, rtol=1e-6)
def test_thermochemical_error_with_multiple_X_points(datasets_db): """Multiple composition datapoints in a dataset for a mixing phase should be successful.""" datasets_db.insert(CU_MG_CPM_MIX_X_HCP_A3) dbf = Database(CU_MG_TDB) phases = list(dbf.phases.keys()) comps = ['CU', 'MG', 'VA'] thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db) error = calculate_thermochemical_error(dbf, comps, thermochemical_data) assert np.isclose(error, -4061.119001241541, rtol=1e-6)
def test_lnprob_calculates_single_phase_probability_for_success(datasets_db): """lnprob() succesfully calculates the probability from single phase data""" dbf = Database.from_string(CU_MG_TDB_FCC_ONLY, fmt='tdb') datasets_db.insert(CU_MG_HM_MIX_SINGLE_FCC_A1) comps = ['CU', 'MG', 'VA'] phases = ['FCC_A1'] param = 'VV0003' orig_val = -14.0865 opt = EmceeOptimizer(dbf) thermochemical_data = get_thermochemical_data(dbf, comps, phases, datasets_db, symbols_to_fit=[param]) thermochemical_kwargs = { 'dbf': dbf, 'comps': comps, 'thermochemical_data': thermochemical_data } res_orig = opt.predict([orig_val], prior_rvs=[rv_zero()], symbols_to_fit=[param], thermochemical_kwargs=thermochemical_kwargs) assert np.isreal(res_orig) assert np.isclose(res_orig, -9.119484935312146, rtol=1e-6) res_10 = opt.predict([10], prior_rvs=[rv_zero()], symbols_to_fit=[param], thermochemical_kwargs=thermochemical_kwargs) assert np.isreal(res_10) assert np.isclose(res_10, -9.143559131626864, rtol=1e-6) res_1e5 = opt.predict([1e5], prior_rvs=[rv_zero()], symbols_to_fit=[param], thermochemical_kwargs=thermochemical_kwargs) assert np.isreal(res_1e5) assert np.isclose(res_1e5, -1359.1335466316268, rtol=1e-6)
def setup_context(dbf, datasets, symbols_to_fit=None, data_weights=None, phase_models=None, make_callables=True): """ Set up a context dictionary for calculating error. Parameters ---------- dbf : Database A pycalphad Database that will be fit datasets : PickleableTinyDB A database of single- and multi-phase data to fit symbols_to_fit : list of str List of symbols in the Database that will be fit. If None (default) are passed, then all parameters prefixed with `VV` followed by a number, e.g. VV0001 will be fit. Returns ------- Notes ----- A copy of the Database is made and used in the context. To commit changes back to the original database, the dbf.symbols.update method should be used. """ dbf = copy.deepcopy(dbf) if phase_models is not None: comps = sorted(phase_models['components']) else: comps = sorted([sp for sp in dbf.elements]) if symbols_to_fit is None: symbols_to_fit = database_symbols_to_fit(dbf) else: symbols_to_fit = sorted(symbols_to_fit) data_weights = data_weights if data_weights is not None else {} if len(symbols_to_fit) == 0: raise ValueError( 'No degrees of freedom. Database must contain symbols starting with \'V\' or \'VV\', followed by a number.' ) else: _log.info('Fitting %s degrees of freedom.', len(symbols_to_fit)) for x in symbols_to_fit: if isinstance(dbf.symbols[x], symengine.Piecewise): _log.debug('Replacing %s in database', x) dbf.symbols[x] = dbf.symbols[x].args[0] # construct the models for each phase, substituting in the SymEngine symbol to fit. if phase_models is not None: model_dict = get_model_dict(phase_models) else: model_dict = {} _log.trace('Building phase models (this may take some time)') import time t1 = time.time() phases = sorted( filter_phases(dbf, unpack_components(dbf, comps), dbf.phases.keys())) parameters = dict(zip(symbols_to_fit, [0] * len(symbols_to_fit))) models = instantiate_models(dbf, comps, phases, model=model_dict, parameters=parameters) if make_callables: eq_callables = build_callables(dbf, comps, phases, models, parameter_symbols=symbols_to_fit, output='GM', build_gradients=True, build_hessians=True, additional_statevars={v.N, v.P, v.T}) else: eq_callables = None t2 = time.time() _log.trace('Finished building phase models (%0.2fs)', t2 - t1) _log.trace( 'Getting non-equilibrium thermochemical data (this may take some time)' ) t1 = time.time() thermochemical_data = get_thermochemical_data( dbf, comps, phases, datasets, model=model_dict, weight_dict=data_weights, symbols_to_fit=symbols_to_fit) t2 = time.time() _log.trace('Finished getting non-equilibrium thermochemical data (%0.2fs)', t2 - t1) _log.trace( 'Getting equilibrium thermochemical data (this may take some time)') t1 = time.time() eq_thermochemical_data = get_equilibrium_thermochemical_data( dbf, comps, phases, datasets, model=model_dict, parameters=parameters, data_weight_dict=data_weights) t2 = time.time() _log.trace('Finished getting equilibrium thermochemical data (%0.2fs)', t2 - t1) _log.trace('Getting ZPF data (this may take some time)') t1 = time.time() zpf_data = get_zpf_data(dbf, comps, phases, datasets, model=model_dict, parameters=parameters) t2 = time.time() _log.trace('Finished getting ZPF data (%0.2fs)', t2 - t1) # context for the log probability function # for all cases, parameters argument addressed in MCMC loop error_context = { 'symbols_to_fit': symbols_to_fit, 'zpf_kwargs': { 'zpf_data': zpf_data, 'data_weight': data_weights.get('ZPF', 1.0), }, 'equilibrium_thermochemical_kwargs': { 'eq_thermochemical_data': eq_thermochemical_data, }, 'thermochemical_kwargs': { 'thermochemical_data': thermochemical_data, }, 'activity_kwargs': { 'dbf': dbf, 'comps': comps, 'phases': phases, 'datasets': datasets, 'phase_models': models, 'callables': eq_callables, 'data_weight': data_weights.get('ACR', 1.0), }, } return error_context