def _process_species(db, sp_name, sp_comp, charge=0, *args): """Add a species to the Database. If charge not specified, the Species will be neutral.""" # process the species composition list of [element1, ratio1, element2, ratio2, ..., elementN, ratioN] constituents = { sp_comp[i]: sp_comp[i + 1] for i in range(0, len(sp_comp), 2) } db.species.add(Species(sp_name, constituents, charge=charge))
def generate_parameters(phase_models, datasets, ref_state, excess_model): """Generate parameters from given phase models and datasets Parameters ---------- phase_models : dict Dictionary of components and phases to fit. datasets : PickleableTinyDB database of single- and multi-phase to fit. ref_state : str String of the reference data to use, e.g. 'SGTE91' or 'SR2016' excess_model : str String of the type of excess model to fit to, e.g. 'linear' Returns ------- pycalphad.Database """ logging.info('Generating parameters.') dbf = Database() dbf.elements = set(phase_models['components']) for el in dbf.elements: if Species is not None: # TODO: drop this on release of pycalphad 0.7 dbf.species.add(Species(el, {el: 1}, 0)) # Write reference state to Database refdata = getattr(espei.refdata, ref_state) stabledata = getattr(espei.refdata, ref_state + 'Stable') for key, element in refdata.items(): if isinstance(element, sympy.Piecewise): newargs = element.args + ((0, True), ) refdata[key] = sympy.Piecewise(*newargs) for key, element in stabledata.items(): if isinstance(element, sympy.Piecewise): newargs = element.args + ((0, True), ) stabledata[key] = sympy.Piecewise(*newargs) comp_refs = { c.upper(): stabledata[c.upper()] for c in dbf.elements if c.upper() != 'VA' } comp_refs['VA'] = 0 # note that the `c.upper()*2)[:2]` returns 'AL' for c.upper()=='AL' and 'VV' for c.upper()=='V' dbf.symbols.update( {'GHSER' + (c.upper() * 2)[:2]: data for c, data in comp_refs.items()}) for phase_name, phase_obj in sorted(phase_models['phases'].items(), key=operator.itemgetter(0)): # Perform parameter selection and single-phase fitting based on input # TODO: Need to pass particular models to include: magnetic, order-disorder, etc. symmetry = phase_obj.get('equivalent_sublattices', None) aliases = phase_obj.get('aliases', None) # TODO: More advanced phase data searching site_ratios = phase_obj['sublattice_site_ratios'] subl_model = phase_obj['sublattice_model'] dbf.add_phase(phase_name, dict(), site_ratios) dbf.add_phase_constituents(phase_name, subl_model) dbf.add_structure_entry(phase_name, phase_name) phase_fit(dbf, phase_name, symmetry, subl_model, site_ratios, datasets, refdata, aliases=aliases) logging.info('Finished generating parameters.') return dbf
def fit_formation_energy(dbf, comps, phase_name, configuration, symmetry, datasets, features=None): """ Find suitable linear model parameters for the given phase. We do this by successively fitting heat capacities, entropies and enthalpies of formation, and selecting against criteria to prevent overfitting. The "best" set of parameters minimizes the error without overfitting. Parameters ---------- dbf : Database pycalphad Database. Partially complete, so we know what degrees of freedom to fix. comps : [str] Names of the relevant components. phase_name : str Name of the desired phase for which the parameters will be found. configuration : ndarray Configuration of the sublattices for the fitting procedure. symmetry : [[int]] Symmetry of the sublattice configuration. datasets : PickleableTinyDB All the datasets desired to fit to. features : dict Maps "property" to a list of features for the linear model. These will be transformed from "GM" coefficients e.g., {"CPM_FORM": (v.T*sympy.log(v.T), v.T**2, v.T**-1, v.T**3)} (Default value = None) Returns ------- dict {feature: estimated_value} """ if features is None: features = [("CPM_FORM", (v.T * sympy.log(v.T), v.T**2, v.T**-1, v.T**3)), ("SM_FORM", (v.T, )), ("HM_FORM", (sympy.S.One, ))] features = OrderedDict(features) if any([isinstance(conf, (list, tuple)) for conf in configuration]): # TODO: assumes binary interaction here fitting_steps = (["CPM_FORM", "CPM_MIX"], ["SM_FORM", "SM_MIX"], ["HM_FORM", "HM_MIX"]) # Product of all nonzero site fractions in all sublattices YS = sympy.Symbol('YS') # Product of all binary interaction terms Z = sympy.Symbol('Z') redlich_kister_features = (YS, YS * Z, YS * (Z**2), YS * (Z**3)) for feature in features.keys(): all_features = list( itertools.product(redlich_kister_features, features[feature])) features[feature] = [i[0] * i[1] for i in all_features] logging.debug('ENDMEMBERS FROM INTERACTION: {}'.format( endmembers_from_interaction(configuration))) else: # We are only fitting an endmember; no mixing data needed fitting_steps = (["CPM_FORM"], ["SM_FORM"], ["HM_FORM"]) parameters = {} for feature in features.values(): for coef in feature: parameters[coef] = 0 # These is our previously fit partial model # Subtract out all of these contributions (zero out reference state because these are formation properties) fixed_model = Model( dbf, comps, phase_name, parameters={'GHSER' + (c.upper() * 2)[:2]: 0 for c in comps}) fixed_model.models['idmix'] = 0 fixed_portions = [0] moles_per_formula_unit = sympy.S(0) subl_idx = 0 for num_sites, const in zip(dbf.phases[phase_name].sublattices, dbf.phases[phase_name].constituents): if Species('VA') in const: moles_per_formula_unit += num_sites * ( 1 - v.SiteFraction(phase_name, subl_idx, Species('VA'))) else: moles_per_formula_unit += num_sites subl_idx += 1 for desired_props in fitting_steps: desired_data = get_data(comps, phase_name, configuration, symmetry, datasets, desired_props) logging.debug('{}: datasets found: {}'.format(desired_props, len(desired_data))) if len(desired_data) > 0: # We assume all properties in the same fitting step have the same features (but different ref states) feature_matrix = _build_feature_matrix(desired_props[0], features[desired_props[0]], desired_data) all_samples = get_samples(desired_data) data_quantities = np.concatenate(_shift_reference_state( desired_data, feature_transforms[desired_props[0]], fixed_model), axis=-1) site_fractions = [ build_sitefractions( phase_name, ds['solver']['sublattice_configurations'], ds['solver'].get( 'sublattice_occupancies', np.ones(( len(ds['solver']['sublattice_configurations']), len(ds['solver']['sublattice_configurations'][0])), dtype=np.float))) for ds in desired_data for _ in ds['conditions']['T'] ] # Flatten list site_fractions = list(itertools.chain(*site_fractions)) # Remove existing partial model contributions from the data data_quantities = data_quantities - feature_transforms[ desired_props[0]](fixed_model.ast) # Subtract out high-order (in T) parameters we've already fit data_quantities = data_quantities - \ feature_transforms[desired_props[0]](sum(fixed_portions)) / moles_per_formula_unit for sf, i in zip(site_fractions, data_quantities): missing_variables = sympy.S(i * moles_per_formula_unit).atoms( v.SiteFraction) - set(sf.keys()) sf.update({x: 0. for x in missing_variables}) # moles_per_formula_unit factor is here because our data is stored per-atom # but all of our fits are per-formula-unit data_quantities = [ sympy.S(i * moles_per_formula_unit).xreplace(sf).xreplace({ v.T: ixx[0] }).evalf() for i, sf, ixx in zip(data_quantities, site_fractions, all_samples) ] data_quantities = np.asarray(data_quantities, dtype=np.float) parameters.update( _fit_parameters(feature_matrix, data_quantities, features[desired_props[0]])) # Add these parameters to be fixed for the next fitting step fixed_portion = np.array(features[desired_props[0]], dtype=np.object) fixed_portion = np.dot(fixed_portion, [ parameters[feature] for feature in features[desired_props[0]] ]) fixed_portions.append(fixed_portion) return parameters
def generate_parameters(phase_models, datasets, ref_state, excess_model, ridge_alpha=None, aicc_penalty_factor=None, dbf=None): """Generate parameters from given phase models and datasets Parameters ---------- phase_models : dict Dictionary of components and phases to fit. datasets : PickleableTinyDB database of single- and multi-phase to fit. ref_state : str String of the reference data to use, e.g. 'SGTE91' or 'SR2016' excess_model : str String of the type of excess model to fit to, e.g. 'linear' ridge_alpha : float Value of the $alpha$ hyperparameter used in ridge regression. Defaults to None, which falls back to ordinary least squares regression. For now, the parameter is applied to all features. aicc_penalty_factor : dict Map of phase name to feature to a multiplication factor for the AICc's parameter penalty. dbf : Database Initial pycalphad Database that can have parameters that would not be fit by ESPEI Returns ------- pycalphad.Database """ logging.info('Generating parameters.') logging.log( TRACE, f'Found the following user reference states: {espei.refdata.INSERTED_USER_REFERENCE_STATES}' ) phases = sorted(map(lambda x: x.upper(), phase_models['phases'].keys())) dbf = dbf or Database() dbf.elements.update(set(phase_models['components'])) for el in dbf.elements: dbf.species.add(Species(el, {el: 1}, 0)) # Add the SER reference data dbf.refstates[el] = espei.refdata.ser_dict[el] # update the refdata for this element with the reference phase if el not in espei.refdata.pure_element_phases.keys(): # Probably VA, /- or something else continue refdata_phase = espei.refdata.pure_element_phases[el] if refdata_phase in phases: dbf.refstates[el]['phase'] = refdata_phase else: # Check all the aliases and set the one that matches for phase_name, phase_obj in phase_models['phases'].items(): for alias in phase_obj.get('aliases', []): if alias == refdata_phase: dbf.refstates[el]['phase'] = phase_name # Write reference state to Database refdata = getattr(espei.refdata, ref_state) stabledata = getattr(espei.refdata, ref_state + 'Stable') for key, element in refdata.items(): if isinstance(element, sympy.Piecewise): newargs = element.args + ((0, True), ) refdata[key] = sympy.Piecewise(*newargs) for key, element in stabledata.items(): if isinstance(element, sympy.Piecewise): newargs = element.args + ((0, True), ) stabledata[key] = sympy.Piecewise(*newargs) comp_refs = { c.upper(): stabledata[c.upper()] for c in dbf.elements if c.upper() != 'VA' } comp_refs['VA'] = 0 # note that the `c.upper()*2)[:2]` returns 'AL' for c.upper()=='AL' and 'VV' for c.upper()=='V' dbf.symbols.update( {'GHSER' + (c.upper() * 2)[:2]: data for c, data in comp_refs.items()}) for phase_name, phase_obj in sorted(phase_models['phases'].items(), key=operator.itemgetter(0)): # Perform parameter selection and single-phase fitting based on input # TODO: Need to pass particular models to include: magnetic, order-disorder, etc. symmetry = phase_obj.get('equivalent_sublattices', None) aliases = phase_obj.get('aliases', None) # TODO: More advanced phase data searching site_ratios = phase_obj['sublattice_site_ratios'] subl_model = phase_obj['sublattice_model'] if phase_name not in dbf.phases.keys(): dbf.add_phase(phase_name, dict(), site_ratios) dbf.add_phase_constituents(phase_name, subl_model) dbf.add_structure_entry(phase_name, phase_name) phase_fit(dbf, phase_name, symmetry, subl_model, site_ratios, datasets, refdata, ridge_alpha, aicc_penalty=aicc_penalty_factor, aliases=aliases) logging.info('Finished generating parameters.') return dbf