def phase_fit(dbf, phase_name, symmetry, subl_model, site_ratios, datasets, refdata, aliases=None): """Generate an initial CALPHAD model for a given phase and sublattice model. Parameters ---------- dbf : Database pycalphad Database to add parameters to. phase_name : str Name of the phase. symmetry : [[int]] Sublattice model symmetry. subl_model : [[str]] Sublattice model for the phase of interest. site_ratios : [float] Number of sites in each sublattice, normalized to one atom. datasets : PickleableTinyDB All datasets to consider for the calculation. refdata : dict Maps tuple(element, phase_name) -> SymPy object defining energy relative to SER aliases : [str] Alternative phase names. Useful for matching against reference data or other datasets. (Default value = None) Returns ------- None Modifies the dbf. """ if not hasattr(dbf, 'varcounter'): dbf.varcounter = 0 # First fit endmembers all_em_count = len(list(itertools.product(*subl_model))) endmembers = sorted( set(canonicalize(i, symmetry) for i in itertools.product(*subl_model))) # Number of significant figures in parameters numdigits = 6 em_dict = {} aliases = [] if aliases is None else aliases aliases = sorted(set(aliases + [phase_name])) logging.info('FITTING: {}'.format(phase_name)) logging.debug('{0} endmembers ({1} distinct by symmetry)'.format( all_em_count, len(endmembers))) def _to_tuple(x): if isinstance(x, list) or isinstance(x, tuple): return tuple(x) else: return tuple([x]) all_endmembers = [] for endmember in endmembers: logging.debug('ENDMEMBER: {}'.format(endmember)) # Some endmembers are fixed by our choice of standard lattice stabilities, e.g., SGTE91 # If a (phase, pure component endmember) tuple is fixed, we should use that value instead of fitting endmember_comps = list(set(endmember)) fit_eq = None # only one non-VA component, or two components but the other is VA and its only the last sublattice if ((len(endmember_comps) == 1) and (endmember_comps[0] != 'VA')) or\ ((len(endmember_comps) == 2) and (endmember[-1] == 'VA') and (len(set(endmember[:-1])) == 1)): # this is a "pure component endmember" # try all phase name aliases until we get run out or get a hit em_comp = list(set(endmember_comps) - {'VA'})[0] sym_name = None for name in aliases: sym_name = 'G' + name[:3].upper() + em_comp.upper() stability = refdata.get((em_comp.upper(), name.upper()), None) if stability is not None: if isinstance(stability, sympy.Piecewise): # Default zero required for the compiled backend if (0, True) not in stability.args: new_args = stability.args + ((0, True), ) stability = sympy.Piecewise(*new_args) dbf.symbols[sym_name] = stability break if dbf.symbols.get(sym_name, None) is not None: num_moles = sum([ sites for elem, sites in zip(endmember, site_ratios) if elem != 'VA' ]) fit_eq = num_moles * sympy.Symbol(sym_name) if fit_eq is None: # No reference lattice stability data -- we have to fit it parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, endmember, symmetry, datasets) for key, value in sorted(parameters.items(), key=str): if value == 0: continue symbol_name = 'VV' + str(dbf.varcounter).zfill(4) while dbf.symbols.get(symbol_name, None) is not None: dbf.varcounter += 1 symbol_name = 'VV' + str(dbf.varcounter).zfill(4) dbf.symbols[symbol_name] = sigfigs(value, numdigits) parameters[key] = sympy.Symbol(symbol_name) fit_eq = sympy.Add( *[value * key for key, value in parameters.items()]) ref = 0 for subl, ratio in zip(endmember, site_ratios): if subl == 'VA': continue subl = (subl.upper() * 2)[:2] ref = ref + ratio * sympy.Symbol('GHSER' + subl) fit_eq += ref symmetric_endmembers = _generate_symmetric_group(endmember, symmetry) logging.debug('SYMMETRIC_ENDMEMBERS: {}'.format(symmetric_endmembers)) all_endmembers.extend(symmetric_endmembers) for em in symmetric_endmembers: em_dict[em] = fit_eq dbf.add_parameter('G', phase_name, tuple(map(_to_tuple, em)), 0, fit_eq) # Now fit all binary interactions # Need to use 'all_endmembers' instead of 'endmembers' because you need to generate combinations # of ALL endmembers, not just symmetry equivalent ones bin_interactions = list(itertools.combinations(all_endmembers, 2)) transformed_bin_interactions = [] for first_endmember, second_endmember in bin_interactions: interaction = [] for first_occupant, second_occupant in zip(first_endmember, second_endmember): if first_occupant == second_occupant: interaction.append(first_occupant) else: interaction.append( tuple(sorted([first_occupant, second_occupant]))) transformed_bin_interactions.append(interaction) def bin_int_sort_key(x): interacting_sublattices = sum( (isinstance(n, (list, tuple)) and len(n) == 2) for n in x) return canonical_sort_key((interacting_sublattices, ) + x) bin_interactions = sorted(set( canonicalize(i, symmetry) for i in transformed_bin_interactions), key=bin_int_sort_key) logging.debug('{0} distinct binary interactions'.format( len(bin_interactions))) for interaction in bin_interactions: ixx = [] for i in interaction: if isinstance(i, (tuple, list)): ixx.append(tuple(i)) else: ixx.append(i) ixx = tuple(ixx) logging.debug('INTERACTION: {}'.format(ixx)) parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, ixx, symmetry, datasets) # Organize parameters by polynomial degree degree_polys = np.zeros(10, dtype=np.object) for degree in reversed(range(10)): check_symbol = sympy.Symbol('YS') * sympy.Symbol('Z')**degree keys_to_remove = [] for key, value in sorted(parameters.items(), key=str): if key.has(check_symbol): if value != 0: symbol_name = 'VV' + str(dbf.varcounter).zfill(4) while dbf.symbols.get(symbol_name, None) is not None: dbf.varcounter += 1 symbol_name = 'VV' + str(dbf.varcounter).zfill(4) dbf.symbols[symbol_name] = sigfigs( parameters[key], numdigits) parameters[key] = sympy.Symbol(symbol_name) coef = parameters[key] * (key / check_symbol) try: coef = float(coef) except TypeError: pass degree_polys[degree] += coef keys_to_remove.append(key) for key in keys_to_remove: parameters.pop(key) logging.debug('Polynomial coefs: {}'.format(degree_polys)) # Insert into database symmetric_interactions = _generate_symmetric_group( interaction, symmetry) for degree in np.arange(degree_polys.shape[0]): if degree_polys[degree] != 0: for syminter in symmetric_interactions: dbf.add_parameter('L', phase_name, tuple(map(_to_tuple, syminter)), degree, degree_polys[degree]) # TODO: fit ternary interactions if hasattr(dbf, 'varcounter'): del dbf.varcounter
def phase_fit(dbf, phase_name, symmetry, datasets, refdata, ridge_alpha, aicc_penalty=None, aliases=None): """Generate an initial CALPHAD model for a given phase and sublattice model. Parameters ---------- dbf : Database pycalphad Database to add parameters to. phase_name : str Name of the phase. symmetry : [[int]] Sublattice model symmetry. datasets : PickleableTinyDB All datasets to consider for the calculation. refdata : dict Maps tuple(element, phase_name) -> SymPy object defining energy relative to SER ridge_alpha : float Value of the :math:`\\alpha` hyperparameter used in ridge regression. Defaults to 1.0e-100, which should be degenerate with ordinary least squares regression. For now, the parameter is applied to all features. aicc_penalty : dict Map of phase name to feature to a multiplication factor for the AICc's parameter penalty. aliases : Dict[str, str] Mapping of possible aliases to the Database phase names. Returns ------- None Modifies the dbf. """ aicc_penalty = aicc_penalty if aicc_penalty is not None else {} aicc_phase_penalty = aicc_penalty.get(phase_name, {}) if not hasattr(dbf, 'varcounter'): dbf.varcounter = 0 phase_obj = dbf.phases[phase_name] # TODO: assumed pure elements - add proper support for Species objects subl_model = [sorted([sp.name for sp in subl]) for subl in phase_obj.constituents] site_ratios = phase_obj.sublattices # First fit endmembers all_em_count = len(generate_endmembers(subl_model)) # number of total endmembers endmembers = generate_endmembers(subl_model, symmetry) # Number of significant figures in parameters, might cause rounding errors numdigits = 6 em_dict = {} # TODO: use the global aliases dictionary passed in as-is instead of converting it to a phase-local dict # TODO: use the aliases dictionary in dataset queries to find relevant data if aliases is None: aliases = [phase_name] else: aliases = sorted([alias for alias, database_phase in aliases.items() if database_phase == phase_name]) _log.info('FITTING: %s', phase_name) _log.trace('%s endmembers (%s distinct by symmetry)', all_em_count, len(endmembers)) all_endmembers = [] for endmember in endmembers: symmetric_endmembers = generate_symmetric_group(endmember, symmetry) all_endmembers.extend(symmetric_endmembers) if _param_present_in_database(dbf, phase_name, endmember, 'G'): _log.trace('ENDMEMBER: %s already in Database. Skipping.', endmember) continue else: _log.trace('ENDMEMBER: %s', endmember) # Some endmembers are fixed by our choice of standard lattice stabilities, e.g., SGTE91 # If a (phase, pure component endmember) tuple is fixed, we should use that value instead of fitting endmember_comps = list(set(endmember)) fit_eq = None # only one non-VA component, or two components but the other is VA and its only the last sublattice if ((len(endmember_comps) == 1) and (endmember_comps[0] != 'VA')) or\ ((len(endmember_comps) == 2) and (endmember[-1] == 'VA') and (len(set(endmember[:-1])) == 1)): # this is a "pure component endmember" # try all phase name aliases until we get run out or get a hit em_comp = list(set(endmember_comps) - {'VA'})[0] sym_name = None for name in aliases: sym_name = 'G'+name[:3].upper()+em_comp.upper() stability = refdata.get((em_comp.upper(), name.upper()), None) if stability is not None: if isinstance(stability, sympy.Piecewise): # Default zero required for the compiled backend if (0, True) not in stability.args: new_args = stability.args + ((0, True),) stability = sympy.Piecewise(*new_args) dbf.symbols[sym_name] = stability break if dbf.symbols.get(sym_name, None) is not None: num_moles = sum([sites for elem, sites in zip(endmember, site_ratios) if elem != 'VA']) fit_eq = num_moles * Symbol(sym_name) _log.trace("Found lattice stability: %s", sym_name) _log.debug("%s = %s", sym_name, dbf.symbols[sym_name]) if fit_eq is None: # No reference lattice stability data -- we have to fit it parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, endmember, symmetry, datasets, ridge_alpha, aicc_phase_penalty=aicc_phase_penalty) for key, value in sorted(parameters.items(), key=str): if value == 0: continue symbol_name = get_next_symbol(dbf) dbf.symbols[symbol_name] = sigfigs(value, numdigits) parameters[key] = Symbol(symbol_name) fit_eq = sympy.Add(*[value * key for key, value in parameters.items()]) ref = 0 for subl, ratio in zip(endmember, site_ratios): if subl == 'VA': continue subl = (subl.upper()*2)[:2] ref = ref + ratio * Symbol('GHSER'+subl) fit_eq += ref _log.trace('SYMMETRIC_ENDMEMBERS: %s', symmetric_endmembers) for em in symmetric_endmembers: em_dict[em] = fit_eq dbf.add_parameter('G', phase_name, tuple(map(tuplify, em)), 0, fit_eq) _log.trace('FITTING BINARY INTERACTIONS') bin_interactions = generate_interactions(all_endmembers, order=2, symmetry=symmetry) _log.trace('%s distinct binary interactions', len(bin_interactions)) for interaction in bin_interactions: ixx = [] for i in interaction: if isinstance(i, (tuple, list)): ixx.append(tuple(i)) else: ixx.append(i) ixx = tuple(ixx) config = tuple(map(tuplify, ixx)) if _param_present_in_database(dbf, phase_name, config, 'L'): _log.trace('INTERACTION: %s already in Database', ixx) continue else: _log.trace('INTERACTION: %s', ixx) parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, ixx, symmetry, datasets, ridge_alpha, aicc_phase_penalty=aicc_phase_penalty) # Organize parameters by polynomial degree degree_polys = np.zeros(10, dtype=np.object_) for degree in reversed(range(10)): check_symbol = Symbol('YS') * Symbol('Z')**degree keys_to_remove = [] for key, value in sorted(parameters.items(), key=str): if key.has(check_symbol): if value != 0: symbol_name = get_next_symbol(dbf) dbf.symbols[symbol_name] = sigfigs(parameters[key], numdigits) parameters[key] = Symbol(symbol_name) coef = parameters[key] * (key / check_symbol) try: coef = float(coef) except TypeError: pass degree_polys[degree] += coef keys_to_remove.append(key) for key in keys_to_remove: parameters.pop(key) _log.trace('Polynomial coefs: %s', degree_polys.tolist()) # Insert into database symmetric_interactions = generate_symmetric_group(interaction, symmetry) for degree in np.arange(degree_polys.shape[0]): if degree_polys[degree] != 0: for syminter in symmetric_interactions: dbf.add_parameter('L', phase_name, tuple(map(tuplify, syminter)), degree, degree_polys[degree]) _log.trace('FITTING TERNARY INTERACTIONS') fit_ternary_interactions(dbf, phase_name, symmetry, all_endmembers, datasets, aicc_phase_penalty=aicc_phase_penalty) if hasattr(dbf, 'varcounter'): del dbf.varcounter
def phase_fit(dbf, phase_name, symmetry, subl_model, site_ratios, datasets, refdata, ridge_alpha, aicc_penalty=None, aliases=None): """Generate an initial CALPHAD model for a given phase and sublattice model. Parameters ---------- dbf : Database pycalphad Database to add parameters to. phase_name : str Name of the phase. symmetry : [[int]] Sublattice model symmetry. subl_model : [[str]] Sublattice model for the phase of interest. site_ratios : [float] Number of sites in each sublattice, normalized to one atom. datasets : PickleableTinyDB All datasets to consider for the calculation. refdata : dict Maps tuple(element, phase_name) -> SymPy object defining energy relative to SER ridge_alpha : float Value of the $alpha$ hyperparameter used in ridge regression. Defaults to 1.0e-100, which should be degenerate with ordinary least squares regression. For now, the parameter is applied to all features. aicc_penalty : dict Map of phase name to feature to a multiplication factor for the AICc's parameter penalty. aliases : [str] Alternative phase names. Useful for matching against reference data or other datasets. (Default value = None) Returns ------- None Modifies the dbf. """ aicc_penalty = aicc_penalty if aicc_penalty is not None else {} aicc_phase_penalty = aicc_penalty.get(phase_name, {}) if not hasattr(dbf, 'varcounter'): dbf.varcounter = 0 # First fit endmembers all_em_count = len(generate_endmembers(subl_model)) # number of total endmembers endmembers = generate_endmembers(subl_model, symmetry) # Number of significant figures in parameters, might cause rounding errors numdigits = 6 em_dict = {} aliases = [] if aliases is None else aliases aliases = sorted(set(aliases + [phase_name])) logging.info('FITTING: {}'.format(phase_name)) logging.log(TRACE, '{0} endmembers ({1} distinct by symmetry)'.format(all_em_count, len(endmembers))) all_endmembers = [] for endmember in endmembers: logging.log(TRACE, 'ENDMEMBER: {}'.format(endmember)) # Some endmembers are fixed by our choice of standard lattice stabilities, e.g., SGTE91 # If a (phase, pure component endmember) tuple is fixed, we should use that value instead of fitting endmember_comps = list(set(endmember)) fit_eq = None # only one non-VA component, or two components but the other is VA and its only the last sublattice if ((len(endmember_comps) == 1) and (endmember_comps[0] != 'VA')) or\ ((len(endmember_comps) == 2) and (endmember[-1] == 'VA') and (len(set(endmember[:-1])) == 1)): # this is a "pure component endmember" # try all phase name aliases until we get run out or get a hit em_comp = list(set(endmember_comps) - {'VA'})[0] sym_name = None for name in aliases: sym_name = 'G'+name[:3].upper()+em_comp.upper() stability = refdata.get((em_comp.upper(), name.upper()), None) if stability is not None: if isinstance(stability, sympy.Piecewise): # Default zero required for the compiled backend if (0, True) not in stability.args: new_args = stability.args + ((0, True),) stability = sympy.Piecewise(*new_args) dbf.symbols[sym_name] = stability break if dbf.symbols.get(sym_name, None) is not None: num_moles = sum([sites for elem, sites in zip(endmember, site_ratios) if elem != 'VA']) fit_eq = num_moles * sympy.Symbol(sym_name) if fit_eq is None: # No reference lattice stability data -- we have to fit it parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, endmember, symmetry, datasets, ridge_alpha, aicc_phase_penalty=aicc_phase_penalty) for key, value in sorted(parameters.items(), key=str): if value == 0: continue symbol_name = get_next_symbol(dbf) dbf.symbols[symbol_name] = sigfigs(value, numdigits) parameters[key] = sympy.Symbol(symbol_name) fit_eq = sympy.Add(*[value * key for key, value in parameters.items()]) ref = 0 for subl, ratio in zip(endmember, site_ratios): if subl == 'VA': continue subl = (subl.upper()*2)[:2] ref = ref + ratio * sympy.Symbol('GHSER'+subl) fit_eq += ref symmetric_endmembers = generate_symmetric_group(endmember, symmetry) logging.log(TRACE, 'SYMMETRIC_ENDMEMBERS: {}'.format(symmetric_endmembers)) all_endmembers.extend(symmetric_endmembers) for em in symmetric_endmembers: em_dict[em] = fit_eq dbf.add_parameter('G', phase_name, tuple(map(tuplify, em)), 0, fit_eq) logging.log(TRACE, 'FITTING BINARY INTERACTIONS') bin_interactions = generate_interactions(all_endmembers, order=2, symmetry=symmetry) logging.log(TRACE, '{0} distinct binary interactions'.format(len(bin_interactions))) for interaction in bin_interactions: ixx = [] for i in interaction: if isinstance(i, (tuple, list)): ixx.append(tuple(i)) else: ixx.append(i) ixx = tuple(ixx) logging.log(TRACE, 'INTERACTION: {}'.format(ixx)) parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, ixx, symmetry, datasets, ridge_alpha, aicc_phase_penalty=aicc_phase_penalty) # Organize parameters by polynomial degree degree_polys = np.zeros(10, dtype=np.object) for degree in reversed(range(10)): check_symbol = sympy.Symbol('YS') * sympy.Symbol('Z')**degree keys_to_remove = [] for key, value in sorted(parameters.items(), key=str): if key.has(check_symbol): if value != 0: symbol_name = get_next_symbol(dbf) dbf.symbols[symbol_name] = sigfigs(parameters[key], numdigits) parameters[key] = sympy.Symbol(symbol_name) coef = parameters[key] * (key / check_symbol) try: coef = float(coef) except TypeError: pass degree_polys[degree] += coef keys_to_remove.append(key) for key in keys_to_remove: parameters.pop(key) logging.log(TRACE, 'Polynomial coefs: {}'.format(degree_polys)) # Insert into database symmetric_interactions = generate_symmetric_group(interaction, symmetry) for degree in np.arange(degree_polys.shape[0]): if degree_polys[degree] != 0: for syminter in symmetric_interactions: dbf.add_parameter('L', phase_name, tuple(map(tuplify, syminter)), degree, degree_polys[degree]) logging.log(TRACE, 'FITTING TERNARY INTERACTIONS') fit_ternary_interactions(dbf, phase_name, symmetry, all_endmembers, datasets, aicc_phase_penalty=aicc_phase_penalty) if hasattr(dbf, 'varcounter'): del dbf.varcounter