Пример #1
0
def _process_species(db, sp_name, sp_comp, charge=0, *args):
    """Add a species to the Database. If charge not specified, the Species will be neutral."""
    # process the species composition list of [element1, ratio1, element2, ratio2, ..., elementN, ratioN]
    constituents = {
        sp_comp[i]: sp_comp[i + 1]
        for i in range(0, len(sp_comp), 2)
    }
    db.species.add(Species(sp_name, constituents, charge=charge))
Пример #2
0
def generate_parameters(phase_models, datasets, ref_state, excess_model):
    """Generate parameters from given phase models and datasets

    Parameters
    ----------
    phase_models : dict
        Dictionary of components and phases to fit.
    datasets : PickleableTinyDB
        database of single- and multi-phase to fit.
    ref_state : str
        String of the reference data to use, e.g. 'SGTE91' or 'SR2016'
    excess_model : str
        String of the type of excess model to fit to, e.g. 'linear'

    Returns
    -------
    pycalphad.Database

    """
    logging.info('Generating parameters.')
    dbf = Database()
    dbf.elements = set(phase_models['components'])
    for el in dbf.elements:
        if Species is not None:  # TODO: drop this on release of pycalphad 0.7
            dbf.species.add(Species(el, {el: 1}, 0))
    # Write reference state to Database
    refdata = getattr(espei.refdata, ref_state)
    stabledata = getattr(espei.refdata, ref_state + 'Stable')
    for key, element in refdata.items():
        if isinstance(element, sympy.Piecewise):
            newargs = element.args + ((0, True), )
            refdata[key] = sympy.Piecewise(*newargs)
    for key, element in stabledata.items():
        if isinstance(element, sympy.Piecewise):
            newargs = element.args + ((0, True), )
            stabledata[key] = sympy.Piecewise(*newargs)
    comp_refs = {
        c.upper(): stabledata[c.upper()]
        for c in dbf.elements if c.upper() != 'VA'
    }
    comp_refs['VA'] = 0
    # note that the `c.upper()*2)[:2]` returns 'AL' for c.upper()=='AL' and 'VV' for c.upper()=='V'
    dbf.symbols.update(
        {'GHSER' + (c.upper() * 2)[:2]: data
         for c, data in comp_refs.items()})
    for phase_name, phase_obj in sorted(phase_models['phases'].items(),
                                        key=operator.itemgetter(0)):
        # Perform parameter selection and single-phase fitting based on input
        # TODO: Need to pass particular models to include: magnetic, order-disorder, etc.
        symmetry = phase_obj.get('equivalent_sublattices', None)
        aliases = phase_obj.get('aliases', None)
        # TODO: More advanced phase data searching
        site_ratios = phase_obj['sublattice_site_ratios']
        subl_model = phase_obj['sublattice_model']
        dbf.add_phase(phase_name, dict(), site_ratios)
        dbf.add_phase_constituents(phase_name, subl_model)
        dbf.add_structure_entry(phase_name, phase_name)
        phase_fit(dbf,
                  phase_name,
                  symmetry,
                  subl_model,
                  site_ratios,
                  datasets,
                  refdata,
                  aliases=aliases)
    logging.info('Finished generating parameters.')
    return dbf
Пример #3
0
def fit_formation_energy(dbf,
                         comps,
                         phase_name,
                         configuration,
                         symmetry,
                         datasets,
                         features=None):
    """
    Find suitable linear model parameters for the given phase.
    We do this by successively fitting heat capacities, entropies and
    enthalpies of formation, and selecting against criteria to prevent
    overfitting. The "best" set of parameters minimizes the error
    without overfitting.

    Parameters
    ----------
    dbf : Database
        pycalphad Database. Partially complete, so we know what degrees of freedom to fix.
    comps : [str]
        Names of the relevant components.
    phase_name : str
        Name of the desired phase for which the parameters will be found.
    configuration : ndarray
        Configuration of the sublattices for the fitting procedure.
    symmetry : [[int]]
        Symmetry of the sublattice configuration.
    datasets : PickleableTinyDB
        All the datasets desired to fit to.
    features : dict
        Maps "property" to a list of features for the linear model.
        These will be transformed from "GM" coefficients
        e.g., {"CPM_FORM": (v.T*sympy.log(v.T), v.T**2, v.T**-1, v.T**3)} (Default value = None)

    Returns
    -------
    dict
        {feature: estimated_value}

    """
    if features is None:
        features = [("CPM_FORM", (v.T * sympy.log(v.T), v.T**2, v.T**-1,
                                  v.T**3)), ("SM_FORM", (v.T, )),
                    ("HM_FORM", (sympy.S.One, ))]
        features = OrderedDict(features)
    if any([isinstance(conf, (list, tuple)) for conf in configuration]):
        # TODO: assumes binary interaction here
        fitting_steps = (["CPM_FORM",
                          "CPM_MIX"], ["SM_FORM",
                                       "SM_MIX"], ["HM_FORM", "HM_MIX"])
        # Product of all nonzero site fractions in all sublattices
        YS = sympy.Symbol('YS')
        # Product of all binary interaction terms
        Z = sympy.Symbol('Z')
        redlich_kister_features = (YS, YS * Z, YS * (Z**2), YS * (Z**3))
        for feature in features.keys():
            all_features = list(
                itertools.product(redlich_kister_features, features[feature]))
            features[feature] = [i[0] * i[1] for i in all_features]
        logging.debug('ENDMEMBERS FROM INTERACTION: {}'.format(
            endmembers_from_interaction(configuration)))
    else:
        # We are only fitting an endmember; no mixing data needed
        fitting_steps = (["CPM_FORM"], ["SM_FORM"], ["HM_FORM"])

    parameters = {}
    for feature in features.values():
        for coef in feature:
            parameters[coef] = 0

    # These is our previously fit partial model
    # Subtract out all of these contributions (zero out reference state because these are formation properties)
    fixed_model = Model(
        dbf,
        comps,
        phase_name,
        parameters={'GHSER' + (c.upper() * 2)[:2]: 0
                    for c in comps})
    fixed_model.models['idmix'] = 0
    fixed_portions = [0]

    moles_per_formula_unit = sympy.S(0)
    subl_idx = 0
    for num_sites, const in zip(dbf.phases[phase_name].sublattices,
                                dbf.phases[phase_name].constituents):
        if Species('VA') in const:
            moles_per_formula_unit += num_sites * (
                1 - v.SiteFraction(phase_name, subl_idx, Species('VA')))
        else:
            moles_per_formula_unit += num_sites
        subl_idx += 1

    for desired_props in fitting_steps:
        desired_data = get_data(comps, phase_name, configuration, symmetry,
                                datasets, desired_props)
        logging.debug('{}: datasets found: {}'.format(desired_props,
                                                      len(desired_data)))
        if len(desired_data) > 0:
            # We assume all properties in the same fitting step have the same features (but different ref states)
            feature_matrix = _build_feature_matrix(desired_props[0],
                                                   features[desired_props[0]],
                                                   desired_data)
            all_samples = get_samples(desired_data)
            data_quantities = np.concatenate(_shift_reference_state(
                desired_data, feature_transforms[desired_props[0]],
                fixed_model),
                                             axis=-1)
            site_fractions = [
                build_sitefractions(
                    phase_name, ds['solver']['sublattice_configurations'],
                    ds['solver'].get(
                        'sublattice_occupancies',
                        np.ones((
                            len(ds['solver']['sublattice_configurations']),
                            len(ds['solver']['sublattice_configurations'][0])),
                                dtype=np.float))) for ds in desired_data
                for _ in ds['conditions']['T']
            ]
            # Flatten list
            site_fractions = list(itertools.chain(*site_fractions))
            # Remove existing partial model contributions from the data
            data_quantities = data_quantities - feature_transforms[
                desired_props[0]](fixed_model.ast)
            # Subtract out high-order (in T) parameters we've already fit
            data_quantities = data_quantities - \
                feature_transforms[desired_props[0]](sum(fixed_portions)) / moles_per_formula_unit
            for sf, i in zip(site_fractions, data_quantities):
                missing_variables = sympy.S(i * moles_per_formula_unit).atoms(
                    v.SiteFraction) - set(sf.keys())
                sf.update({x: 0. for x in missing_variables})
            # moles_per_formula_unit factor is here because our data is stored per-atom
            # but all of our fits are per-formula-unit
            data_quantities = [
                sympy.S(i * moles_per_formula_unit).xreplace(sf).xreplace({
                    v.T:
                    ixx[0]
                }).evalf() for i, sf, ixx in zip(data_quantities,
                                                 site_fractions, all_samples)
            ]
            data_quantities = np.asarray(data_quantities, dtype=np.float)
            parameters.update(
                _fit_parameters(feature_matrix, data_quantities,
                                features[desired_props[0]]))
            # Add these parameters to be fixed for the next fitting step
            fixed_portion = np.array(features[desired_props[0]],
                                     dtype=np.object)
            fixed_portion = np.dot(fixed_portion, [
                parameters[feature] for feature in features[desired_props[0]]
            ])
            fixed_portions.append(fixed_portion)
    return parameters
Пример #4
0
def generate_parameters(phase_models,
                        datasets,
                        ref_state,
                        excess_model,
                        ridge_alpha=None,
                        aicc_penalty_factor=None,
                        dbf=None):
    """Generate parameters from given phase models and datasets

    Parameters
    ----------
    phase_models : dict
        Dictionary of components and phases to fit.
    datasets : PickleableTinyDB
        database of single- and multi-phase to fit.
    ref_state : str
        String of the reference data to use, e.g. 'SGTE91' or 'SR2016'
    excess_model : str
        String of the type of excess model to fit to, e.g. 'linear'
    ridge_alpha : float
        Value of the $alpha$ hyperparameter used in ridge regression. Defaults
        to None, which falls back to ordinary least squares regression.
        For now, the parameter is applied to all features.
    aicc_penalty_factor : dict
        Map of phase name to feature to a multiplication factor for the AICc's parameter penalty.
    dbf : Database
        Initial pycalphad Database that can have parameters that would not be fit by ESPEI

    Returns
    -------
    pycalphad.Database

    """
    logging.info('Generating parameters.')
    logging.log(
        TRACE,
        f'Found the following user reference states: {espei.refdata.INSERTED_USER_REFERENCE_STATES}'
    )
    phases = sorted(map(lambda x: x.upper(), phase_models['phases'].keys()))
    dbf = dbf or Database()
    dbf.elements.update(set(phase_models['components']))
    for el in dbf.elements:
        dbf.species.add(Species(el, {el: 1}, 0))
        # Add the SER reference data
        dbf.refstates[el] = espei.refdata.ser_dict[el]
        # update the refdata for this element with the reference phase
        if el not in espei.refdata.pure_element_phases.keys():
            # Probably VA, /- or something else
            continue
        refdata_phase = espei.refdata.pure_element_phases[el]
        if refdata_phase in phases:
            dbf.refstates[el]['phase'] = refdata_phase
        else:
            # Check all the aliases and set the one that matches
            for phase_name, phase_obj in phase_models['phases'].items():
                for alias in phase_obj.get('aliases', []):
                    if alias == refdata_phase:
                        dbf.refstates[el]['phase'] = phase_name
    # Write reference state to Database
    refdata = getattr(espei.refdata, ref_state)
    stabledata = getattr(espei.refdata, ref_state + 'Stable')
    for key, element in refdata.items():
        if isinstance(element, sympy.Piecewise):
            newargs = element.args + ((0, True), )
            refdata[key] = sympy.Piecewise(*newargs)
    for key, element in stabledata.items():
        if isinstance(element, sympy.Piecewise):
            newargs = element.args + ((0, True), )
            stabledata[key] = sympy.Piecewise(*newargs)
    comp_refs = {
        c.upper(): stabledata[c.upper()]
        for c in dbf.elements if c.upper() != 'VA'
    }
    comp_refs['VA'] = 0
    # note that the `c.upper()*2)[:2]` returns 'AL' for c.upper()=='AL' and 'VV' for c.upper()=='V'
    dbf.symbols.update(
        {'GHSER' + (c.upper() * 2)[:2]: data
         for c, data in comp_refs.items()})
    for phase_name, phase_obj in sorted(phase_models['phases'].items(),
                                        key=operator.itemgetter(0)):
        # Perform parameter selection and single-phase fitting based on input
        # TODO: Need to pass particular models to include: magnetic, order-disorder, etc.
        symmetry = phase_obj.get('equivalent_sublattices', None)
        aliases = phase_obj.get('aliases', None)
        # TODO: More advanced phase data searching
        site_ratios = phase_obj['sublattice_site_ratios']
        subl_model = phase_obj['sublattice_model']
        if phase_name not in dbf.phases.keys():
            dbf.add_phase(phase_name, dict(), site_ratios)
            dbf.add_phase_constituents(phase_name, subl_model)
            dbf.add_structure_entry(phase_name, phase_name)
        phase_fit(dbf,
                  phase_name,
                  symmetry,
                  subl_model,
                  site_ratios,
                  datasets,
                  refdata,
                  ridge_alpha,
                  aicc_penalty=aicc_penalty_factor,
                  aliases=aliases)
    logging.info('Finished generating parameters.')
    return dbf