Python get_data示例

编程语言: Python

命名空间/包名称: espei.core_utils

方法/功能: get_data

hotexamples.com的示例: 6

Python get_data - 已找到6个示例。这些是从开源项目中提取的最受好评的espei.core_utils.get_data现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def test_get_data_for_a_minimal_example():
    """Given a dataset and the congfiguration pertaining to that dataset, we should find the values."""
    SAMPLE_DATASET = {
        "components": ["CU", "MG", "VA"],
        "phases": ["LAVES_C15"],
        "solver": {
            "mode":
            "manual",
            "sublattice_site_ratios": [2, 1],
            "sublattice_configurations": [["CU", "MG"], ["MG", "CU"],
                                          ["MG", "MG"], ["CU", "CU"]]
        },
        "conditions": {
            "P": 101325,
            "T": 298.15
        },
        "output": "HM_FORM",
        "values": [[[-15720, 34720, 7000, 15500]]]
    }
    datasets = PickleableTinyDB(storage=MemoryStorage)
    datasets.insert(SAMPLE_DATASET)
    comps = ['CU', 'MG', 'VA']
    phase_name = 'LAVES_C15'
    configuration = ('MG', 'CU')
    symmetry = None
    desired_props = ['HM_FORM']

    desired_data = get_data(comps, phase_name, configuration, symmetry,
                            datasets, desired_props)
    assert len(desired_data) == 1
    desired_data = desired_data[0]
    assert desired_data['components'] == comps
    assert desired_data['phases'][0] == phase_name
    assert desired_data['solver']['sublattice_site_ratios'] == [2, 1]
    assert desired_data['solver']['sublattice_configurations'] == (('MG',
                                                                    'CU'), )
    assert desired_data['conditions']['P'] == 101325
    assert desired_data['conditions']['T'] == 298.15
    assert desired_data['output'] == 'HM_FORM'
    assert desired_data['values'] == np.array([[[34720.0]]])

示例#2

显示文件

def plot_parameters(dbf,
                    comps,
                    phase_name,
                    configuration,
                    symmetry,
                    datasets=None,
                    fig=None,
                    require_data=True):
    """
    Plot parameters of interest compared with data in subplots of a single figure

    Parameters
    ----------
    dbf : Database
        pycalphad thermodynamic database containing the relevant parameters.
    comps : list
        Names of components to consider in the calculation.
    phase_name : str
        Name of the considered phase phase
    configuration : tuple
        Sublattice configuration to plot, such as ('CU', 'CU') or (('CU', 'MG'), 'CU')
    symmetry : list
        List of lists containing indices of symmetric sublattices e.g. [[0, 1], [2, 3]]
    datasets : PickleableTinyDB
        ESPEI datasets to compare against. If None, nothing is plotted.
    fig : matplotlib.Figure
        Figure to create with axes as subplots.
    require_data : bool
        If True, plot parameters that have data corresponding data. Defaults to
        True. Will raise an error for non-interaction configurations.

    Returns
    -------
    None

    Examples
    --------
    # plot the LAVES_C15 (Cu)(Mg) endmember
    >>> plot_parameters(dbf, ['CU', 'MG'], 'LAVES_C15', ('CU', 'MG'), symmetry=None, datasets=datasets)
    # plot the mixing interaction in the first sublattice
    >>> plot_parameters(dbf, ['CU', 'MG'], 'LAVES_C15', (('CU', 'MG'), 'MG'), symmetry=None, datasets=datasets)

    """
    em_plots = [('T', 'CPM'), ('T', 'CPM_FORM'), ('T', 'SM'), ('T', 'SM_FORM'),
                ('T', 'HM'), ('T', 'HM_FORM')]
    mix_plots = [('Z', 'HM_FORM'), ('Z', 'HM_MIX'), ('Z', 'SM_MIX')]
    comps = sorted(comps)
    mod = Model(dbf, comps, phase_name)
    # This is for computing properties of formation
    mod_norefstate = Model(
        dbf,
        comps,
        phase_name,
        parameters={'GHSER' + (c.upper() * 2)[:2]: 0
                    for c in comps})
    # Is this an interaction parameter or endmember?
    if any([
            isinstance(conf, list) or isinstance(conf, tuple)
            for conf in configuration
    ]):
        plots = mix_plots
    else:
        plots = em_plots

    # filter which parameters to plot by the data that exists
    if require_data and datasets is not None:
        filtered_plots = []
        for x_val, y_val in plots:
            desired_props = [y_val.split('_')[0] + '_FORM', y_val
                             ] if y_val.endswith('_MIX') else [y_val]
            data = get_data(comps, phase_name, configuration, symmetry,
                            datasets, desired_props)
            if len(data) > 0:
                filtered_plots.append((x_val, y_val, data))
    elif require_data:
        raise ValueError(
            'Plots require datasets, but no datasets were passed.')
    elif plots == em_plots and not require_data:
        # How we treat temperature dependence is ambiguous when there is no data, so we raise an error
        raise ValueError(
            'The "require_data=False" option is not supported for non-mixing configurations.'
        )
    elif datasets is not None:
        filtered_plots = []
        for x_val, y_val in plots:
            desired_props = [y_val.split('_')[0] + '_FORM', y_val
                             ] if y_val.endswith('_MIX') else [y_val]
            data = get_data(comps, phase_name, configuration, symmetry,
                            datasets, desired_props)
            filtered_plots.append((x_val, y_val, data))
    else:
        filtered_plots = [(x_val, y_val, []) for x_val, y_val in plots]

    num_plots = len(filtered_plots)
    if num_plots == 0:
        return
    if not fig:
        fig = plt.figure(figsize=plt.figaspect(num_plots))

    # plot them
    for i, (x_val, y_val, data) in enumerate(filtered_plots):
        if y_val.endswith('_FORM'):
            ax = fig.add_subplot(num_plots, 1, i + 1)
            ax = _compare_data_to_parameters(dbf,
                                             comps,
                                             phase_name,
                                             data,
                                             mod_norefstate,
                                             configuration,
                                             x_val,
                                             y_val,
                                             ax=ax)
        else:
            ax = fig.add_subplot(num_plots, 1, i + 1)
            ax = _compare_data_to_parameters(dbf,
                                             comps,
                                             phase_name,
                                             data,
                                             mod,
                                             configuration,
                                             x_val,
                                             y_val,
                                             ax=ax)

示例#3

显示文件

def plot_property(dbf,
                  comps,
                  phaseL,
                  params,
                  T,
                  prop,
                  config=None,
                  datasets=None,
                  xlim=None,
                  xlabel=None,
                  ylabel=None,
                  yscale=None,
                  phase_label_dict=None,
                  unit='kJ/mol.',
                  cdict=None,
                  figsize=None):
    """
    Plot a property of interest versus temperature with uncertainty
    bounds for all phases of interest

    Parameters
    ----------
    dbf : Database
        Thermodynamic database containing the relevant parameters
    comps : list
        Names of components to consider in the calculation
    phaseL : list
        Names of phases to plot properties for
    params : numpy array
        Array where the rows contain the parameter sets
        for the pycalphad equilibrium calculation
    T : list, array or x-array object
        Temperature values at which to plot the selected property
    prop : str
        property (or attribute in pycalphad terminology) to sample,
        e.g. GM for molar gibbs energy or H_MIX for the enthalpy of
        mixing
    config : tuple, optional
        Sublattice configuration as a tuple, e.g. (“CU”, (“CU”, “MG”))
    datasets : espei.utils.PickleableTinyDB, optional
        Database of datasets to search for data
    xlims : list or tuple of float, optional
        List or tuple with two floats corresponding to the
        minimum and maximum molar composition of comp
    xlabel : str, optional
        plot x label
    ylabel : str, optional
        plot y label
    yscale : int or float, optional
        scaling factor to apply to property (e.g. to plot kJ/mol.
        instead of J/mol. choose yscale to be 0.001)
    phase_label_dict : dict, optional
        Dictionary with keys given by phase names and corresponding
        strings to use in plotting (e.g. to enable LaTeX labels)
    unit : str, optional
        Unit to plot on the y-axis for the property of interest
    cdict : dict, optional
        Dictionary with phase names and corresponding
        colors
    figsize : tuple or list of int or float, optional
        Plot dimensions in inches

    Returns
    -------

    Examples
    --------
    >>> import numpy as np
    >>> import pduq.uq_plot as uq
    >>> from pycalphad import Database
    >>> dbf = Database('CU-MG_param_gen.tdb')
    >>> comps = ['MG', 'CU', 'VA']
    >>> phaseL = ['CUMG2', 'LIQUID']
    >>> params = np.loadtxt('params.npy')[: -1, :]
    >>> T = 650
    >>> prop = 'GM'
    >>> # Plot the molar gibbs energy of all phases in phaseL
    >>> # versus molar fraction of MG at 650K. This will have
    >>> # uncertainty intervals generated by the parameter sets
    >>> # in params
    >>> uq.plot_property(dbf, comps, phaseL, params, T, prop)
    """

    symbols_to_fit = database_symbols_to_fit(dbf)

    CI = 95
    nph = len(phaseL)
    colorL = sns.color_palette("cubehelix", nph)
    markerL = 10 * [
        'o', 'D', '^', 'x', 'h', 's', 'v', '*', 'P', 'p', '>', 'd', '<'
    ]

    plt.figure(figsize=figsize)

    # compute uncertainty in property for each phase in list
    for ii in range(nph):
        phase = phaseL[ii]
        print('starting', prop, 'evaluations for the', phase, 'phase')

        # for each parameter sample calculate the property
        # for each possible site occupancy ratios
        compL = []
        for index in range(params.shape[0]):
            param_dict = {
                param_name: param
                for param_name, param in zip(symbols_to_fit, params[index, :])
            }
            parameters = OrderedDict(sorted(param_dict.items(), key=str))
            comp = calculate(dbf,
                             comps,
                             phase,
                             P=101325,
                             T=T,
                             output=prop,
                             parameters=parameters)
            compL += [comp]

        # concatenate the calculate results in an xarray along
        # an axis named 'sample'
        compC = xr.concat(compL, 'sample')
        compC.coords['sample'] = np.arange(params.shape[0])

        # The composition vector is the same for all samples
        if hasattr(T, "__len__"):
            Xvals = T
        else:
            Xvals = comp.X.sel(component=comps[0]).values.squeeze()
        Pvals = compC[prop].where(compC.Phase == phase).values.squeeze()

        if np.array(Xvals).size == 1:
            print('phase is a line compound')
            Xvals_ = np.array([Xvals - 0.002, Xvals + 0.002])
            Pvals_ = np.vstack([Pvals, Pvals]).T
        else:
            # find the lower hull of the property by finding
            # the configuration with the lowest value within
            # each interval. In each interval record the composition
            # and property
            indxL = np.array([])
            # Xbnds = np.arange(0, 1.01, 0.01)
            Xbnds = np.linspace(Xvals.min(), Xvals.max(), 100)
            for lb, ub in zip(Xbnds[:-1], Xbnds[1:]):
                # print('lb: ', lb, ', ub: ', ub)
                boolA = (lb <= Xvals) * (Xvals < ub)
                if boolA.sum() == 0:
                    continue
                indxA = np.arange(boolA.size)[boolA]
                P_ = Pvals[0, boolA]
                indxL = np.append(indxL, indxA[P_.argmin()])
                # indxL = np.append(indxL, indxA[P_.argmax()])
            indxL = indxL.astype('int32')

            if indxL.size == 1:
                print('only one point found')
                Xvals_ = Xvals[np.asscalar(indxL)]
                Pvals_ = Pvals[:, np.asscalar(indxL)]
            else:
                Xvals_ = Xvals[indxL]
                Pvals_ = Pvals[:, indxL]

        # Xvals_ = Xvals
        # Pvals_ = Pvals
        # for ii in range(params.shape[0]):
        #     plt.plot(Xvals_, Pvals_[ii, :], 'k-', linewidth=0.5, alpha=0.1)
        # plt.show()

        if yscale is not None:
            Pvals_ *= yscale

        low, mid, high = np.percentile(
            Pvals_, [0.5 * (100 - CI), 50, 100 - 0.5 * (100 - CI)], axis=0)

        if cdict is not None:
            color = cdict[phase]
        else:
            color = colorL[ii]

        if phase_label_dict is not None:
            label = phase_label_dict[phase]
        else:
            label = phase

        plt.plot(Xvals_, mid, linestyle='-', color=color, label=label)
        plt.fill_between(np.atleast_1d(Xvals_),
                         low,
                         high,
                         alpha=0.3,
                         facecolor=color)

        # collect and plot experimental data
        if config is not None and datasets is not None:
            symmetry = None
            data = get_data(comps, phase, config, symmetry, datasets, prop)
            print(data)
            for data_s, marker in zip(data, markerL):
                occupancies = data_s['solver']['sublattice_occupancies']
                # at the moment this needs to be changed manually
                X_vec = [row[0][0] for row in occupancies]
                values = np.squeeze(data_s['values'])

                if yscale is not None:
                    values *= yscale

                plt.plot(X_vec,
                         values,
                         linestyle='',
                         marker=marker,
                         markerfacecolor='none',
                         markeredgecolor=color,
                         markersize=6,
                         alpha=0.9,
                         label=data_s['reference'])

    if xlim is None:
        plt.xlim([Xvals_.min(), Xvals_.max()])
    else:
        plt.xlim(xlim)

    if xlabel is not None:
        plt.xlabel(xlabel)
    else:
        plt.xlabel(r'$X_{%s}$' % comps[0])

    if ylabel is not None:
        plt.ylabel(ylabel)
    else:
        plt.ylabel(prop + ' (' + unit + ')')

    plt.legend()
    plt.tight_layout()

示例#4

显示文件

def fit_formation_energy(dbf,
                         comps,
                         phase_name,
                         configuration,
                         symmetry,
                         datasets,
                         features=None):
    """
    Find suitable linear model parameters for the given phase.
    We do this by successively fitting heat capacities, entropies and
    enthalpies of formation, and selecting against criteria to prevent
    overfitting. The "best" set of parameters minimizes the error
    without overfitting.

    Parameters
    ----------
    dbf : Database
        pycalphad Database. Partially complete, so we know what degrees of freedom to fix.
    comps : [str]
        Names of the relevant components.
    phase_name : str
        Name of the desired phase for which the parameters will be found.
    configuration : ndarray
        Configuration of the sublattices for the fitting procedure.
    symmetry : [[int]]
        Symmetry of the sublattice configuration.
    datasets : PickleableTinyDB
        All the datasets desired to fit to.
    features : dict
        Maps "property" to a list of features for the linear model.
        These will be transformed from "GM" coefficients
        e.g., {"CPM_FORM": (v.T*sympy.log(v.T), v.T**2, v.T**-1, v.T**3)} (Default value = None)

    Returns
    -------
    dict
        {feature: estimated_value}

    """
    if features is None:
        features = [("CPM_FORM", (v.T * sympy.log(v.T), v.T**2, v.T**-1,
                                  v.T**3)), ("SM_FORM", (v.T, )),
                    ("HM_FORM", (sympy.S.One, ))]
        features = OrderedDict(features)
    if any([isinstance(conf, (list, tuple)) for conf in configuration]):
        # TODO: assumes binary interaction here
        fitting_steps = (["CPM_FORM",
                          "CPM_MIX"], ["SM_FORM",
                                       "SM_MIX"], ["HM_FORM", "HM_MIX"])
        # Product of all nonzero site fractions in all sublattices
        YS = sympy.Symbol('YS')
        # Product of all binary interaction terms
        Z = sympy.Symbol('Z')
        redlich_kister_features = (YS, YS * Z, YS * (Z**2), YS * (Z**3))
        for feature in features.keys():
            all_features = list(
                itertools.product(redlich_kister_features, features[feature]))
            features[feature] = [i[0] * i[1] for i in all_features]
        logging.debug('ENDMEMBERS FROM INTERACTION: {}'.format(
            endmembers_from_interaction(configuration)))
    else:
        # We are only fitting an endmember; no mixing data needed
        fitting_steps = (["CPM_FORM"], ["SM_FORM"], ["HM_FORM"])

    parameters = {}
    for feature in features.values():
        for coef in feature:
            parameters[coef] = 0

    # These is our previously fit partial model
    # Subtract out all of these contributions (zero out reference state because these are formation properties)
    fixed_model = Model(
        dbf,
        comps,
        phase_name,
        parameters={'GHSER' + (c.upper() * 2)[:2]: 0
                    for c in comps})
    fixed_model.models['idmix'] = 0
    fixed_portions = [0]

    moles_per_formula_unit = sympy.S(0)
    subl_idx = 0
    for num_sites, const in zip(dbf.phases[phase_name].sublattices,
                                dbf.phases[phase_name].constituents):
        if Species('VA') in const:
            moles_per_formula_unit += num_sites * (
                1 - v.SiteFraction(phase_name, subl_idx, Species('VA')))
        else:
            moles_per_formula_unit += num_sites
        subl_idx += 1

    for desired_props in fitting_steps:
        desired_data = get_data(comps, phase_name, configuration, symmetry,
                                datasets, desired_props)
        logging.debug('{}: datasets found: {}'.format(desired_props,
                                                      len(desired_data)))
        if len(desired_data) > 0:
            # We assume all properties in the same fitting step have the same features (but different ref states)
            feature_matrix = _build_feature_matrix(desired_props[0],
                                                   features[desired_props[0]],
                                                   desired_data)
            all_samples = get_samples(desired_data)
            data_quantities = np.concatenate(_shift_reference_state(
                desired_data, feature_transforms[desired_props[0]],
                fixed_model),
                                             axis=-1)
            site_fractions = [
                build_sitefractions(
                    phase_name, ds['solver']['sublattice_configurations'],
                    ds['solver'].get(
                        'sublattice_occupancies',
                        np.ones((
                            len(ds['solver']['sublattice_configurations']),
                            len(ds['solver']['sublattice_configurations'][0])),
                                dtype=np.float))) for ds in desired_data
                for _ in ds['conditions']['T']
            ]
            # Flatten list
            site_fractions = list(itertools.chain(*site_fractions))
            # Remove existing partial model contributions from the data
            data_quantities = data_quantities - feature_transforms[
                desired_props[0]](fixed_model.ast)
            # Subtract out high-order (in T) parameters we've already fit
            data_quantities = data_quantities - \
                feature_transforms[desired_props[0]](sum(fixed_portions)) / moles_per_formula_unit
            for sf, i in zip(site_fractions, data_quantities):
                missing_variables = sympy.S(i * moles_per_formula_unit).atoms(
                    v.SiteFraction) - set(sf.keys())
                sf.update({x: 0. for x in missing_variables})
            # moles_per_formula_unit factor is here because our data is stored per-atom
            # but all of our fits are per-formula-unit
            data_quantities = [
                sympy.S(i * moles_per_formula_unit).xreplace(sf).xreplace({
                    v.T:
                    ixx[0]
                }).evalf() for i, sf, ixx in zip(data_quantities,
                                                 site_fractions, all_samples)
            ]
            data_quantities = np.asarray(data_quantities, dtype=np.float)
            parameters.update(
                _fit_parameters(feature_matrix, data_quantities,
                                features[desired_props[0]]))
            # Add these parameters to be fixed for the next fitting step
            fixed_portion = np.array(features[desired_props[0]],
                                     dtype=np.object)
            fixed_portion = np.dot(fixed_portion, [
                parameters[feature] for feature in features[desired_props[0]]
            ])
            fixed_portions.append(fixed_portion)
    return parameters

示例#5

显示文件

def fit_formation_energy(dbf,
                         comps,
                         phase_name,
                         configuration,
                         symmetry,
                         datasets,
                         ridge_alpha=None,
                         aicc_phase_penalty=None,
                         features=None):
    """
    Find suitable linear model parameters for the given phase.
    We do this by successively fitting heat capacities, entropies and
    enthalpies of formation, and selecting against criteria to prevent
    overfitting. The "best" set of parameters minimizes the error
    without overfitting.

    Parameters
    ----------
    dbf : Database
        pycalphad Database. Partially complete, so we know what degrees of freedom to fix.
    comps : [str]
        Names of the relevant components.
    phase_name : str
        Name of the desired phase for which the parameters will be found.
    configuration : ndarray
        Configuration of the sublattices for the fitting procedure.
    symmetry : [[int]]
        Symmetry of the sublattice configuration.
    datasets : PickleableTinyDB
        All the datasets desired to fit to.
    ridge_alpha : float
        Value of the $alpha$ hyperparameter used in ridge regression. Defaults to 1.0e-100, which should be degenerate
        with ordinary least squares regression. For now, the parameter is applied to all features.
    aicc_feature_factors : dict
        Map of phase name to feature to a multiplication factor for the AICc's parameter penalty.
    features : dict
        Maps "property" to a list of features for the linear model.
        These will be transformed from "GM" coefficients
        e.g., {"CPM_FORM": (v.T*sympy.log(v.T), v.T**2, v.T**-1, v.T**3)} (Default value = None)

    Returns
    -------
    dict
        {feature: estimated_value}

    """
    aicc_feature_factors = aicc_phase_penalty if aicc_phase_penalty is not None else {}
    if interaction_test(configuration):
        logging.debug('ENDMEMBERS FROM INTERACTION: {}'.format(
            endmembers_from_interaction(configuration)))
        fitting_steps = (["CPM_FORM",
                          "CPM_MIX"], ["SM_FORM",
                                       "SM_MIX"], ["HM_FORM", "HM_MIX"])

    else:
        # We are only fitting an endmember; no mixing data needed
        fitting_steps = (["CPM_FORM"], ["SM_FORM"], ["HM_FORM"])

    # create the candidate models and fitting steps
    if features is None:
        features = OrderedDict([
            ("CPM_FORM", (v.T * sympy.log(v.T), v.T**2, v.T**-1, v.T**3)),
            ("SM_FORM", (v.T, )),
            ("HM_FORM", (sympy.S.One, )),
        ])
    # dict of {feature, [candidate_models]}
    candidate_models_features = build_candidate_models(configuration, features)

    # All possible parameter values that could be taken on. This is some legacy
    # code from before there were many candidate models built. For very large
    # sets of candidate models, this could be quite slow.
    # TODO: we might be able to remove this initialization for clarity, depends on fixed poritions
    parameters = {}
    for candidate_models in candidate_models_features.values():
        for model in candidate_models:
            for coef in model:
                parameters[coef] = 0

    # These is our previously fit partial model from previous steps
    # Subtract out all of these contributions (zero out reference state because these are formation properties)
    fixed_model = Model(
        dbf,
        comps,
        phase_name,
        parameters={'GHSER' + (c.upper() * 2)[:2]: 0
                    for c in comps})
    fixed_portions = [0]

    for desired_props in fitting_steps:
        feature_type = desired_props[0].split('_')[0]  # HM_FORM -> HM
        aicc_factor = aicc_feature_factors.get(feature_type, 1.0)
        desired_data = get_data(comps, phase_name, configuration, symmetry,
                                datasets, desired_props)
        logging.log(
            TRACE, '{}: datasets found: {}'.format(desired_props,
                                                   len(desired_data)))
        if len(desired_data) > 0:
            # Ravelled weights for all data
            weights = get_weights(desired_data)

            # We assume all properties in the same fitting step have the same
            # features (all CPM, all HM, etc., but different ref states).
            # data quantities are the same for each candidate model and can be computed up front
            data_qtys = get_data_quantities(feature_type, fixed_model,
                                            fixed_portions, desired_data)

            # build the candidate model transformation matrix and response vector (A, b in Ax=b)
            feature_matricies = []
            data_quantities = []
            for candidate_model in candidate_models_features[desired_props[0]]:
                if interaction_test(configuration, 3):
                    feature_matricies.append(
                        build_ternary_feature_matrix(desired_props[0],
                                                     candidate_model,
                                                     desired_data))
                else:
                    feature_matricies.append(
                        _build_feature_matrix(desired_props[0],
                                              candidate_model, desired_data))
                data_quantities.append(data_qtys)

            # provide candidate models and get back a selected model.
            selected_model = select_model(zip(
                candidate_models_features[desired_props[0]], feature_matricies,
                data_quantities),
                                          ridge_alpha,
                                          weights=weights,
                                          aicc_factor=aicc_factor)
            selected_features, selected_values = selected_model
            parameters.update(zip(*(selected_features, selected_values)))
            # Add these parameters to be fixed for the next fitting step
            fixed_portion = np.array(selected_features, dtype=np.object)
            fixed_portion = np.dot(fixed_portion, selected_values)
            fixed_portions.append(fixed_portion)
    return parameters

示例#6

显示文件

文件： paramselect.py 项目： anilkunwar/ESPEI

def fit_formation_energy(dbf,
                         comps,
                         phase_name,
                         configuration,
                         symmetry,
                         datasets,
                         ridge_alpha=1.0e-100,
                         features=None):
    """
    Find suitable linear model parameters for the given phase.
    We do this by successively fitting heat capacities, entropies and
    enthalpies of formation, and selecting against criteria to prevent
    overfitting. The "best" set of parameters minimizes the error
    without overfitting.

    Parameters
    ----------
    dbf : Database
        pycalphad Database. Partially complete, so we know what degrees of freedom to fix.
    comps : [str]
        Names of the relevant components.
    phase_name : str
        Name of the desired phase for which the parameters will be found.
    configuration : ndarray
        Configuration of the sublattices for the fitting procedure.
    symmetry : [[int]]
        Symmetry of the sublattice configuration.
    datasets : PickleableTinyDB
        All the datasets desired to fit to.
    ridge_alpha : float
        Value of the $alpha$ hyperparameter used in ridge regression. Defaults to 1.0e-100, which should be degenerate
        with ordinary least squares regression. For now, the parameter is applied to all features.
    features : dict
        Maps "property" to a list of features for the linear model.
        These will be transformed from "GM" coefficients
        e.g., {"CPM_FORM": (v.T*sympy.log(v.T), v.T**2, v.T**-1, v.T**3)} (Default value = None)

    Returns
    -------
    dict
        {feature: estimated_value}

    """
    if interaction_test(configuration):
        logging.debug('ENDMEMBERS FROM INTERACTION: {}'.format(
            endmembers_from_interaction(configuration)))
        fitting_steps = (["CPM_FORM",
                          "CPM_MIX"], ["SM_FORM",
                                       "SM_MIX"], ["HM_FORM", "HM_MIX"])

    else:
        # We are only fitting an endmember; no mixing data needed
        fitting_steps = (["CPM_FORM"], ["SM_FORM"], ["HM_FORM"])

    # create the candidate models and fitting steps
    if features is None:
        features = OrderedDict([("CPM_FORM", (v.T * sympy.log(v.T), v.T**2,
                                              v.T**-1, v.T**3)),
                                ("SM_FORM", (v.T, )),
                                ("HM_FORM", (sympy.S.One, ))])
    # dict of {feature, [candidate_models]}
    candidate_models_features = build_candidate_models(configuration, features)

    # All possible parameter values that could be taken on. This is some legacy
    # code from before there were many candidate models built. For very large
    # sets of candidate models, this could be quite slow.
    # TODO: we might be able to remove this initialization for clarity, depends on fixed poritions
    parameters = {}
    for candidate_models in candidate_models_features.values():
        for model in candidate_models:
            for coef in model:
                parameters[coef] = 0

    # These is our previously fit partial model from previous steps
    # Subtract out all of these contributions (zero out reference state because these are formation properties)
    fixed_model = Model(
        dbf,
        comps,
        phase_name,
        parameters={'GHSER' + (c.upper() * 2)[:2]: 0
                    for c in comps})
    fixed_model.models['idmix'] = 0
    fixed_portions = [0]

    moles_per_formula_unit = sympy.S(0)
    YS = sympy.Symbol('YS')  # site fraction symbol that we will reuse
    Z = sympy.Symbol('Z')  # site fraction symbol that we will reuse
    subl_idx = 0
    for num_sites, const in zip(dbf.phases[phase_name].sublattices,
                                dbf.phases[phase_name].constituents):
        if v.Species('VA') in const:
            moles_per_formula_unit += num_sites * (
                1 - v.SiteFraction(phase_name, subl_idx, v.Species('VA')))
        else:
            moles_per_formula_unit += num_sites
        subl_idx += 1

    for desired_props in fitting_steps:
        desired_data = get_data(comps, phase_name, configuration, symmetry,
                                datasets, desired_props)
        logging.debug('{}: datasets found: {}'.format(desired_props,
                                                      len(desired_data)))
        if len(desired_data) > 0:
            # We assume all properties in the same fitting step have the same features (all CPM, all HM, etc.) (but different ref states)
            all_samples = get_samples(desired_data)
            site_fractions = [
                build_sitefractions(
                    phase_name, ds['solver']['sublattice_configurations'],
                    ds['solver'].get(
                        'sublattice_occupancies',
                        np.ones((
                            len(ds['solver']['sublattice_configurations']),
                            len(ds['solver']['sublattice_configurations'][0])),
                                dtype=np.float))) for ds in desired_data
                for _ in ds['conditions']['T']
            ]
            # Flatten list
            site_fractions = list(itertools.chain(*site_fractions))

            # build the candidate model transformation matrix and response vector (A, b in Ax=b)
            feature_matricies = []
            data_quantities = []
            for candidate_model in candidate_models_features[desired_props[0]]:
                if interaction_test(configuration, 3):
                    feature_matricies.append(
                        build_ternary_feature_matrix(desired_props[0],
                                                     candidate_model,
                                                     desired_data))
                else:
                    feature_matricies.append(
                        _build_feature_matrix(desired_props[0],
                                              candidate_model, desired_data))

                data_qtys = np.concatenate(shift_reference_state(
                    desired_data, feature_transforms[desired_props[0]],
                    fixed_model),
                                           axis=-1)

                # Remove existing partial model contributions from the data
                data_qtys = data_qtys - feature_transforms[desired_props[0]](
                    fixed_model.ast)
                # Subtract out high-order (in T) parameters we've already fit
                data_qtys = data_qtys - feature_transforms[desired_props[0]](
                    sum(fixed_portions)) / moles_per_formula_unit

                # if any site fractions show up in our data_qtys that aren't in this datasets site fractions, set them to zero.
                for sf, i, (_, (sf_product,
                                inter_product)) in zip(site_fractions,
                                                       data_qtys, all_samples):
                    missing_variables = sympy.S(
                        i * moles_per_formula_unit).atoms(
                            v.SiteFraction) - set(sf.keys())
                    sf.update({x: 0. for x in missing_variables})
                    # The equations we have just have the site fractions as YS
                    # and interaction products as Z, so take the product of all
                    # the site fractions that we see in our data qtys
                    sf.update({YS: sf_product, Z: inter_product})

                # moles_per_formula_unit factor is here because our data is stored per-atom
                # but all of our fits are per-formula-unit
                data_qtys = [
                    sympy.S(i * moles_per_formula_unit).xreplace(sf).xreplace({
                        v.T:
                        ixx[0]
                    }).evalf() for i, sf, ixx in zip(data_qtys, site_fractions,
                                                     all_samples)
                ]
                data_qtys = np.asarray(data_qtys, dtype=np.float)
                data_quantities.append(data_qtys)

            # provide candidate models and get back a selected model.
            selected_model = select_model(
                zip(candidate_models_features[desired_props[0]],
                    feature_matricies, data_quantities), ridge_alpha)
            selected_features, selected_values = selected_model
            parameters.update(zip(*(selected_features, selected_values)))
            # Add these parameters to be fixed for the next fitting step
            fixed_portion = np.array(selected_features, dtype=np.object)
            fixed_portion = np.dot(fixed_portion, selected_values)
            fixed_portions.append(fixed_portion)
    return parameters