Exemplo n.º 1
0
def fit_ternary_interactions(dbf, phase_name, symmetry, endmembers, datasets, ridge_alpha=None, aicc_phase_penalty=None):
    """
    Fit ternary interactions for a database in place

    Parameters
    ----------
    dbf : Database
        pycalphad Database to add parameters to
    phase_name : str
        Name of the phase to fit
    symmetry : list
        List of symmetric sublattices, e.g. [[0, 1, 2], [3, 4]]
    endmembers : list
        List of endmember tuples, e.g. [('CU', 'MG')]
    datasets : PickleableTinyDB
        TinyDB database of datasets
    ridge_alpha : float
        Value of the :math:`\\alpha` hyperparameter used in ridge regression. Defaults to 1.0e-100, which should be degenerate
        with ordinary least squares regression. For now, the parameter is applied to all features.

    Returns
    -------
    None
        Modified the Database in place
    """
    numdigits = 6  # number of significant figures, might cause rounding errors
    interactions = generate_interactions(endmembers, order=3, symmetry=symmetry)
    _log.trace('%s distinct ternary interactions', len(interactions))
    for interaction in interactions:
        ixx = interaction
        config = tuple(map(tuplify, ixx))
        if _param_present_in_database(dbf, phase_name, config, 'L'):
            _log.warning('INTERACTION: %s already in Database. Skipping.', ixx)
            continue
        else:
            _log.trace('INTERACTION: %s', ixx)
        parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, ixx, symmetry, datasets, ridge_alpha, aicc_phase_penalty=aicc_phase_penalty)
        # Organize parameters by polynomial degree
        degree_polys = np.zeros(3, dtype=np.object_)
        YS = Symbol('YS')
        # asymmetric parameters should have Mugiannu V_I/V_J/V_K, while symmetric just has YS
        is_asymmetric = any([(k.has(Symbol('V_I'))) and (v != 0) for k, v in parameters.items()])
        if is_asymmetric:
            params = [(2, YS*Symbol('V_K')), (1, YS*Symbol('V_J')), (0, YS*Symbol('V_I'))]  # (excess parameter degree, symbol) tuples
        else:
            params = [(0, YS)]  # (excess parameter degree, symbol) tuples
        for degree, check_symbol in params:
            keys_to_remove = []
            for key, value in sorted(parameters.items(), key=str):
                if key.has(check_symbol):
                    if value != 0:
                        symbol_name = get_next_symbol(dbf)
                        dbf.symbols[symbol_name] = sigfigs(parameters[key], numdigits)
                        parameters[key] = Symbol(symbol_name)
                    coef = parameters[key] * (key / check_symbol)
                    try:
                        coef = float(coef)
                    except TypeError:
                        pass
                    degree_polys[degree] += coef
                    keys_to_remove.append(key)
            for key in keys_to_remove:
                parameters.pop(key)
        _log.trace('Polynomial coefs: %s', degree_polys)
        # Insert into database
        symmetric_interactions = generate_symmetric_group(interaction, symmetry)
        for degree in np.arange(degree_polys.shape[0]):
            if degree_polys[degree] != 0:
                for syminter in symmetric_interactions:
                    dbf.add_parameter('L', phase_name, tuple(map(tuplify, syminter)), degree, degree_polys[degree])
Exemplo n.º 2
0
def phase_fit(dbf, phase_name, symmetry, datasets, refdata, ridge_alpha, aicc_penalty=None, aliases=None):
    """Generate an initial CALPHAD model for a given phase and sublattice model.

    Parameters
    ----------
    dbf : Database
        pycalphad Database to add parameters to.
    phase_name : str
        Name of the phase.
    symmetry : [[int]]
        Sublattice model symmetry.
    datasets : PickleableTinyDB
        All datasets to consider for the calculation.
    refdata : dict
        Maps tuple(element, phase_name) -> SymPy object defining
        energy relative to SER
    ridge_alpha : float
        Value of the :math:`\\alpha` hyperparameter used in ridge regression. Defaults to 1.0e-100, which should be degenerate
        with ordinary least squares regression. For now, the parameter is applied to all features.
    aicc_penalty : dict
        Map of phase name to feature to a multiplication factor for the AICc's parameter penalty.
    aliases : Dict[str, str]
        Mapping of possible aliases to the Database phase names.

    Returns
    -------
    None
        Modifies the dbf.

    """
    aicc_penalty = aicc_penalty if aicc_penalty is not None else {}
    aicc_phase_penalty = aicc_penalty.get(phase_name, {})
    if not hasattr(dbf, 'varcounter'):
        dbf.varcounter = 0
    phase_obj = dbf.phases[phase_name]
    # TODO: assumed pure elements - add proper support for Species objects
    subl_model = [sorted([sp.name for sp in subl]) for subl in phase_obj.constituents]
    site_ratios = phase_obj.sublattices
    # First fit endmembers
    all_em_count = len(generate_endmembers(subl_model))  # number of total endmembers
    endmembers = generate_endmembers(subl_model, symmetry)
    # Number of significant figures in parameters, might cause rounding errors
    numdigits = 6
    em_dict = {}
    # TODO: use the global aliases dictionary passed in as-is instead of converting it to a phase-local dict
    # TODO: use the aliases dictionary in dataset queries to find relevant data
    if aliases is None:
        aliases = [phase_name]
    else:
        aliases = sorted([alias for alias, database_phase in aliases.items() if database_phase == phase_name])
    _log.info('FITTING: %s', phase_name)
    _log.trace('%s endmembers (%s distinct by symmetry)', all_em_count, len(endmembers))

    all_endmembers = []
    for endmember in endmembers:
        symmetric_endmembers = generate_symmetric_group(endmember, symmetry)
        all_endmembers.extend(symmetric_endmembers)
        if _param_present_in_database(dbf, phase_name, endmember, 'G'):
            _log.trace('ENDMEMBER: %s already in Database. Skipping.', endmember)
            continue
        else:
            _log.trace('ENDMEMBER: %s', endmember)
        # Some endmembers are fixed by our choice of standard lattice stabilities, e.g., SGTE91
        # If a (phase, pure component endmember) tuple is fixed, we should use that value instead of fitting
        endmember_comps = list(set(endmember))
        fit_eq = None
        # only one non-VA component, or two components but the other is VA and its only the last sublattice
        if ((len(endmember_comps) == 1) and (endmember_comps[0] != 'VA')) or\
                ((len(endmember_comps) == 2) and (endmember[-1] == 'VA') and (len(set(endmember[:-1])) == 1)):
            # this is a "pure component endmember"
            # try all phase name aliases until we get run out or get a hit
            em_comp = list(set(endmember_comps) - {'VA'})[0]
            sym_name = None
            for name in aliases:
                sym_name = 'G'+name[:3].upper()+em_comp.upper()
                stability = refdata.get((em_comp.upper(), name.upper()), None)
                if stability is not None:
                    if isinstance(stability, sympy.Piecewise):
                        # Default zero required for the compiled backend
                        if (0, True) not in stability.args:
                            new_args = stability.args + ((0, True),)
                            stability = sympy.Piecewise(*new_args)
                    dbf.symbols[sym_name] = stability
                    break
            if dbf.symbols.get(sym_name, None) is not None:
                num_moles = sum([sites for elem, sites in zip(endmember, site_ratios) if elem != 'VA'])
                fit_eq = num_moles * Symbol(sym_name)
                _log.trace("Found lattice stability: %s", sym_name)
                _log.debug("%s = %s", sym_name, dbf.symbols[sym_name])
        if fit_eq is None:
            # No reference lattice stability data -- we have to fit it
            parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, endmember, symmetry, datasets, ridge_alpha, aicc_phase_penalty=aicc_phase_penalty)
            for key, value in sorted(parameters.items(), key=str):
                if value == 0:
                    continue
                symbol_name = get_next_symbol(dbf)
                dbf.symbols[symbol_name] = sigfigs(value, numdigits)
                parameters[key] = Symbol(symbol_name)
            fit_eq = sympy.Add(*[value * key for key, value in parameters.items()])
            ref = 0
            for subl, ratio in zip(endmember, site_ratios):
                if subl == 'VA':
                    continue
                subl = (subl.upper()*2)[:2]
                ref = ref + ratio * Symbol('GHSER'+subl)
            fit_eq += ref
        _log.trace('SYMMETRIC_ENDMEMBERS: %s', symmetric_endmembers)
        for em in symmetric_endmembers:
            em_dict[em] = fit_eq
            dbf.add_parameter('G', phase_name, tuple(map(tuplify, em)), 0, fit_eq)

    _log.trace('FITTING BINARY INTERACTIONS')
    bin_interactions = generate_interactions(all_endmembers, order=2, symmetry=symmetry)
    _log.trace('%s distinct binary interactions', len(bin_interactions))
    for interaction in bin_interactions:
        ixx = []
        for i in interaction:
            if isinstance(i, (tuple, list)):
                ixx.append(tuple(i))
            else:
                ixx.append(i)
        ixx = tuple(ixx)
        config = tuple(map(tuplify, ixx))
        if _param_present_in_database(dbf, phase_name, config, 'L'):
            _log.trace('INTERACTION: %s already in Database', ixx)
            continue
        else:
            _log.trace('INTERACTION: %s', ixx)
        parameters = fit_formation_energy(dbf, sorted(dbf.elements), phase_name, ixx, symmetry, datasets, ridge_alpha, aicc_phase_penalty=aicc_phase_penalty)
        # Organize parameters by polynomial degree
        degree_polys = np.zeros(10, dtype=np.object_)
        for degree in reversed(range(10)):
            check_symbol = Symbol('YS') * Symbol('Z')**degree
            keys_to_remove = []
            for key, value in sorted(parameters.items(), key=str):
                if key.has(check_symbol):
                    if value != 0:
                        symbol_name = get_next_symbol(dbf)
                        dbf.symbols[symbol_name] = sigfigs(parameters[key], numdigits)
                        parameters[key] = Symbol(symbol_name)
                    coef = parameters[key] * (key / check_symbol)
                    try:
                        coef = float(coef)
                    except TypeError:
                        pass
                    degree_polys[degree] += coef
                    keys_to_remove.append(key)
            for key in keys_to_remove:
                parameters.pop(key)
        _log.trace('Polynomial coefs: %s', degree_polys.tolist())
        # Insert into database
        symmetric_interactions = generate_symmetric_group(interaction, symmetry)
        for degree in np.arange(degree_polys.shape[0]):
            if degree_polys[degree] != 0:
                for syminter in symmetric_interactions:
                    dbf.add_parameter('L', phase_name, tuple(map(tuplify, syminter)), degree, degree_polys[degree])

    _log.trace('FITTING TERNARY INTERACTIONS')
    fit_ternary_interactions(dbf, phase_name, symmetry, all_endmembers, datasets, aicc_phase_penalty=aicc_phase_penalty)
    if hasattr(dbf, 'varcounter'):
        del dbf.varcounter
Exemplo n.º 3
0
def phase_fit(dbf,
              phase_name,
              symmetry,
              subl_model,
              site_ratios,
              datasets,
              refdata,
              ridge_alpha,
              aliases=None):
    """Generate an initial CALPHAD model for a given phase and sublattice model.

    Parameters
    ----------
    dbf : Database
        pycalphad Database to add parameters to.
    phase_name : str
        Name of the phase.
    symmetry : [[int]]
        Sublattice model symmetry.
    subl_model : [[str]]
        Sublattice model for the phase of interest.
    site_ratios : [float]
        Number of sites in each sublattice, normalized to one atom.
    datasets : PickleableTinyDB
        All datasets to consider for the calculation.
    refdata : dict
        Maps tuple(element, phase_name) -> SymPy object defining
        energy relative to SER
    ridge_alpha : float
        Value of the $alpha$ hyperparameter used in ridge regression. Defaults to 1.0e-100, which should be degenerate
        with ordinary least squares regression. For now, the parameter is applied to all features.
    aliases : [str]
        Alternative phase names. Useful for matching against
        reference data or other datasets. (Default value = None)

    Returns
    -------
    None
        Modifies the dbf.

    """
    if not hasattr(dbf, 'varcounter'):
        dbf.varcounter = 0
    # First fit endmembers
    all_em_count = len(
        generate_endmembers(subl_model))  # number of total endmembers
    endmembers = generate_endmembers(subl_model, symmetry)
    # Number of significant figures in parameters, might cause rounding errors
    numdigits = 6
    em_dict = {}
    aliases = [] if aliases is None else aliases
    aliases = sorted(set(aliases + [phase_name]))
    logging.info('FITTING: {}'.format(phase_name))
    logging.debug('{0} endmembers ({1} distinct by symmetry)'.format(
        all_em_count, len(endmembers)))

    all_endmembers = []
    for endmember in endmembers:
        logging.debug('ENDMEMBER: {}'.format(endmember))
        # Some endmembers are fixed by our choice of standard lattice stabilities, e.g., SGTE91
        # If a (phase, pure component endmember) tuple is fixed, we should use that value instead of fitting
        endmember_comps = list(set(endmember))
        fit_eq = None
        # only one non-VA component, or two components but the other is VA and its only the last sublattice
        if ((len(endmember_comps) == 1) and (endmember_comps[0] != 'VA')) or\
                ((len(endmember_comps) == 2) and (endmember[-1] == 'VA') and (len(set(endmember[:-1])) == 1)):
            # this is a "pure component endmember"
            # try all phase name aliases until we get run out or get a hit
            em_comp = list(set(endmember_comps) - {'VA'})[0]
            sym_name = None
            for name in aliases:
                sym_name = 'G' + name[:3].upper() + em_comp.upper()
                stability = refdata.get((em_comp.upper(), name.upper()), None)
                if stability is not None:
                    if isinstance(stability, sympy.Piecewise):
                        # Default zero required for the compiled backend
                        if (0, True) not in stability.args:
                            new_args = stability.args + ((0, True), )
                            stability = sympy.Piecewise(*new_args)
                    dbf.symbols[sym_name] = stability
                    break
            if dbf.symbols.get(sym_name, None) is not None:
                num_moles = sum([
                    sites for elem, sites in zip(endmember, site_ratios)
                    if elem != 'VA'
                ])
                fit_eq = num_moles * sympy.Symbol(sym_name)
        if fit_eq is None:
            # No reference lattice stability data -- we have to fit it
            parameters = fit_formation_energy(dbf, sorted(dbf.elements),
                                              phase_name, endmember, symmetry,
                                              datasets, ridge_alpha)
            for key, value in sorted(parameters.items(), key=str):
                if value == 0:
                    continue
                symbol_name = get_next_symbol(dbf)
                dbf.symbols[symbol_name] = sigfigs(value, numdigits)
                parameters[key] = sympy.Symbol(symbol_name)
            fit_eq = sympy.Add(
                *[value * key for key, value in parameters.items()])
            ref = 0
            for subl, ratio in zip(endmember, site_ratios):
                if subl == 'VA':
                    continue
                subl = (subl.upper() * 2)[:2]
                ref = ref + ratio * sympy.Symbol('GHSER' + subl)
            fit_eq += ref
        symmetric_endmembers = generate_symmetric_group(endmember, symmetry)
        logging.debug('SYMMETRIC_ENDMEMBERS: {}'.format(symmetric_endmembers))
        all_endmembers.extend(symmetric_endmembers)
        for em in symmetric_endmembers:
            em_dict[em] = fit_eq
            dbf.add_parameter('G', phase_name, tuple(map(tuplify, em)), 0,
                              fit_eq)

    logging.debug('FITTING BINARY INTERACTIONS')
    bin_interactions = generate_interactions(all_endmembers,
                                             order=2,
                                             symmetry=symmetry)
    logging.debug('{0} distinct binary interactions'.format(
        len(bin_interactions)))
    for interaction in bin_interactions:
        ixx = []
        for i in interaction:
            if isinstance(i, (tuple, list)):
                ixx.append(tuple(i))
            else:
                ixx.append(i)
        ixx = tuple(ixx)
        logging.debug('INTERACTION: {}'.format(ixx))
        parameters = fit_formation_energy(dbf, sorted(dbf.elements),
                                          phase_name, ixx, symmetry, datasets,
                                          ridge_alpha)
        # Organize parameters by polynomial degree
        degree_polys = np.zeros(10, dtype=np.object)
        for degree in reversed(range(10)):
            check_symbol = sympy.Symbol('YS') * sympy.Symbol('Z')**degree
            keys_to_remove = []
            for key, value in sorted(parameters.items(), key=str):
                if key.has(check_symbol):
                    if value != 0:
                        symbol_name = get_next_symbol(dbf)
                        dbf.symbols[symbol_name] = sigfigs(
                            parameters[key], numdigits)
                        parameters[key] = sympy.Symbol(symbol_name)
                    coef = parameters[key] * (key / check_symbol)
                    try:
                        coef = float(coef)
                    except TypeError:
                        pass
                    degree_polys[degree] += coef
                    keys_to_remove.append(key)
            for key in keys_to_remove:
                parameters.pop(key)
        logging.debug('Polynomial coefs: {}'.format(degree_polys))
        # Insert into database
        symmetric_interactions = generate_symmetric_group(
            interaction, symmetry)
        for degree in np.arange(degree_polys.shape[0]):
            if degree_polys[degree] != 0:
                for syminter in symmetric_interactions:
                    dbf.add_parameter('L', phase_name,
                                      tuple(map(tuplify, syminter)), degree,
                                      degree_polys[degree])

    logging.debug('FITTING TERNARY INTERACTIONS')
    fit_ternary_interactions(dbf, phase_name, symmetry, all_endmembers,
                             datasets)
    if hasattr(dbf, 'varcounter'):
        del dbf.varcounter