def build_eqpropdata( data: tinydb.database.Document, dbf: Database, parameters: Optional[Dict[str, float]] = None, data_weight_dict: Optional[Dict[str, float]] = None) -> EqPropData: """ Build EqPropData for the calculations corresponding to a single dataset. Parameters ---------- data : tinydb.database.Document Document corresponding to a single ESPEI dataset. dbf : Database Database that should be used to construct the `Model` and `PhaseRecord` objects. parameters : Optional[Dict[str, float]] Mapping of parameter symbols to values. data_weight_dict : Optional[Dict[str, float]] Mapping of a data type (e.g. `HM` or `SM`) to a weight. Returns ------- EqPropData """ parameters = parameters if parameters is not None else {} data_weight_dict = data_weight_dict if data_weight_dict is not None else {} property_std_deviation = { 'HM': 500.0, # J/mol 'SM': 0.2, # J/K-mol 'CPM': 0.2, # J/K-mol } params_keys, _ = extract_parameters(parameters) data_comps = list(set(data['components']).union({'VA'})) species = sorted(unpack_components(dbf, data_comps), key=str) data_phases = filter_phases(dbf, species, candidate_phases=data['phases']) models = instantiate_models(dbf, species, data_phases, parameters=parameters) output = data['output'] property_output = output.split('_')[ 0] # property without _FORM, _MIX, etc. samples = np.array(data['values']).flatten() reference = data.get('reference', '') # Models are now modified in response to the data from this data if 'reference_states' in data: property_output = output[:-1] if output.endswith( 'R' ) else output # unreferenced model property so we can tell shift_reference_state what to build. reference_states = [] for el, vals in data['reference_states'].items(): reference_states.append( ReferenceState( v.Species(el), vals['phase'], fixed_statevars=vals.get('fixed_state_variables'))) for mod in models.values(): mod.shift_reference_state(reference_states, dbf, output=(property_output, )) data['conditions'].setdefault( 'N', 1.0 ) # Add default for N. Nothing else is supported in pycalphad anyway. pot_conds = OrderedDict([(getattr(v, key), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if not key.startswith('X_')]) comp_conds = OrderedDict([(v.X(key[2:]), unpack_condition(data['conditions'][key])) for key in sorted(data['conditions'].keys()) if key.startswith('X_')]) phase_records = build_phase_records(dbf, species, data_phases, { **pot_conds, **comp_conds }, models, parameters=parameters, build_gradients=True, build_hessians=True) # Now we need to unravel the composition conditions # (from Dict[v.X, Sequence[float]] to Sequence[Dict[v.X, float]]), since the # composition conditions are only broadcast against the potentials, not # each other. Each individual composition needs to be computed # independently, since broadcasting over composition cannot be turned off # in pycalphad. rav_comp_conds = [ OrderedDict(zip(comp_conds.keys(), pt_comps)) for pt_comps in zip(*comp_conds.values()) ] # Build weights, should be the same size as the values total_num_calculations = len(rav_comp_conds) * np.prod( [len(vals) for vals in pot_conds.values()]) dataset_weights = np.array(data.get('weight', 1.0)) * np.ones(total_num_calculations) weights = (property_std_deviation.get(property_output, 1.0) / data_weight_dict.get(property_output, 1.0) / dataset_weights).flatten() return EqPropData(dbf, species, data_phases, pot_conds, rav_comp_conds, models, params_keys, phase_records, output, samples, weights, reference)
def _compute_phase_values(components, statevar_dict, points, phase_record, output, maximum_internal_dof, broadcast=True, parameters=None, fake_points=False, largest_energy=None): """ Calculate output values for a particular phase. Parameters ---------- components : list Names of components to consider in the calculation. statevar_dict : OrderedDict {str -> float or sequence} Mapping of state variables to desired values. This will broadcast if necessary. points : ndarray Inputs to 'func', except state variables. Columns should be in 'variables' order. phase_record : PhaseRecord Contains callable for energy and phase metadata. output : string Desired name of the output result in the Dataset. maximum_internal_dof : int Largest number of internal degrees of freedom of any phase. This is used to guarantee different phase's Datasets can be concatenated. broadcast : bool If True, broadcast state variables against each other to create a grid. If False, assume state variables are given as equal-length lists (or single-valued). parameters : OrderedDict {str -> float or sequence}, optional Maps SymPy symbols to a scalar or 1-D array. The arrays must be equal length. The corresponding PhaseRecord must have been initialized with the same parameters. fake_points : bool, optional (Default: False) If True, the first few points of the output surface will be fictitious points used to define an equilibrium hyperplane guaranteed to be above all the other points. This is used for convex hull computations. Returns ------- Dataset of the output attribute as a function of state variables Examples -------- None yet. """ if broadcast: # Broadcast compositions and state variables along orthogonal axes # This lets us eliminate an expensive Python loop statevars = np.meshgrid(*itertools.chain(statevar_dict.values(), [np.empty(points.shape[-2])]), sparse=True, indexing='ij')[:-1] points = broadcast_to( points, tuple(len(np.atleast_1d(x)) for x in statevar_dict.values()) + points.shape[-2:]) else: statevars = list(np.atleast_1d(x) for x in statevar_dict.values()) statevars_ = [] for statevar in statevars: if (len(statevar) != len(points)) and (len(statevar) == 1): statevar = np.repeat(statevar, len(points)) if (len(statevar) != len(points)) and (len(statevar) != 1): raise ValueError( 'Length of state variable list and number of given points must be equal when ' 'broadcast=False.') statevars_.append(statevar) statevars = statevars_ pure_elements = [list(x.constituents.keys()) for x in components] pure_elements = sorted( set([ el.upper() for constituents in pure_elements for el in constituents ])) pure_elements = [x for x in pure_elements if x != 'VA'] # func may only have support for vectorization along a single axis (no broadcasting) # we need to force broadcasting and flatten the result before calling bc_statevars = np.ascontiguousarray( [broadcast_to(x, points.shape[:-1]).reshape(-1) for x in statevars]) pts = points.reshape(-1, points.shape[-1]) dof = np.ascontiguousarray(np.concatenate((bc_statevars.T, pts), axis=1)) phase_compositions = np.zeros((dof.shape[0], len(pure_elements)), order='F') param_symbols, parameter_array = extract_parameters(parameters) parameter_array_length = parameter_array.shape[0] if parameter_array_length == 0: # No parameters specified phase_output = np.zeros(dof.shape[0], order='C') phase_record.obj_2d(phase_output, dof) else: # Vectorized parameter arrays phase_output = np.zeros((dof.shape[0], parameter_array_length), order='C') phase_record.obj_parameters_2d(phase_output, dof, parameter_array) for el_idx in range(len(pure_elements)): phase_record.mass_obj_2d(phase_compositions[:, el_idx], dof, el_idx) max_tieline_vertices = len(pure_elements) if isinstance(phase_output, (float, int)): phase_output = broadcast_to(phase_output, points.shape[:-1]) if isinstance(phase_compositions, (float, int)): phase_compositions = broadcast_to( phase_output, points.shape[:-1] + (len(pure_elements), )) phase_output = np.asarray(phase_output, dtype=np.float) if parameter_array_length <= 1: phase_output.shape = points.shape[:-1] else: phase_output.shape = points.shape[:-1] + (parameter_array_length, ) phase_compositions = np.asarray(phase_compositions, dtype=np.float) phase_compositions.shape = points.shape[:-1] + (len(pure_elements), ) if fake_points: output_shape = points.shape[:-2] + (max_tieline_vertices, ) if parameter_array_length > 1: output_shape = output_shape + (parameter_array_length, ) concat_axis = -2 else: concat_axis = -1 phase_output = np.concatenate( (broadcast_to(largest_energy, output_shape), phase_output), axis=concat_axis) phase_names = np.concatenate( (broadcast_to('_FAKE_', points.shape[:-2] + (max_tieline_vertices, )), np.full(points.shape[:-1], phase_record.phase_name, dtype='U' + str(len(phase_record.phase_name)))), axis=-1) else: phase_names = np.full(points.shape[:-1], phase_record.phase_name, dtype='U' + str(len(phase_record.phase_name))) if fake_points: phase_compositions = np.concatenate((np.broadcast_to( np.eye(len(pure_elements)), points.shape[:-2] + (max_tieline_vertices, len(pure_elements))), phase_compositions), axis=-2) coordinate_dict = {'component': pure_elements} # Resize 'points' so it has the same number of columns as the maximum # number of internal degrees of freedom of any phase in the calculation. # We do this so that everything is aligned for concat. # Waste of memory? Yes, but the alternatives are unclear. # In each case, first check if we need to do this... # It can be expensive for many points (~14s for 500M points) if fake_points: desired_shape = points.shape[:-2] + ( max_tieline_vertices + points.shape[-2], maximum_internal_dof) expanded_points = np.full(desired_shape, np.nan) expanded_points[..., len(pure_elements):, :points.shape[-1]] = points else: desired_shape = points.shape[:-1] + (maximum_internal_dof, ) if points.shape == desired_shape: expanded_points = points else: # TODO: most optimal solution would be to take pre-extended arrays as an argument and remove this # This still copies the array, but is more efficient than filling # an array with np.nan, then copying the existing points append_nans = np.full( desired_shape[:-1] + (desired_shape[-1] - points.shape[-1], ), np.nan) expanded_points = np.append(points, append_nans, axis=-1) if broadcast: coordinate_dict.update({ key: np.atleast_1d(value) for key, value in statevar_dict.items() }) output_columns = [str(x) for x in statevar_dict.keys()] + ['points'] else: output_columns = ['points'] if parameter_array_length > 1: parameter_column = ['samples'] coordinate_dict['param_symbols'] = [str(x) for x in param_symbols] else: parameter_column = [] data_arrays = { 'X': (output_columns + ['component'], phase_compositions), 'Phase': (output_columns, phase_names), 'Y': (output_columns + ['internal_dof'], expanded_points), output: ([ 'dim_' + str(i) for i in range( len(phase_output.shape) - (len(output_columns) + len(parameter_column))) ] + output_columns + parameter_column, phase_output) } if not broadcast: # Add state variables as data variables rather than as coordinates for sym, vals in zip(statevar_dict.keys(), statevars): data_arrays.update({sym: (output_columns, vals)}) if parameter_array_length > 1: data_arrays['param_values'] = (['samples', 'param_symbols'], parameter_array) return LightDataset(data_arrays, coords=coordinate_dict)
def build_phase_records(dbf, comps, phases, conds, models, output='GM', callables=None, parameters=None, verbose=False, build_gradients=False, build_hessians=False): """ Combine compiled callables and callables from conditions into PhaseRecords. Parameters ---------- dbf : Database A Database object comps : list List of component names phases : list List of phase names conds : dict or None Conditions for calculation models : dict Dictionary of {'phase_name': Model()} parameters : dict, optional Maps SymPy Symbol to numbers, for overriding the values of parameters in the Database. callables : dict, optional Pre-computed callables. If None are passed, they will be built. Maps {'output' -> {'function' -> {'phase_name' -> AutowrapFunction()}} output : str Output property of the particular Model to sample verbose : bool, optional Print the name of the phase when its callables are built build_gradients : bool Whether or not to build gradient functions. Defaults to False. Only takes effect if callables are not passed. build_hessians : bool Whether or not to build Hessian functions. Defaults to False. Only takes effect if callables are not passed. Returns ------- dict Dictionary mapping phase names to PhaseRecord instances. Notes ----- If callables are passed, don't rebuild them. This means that the callables are not checked for incompatibility. Users of build_callables are responsible for ensuring that the state variables, parameters and models used to construct the callables are compatible with the ones used to build the constraints and phase records. """ parameters = parameters if parameters is not None else {} callables = callables if callables is not None else {} _constraints = { 'internal_cons_func': {}, 'internal_cons_jac': {}, 'internal_cons_hess': {}, 'multiphase_cons_func': {}, 'multiphase_cons_jac': {}, 'multiphase_cons_hess': {} } phase_records = {} state_variables = sorted(get_state_variables(models=models, conds=conds), key=str) param_symbols, param_values = extract_parameters(parameters) if callables.get(output) is None: callables = build_callables(dbf, comps, phases, models, parameter_symbols=parameters.keys(), output=output, additional_statevars=state_variables, build_gradients=build_gradients, build_hessians=build_hessians) for name in phases: mod = models[name] site_fracs = mod.site_fractions # build constraint functions cfuncs = build_constraints(mod, state_variables + site_fracs, conds, parameters=param_symbols) _constraints['internal_cons_func'][name] = cfuncs.internal_cons_func _constraints['internal_cons_jac'][name] = cfuncs.internal_cons_jac _constraints['internal_cons_hess'][name] = cfuncs.internal_cons_hess _constraints['multiphase_cons_func'][ name] = cfuncs.multiphase_cons_func _constraints['multiphase_cons_jac'][name] = cfuncs.multiphase_cons_jac _constraints['multiphase_cons_hess'][ name] = cfuncs.multiphase_cons_hess num_internal_cons = cfuncs.num_internal_cons num_multiphase_cons = cfuncs.num_multiphase_cons phase_records[name.upper()] = PhaseRecord( comps, state_variables, site_fracs, param_values, callables[output]['callables'][name], callables[output]['grad_callables'][name], callables[output]['hess_callables'][name], callables[output]['massfuncs'][name], callables[output]['massgradfuncs'][name], callables[output]['masshessfuncs'][name], _constraints['internal_cons_func'][name], _constraints['internal_cons_jac'][name], _constraints['internal_cons_hess'][name], _constraints['multiphase_cons_func'][name], _constraints['multiphase_cons_jac'][name], _constraints['multiphase_cons_hess'][name], num_internal_cons, num_multiphase_cons) if verbose: print(name + ' ') return phase_records
def build_phase_records(dbf, comps, phases, conds, models, output='GM', callables=None, parameters=None, verbose=False, build_gradients=False, build_hessians=False ): """ Combine compiled callables and callables from conditions into PhaseRecords. Parameters ---------- dbf : Database A Database object comps : list List of component names phases : list List of phase names conds : dict or None Conditions for calculation models : dict Dictionary of {'phase_name': Model()} parameters : dict, optional Maps SymPy Symbol to numbers, for overriding the values of parameters in the Database. callables : dict, optional Pre-computed callables. If None are passed, they will be built. Maps {'output' -> {'function' -> {'phase_name' -> AutowrapFunction()}} output : str Output property of the particular Model to sample verbose : bool, optional Print the name of the phase when its callables are built build_gradients : bool Whether or not to build gradient functions. Defaults to False. Only takes effect if callables are not passed. build_hessians : bool Whether or not to build Hessian functions. Defaults to False. Only takes effect if callables are not passed. Returns ------- dict Dictionary mapping phase names to PhaseRecord instances. Notes ----- If callables are passed, don't rebuild them. This means that the callables are not checked for incompatibility. Users of build_callables are responsible for ensuring that the state variables, parameters and models used to construct the callables are compatible with the ones used to build the constraints and phase records. """ parameters = parameters if parameters is not None else {} callables = callables if callables is not None else {} _constraints = { 'internal_cons': {}, 'internal_jac': {}, 'internal_cons_hess': {}, 'mp_cons': {}, 'mp_jac': {}, } phase_records = {} state_variables = sorted(get_state_variables(models=models, conds=conds), key=str) param_symbols, param_values = extract_parameters(parameters) if callables.get(output) is None: callables = build_callables(dbf, comps, phases, models, parameter_symbols=parameters.keys(), output=output, additional_statevars=state_variables, build_gradients=build_gradients, build_hessians=build_hessians) for name in phases: mod = models[name] site_fracs = mod.site_fractions # build constraint functions cfuncs = build_constraints(mod, state_variables + site_fracs, conds, parameters=param_symbols) _constraints['internal_cons'][name] = cfuncs.internal_cons _constraints['internal_jac'][name] = cfuncs.internal_jac _constraints['internal_cons_hess'][name] = cfuncs.internal_cons_hess _constraints['mp_cons'][name] = cfuncs.multiphase_cons _constraints['mp_jac'][name] = cfuncs.multiphase_jac num_internal_cons = cfuncs.num_internal_cons num_multiphase_cons = cfuncs.num_multiphase_cons phase_records[name.upper()] = PhaseRecord(comps, state_variables, site_fracs, param_values, callables[output]['callables'][name], callables[output]['grad_callables'][name], callables[output]['hess_callables'][name], callables[output]['massfuncs'][name], callables[output]['massgradfuncs'][name], callables[output]['masshessfuncs'][name], _constraints['internal_cons'][name], _constraints['internal_jac'][name], _constraints['internal_cons_hess'][name], _constraints['mp_cons'][name], _constraints['mp_jac'][name], num_internal_cons, num_multiphase_cons) if verbose: print(name + ' ') return phase_records
def build_phase_records(dbf, comps, phases, state_variables, models, output='GM', callables=None, parameters=None, verbose=False, build_gradients=True, build_hessians=True): """ Combine compiled callables and callables from conditions into PhaseRecords. Parameters ---------- dbf : Database A Database object comps : List[Union[str, v.Species]] List of active pure elements or species. phases : list List of phase names state_variables : Iterable[v.StateVariable] State variables used to produce the generated functions. models : Mapping[str, Model] Mapping of phase names to model instances parameters : dict, optional Maps SymEngine Symbol to numbers, for overriding the values of parameters in the Database. callables : dict, optional Pre-computed callables. If None are passed, they will be built. Maps {'output' -> {'function' -> {'phase_name' -> AutowrapFunction()}} output : str Output property of the particular Model to sample verbose : bool, optional Print the name of the phase when its callables are built build_gradients : bool Whether or not to build gradient functions. Defaults to False. Only takes effect if callables are not passed. build_hessians : bool Whether or not to build Hessian functions. Defaults to False. Only takes effect if callables are not passed. Returns ------- dict Dictionary mapping phase names to PhaseRecord instances. Notes ----- If callables are passed, don't rebuild them. This means that the callables are not checked for incompatibility. Users of build_callables are responsible for ensuring that the state variables, parameters and models used to construct the callables are compatible with the ones used to build the constraints and phase records. """ comps = sorted(unpack_components(dbf, comps)) parameters = parameters if parameters is not None else {} callables = callables if callables is not None else {} _constraints = { 'internal_cons_func': {}, 'internal_cons_jac': {}, 'internal_cons_hess': {}, } phase_records = {} state_variables = sorted(get_state_variables(models=models, conds=state_variables), key=str) param_symbols, param_values = extract_parameters(parameters) if callables.get(output) is None: callables = build_callables(dbf, comps, phases, models, parameter_symbols=parameters.keys(), output=output, additional_statevars=state_variables, build_gradients=False, build_hessians=False) # Temporary solution. PhaseRecord needs rework: https://github.com/pycalphad/pycalphad/pull/329#discussion_r634579356 formulacallables = build_callables(dbf, comps, phases, models, parameter_symbols=parameters.keys(), output='G', additional_statevars=state_variables, build_gradients=build_gradients, build_hessians=build_hessians) # If a vector of parameters is specified, only pass the first row to the PhaseRecord # Future callers of PhaseRecord.obj_parameters_2d() can pass the full param_values array as an argument if len(param_values.shape) > 1: param_values = param_values[0] for name in phases: mod = models[name] site_fracs = mod.site_fractions # build constraint functions cfuncs = build_constraints(mod, state_variables + site_fracs, parameters=param_symbols) _constraints['internal_cons_func'][name] = cfuncs.internal_cons_func _constraints['internal_cons_jac'][name] = cfuncs.internal_cons_jac _constraints['internal_cons_hess'][name] = cfuncs.internal_cons_hess num_internal_cons = cfuncs.num_internal_cons phase_records[name.upper()] = PhaseRecord( comps, state_variables, site_fracs, param_values, callables[output]['callables'][name], formulacallables['G']['callables'][name], formulacallables['G']['grad_callables'][name], formulacallables['G']['hess_callables'][name], callables[output]['massfuncs'][name], formulacallables['G']['formulamolefuncs'][name], formulacallables['G']['formulamolegradfuncs'][name], formulacallables['G']['formulamolehessfuncs'][name], _constraints['internal_cons_func'][name], _constraints['internal_cons_jac'][name], _constraints['internal_cons_hess'][name], num_internal_cons) if verbose: print(name + ' ') return phase_records