def test(self, species: List[ReferenceSpecies] = None, dataset: BACDataset = None, db_names: Union[str, List[str]] = None) -> BACDataset: """ Test on data. Note: Only one of `species`, `dataset`, or `db_names` can be specified. Args: species: Species to test on. dataset: BACDataset to test on. db_names: Database names to test on.. Returns: BACDataset containing the calculated BAC enthalpies in `bac_data`. """ if sum(1 for arg in (species, dataset, db_names) if arg is not None) > 1: raise BondAdditivityCorrectionError('Cannot specify several data sources') if species is not None: dataset = BACDataset([BACDatapoint(spc, level_of_theory=self.level_of_theory) for spc in species]) elif db_names is not None: database_key = self.load_database(names=db_names) dataset = extract_dataset(self.ref_databases[database_key], self.level_of_theory) if dataset is None or len(dataset) == 0: raise BondAdditivityCorrectionError('No data available for evaluation') corr = np.array([self.get_correction(datapoint=d).value_si / 4184 for d in dataset]) dataset.bac_data = dataset.calc_data + corr return dataset
def write_to_database(self, overwrite: bool = False, alternate_path: str = None): """ Write BACs to database. Args: overwrite: Overwrite existing BACs. alternate_path: Write BACs to this path instead. """ if self.bacs is None: raise BondAdditivityCorrectionError('No BACs available for writing') data_path = data.quantum_corrections_path with open(data_path) as f: lines = f.readlines() bacs_formatted = self.format_bacs(indent=True) bac_dict = data.mbac if self.bac_type == 'm' else data.pbac keyword = 'mbac' if self.bac_type == 'm' else 'pbac' has_entries = bool(data.mbac) if self.bac_type == 'm' else bool(data.pbac) # Add new BACs to file without changing existing formatting for i, line in enumerate(lines): if keyword in line: if has_entries: if self.level_of_theory in bac_dict: if overwrite: # Does not overwrite comments del_idx_start = del_idx_end = None for j, line2 in enumerate(lines[i:]): if repr(self.level_of_theory) in line2: del_idx_start = i + j del_idx_end = None elif line2.rstrip() == ' },': # Can't have comment after final brace del_idx_end = i + j + 1 if del_idx_start is not None and del_idx_end is not None: if (lines[del_idx_start - 1].lstrip().startswith('#') or lines[del_idx_end + 1].lstrip().startswith('#')): logging.warning('There may be left over comments from previous BACs') lines[del_idx_start:del_idx_end] = bacs_formatted break else: raise IOError( f'{self.level_of_theory} already exists. Set `overwrite` to True.' ) else: lines[(i+1):(i+1)] = ['\n'] + bacs_formatted else: lines[i] = f'{keyword} = {{\n' lines[(i+1):(i+1)] = ['\n'] + bacs_formatted + ['\n}\n'] break with open(data_path if alternate_path is None else alternate_path, 'w') as f: f.writelines(lines) # Reload data to update BAC dictionaries if alternate_path is None: importlib.reload(data)
def save_correlation_mat(self, path: str, labels: List[str] = None): """ Save a visual representation of the parameter correlation matrix. Args: path: Path to save figure to. labels: Parameter labels. """ try: import matplotlib.pyplot as plt except ImportError: return if self.correlation is None: raise BondAdditivityCorrectionError('Fit BACs before saving correlation matrix!') if labels is None: if self.bac_type == 'm': param_types = list(self.bacs.keys()) atom_symbols = list(self.bacs[param_types[0]]) labels = [r'$\alpha_{' + s + r'}$' for s in atom_symbols] # atom_corr is alpha labels.extend(r'$\beta_{' + s + r'}$' for s in atom_symbols) # bond_corr_length is beta labels.extend(r'$\gamma_{' + s + r'}$' for s in atom_symbols) # bond_corr_neighbor is gamma if len(self.correlation) == 3 * len(atom_symbols) + 1: labels.append('K') # mol_corr is K elif self.bac_type == 'p': labels = list(self.bacs.keys()) fig, ax = plt.subplots(figsize=(11, 11) if self.bac_type == 'm' else (18, 18)) ax.matshow(self.correlation, cmap=plt.cm.PiYG) # Superimpose values as text for i in range(len(self.correlation)): for j in range(len(self.correlation)): c = self.correlation[j, i] ax.text(i, j, f'{c: .2f}', va='center', ha='center', fontsize=8) # Save lims because they get changed when modifying labels xlim = ax.get_xlim() ylim = ax.get_ylim() ax.set_xticks(list(range(len(self.correlation)))) ax.set_yticks(list(range(len(self.correlation)))) ax.set_xticklabels(labels, fontsize=14, rotation='vertical' if self.bac_type == 'p' else None) ax.set_yticklabels(labels, fontsize=14) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.tick_params(bottom=False, top=False, left=False, right=False) fig.savefig(path, dpi=600, bbox_inches='tight', pad_inches=0)
def get_correction(self, bonds: Dict[str, int] = None, coords: np.ndarray = None, nums: Iterable[int] = None, datapoint: BACDatapoint = None, spc: ReferenceSpecies = None, multiplicity: int = None) -> ScalarQuantity: """ Returns the bond additivity correction. There are two bond additivity corrections currently supported. Peterson-type corrections can be specified by setting `self.bac_type` to 'p'. This will use the `bonds` variable, which is a dictionary associating bond types with the number of that bond in the molecule. The Melius-type BAC is specified with 'm' and utilizes the atom coordinates in `coords` and the structure's multiplicity. Args: bonds: A dictionary of bond types (e.g., 'C=O') with their associated counts. coords: A Numpy array of Cartesian molecular coordinates. nums: A sequence of atomic numbers. datapoint: If not using bonds, coords, nums, use BACDatapoint. spc: Alternatively, use ReferenceSpecies. multiplicity: The spin multiplicity of the molecule. Returns: The bond correction to the electronic energy. """ if self.bacs is None: bac_type_str = 'Melius' if self.bac_type == 'm' else 'Petersson' raise BondAdditivityCorrectionError( f'Missing {bac_type_str}-type BAC parameters for model chemistry {self.model_chemistry}' ) if datapoint is None and spc is not None: datapoint = BACDatapoint(spc, model_chemistry=self.model_chemistry) if self.bac_type == 'm': return self._get_melius_correction(coords=coords, nums=nums, datapoint=datapoint, multiplicity=multiplicity) elif self.bac_type == 'p': return self._get_petersson_correction(bonds=bonds, datapoint=datapoint)
def fit(self, weighted: bool = False, db_names: Union[str, List[str]] = 'main', **kwargs): """ Fits bond additivity corrections using calculated and reference data available in the RMG database. The resulting BACs stored in self.bacs will be based on kcal/mol. Args: weighted: Perform weighted least squares by balancing training data. db_names: Optionally specify database names to train on (defaults to main). kwargs: Keyword arguments for fitting Melius-type BACs (see self._fit_melius). """ self._reset_memoization() self.database_key = self.load_database(names=db_names) self.dataset = extract_dataset(self.ref_databases[self.database_key], self.model_chemistry) if len(self.dataset) == 0: raise BondAdditivityCorrectionError( f'No species available for {self.model_chemistry} model chemistry' ) if weighted: self.dataset.compute_weights() if self.bac_type == 'm': logging.info( f'Fitting Melius-type BACs for {self.model_chemistry}...') self._fit_melius(**kwargs) elif self.bac_type == 'p': logging.info( f'Fitting Petersson-type BACs for {self.model_chemistry}...') self._fit_petersson() stats_before = self.dataset.calculate_stats() stats_after = self.dataset.calculate_stats(for_bac_data=True) logging.info( f'RMSE/MAE before fitting: {stats_before.rmse:.2f}/{stats_before.mae:.2f} kcal/mol' ) logging.info( f'RMSE/MAE after fitting: {stats_after.rmse:.2f}/{stats_after.mae:.2f} kcal/mol' )
def fit(self, weighted: bool = False, db_names: Union[str, List[str]] = 'main', exclude_elements: Union[Sequence[str], Set[str], str] = None, charge: Union[Sequence[Union[str, int]], Set[Union[str, int]], str, int] = 'all', multiplicity: Union[Sequence[int], Set[int], int, str] = 'all', **kwargs): """ Fits bond additivity corrections using calculated and reference data available in the RMG database. The resulting BACs stored in self.bacs will be based on kcal/mol. Args: weighted: Perform weighted least squares by balancing training data. db_names: Optionally specify database names to train on (defaults to main). exclude_elements: Molecules with any of the elements in this sequence are excluded from training data. charge: Allowable charges for molecules in training data. multiplicity: Allowable multiplicites for molecules in training data. kwargs: Keyword arguments for fitting Melius-type BACs (see self._fit_melius). """ self._reset_memoization() self.database_key = self.load_database(names=db_names) self.dataset = extract_dataset(self.ref_databases[self.database_key], self.level_of_theory, exclude_elements=exclude_elements, charge=charge, multiplicity=multiplicity) if len(self.dataset) == 0: raise BondAdditivityCorrectionError(f'No species available for {self.level_of_theory}') if weighted: self.dataset.compute_weights() if self.bac_type == 'm': logging.info(f'Fitting Melius-type BACs for {self.level_of_theory}...') self._fit_melius(**kwargs) elif self.bac_type == 'p': logging.info(f'Fitting Petersson-type BACs for {self.level_of_theory}...') self._fit_petersson() stats_before = self.dataset.calculate_stats() stats_after = self.dataset.calculate_stats(for_bac_data=True) logging.info(f'RMSE/MAE before fitting: {stats_before.rmse:.2f}/{stats_before.mae:.2f} kcal/mol') logging.info(f'RMSE/MAE after fitting: {stats_after.rmse:.2f}/{stats_after.mae:.2f} kcal/mol')
def get_bac(model_chemistry, bonds, coords, nums, bac_type='p', multiplicity=1): """ Returns the bond additivity correction in J/mol. There are two bond additivity corrections currently supported. Peterson-type corrections can be specified by setting `bac_type` to 'p'. This will use the `bonds` attribute, which is a dictionary associating bond types with the number of that bond in the molecule. The Melius-type BAC is specified with 'm' and utilizes the atom xyz coordinates in `coords` and array of atomic numbers of atoms as well as the structure's multiplicity. Args: model_chemistry: The model chemistry, typically specified as method/basis. bonds: A dictionary of bond types (e.g., 'C=O') with their associated counts. coords: A Numpy array of Cartesian molecular coordinates. nums: A sequence of atomic numbers. multiplicity: The spin multiplicity of the molecule. bac_type: The type of bond additivity correction to use. Returns: The bond correction to the electronic energy in J/mol. """ model_chemistry = model_chemistry.lower() if bac_type.lower() == 'p': # Petersson-type BACs return pbac.get_bac(model_chemistry, bonds) elif bac_type.lower() == 'm': # Melius-type BACs # Return negative because the correction is subtracted in the Melius paper return -mbac.get_bac( model_chemistry, coords, nums, multiplicity=multiplicity) else: raise BondAdditivityCorrectionError( 'BAC type {} is not available'.format(bac_type))
def get_bac(model_chemistry, bonds): """ Given the model_chemistry and a dictionary of bonds, return the total BAC (should be ADDED to energy). The dictionary of bonds should have the following form: bonds = { 'C-H': bac1, 'C-C': bac2, 'C=C': bac3, ... } """ # Get BAC parameters try: params = data.pbac[model_chemistry] except KeyError: raise BondAdditivityCorrectionError( 'Missing Petersson-type BAC parameters for model chemistry {}'. format(model_chemistry)) # Sum corrections bac = 0.0 for symbol, count in bonds.items(): if symbol in params: bac += count * params[symbol] else: symbol_flipped = ''.join( re.findall('[a-zA-Z]+|[^a-zA-Z]+', symbol)[::-1]) # Check reversed symbol if symbol_flipped in params: bac += count * params[symbol_flipped] else: logging.warning('Ignored unknown bond type {}.'.format(symbol)) return bac * 4184.0 # Convert kcal/mol to J/mol
def get_bac(model_chemistry, coords, nums, multiplicity=1, mol_corr=0.0): """ Given the model chemistry, molecular coordinates, atomic numbers, and dictionaries of BAC parameters, return the total BAC (should be SUBTRACTED from energy). Note that a molecular correction term other than 0 destroys the size consistency of the quantum chemistry method. This correction also requires the multiplicity of the molecule. """ alpha = 3.0 # Angstrom^-1 # Get BAC parameters try: params = data.mbac[model_chemistry] except KeyError: raise BondAdditivityCorrectionError( 'Missing Melius-type BAC parameters for model chemistry {}'.format( model_chemistry)) atom_corr = params['atom_corr'] bond_corr_length = params['bond_corr_length'] bond_corr_neighbor = params['bond_corr_neighbor'] # Get single-bonded RMG molecule mol = geo_to_mol(coords, nums) # Molecular correction spin = 0.5 * (multiplicity - 1) bac_mol = mol_corr * (spin - sum(atom_spins[atom.element.symbol] for atom in mol.atoms)) # Atomic correction bac_atom = sum(atom_corr[atom.element.symbol] for atom in mol.atoms) # Bond correction bac_bond = 0.0 for bond in mol.getAllEdges(): atom1 = bond.atom1 atom2 = bond.atom2 symbol1 = atom1.element.symbol symbol2 = atom2.element.symbol # Bond length correction length_corr = (bond_corr_length[symbol1] * bond_corr_length[symbol2])**0.5 length = np.linalg.norm(atom1.coords - atom2.coords) bac_bond += length_corr * np.exp(-alpha * length) # Neighbor correction for other_atom, other_bond in mol.getBonds( atom1).iteritems(): # Atoms adjacent to atom1 if other_bond is not bond: other_symbol = other_atom.element.symbol bac_bond += bond_corr_neighbor[symbol1] + bond_corr_neighbor[ other_symbol] for other_atom, other_bond in mol.getBonds( atom2).iteritems(): # Atoms adjacent to atom2 if other_bond is not bond: other_symbol = other_atom.element.symbol bac_bond += bond_corr_neighbor[symbol2] + bond_corr_neighbor[ other_symbol] return (bac_mol + bac_atom + bac_bond) * 4184.0 # Convert kcal/mol to J/mol
def _get_melius_correction(self, coords: np.ndarray = None, nums: Iterable[int] = None, datapoint: BACDatapoint = None, multiplicity: int = None, params: Dict[str, Union[float, Dict[str, float]]] = None) -> ScalarQuantity: """ Given the level of theory, molecular coordinates, atomic numbers, and dictionaries of BAC parameters, return the total BAC. Notes: A molecular correction term other than 0 destroys the size consistency of the quantum chemistry method. This correction also requires the multiplicity of the molecule. The negative of the total correction described in Anantharaman and Melius (JPCA 2005) is returned so that it can be added to the energy. Args: coords: Numpy array of Cartesian atomic coordinates. nums: Sequence of atomic numbers. datapoint: BACDatapoint instead of molecule. multiplicity: Multiplicity of the molecule (not necessary if using datapoint). params: Optionally provide parameters other than those stored in self. Returns: Melius-type bond additivity correction. """ if params is None: params = self.bacs atom_corr = params['atom_corr'] bond_corr_length = params['bond_corr_length'] bond_corr_neighbor = params['bond_corr_neighbor'] mol_corr = params.get('mol_corr', 0.0) # Get single-bonded RMG molecule mol = None if datapoint is not None: if nums is None or coords is None: mol = datapoint.to_mol(from_geo=True) multiplicity = datapoint.spc.multiplicity # Use species multiplicity instead else: logging.warning( f'Species {datapoint.spc.label} will not be used because `nums` and `coords` were specified' ) if mol is None: mol = geo_to_mol(coords, nums=nums) # Molecular correction if mol_corr != 0 and multiplicity is None: raise BondAdditivityCorrectionError(f'Missing multiplicity for {mol}') bac_mol = mol_corr * self._get_mol_coeff(mol, multiplicity=multiplicity) # Atomic correction bac_atom = sum(count * atom_corr[symbol] for symbol, count in self._get_atom_counts(mol).items()) # Bond correction bac_length = sum( coeff * (bond_corr_length[symbol[0]] * bond_corr_length[symbol[1]]) ** 0.5 if isinstance(symbol, tuple) else coeff * bond_corr_length[symbol] for symbol, coeff in self._get_length_coeffs(mol).items() ) bac_neighbor = sum(count * bond_corr_neighbor[symbol] for symbol, count in self._get_neighbor_coeffs(mol).items()) bac_bond = bac_length + bac_neighbor # Note the minus sign return ScalarQuantity(-(bac_mol + bac_atom + bac_bond), 'kcal/mol')
def bac_type(self, val: str): """Check validity and update BACs every time the BAC type is changed.""" if val not in {'m', 'p'}: raise BondAdditivityCorrectionError(f'Invalid BAC type: {val}') self._bac_type = val self._update_bacs()
def get_bac(level_of_theory: Union[LevelOfTheory, CompositeLevelOfTheory], bonds: Dict[str, int], coords: np.ndarray, nums: Iterable[int], bac_type: str = 'p', multiplicity: int = 1) -> float: """ Returns the bond additivity correction in J/mol. There are two bond additivity corrections currently supported. Peterson-type corrections can be specified by setting `bac_type` to 'p'. This will use the `bonds` attribute, which is a dictionary associating bond types with the number of that bond in the molecule. The Melius-type BAC is specified with 'm' and utilizes the atom xyz coordinates in `coords` and array of atomic numbers of atoms as well as the structure's multiplicity. Args: level_of_theory: The level of theory. bonds: A dictionary of bond types (e.g., 'C=O') with their associated counts. coords: A Numpy array of Cartesian molecular coordinates. nums: A sequence of atomic numbers. multiplicity: The spin multiplicity of the molecule. bac_type: The type of bond additivity correction to use. Returns: The bond correction to the electronic energy in J/mol. """ def _get_bac(_lot): """Helper function to get BACs""" bac = BAC(_lot, bac_type=bac_type) return bac.get_correction(bonds=bonds, coords=coords, nums=nums, multiplicity=multiplicity).value_si # Try to match each of these levels of theory, but issue warning if full level of theory cannot be matched lots_to_attempt = [ level_of_theory, # Full level of theory level_of_theory.simple() # Only method and basis ] if isinstance(level_of_theory, CompositeLevelOfTheory): lots_to_attempt.extend([ level_of_theory.energy, # Full energy level level_of_theory.energy.simple() # Energy level with only method and basis ]) for lot in lots_to_attempt: try: corr = _get_bac(lot) except BondAdditivityCorrectionError as e: if lot is not lots_to_attempt[-1]: continue else: if 'BAC parameters' in str(e): bac_type_str = 'Melius' if bac_type == 'm' else 'Petersson' raise BondAdditivityCorrectionError( f'Missing {bac_type_str}-type BAC parameters for {level_of_theory}' ) else: raise else: if lot is not lots_to_attempt[0]: logging.warning(f'No exact BAC match found for {level_of_theory}. Using {lot} instead.') return corr
def wrapper(*args, **kwargs): if args[0].level_of_theory is None: # args[0] is the instance raise BondAdditivityCorrectionError( 'Level of theory is not defined') return func(*args, **kwargs)
def wrapper(*args, **kwargs): if args[0].model_chemistry is None: # args[0] is the instance raise BondAdditivityCorrectionError('Model chemistry is not defined') return func(*args, **kwargs)