Пример #1
0
    def test(self,
             species: List[ReferenceSpecies] = None,
             dataset: BACDataset = None,
             db_names: Union[str, List[str]] = None) -> BACDataset:
        """
        Test on data.

        Note:
            Only one of `species`, `dataset`, or `db_names` can be specified.

        Args:
            species: Species to test on.
            dataset: BACDataset to test on.
            db_names: Database names to test on..

        Returns:
            BACDataset containing the calculated BAC enthalpies in `bac_data`.
        """
        if sum(1 for arg in (species, dataset, db_names) if arg is not None) > 1:
            raise BondAdditivityCorrectionError('Cannot specify several data sources')

        if species is not None:
            dataset = BACDataset([BACDatapoint(spc, level_of_theory=self.level_of_theory) for spc in species])
        elif db_names is not None:
            database_key = self.load_database(names=db_names)
            dataset = extract_dataset(self.ref_databases[database_key], self.level_of_theory)

        if dataset is None or len(dataset) == 0:
            raise BondAdditivityCorrectionError('No data available for evaluation')

        corr = np.array([self.get_correction(datapoint=d).value_si / 4184 for d in dataset])
        dataset.bac_data = dataset.calc_data + corr
        return dataset
Пример #2
0
    def write_to_database(self, overwrite: bool = False, alternate_path: str = None):
        """
        Write BACs to database.

        Args:
            overwrite: Overwrite existing BACs.
            alternate_path: Write BACs to this path instead.
        """
        if self.bacs is None:
            raise BondAdditivityCorrectionError('No BACs available for writing')

        data_path = data.quantum_corrections_path
        with open(data_path) as f:
            lines = f.readlines()

        bacs_formatted = self.format_bacs(indent=True)

        bac_dict = data.mbac if self.bac_type == 'm' else data.pbac
        keyword = 'mbac' if self.bac_type == 'm' else 'pbac'
        has_entries = bool(data.mbac) if self.bac_type == 'm' else bool(data.pbac)

        # Add new BACs to file without changing existing formatting
        for i, line in enumerate(lines):
            if keyword in line:
                if has_entries:
                    if self.level_of_theory in bac_dict:
                        if overwrite:
                            # Does not overwrite comments
                            del_idx_start = del_idx_end = None
                            for j, line2 in enumerate(lines[i:]):
                                if repr(self.level_of_theory) in line2:
                                    del_idx_start = i + j
                                    del_idx_end = None
                                elif line2.rstrip() == '    },':  # Can't have comment after final brace
                                    del_idx_end = i + j + 1
                                if del_idx_start is not None and del_idx_end is not None:
                                    if (lines[del_idx_start - 1].lstrip().startswith('#')
                                            or lines[del_idx_end + 1].lstrip().startswith('#')):
                                        logging.warning('There may be left over comments from previous BACs')
                                    lines[del_idx_start:del_idx_end] = bacs_formatted
                                    break
                        else:
                            raise IOError(
                                f'{self.level_of_theory} already exists. Set `overwrite` to True.'
                            )
                    else:
                        lines[(i+1):(i+1)] = ['\n'] + bacs_formatted
                else:
                    lines[i] = f'{keyword} = {{\n'
                    lines[(i+1):(i+1)] = ['\n'] + bacs_formatted + ['\n}\n']
                break

        with open(data_path if alternate_path is None else alternate_path, 'w') as f:
            f.writelines(lines)

        # Reload data to update BAC dictionaries
        if alternate_path is None:
            importlib.reload(data)
Пример #3
0
    def save_correlation_mat(self, path: str, labels: List[str] = None):
        """
        Save a visual representation of the parameter correlation matrix.

        Args:
            path: Path to save figure to.
            labels: Parameter labels.
        """
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            return

        if self.correlation is None:
            raise BondAdditivityCorrectionError('Fit BACs before saving correlation matrix!')

        if labels is None:
            if self.bac_type == 'm':
                param_types = list(self.bacs.keys())
                atom_symbols = list(self.bacs[param_types[0]])
                labels = [r'$\alpha_{' + s + r'}$' for s in atom_symbols]      # atom_corr is alpha
                labels.extend(r'$\beta_{' + s + r'}$' for s in atom_symbols)   # bond_corr_length is beta
                labels.extend(r'$\gamma_{' + s + r'}$' for s in atom_symbols)  # bond_corr_neighbor is gamma
                if len(self.correlation) == 3 * len(atom_symbols) + 1:
                    labels.append('K')  # mol_corr is K
            elif self.bac_type == 'p':
                labels = list(self.bacs.keys())

        fig, ax = plt.subplots(figsize=(11, 11) if self.bac_type == 'm' else (18, 18))
        ax.matshow(self.correlation, cmap=plt.cm.PiYG)

        # Superimpose values as text
        for i in range(len(self.correlation)):
            for j in range(len(self.correlation)):
                c = self.correlation[j, i]
                ax.text(i, j, f'{c: .2f}', va='center', ha='center', fontsize=8)

        # Save lims because they get changed when modifying labels
        xlim = ax.get_xlim()
        ylim = ax.get_ylim()

        ax.set_xticks(list(range(len(self.correlation))))
        ax.set_yticks(list(range(len(self.correlation))))
        ax.set_xticklabels(labels, fontsize=14, rotation='vertical' if self.bac_type == 'p' else None)
        ax.set_yticklabels(labels, fontsize=14)
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
        ax.tick_params(bottom=False, top=False, left=False, right=False)

        fig.savefig(path, dpi=600, bbox_inches='tight', pad_inches=0)
Пример #4
0
    def get_correction(self,
                       bonds: Dict[str, int] = None,
                       coords: np.ndarray = None,
                       nums: Iterable[int] = None,
                       datapoint: BACDatapoint = None,
                       spc: ReferenceSpecies = None,
                       multiplicity: int = None) -> ScalarQuantity:
        """
        Returns the bond additivity correction.

        There are two bond additivity corrections currently supported.
        Peterson-type corrections can be specified by setting
        `self.bac_type` to 'p'. This will use the `bonds` variable,
        which is a dictionary associating bond types with the number of
        that bond in the molecule.

        The Melius-type BAC is specified with 'm' and utilizes the atom
        coordinates in `coords` and the structure's multiplicity.

        Args:
            bonds: A dictionary of bond types (e.g., 'C=O') with their associated counts.
            coords: A Numpy array of Cartesian molecular coordinates.
            nums: A sequence of atomic numbers.
            datapoint: If not using bonds, coords, nums, use BACDatapoint.
            spc: Alternatively, use ReferenceSpecies.
            multiplicity: The spin multiplicity of the molecule.

        Returns:
            The bond correction to the electronic energy.
        """
        if self.bacs is None:
            bac_type_str = 'Melius' if self.bac_type == 'm' else 'Petersson'
            raise BondAdditivityCorrectionError(
                f'Missing {bac_type_str}-type BAC parameters for model chemistry {self.model_chemistry}'
            )

        if datapoint is None and spc is not None:
            datapoint = BACDatapoint(spc, model_chemistry=self.model_chemistry)

        if self.bac_type == 'm':
            return self._get_melius_correction(coords=coords,
                                               nums=nums,
                                               datapoint=datapoint,
                                               multiplicity=multiplicity)
        elif self.bac_type == 'p':
            return self._get_petersson_correction(bonds=bonds,
                                                  datapoint=datapoint)
Пример #5
0
    def fit(self,
            weighted: bool = False,
            db_names: Union[str, List[str]] = 'main',
            **kwargs):
        """
        Fits bond additivity corrections using calculated and reference
        data available in the RMG database. The resulting BACs stored
        in self.bacs will be based on kcal/mol.

        Args:
            weighted: Perform weighted least squares by balancing training data.
            db_names: Optionally specify database names to train on (defaults to main).
            kwargs: Keyword arguments for fitting Melius-type BACs (see self._fit_melius).
        """
        self._reset_memoization()
        self.database_key = self.load_database(names=db_names)

        self.dataset = extract_dataset(self.ref_databases[self.database_key],
                                       self.model_chemistry)
        if len(self.dataset) == 0:
            raise BondAdditivityCorrectionError(
                f'No species available for {self.model_chemistry} model chemistry'
            )

        if weighted:
            self.dataset.compute_weights()

        if self.bac_type == 'm':
            logging.info(
                f'Fitting Melius-type BACs for {self.model_chemistry}...')
            self._fit_melius(**kwargs)
        elif self.bac_type == 'p':
            logging.info(
                f'Fitting Petersson-type BACs for {self.model_chemistry}...')
            self._fit_petersson()

        stats_before = self.dataset.calculate_stats()
        stats_after = self.dataset.calculate_stats(for_bac_data=True)
        logging.info(
            f'RMSE/MAE before fitting: {stats_before.rmse:.2f}/{stats_before.mae:.2f} kcal/mol'
        )
        logging.info(
            f'RMSE/MAE after fitting: {stats_after.rmse:.2f}/{stats_after.mae:.2f} kcal/mol'
        )
Пример #6
0
    def fit(self,
            weighted: bool = False,
            db_names: Union[str, List[str]] = 'main',
            exclude_elements: Union[Sequence[str], Set[str], str] = None,
            charge: Union[Sequence[Union[str, int]], Set[Union[str, int]], str, int] = 'all',
            multiplicity: Union[Sequence[int], Set[int], int, str] = 'all',
            **kwargs):
        """
        Fits bond additivity corrections using calculated and reference
        data available in the RMG database. The resulting BACs stored
        in self.bacs will be based on kcal/mol.

        Args:
            weighted: Perform weighted least squares by balancing training data.
            db_names: Optionally specify database names to train on (defaults to main).
            exclude_elements: Molecules with any of the elements in this sequence are excluded from training data.
            charge: Allowable charges for molecules in training data.
            multiplicity: Allowable multiplicites for molecules in training data.
            kwargs: Keyword arguments for fitting Melius-type BACs (see self._fit_melius).
        """
        self._reset_memoization()
        self.database_key = self.load_database(names=db_names)

        self.dataset = extract_dataset(self.ref_databases[self.database_key], self.level_of_theory,
                                       exclude_elements=exclude_elements, charge=charge, multiplicity=multiplicity)
        if len(self.dataset) == 0:
            raise BondAdditivityCorrectionError(f'No species available for {self.level_of_theory}')

        if weighted:
            self.dataset.compute_weights()

        if self.bac_type == 'm':
            logging.info(f'Fitting Melius-type BACs for {self.level_of_theory}...')
            self._fit_melius(**kwargs)
        elif self.bac_type == 'p':
            logging.info(f'Fitting Petersson-type BACs for {self.level_of_theory}...')
            self._fit_petersson()

        stats_before = self.dataset.calculate_stats()
        stats_after = self.dataset.calculate_stats(for_bac_data=True)
        logging.info(f'RMSE/MAE before fitting: {stats_before.rmse:.2f}/{stats_before.mae:.2f} kcal/mol')
        logging.info(f'RMSE/MAE after fitting: {stats_after.rmse:.2f}/{stats_after.mae:.2f} kcal/mol')
Пример #7
0
def get_bac(model_chemistry,
            bonds,
            coords,
            nums,
            bac_type='p',
            multiplicity=1):
    """
    Returns the bond additivity correction in J/mol.

    There are two bond additivity corrections currently supported. Peterson-type
    corrections can be specified by setting `bac_type` to 'p'. This will use the
    `bonds` attribute, which is a dictionary associating bond types with the number
    of that bond in the molecule.

    The Melius-type BAC is specified with 'm' and utilizes the atom xyz coordinates
    in `coords` and array of atomic numbers of atoms as well as the structure's multiplicity.

    Args:
        model_chemistry: The model chemistry, typically specified as method/basis.
        bonds: A dictionary of bond types (e.g., 'C=O') with their associated counts.
        coords: A Numpy array of Cartesian molecular coordinates.
        nums: A sequence of atomic numbers.
        multiplicity: The spin multiplicity of the molecule.
        bac_type: The type of bond additivity correction to use.

    Returns:
        The bond correction to the electronic energy in J/mol.
    """
    model_chemistry = model_chemistry.lower()
    if bac_type.lower() == 'p':  # Petersson-type BACs
        return pbac.get_bac(model_chemistry, bonds)
    elif bac_type.lower() == 'm':  # Melius-type BACs
        # Return negative because the correction is subtracted in the Melius paper
        return -mbac.get_bac(
            model_chemistry, coords, nums, multiplicity=multiplicity)
    else:
        raise BondAdditivityCorrectionError(
            'BAC type {} is not available'.format(bac_type))
Пример #8
0
def get_bac(model_chemistry, bonds):
    """
    Given the model_chemistry and a dictionary of bonds, return the
    total BAC (should be ADDED to energy).

    The dictionary of bonds should have the following form:

    bonds = {
        'C-H': bac1,
        'C-C': bac2,
        'C=C': bac3,
        ...
    }
    """

    # Get BAC parameters
    try:
        params = data.pbac[model_chemistry]
    except KeyError:
        raise BondAdditivityCorrectionError(
            'Missing Petersson-type BAC parameters for model chemistry {}'.
            format(model_chemistry))

    # Sum corrections
    bac = 0.0
    for symbol, count in bonds.items():
        if symbol in params:
            bac += count * params[symbol]
        else:
            symbol_flipped = ''.join(
                re.findall('[a-zA-Z]+|[^a-zA-Z]+',
                           symbol)[::-1])  # Check reversed symbol
            if symbol_flipped in params:
                bac += count * params[symbol_flipped]
            else:
                logging.warning('Ignored unknown bond type {}.'.format(symbol))

    return bac * 4184.0  # Convert kcal/mol to J/mol
Пример #9
0
def get_bac(model_chemistry, coords, nums, multiplicity=1, mol_corr=0.0):
    """
    Given the model chemistry, molecular coordinates, atomic numbers,
    and dictionaries of BAC parameters, return the total BAC
    (should be SUBTRACTED from energy).

    Note that a molecular correction term other than 0 destroys the size
    consistency of the quantum chemistry method. This correction also
    requires the multiplicity of the molecule.
    """
    alpha = 3.0  # Angstrom^-1

    # Get BAC parameters
    try:
        params = data.mbac[model_chemistry]
    except KeyError:
        raise BondAdditivityCorrectionError(
            'Missing Melius-type BAC parameters for model chemistry {}'.format(
                model_chemistry))
    atom_corr = params['atom_corr']
    bond_corr_length = params['bond_corr_length']
    bond_corr_neighbor = params['bond_corr_neighbor']

    # Get single-bonded RMG molecule
    mol = geo_to_mol(coords, nums)

    # Molecular correction
    spin = 0.5 * (multiplicity - 1)
    bac_mol = mol_corr * (spin - sum(atom_spins[atom.element.symbol]
                                     for atom in mol.atoms))

    # Atomic correction
    bac_atom = sum(atom_corr[atom.element.symbol] for atom in mol.atoms)

    # Bond correction
    bac_bond = 0.0
    for bond in mol.getAllEdges():
        atom1 = bond.atom1
        atom2 = bond.atom2
        symbol1 = atom1.element.symbol
        symbol2 = atom2.element.symbol

        # Bond length correction
        length_corr = (bond_corr_length[symbol1] *
                       bond_corr_length[symbol2])**0.5
        length = np.linalg.norm(atom1.coords - atom2.coords)
        bac_bond += length_corr * np.exp(-alpha * length)

        # Neighbor correction
        for other_atom, other_bond in mol.getBonds(
                atom1).iteritems():  # Atoms adjacent to atom1
            if other_bond is not bond:
                other_symbol = other_atom.element.symbol
                bac_bond += bond_corr_neighbor[symbol1] + bond_corr_neighbor[
                    other_symbol]
        for other_atom, other_bond in mol.getBonds(
                atom2).iteritems():  # Atoms adjacent to atom2
            if other_bond is not bond:
                other_symbol = other_atom.element.symbol
                bac_bond += bond_corr_neighbor[symbol2] + bond_corr_neighbor[
                    other_symbol]

    return (bac_mol + bac_atom +
            bac_bond) * 4184.0  # Convert kcal/mol to J/mol
Пример #10
0
    def _get_melius_correction(self,
                               coords: np.ndarray = None,
                               nums: Iterable[int] = None,
                               datapoint: BACDatapoint = None,
                               multiplicity: int = None,
                               params: Dict[str, Union[float, Dict[str, float]]] = None) -> ScalarQuantity:
        """
        Given the level of theory, molecular coordinates, atomic numbers,
        and dictionaries of BAC parameters, return the total BAC.

        Notes:
            A molecular correction term other than 0 destroys the size
            consistency of the quantum chemistry method. This correction
            also requires the multiplicity of the molecule.

            The negative of the total correction described in
            Anantharaman and Melius (JPCA 2005) is returned so that it
            can be added to the energy.

        Args:
            coords: Numpy array of Cartesian atomic coordinates.
            nums: Sequence of atomic numbers.
            datapoint: BACDatapoint instead of molecule.
            multiplicity: Multiplicity of the molecule (not necessary if using datapoint).
            params: Optionally provide parameters other than those stored in self.

        Returns:
            Melius-type bond additivity correction.
        """
        if params is None:
            params = self.bacs
        atom_corr = params['atom_corr']
        bond_corr_length = params['bond_corr_length']
        bond_corr_neighbor = params['bond_corr_neighbor']
        mol_corr = params.get('mol_corr', 0.0)

        # Get single-bonded RMG molecule
        mol = None
        if datapoint is not None:
            if nums is None or coords is None:
                mol = datapoint.to_mol(from_geo=True)
                multiplicity = datapoint.spc.multiplicity  # Use species multiplicity instead
            else:
                logging.warning(
                    f'Species {datapoint.spc.label} will not be used because `nums` and `coords` were specified'
                )
        if mol is None:
            mol = geo_to_mol(coords, nums=nums)

        # Molecular correction
        if mol_corr != 0 and multiplicity is None:
            raise BondAdditivityCorrectionError(f'Missing multiplicity for {mol}')
        bac_mol = mol_corr * self._get_mol_coeff(mol, multiplicity=multiplicity)

        # Atomic correction
        bac_atom = sum(count * atom_corr[symbol] for symbol, count in self._get_atom_counts(mol).items())

        # Bond correction
        bac_length = sum(
            coeff * (bond_corr_length[symbol[0]] * bond_corr_length[symbol[1]]) ** 0.5 if isinstance(symbol, tuple)
            else coeff * bond_corr_length[symbol]
            for symbol, coeff in self._get_length_coeffs(mol).items()
        )
        bac_neighbor = sum(count * bond_corr_neighbor[symbol] for
                           symbol, count in self._get_neighbor_coeffs(mol).items())
        bac_bond = bac_length + bac_neighbor

        # Note the minus sign
        return ScalarQuantity(-(bac_mol + bac_atom + bac_bond), 'kcal/mol')
Пример #11
0
 def bac_type(self, val: str):
     """Check validity and update BACs every time the BAC type is changed."""
     if val not in {'m', 'p'}:
         raise BondAdditivityCorrectionError(f'Invalid BAC type: {val}')
     self._bac_type = val
     self._update_bacs()
Пример #12
0
def get_bac(level_of_theory: Union[LevelOfTheory, CompositeLevelOfTheory],
            bonds: Dict[str, int],
            coords: np.ndarray,
            nums: Iterable[int],
            bac_type: str = 'p',
            multiplicity: int = 1) -> float:
    """
    Returns the bond additivity correction in J/mol.

    There are two bond additivity corrections currently supported. Peterson-type
    corrections can be specified by setting `bac_type` to 'p'. This will use the
    `bonds` attribute, which is a dictionary associating bond types with the number
    of that bond in the molecule.

    The Melius-type BAC is specified with 'm' and utilizes the atom xyz coordinates
    in `coords` and array of atomic numbers of atoms as well as the structure's multiplicity.

    Args:
        level_of_theory: The level of theory.
        bonds: A dictionary of bond types (e.g., 'C=O') with their associated counts.
        coords: A Numpy array of Cartesian molecular coordinates.
        nums: A sequence of atomic numbers.
        multiplicity: The spin multiplicity of the molecule.
        bac_type: The type of bond additivity correction to use.

    Returns:
        The bond correction to the electronic energy in J/mol.
    """
    def _get_bac(_lot):
        """Helper function to get BACs"""
        bac = BAC(_lot, bac_type=bac_type)
        return bac.get_correction(bonds=bonds, coords=coords, nums=nums, multiplicity=multiplicity).value_si

    # Try to match each of these levels of theory, but issue warning if full level of theory cannot be matched
    lots_to_attempt = [
        level_of_theory,  # Full level of theory
        level_of_theory.simple()  # Only method and basis
    ]
    if isinstance(level_of_theory, CompositeLevelOfTheory):
        lots_to_attempt.extend([
            level_of_theory.energy,  # Full energy level
            level_of_theory.energy.simple()  # Energy level with only method and basis
        ])
    for lot in lots_to_attempt:
        try:
            corr = _get_bac(lot)
        except BondAdditivityCorrectionError as e:
            if lot is not lots_to_attempt[-1]:
                continue
            else:
                if 'BAC parameters' in str(e):
                    bac_type_str = 'Melius' if bac_type == 'm' else 'Petersson'
                    raise BondAdditivityCorrectionError(
                        f'Missing {bac_type_str}-type BAC parameters for {level_of_theory}'
                    )
                else:
                    raise
        else:
            if lot is not lots_to_attempt[0]:
                logging.warning(f'No exact BAC match found for {level_of_theory}. Using {lot} instead.')
            return corr
Пример #13
0
 def wrapper(*args, **kwargs):
     if args[0].level_of_theory is None:  # args[0] is the instance
         raise BondAdditivityCorrectionError(
             'Level of theory is not defined')
     return func(*args, **kwargs)
Пример #14
0
 def wrapper(*args, **kwargs):
     if args[0].model_chemistry is None:  # args[0] is the instance
         raise BondAdditivityCorrectionError('Model chemistry is not defined')
     return func(*args, **kwargs)