Exemplo n.º 1
0
def get_qm_positions_energies_and_charges(sigma,
                                          overlapping_precision,
                                          database='qm7',
                                          planar=False):
    if database == 'qm7':
        qm = load_qm7(align=True, only_planar=planar)
    elif database == 'qm9':
        qm = load_qm9(align=True, only_planar=planar)
    else:
        raise ValueError('only qm7 and qm9 databases')
    positions = qm.R.astype('float32')
    atomic_numbers = qm.Z.astype('float32')
    atom_valences = get_atom_valences(atomic_numbers).astype('float32')
    electron_valences = get_electron_valences(atomic_numbers).astype('float32')

    min_dist = np.inf
    for i in range(positions.shape[0]):
        n_atoms = np.sum(atomic_numbers[i] != 0)
        distances = pdist(positions[i, :n_atoms, :])
        min_dist = min(min_dist, distances.min())

    delta = sigma * np.sqrt(-8 * np.log(overlapping_precision))
    print(delta, min_dist)

    positions *= delta / min_dist

    return (
        torch.from_numpy(positions),
        torch.from_numpy(atomic_numbers),
        torch.from_numpy(atom_valences),
        torch.from_numpy(electron_valences),
    )
Exemplo n.º 2
0
def get_qm_energies(database='qm7', planar=False):
    if database == 'qm7':
        return load_qm7(align=True, only_planar=planar).T.transpose()
    elif database == 'qm9':
        return load_qm9(align=True, only_planar=planar).T.transpose()
    else:
        raise ValueError('only qm7 and qm9 databases')
Exemplo n.º 3
0
def get_qm7_positions_energies_and_charges(M, N, O, J, L, sigma):
    qm7 = load_qm7(align=True)
    positions = qm7.R
    charges = qm7.Z
    energies = qm7.T.transpose()
    valence_charges = get_valence(charges)

    positions = renormalize(positions, charges, sigma)

    return torch.from_numpy(positions), torch.from_numpy(
        energies), torch.from_numpy(charges), torch.from_numpy(valence_charges)
Exemplo n.º 4
0
def get_qm_energies_and_folds(random_folds=False, database='qm7'):
    if database == 'qm7':
        qm = load_qm7(align=True)
        P = qm.P
    elif database == 'qm9':
        qm = load_qm9(align=True)
        P = qm.P_stratified_Ua.transpose()
    else:
        raise ValueError('only qm7 and qm9 databases')
    n_folds = P.shape[0]
    energies = qm.T.transpose()
    if random_folds:
        P = np.random.permutation(energies.shape[0]).reshape((n_folds, -1))
    cross_val_folds = []
    for i_fold in range(n_folds):
        fold = (np.concatenate(P[np.arange(n_folds) != i_fold],
                               axis=0), P[i_fold])
        cross_val_folds.append(fold)
    return energies, cross_val_folds
Exemplo n.º 5
0
def get_qm7_positions_and_charges(sigma, overlapping_precision=1e-1):
    """
    Loads the positions and charges of the molecules of the QM7 dataset.
    QM7 is a dataset of 7165 organic molecules with up to 7 non-hydrogen
    atoms, whose energies were computed with Density Functional Theory.
    This dataset has been made available to train machine learning models
    to predict these energies.

    Parameters
    ----------
    sigma : float
        width parameter of the Gaussian that represents a particle

    Returns
    -------
    positions, charges, valence_charges: float array
        array containing the positions, charges and valence charges
        of the QM7 database molecules
    """
    # qm7 = fetch_qm7(align=True)
    # positions = qm7['positions']
    # charges = qm7['charges'].astype('float32')
    from cheml.datasets import load_qm7
    qm7 = load_qm7(align=True)
    positions = qm7.R
    charges = qm7.Z
    valence_charges = get_valence(charges)

    # normalize positions
    min_dist = np.inf
    for i in range(positions.shape[0]):
        n_atoms = np.sum(charges[i] != 0)
        pos = positions[i, :n_atoms, :]
        min_dist = min(min_dist, pdist(pos).min())
    delta = sigma * np.sqrt(-8 * np.log(overlapping_precision))
    positions = positions * delta / min_dist

    return (torch.from_numpy(positions), torch.from_numpy(charges),
            torch.from_numpy(valence_charges))