Exemplo n.º 1
0
 def test_properties(self):
     """Used to test that changing the setup through properties works as
     intended.
     """
     # Test changing species
     a = ACSF(
         rcut=6.0,
         species=[1, 8],
         g2_params=[[1, 2]],
         sparse=False,
     )
     nfeat1 = a.get_number_of_features()
     vec1 = a.create(H2O)
     a.species = ["C", "H", "O"]
     nfeat2 = a.get_number_of_features()
     vec2 = a.create(molecule("CH3OH"))
     self.assertTrue(nfeat1 != nfeat2)
     self.assertTrue(vec1.shape[1] != vec2.shape[1])
Exemplo n.º 2
0
def ascf_Definition(species=None):
    if not species:
        species = ["H", "C", "N", "O", "F", "S"]
    rcut = 10
    #G2 - eta/Rs couples:
    g2_params = [[1, 2], [0.1, 2], [0.01, 2], [1, 4], [0.1, 4], [0.01, 4],
                 [1, 6], [0.1, 6], [0.01, 6]]
    #G4 - eta/ksi/lambda triplets:
    g4_params = [[1, 4, 1], [0.1, 4, 1], [0.01, 4, 1], [1, 4, -1],
                 [0.1, 4, -1], [0.01, 4, -1]]
    g3_params = None
    g5_params = None
    acsf = ACSF(species=species,
                rcut=rcut,
                g2_params=g2_params,
                g3_params=g3_params,
                g4_params=g4_params,
                g5_params=g5_params,
                sparse=False)
    return acsf
 def __init__(self,
              molecule_map,
              r_cut,
              g2_params=None,
              g3_params=None,
              g4_params=None,
              g5_params=None,
              n_jobs=1):
     super().__init__(molecule_map, n_jobs)
     self.r_cut = r_cut
     self.g2_params = g2_params
     self.g3_params = g3_params
     self.g4_params = g4_params
     self.g5_params = g5_params
     self.dscribe_func = ACSF(species=self.species,
                              rcut=r_cut,
                              g2_params=g2_params,
                              g3_params=g3_params,
                              g4_params=g4_params,
                              g5_params=g5_params)
Exemplo n.º 4
0
    def test_species(self):
        """Tests that the species are correctly determined.
        """
        # As atomic number in contructor
        d = ACSF(rcut=6.0, species=[5, 1])
        self.assertEqual(d.species, [5, 1])          # Saves the original variable
        self.assertTrue(np.array_equal(d._atomic_numbers, [1, 5]))  # Ordered here

        # Set through property
        d.species = [10, 2]
        self.assertEqual(d.species, [10, 2])
        self.assertTrue(np.array_equal(d._atomic_numbers, [2, 10]))  # Ordered here

        # As chemical symbol in the contructor
        d = ACSF(rcut=6.0, species=["O", "H"])
        self.assertEqual(d.species, ["O", "H"])      # Saves the original variable
        self.assertTrue(np.array_equal(d._atomic_numbers, [1, 8]))  # Ordered here

        # Set through property
        d.species = ["N", "Pb"]
        self.assertEqual(d.species, ["N", "Pb"])
        self.assertTrue(np.array_equal(d._atomic_numbers, [7, 82]))
Exemplo n.º 5
0
    def test_parallel_sparse(self):
        """Tests creating sparse output parallelly.
        """
        # Test indices
        samples = [molecule("CO"), molecule("N2O")]
        desc = ACSF(rcut=6.0,
                    species=[6, 7, 8],
                    g2_params=[[1, 2], [4, 5]],
                    g3_params=[1, 2, 3, 4],
                    g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
                    g5_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
                    sparse=True)
        n_features = desc.get_number_of_features()

        # Multiple systems, serial job
        output = desc.create(
            system=samples,
            positions=[[0], [0, 1]],
            n_jobs=1,
        ).toarray()
        assumed = np.empty((3, n_features))
        assumed[0, :] = desc.create(samples[0], [0]).toarray()
        assumed[1, :] = desc.create(samples[1], [0]).toarray()
        assumed[2, :] = desc.create(samples[1], [1]).toarray()
        self.assertTrue(np.allclose(output, assumed))

        # Test when position given as indices
        output = desc.create(
            system=samples,
            positions=[[0], [0, 1]],
            n_jobs=2,
        ).toarray()
        assumed = np.empty((3, n_features))
        assumed[0, :] = desc.create(samples[0], [0]).toarray()
        assumed[1, :] = desc.create(samples[1], [0]).toarray()
        assumed[2, :] = desc.create(samples[1], [1]).toarray()
        self.assertTrue(np.allclose(output, assumed))

        # Test with no positions specified
        output = desc.create(
            system=samples,
            positions=[None, None],
            n_jobs=2,
        ).toarray()

        assumed = np.empty((2 + 3, n_features))
        assumed[0, :] = desc.create(samples[0], [0]).toarray()
        assumed[1, :] = desc.create(samples[0], [1]).toarray()
        assumed[2, :] = desc.create(samples[1], [0]).toarray()
        assumed[3, :] = desc.create(samples[1], [1]).toarray()
        assumed[4, :] = desc.create(samples[1], [2]).toarray()
        self.assertTrue(np.allclose(output, assumed))
Exemplo n.º 6
0
    def test_number_of_features(self):
        """Tests that the reported number of features is correct.
        """
        species = [1, 8]
        n_elem = len(species)

        desc = ACSF(rcut=6.0, species=species)
        n_features = desc.get_number_of_features()
        self.assertEqual(n_features, n_elem)

        desc = ACSF(rcut=6.0, species=species, g2_params=[[1, 2], [4, 5]])
        n_features = desc.get_number_of_features()
        self.assertEqual(n_features, n_elem * (2 + 1))

        desc = ACSF(rcut=6.0, species=[1, 8], g3_params=[1, 2, 3, 4])
        n_features = desc.get_number_of_features()
        self.assertEqual(n_features, n_elem * (4 + 1))

        desc = ACSF(rcut=6.0,
                    species=[1, 8],
                    g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]])
        n_features = desc.get_number_of_features()
        self.assertEqual(n_features, n_elem + 4 * 3)

        desc = ACSF(rcut=6.0,
                    species=[1, 8],
                    g2_params=[[1, 2], [4, 5]],
                    g3_params=[1, 2, 3, 4],
                    g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]])
        n_features = desc.get_number_of_features()
        self.assertEqual(n_features, n_elem * (1 + 2 + 4) + 4 * 3)
Exemplo n.º 7
0
               ]],
    symbols=["H", "O", "H"],
)

H = Atoms(
    cell=[[15.0, 0.0, 0.0], [0.0, 15.0, 0.0], [0.0, 0.0, 15.0]],
    positions=[
        [0, 0, 0],
    ],
    symbols=["H"],
)

default_desc = ACSF(
    rcut=6.0,
    species=[1, 8],
    g2_params=[[1, 2], [4, 5]],
    g3_params=[1, 2, 3, 4],
    g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
    g5_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
)


class ACSFTests(TestBaseClass, unittest.TestCase):
    def test_constructor(self):
        """Tests different valid and invalid constructor values.
        """
        # Invalid species
        with self.assertRaises(ValueError):
            ACSF(rcut=6.0, species=None)

        # Invalid bond_params
        with self.assertRaises(ValueError):
Exemplo n.º 8
0
from dscribe.descriptors import ACSF
from ase.build import molecule
import numpy as np
from ase import Atoms

# Setting up the ACSF descriptor
acsf = ACSF(
    species=["H", "O"],
    rcut=6.0,
    g2_params=[[1, 1], [1, 2], [1, 3]],
    g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]],
)

# a = 4.081
# b = a / 2
# fcc_atom = Atoms('Au',
#                  cell=[(0, b, b),
#                        (b, 0, b),
#                        (b, b, 0)],
#                  pbc=True)

# acsf = ACSF(fcc_atom) -> "Please provide the species as an iterable, e.g. a list."

water = molecule("H2O")
print(water)

# Create MBTR output for the hydrogen atom at index 1
acsf_water = acsf.create(water, positions=[1])

print(acsf_water)
print(acsf_water.shape)
Exemplo n.º 9
0
    def __init__(self, desc_spec):
        """
        make a DScribe ACSF object

        see: 
        https://singroup.github.io/dscribe/tutorials/acsf.html

        # template for an ACSF descriptor
        # currenly Dscribe only supports ASCF for finite system!
        """

        if "type" not in desc_spec.keys() or desc_spec["type"] != "ACSF":
            raise ValueError(
                "Type is not ACSF or cannot find the type of the descriptor")

        if 'periodic' in desc_spec.keys():
            self.periodic = bool(desc_spec['periodic'])
        if self.periodic == True:
            raise ValueError(
                "Warning: currently DScribe only supports ACSF for finite systems"
            )

        from dscribe.descriptors import ACSF

        self.acsf_dict = {
            'g2_params': None,
            'g3_params': None,
            'g4_params': None,
            'g5_params': None
        }

        # required
        try:
            self.species = desc_spec['species']
            self.cutoff = desc_spec['cutoff']
        except:
            raise ValueError(
                "Not enough information to intialize the `Atomic_Descriptor_ACF` object"
            )

        # fill in the values
        for k, v in desc_spec.items():
            if k in self.acsf_dict.keys():
                if isinstance(v, list):
                    self.acsf_dict[k] = np.asarray(v)
                else:
                    self.acsf_dict[k] = v

        self.acsf = ACSF(species=self.species,
                         rcut=self.cutoff,
                         **self.acsf_dict,
                         sparse=False)

        print("Using ACSF Descriptors ...")

        # make an acronym
        self.acronym = "ACSF-c" + str(self.cutoff)
        if self.acsf_dict['g2_params'] is not None:
            self.acronym += "-g2-" + str(len(self.acsf_dict['g2_params']))
        if self.acsf_dict['g3_params'] is not None:
            self.acronym += "-g3-" + str(len(self.acsf_dict['g3_params']))
        if self.acsf_dict['g4_params'] is not None:
            self.acronym += "-g4-" + str(len(self.acsf_dict['g4_params']))
        if self.acsf_dict['g5_params'] is not None:
            self.acronym += "-g5-" + str(len(self.acsf_dict['g5_params']))
Exemplo n.º 10
0
    Chem.rdchem.BondType.TRIPLE,
    Chem.rdchem.BondType.AROMATIC,
]

HYBRIDIZATIONS=[
    #Chem.rdchem.HybridizationType.S,
    Chem.rdchem.HybridizationType.SP,
    Chem.rdchem.HybridizationType.SP2,
    Chem.rdchem.HybridizationType.SP3,
    #Chem.rdchem.HybridizationType.SP3D,
    #Chem.rdchem.HybridizationType.SP3D2,
]

ACSF_GENERATOR = ACSF(
    species = SYMBOLS,
    rcut = 6.0,
    g2_params=[[1, 1], [1, 2], [1, 3]],
    g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]],
)



def one_hot_encoding(x, set):
    one_hot = [int(x == s) for s in set]
    return one_hot

def one_hot_numpy(x, width):
    b = np.zeros((x.shape[0], width))
    b[np.arange(x.shape[0]), x] = 1
    return b

Exemplo n.º 11
0
    cell=[
        [15.0, 0.0, 0.0],
        [0.0, 15.0, 0.0],
        [0.0, 0.0, 15.0]
    ],
    positions=[
        [0, 0, 0],

    ],
    symbols=["H"],
)

default_desc = ACSF(
    atomic_numbers=[1, 8],
    g2_params=[[1, 2]],
    # g2_params=[[1, 2], [4, 5]],
    # g3_params=[1, 2, 3, 4],
    # g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
    # g5_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
)


class ACSFTests(TestBaseClass, unittest.TestCase):

    def test_constructor(self):
        """Tests different valid and invalid constructor values.
        """
        # Invalid atomic_numbers
        with self.assertRaises(ValueError):
            ACSF(atomic_numbers=None)

        # Invalid bond_params
def main(fxyz, dictxyz, prefix, output, per_atom, r_cut, facsf_param, periodic, stride):
    """

    Generate the ACSF Representation. Currently only implemented for finite system in DSCRIBE.

    Parameters
    ----------
    fxyz: string giving location of xyz file
    dictxyz: string giving location of xyz file that is used as a dictionary
    prefix: string giving the filename prefix
    output: [xyz]: append the representations to extended xyz file; [mat] output as a standlone matrix
    rcut: float giving the cutoff radius, default value is 4.0
    param_path': string Specify the Gn parameters using a json file. (see https://singroup.github.io/dscribe/tutorials/acsf.html for details)
    periodic: string (True or False) indicating whether the system is periodic
    stride: compute descriptor each X frames
    """

    periodic = bool(periodic)
    per_atom = bool(per_atom)
    fframes = []
    dictframes = []

    # read frames
    if fxyz != 'none':
        fframes = read(fxyz, slice(0, None, stride))
        nfframes = len(fframes)
        print("read xyz file:", fxyz, ", a total of", nfframes, "frames")
    # read frames in the dictionary
    if dictxyz != 'none':
        dictframes = read(dictxyz, ':')
        ndictframes = len(dictframes)
        print("read xyz file used for a dictionary:", dictxyz, ", a total of",
              ndictframes, "frames")

    frames = dictframes + fframes
    nframes = len(frames)
    global_species = []
    for frame in frames:
        global_species.extend(frame.get_atomic_numbers())
        if not periodic:
            frame.set_pbc([False, False, False])
    global_species = np.unique(global_species)
    print("a total of", nframes, "frames, with elements: ", global_species)

    if periodic:
        print("Warning: currently DScribe only supports ACSF for finite systems")

    # template for an ACSF descriptor
    acsf_dict = {'species': global_species,
                 'rcut': r_cut,
                 'g2_params': None,
                 'g3_params': None,
                 'g4_params': None,
                 'g5_params': None}  # ,
    # 'periodic': periodic, 'sparse': False}
    # currenly Dscribe only supports ASCF for finite system!

    # Setting up the ACSF descriptor
    if os.path.isfile(facsf_param):
        # load file
        try:
            with open(facsf_param, 'r') as facsffile:
                acsf_param = json.load(facsffile)
            # print(acsf_param)
        except:
            raise IOError('Cannot load the json file for ACSF parameters')
        # fill in the values
        for k, v in acsf_param.items():
            if k in acsf_dict.keys():
                if isinstance(v, list):
                    acsf_dict[k] = np.asarray(v)
                else:
                    acsf_dict[k] = v
            else:
                print("Warning: unknown key ", k)
    elif facsf_param == 'smart' or facsf_param == 'SMART' or facsf_param == 'Smart':
        # TODO: add default selection 
        pass
    else:
        print("use very basic selections for ACSF")
        acsf_dict['g2_params'] = [[1, 1], [1, 2], [1, 3]]
        acsf_dict['g4_params']: [[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]]

    # set it up
    rep_atomic = ACSF(**acsf_dict)

    if facsf_param != 'none':
        foutput = prefix + "-rcut" + str(r_cut) + '-' + facsf_param
        desc_name = "ACSF" + "-rcut" + str(r_cut) + '-' + facsf_param
    else:
        foutput = prefix + "-rcut" + str(r_cut)
        desc_name = "ACSF" + "-rcut" + str(r_cut)

    # prepare for the output
    if os.path.isfile(foutput + ".xyz"): os.rename(foutput + ".xyz", "bck." + foutput + ".xyz")
    if os.path.isfile(foutput + ".desc"): os.rename(foutput + ".desc", "bck." + foutput + ".desc")

    for i, frame in enumerate(frames):
        fnow = rep_atomic.create(frame, n_jobs=8)

        frame.info[desc_name] = fnow.mean(axis=0)

        # save
        if output == 'matrix':
            with open(foutput + ".desc", "ab") as f:
                np.savetxt(f, frame.info[desc_name][None])
            if per_atom or nframes == 1:
                with open(foutput + ".atomic-desc", "ab") as f:
                    np.savetxt(f, fnow)
        elif output == 'xyz':
            # output per-atom info
            if per_atom:
                frame.new_array(desc_name, fnow)
            # write xyze
            # print(desc_name,foutput,frame)
            write(foutput + ".xyz", frame, append=True)
        else:
            raise ValueError('Cannot find the output format')
#     g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]],
# )

# Node_Dim : 58
# ACSF_GENERATOR = ACSF(
#     species = SYMBOL,
#     rcut = 6.0,
#     g2_params=[[1, 2], [1, 6]],
#     g4_params=[[1, 4, 1], [1, 4, -1]],
# )

# Boris params
# Node_Dim : 88
ACSF_GENERATOR = ACSF(species=SYMBOL,
                      rcut=10.0,
                      g2_params=[[1, 2], [1, 6]],
                      g4_params=[[1, 4, 1], [0.1, 4, 1], [1, 4, -1],
                                 [0.1, 4, -1]])

EDGE_DIM = 6
NODE_DIM = 88  ##  93  13
NUM_TARGET = 8

# HIDDEN_DIM = 64 # initial 128
HIDDEN_DIM = 128  #

# EDGE_DIM   =  80
# NODE_DIM   = 16 ##  93  13
# NUM_TARGET =  8

#---------------------------------------------------------------------------------
Exemplo n.º 14
0
import pandas as pd
import numpy as np
from tqdm import tqdm
from dscribe.descriptors import ACSF
from dscribe.core.system import System
from multiprocessing import Pool
import compe_data

g2_params = [[1, 1], [1, 2], [1, 3]]
g4_params = [[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]]

gen = ACSF(
    species=["H", "C", "N", "O", "F"],
    rcut=6.0,
    g2_params=g2_params,
    g4_params=g4_params,
)
st_df = compe_data.read_structures()
molecules = st_df["molecule_name"].unique()
st_gr = st_df.groupby("molecule_name")
# make st_dict for pararells
st_dict = {}
for molecule in tqdm(molecules):
    st_dict[molecule] = st_gr.get_group(molecule)


def func_acsf(params):
    i, molecule = params
    #if i%1000 == 0:
    #    print(f"{i}th finish")
    st = st_dict[molecule]
Exemplo n.º 15
0
#
## Setup descriptors
#cm_desc = CoulombMatrix(n_atoms_max=3, permutation="sorted_l2")
#soap_desc = SOAP(species=["C", "H", "O", "N"], rcut=5, nmax=8, lmax=6, crossover=True)
#
## Create descriptors as numpy arrays or scipy sparse matrices
#water = samples[0]
#coulomb_matrix = cm_desc.create(water)
#soap = soap_desc.create(water, positions=[0])
#
## Easy to use also on multiple systems, can be parallelized across processes
#coulomb_matrices = cm_desc.create(samples)
#coulomb_matrices = cm_desc.create(samples, n_jobs=3)
#oxygen_indices = [np.where(x.get_atomic_numbers() == 8)[0] for x in samples]
#oxygen_soap = soap_desc.create(samples, oxygen_indices, n_jobs=3)
#
#

from dscribe.descriptors import ACSF

# Setting up the ACSF descriptor
acsf = ACSF(
    species=["C", "O"],
    rcut=6.0,
    g2_params=[[1, 1], [1, 2], [1, 3]],
    g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]],
)

d = 1.1
co = Atoms(['C', 'O'], positions=[(0, 0, 0), (0, 0, d)])
acsf_water = acsf.create(co)
Exemplo n.º 16
0
    def test_features(self):
        """Tests that the correct features are present in the descriptor.
        """
        rs = math.sqrt(2)
        kappa = math.sqrt(3)
        eta = math.sqrt(5)
        lmbd = 1
        zeta = math.sqrt(7)

        # Test against assumed values
        dist_oh = H2O.get_distance(0, 1)
        dist_hh = H2O.get_distance(0, 2)
        ang_hoh = H2O.get_angle(0, 1, 2) * np.pi / 180.0
        ang_hho = H2O.get_angle(1, 0, 2) * np.pi / 180.0
        ang_ohh = -H2O.get_angle(2, 0, 1) * np.pi / 180.0
        rc = 6.0

        # G1
        desc = ACSF(rcut=6.0, species=[1, 8])
        acsfg1 = desc.create(H2O)
        g1_ho = 0.5 * (np.cos(np.pi * dist_oh / rc) + 1)
        g1_hh = 0.5 * (np.cos(np.pi * dist_hh / rc) + 1)
        g1_oh = 2 * 0.5 * (np.cos(np.pi * dist_oh / rc) + 1)
        self.assertAlmostEqual(acsfg1[0, 0], g1_hh, places=6)
        self.assertAlmostEqual(acsfg1[0, 1], g1_ho, places=6)
        self.assertAlmostEqual(acsfg1[1, 0], g1_oh, places=6)

        # G2
        desc = ACSF(rcut=6.0, species=[1, 8], g2_params=[[eta, rs]])
        acsfg2 = desc.create(H2O)
        g2_hh = np.exp(-eta * np.power((dist_hh - rs), 2)) * g1_hh
        g2_ho = np.exp(-eta * np.power((dist_oh - rs), 2)) * g1_ho
        g2_oh = np.exp(-eta * np.power((dist_oh - rs), 2)) * g1_oh
        self.assertAlmostEqual(acsfg2[0, 1], g2_hh, places=6)
        self.assertAlmostEqual(acsfg2[0, 3], g2_ho, places=6)
        self.assertAlmostEqual(acsfg2[1, 1], g2_oh, places=6)

        # G3
        desc = ACSF(rcut=6.0, species=[1, 8], g3_params=[kappa])
        acsfg3 = desc.create(H2O)
        g3_hh = np.cos(dist_hh * kappa) * g1_hh
        g3_ho = np.cos(dist_oh * kappa) * g1_ho
        g3_oh = np.cos(dist_oh * kappa) * g1_oh
        self.assertAlmostEqual(acsfg3[0, 1], g3_hh, places=6)
        self.assertAlmostEqual(acsfg3[0, 3], g3_ho, places=6)
        self.assertAlmostEqual(acsfg3[1, 1], g3_oh, places=6)

        # G4
        desc = ACSF(rcut=6.0, species=[1, 8], g4_params=[[eta, zeta, lmbd]])
        acsfg4 = desc.create(H2O)
        gauss = np.exp(-eta * (2 * dist_oh * dist_oh +
                               dist_hh * dist_hh)) * g1_ho * g1_hh * g1_ho
        g4_h_ho = np.power(2, 1 - zeta) * np.power(
            (1 + lmbd * np.cos(ang_hho)), zeta) * gauss
        g4_h_oh = np.power(2, 1 - zeta) * np.power(
            (1 + lmbd * np.cos(ang_ohh)), zeta) * gauss
        g4_o_hh = np.power(2, 1 - zeta) * np.power(
            (1 + lmbd * np.cos(ang_hoh)), zeta) * gauss
        self.assertAlmostEqual(acsfg4[0, 3], g4_h_ho, places=6)
        self.assertAlmostEqual(acsfg4[2, 3], g4_h_oh, places=6)
        self.assertAlmostEqual(acsfg4[1, 2], g4_o_hh, places=6)

        # G5
        desc = ACSF(rcut=6.0, species=[1, 8], g5_params=[[eta, zeta, lmbd]])
        acsfg5 = desc.create(H2O)
        gauss = np.exp(-eta *
                       (dist_oh * dist_oh + dist_hh * dist_hh)) * g1_ho * g1_hh
        g5_h_ho = np.power(2, 1 - zeta) * np.power(
            (1 + lmbd * np.cos(ang_hho)), zeta) * gauss
        g5_h_oh = np.power(2, 1 - zeta) * np.power(
            (1 + lmbd * np.cos(ang_ohh)), zeta) * gauss
        g5_o_hh = np.power(2, 1 - zeta) * np.power(
            (1 + lmbd * np.cos(ang_hoh)), zeta) * np.exp(
                -eta * (2 * dist_oh * dist_oh)) * g1_ho * g1_ho
        self.assertAlmostEqual(acsfg5[0, 3], g5_h_ho, places=6)
        self.assertAlmostEqual(acsfg5[2, 3], g5_h_oh, places=6)
        self.assertAlmostEqual(acsfg5[1, 2], g5_o_hh, places=6)
Exemplo n.º 17
0
def create_data_ACSF(data, metadata):
    particles, scaler, test_size, rcut, nmax, lmax, N_PCA, sigma_SOAP = [
        metadata[x] for x in [
            'particles', 'scaler', 'test_size', 'rcut', 'nmax', 'lmax',
            'N_PCA', 'sigma_SOAP'
        ]
    ]

    acsf = ACSF(species=["H", "O"],
                rcut=9.0,
                g2_params=[[1, 0], [0.1, 0], [0.01, 0], [0.01, 0], [0.001, 0]],
                g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1],
                           [0.1, 1, 1], [0.1, 2, 1], [0.1, 1, -1],
                           [0.1, 2, -1], [0.01, 1, 1], [0.01, 2, 1],
                           [0.01, 1, -1], [0.01, 2, -1]])

    nb_features = acsf.get_number_of_features()

    descriptors = pd.np.empty(
        (data.index.max() + 1, len(particles), nb_features))

    for i_time in tqdm.tqdm(range(data.index.max() + 1)):
        descriptors[i_time] = acsf.create(data['molec'][i_time],
                                          positions=np.arange(len(particles)))

    #create training set
    try:
        data['is_train']
    except KeyError:
        data['is_train'] = create_is_train(data.index.max() + 1)
    else:
        pass
    #selecting best params
    if N_PCA:
        try:
            metadata['PCAs']
        except KeyError:
            PCAs = select_best_params(descriptors[data['is_train'].values],
                                      nb_features, N_PCA)
            new_descriptors = pd.np.empty(
                (data.index.max() + 1, len(particles), N_PCA))
            new_descriptors[:, :2, :] = PCAs[0].transform(
                descriptors[:, :2, :].reshape(
                    descriptors[:, :2, :].shape[0] * 2,
                    nb_features)).reshape(descriptors.shape[0], 2, N_PCA)
            new_descriptors[:, 2:, :] = PCAs[1].transform(
                descriptors[:, 2:, :].reshape(
                    descriptors[:, 2:, :].shape[0] * 5,
                    nb_features)).reshape(descriptors.shape[0], 5, N_PCA)
            descriptors = new_descriptors
            metadata['old_N_feature'] = nb_features
            nb_features = N_PCA
            metadata['PCAs'] = PCAs

        else:
            PCAs = metadata['PCAs']
            new_descriptors = pd.np.empty(
                (data.index.max() + 1, len(particles), N_PCA))
            new_descriptors[:, :2, :] = PCAs[0].transform(
                descriptors[:, :2, :].reshape(
                    descriptors[:, :2, :].shape[0] * 2,
                    nb_features)).reshape(descriptors.shape[0], 2, N_PCA)
            new_descriptors[:, 2:, :] = PCAs[1].transform(
                descriptors[:, 2:, :].reshape(
                    descriptors[:, 2:, :].shape[0] * 5,
                    nb_features)).reshape(descriptors.shape[0], 5, N_PCA)
            descriptors = new_descriptors
            nb_features = N_PCA

    else:
        pass
    #scaling
    if scaler == False:
        pass

    elif type(scaler) == type(None):
        descriptors, scaler = scale_descriptors(data, descriptors)

    else:
        descriptors[:, 0:2, :] = scaler[0].transform(
            descriptors[:, 0:2, :].reshape(descriptors[:, 0:2, :].shape[0] * 2,
                                           nb_features)).reshape(
                                               descriptors.shape[0], 2,
                                               nb_features)
        descriptors[:,
                    2:, :] = scaler[1].transform(descriptors[:, 2:, :].reshape(
                        descriptors[:, 2:, :].shape[0] * 5,
                        nb_features)).reshape(descriptors.shape[0], 5,
                                              nb_features)

    metadata['scaler'] = scaler
    return data.join(pd.DataFrame({'descriptor': list(descriptors)})), metadata
Exemplo n.º 18
0
def main(fxyz, dictxyz, prefix, output, per_atom, r_cut , config_path , periodic):
    """

    Generate the ASCF Representation.

    Parameters
    ----------
    fxyz: string giving location of xyz file
    dictxyz: string giving location of xyz file that is used as a dictionary
    prefix: string giving the filename prefix
    output: [xyz]: append the representations to extended xyz file; [mat] output as a standlone matrix
    rcut: float giving the cutoff radius, default value is 3.0
    input_path': string Specify the Gn parameters using a json file. (see https://singroup.github.io/dscribe/tutorials/acsf.html for details)
    periodic: string (True or False) indicating whether the system is periodic
    """

    periodic = bool(periodic)
    per_atom = bool(per_atom)
    fframes = []
    dictframes = []

    # read frames
    if fxyz != 'none':
        fframes = read(fxyz, ':')
        nfframes = len(fframes)
        print("read xyz file:", fxyz, ", a total of", nfframes, "frames")
    # read frames in the dictionary
    if dictxyz != 'none':
        dictframes = read(dictxyz, ':')
        ndictframes = len(dictframes)
        print("read xyz file used for a dictionary:", dictxyz, ", a total of",
              ndictframes, "frames")

    frames = dictframes + fframes
    nframes = len(frames)
    global_species = []
    for frame in frames:
        global_species.extend(frame.get_atomic_numbers())
        if not periodic:
            frame.set_pbc([False, False, False])
    global_species = np.unique(global_species)
    print("a total of", nframes, "frames, with elements: ", global_species)
    if config_path:
        try:
            with open(config_path, 'r') as config_file:
                config = json.load(config_file)
                for k,v in config.items():
                    if isinstance(v, list): 
                        config[k] = np.asarray(v)
                        
        except Exception:
            raise IOError('Cannot load the json file for parameters')
    if config_path: rep_atomic = ACSF(rcut = r_cut,species = global_species,**config)
    else:    rep_atomic = ACSF(rcut = r_cut,species = global_species)
    if config_path:
        foutput = prefix + "-rcut" + str(r_cut) + '-' + config_path
        desc_name = "ACSF" + "-rcut" + str(r_cut) + '-' + config_path
    else: 
        foutput = prefix + "-rcut" + str(r_cut)
        desc_name = "ACSF" + "-rcut" + str(r_cut)

    # prepare for the output
    if os.path.isfile(foutput + ".xyz"): os.rename(foutput + ".xyz", "bck." + foutput + ".xyz")
    if os.path.isfile(foutput + ".desc"): os.rename(foutput + ".desc", "bck." + foutput + ".desc")

    for i, frame in enumerate(frames):
        fnow = rep_atomic.create(frame, n_jobs=8)
        
        frame.info[desc_name] = fnow.mean(axis=0)

        # save
        if output == 'matrix':
            with open(foutput + ".desc", "ab") as f:
                np.savetxt(f, frame.info[desc_name][None])
            if per_atom or nframes == 1:
                with open(foutput + ".atomic-desc", "ab") as f:
                    np.savetxt(f, fnow)
        elif output == 'xyz':
            # output per-atom info
            if per_atom:
                
                frame.new_array(desc_name, fnow)
            # write xyze
            #print(desc_name,foutput,frame)
            write(foutput + ".xyz", frame, append=True)
        else:
            raise ValueError('Cannot find the output format')
Exemplo n.º 19
0
descriptor = "SOAP"

# Compute local descriptors
all_atomtypes = [1, 6]
#all_atomtypes = []
if descriptor == "SOAP":
    desc = SOAP(all_atomtypes, 8.0, 2, 0, periodic=False, crossover=True)
    print(desc.get_number_of_features())
elif descriptor == "ACSF":
    desc = ACSF(n_atoms_max=15,
                types=[1, 6, 7, 8],
                bond_params=[[
                    1,
                    2,
                ], [
                    4,
                    5,
                ]],
                bond_cos_params=[1, 2, 3, 4],
                ang4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
                ang5_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
                flatten=False)
else:
    print("Add your local descriptor here")
    exit(0)

ave = AverageKernel()
desc_list = []
atomic_numbers_list = []
ase_atoms_list = []
all_atomtypes = [
Exemplo n.º 20
0
from dscribe.descriptors import ACSF

# Setting up the ACSF descriptor
acsf = ACSF(
    atomic_numbers=[1, 8],
    rcut=6.0,
    g2_params=[[1, 1], [1, 2], [1, 3]],
    g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]],
)

# Creating an atomic system as an ase.Atoms-object
from ase.build import molecule

water = molecule("H2O")

# Create MBTR output for the hydrogen atom at index 1
acsf_water = acsf.create(water, positions=[1])

print(acsf_water)
print(acsf_water.shape)
Exemplo n.º 21
0
from data import *

from dscribe.descriptors import ACSF
from dscribe.core.system import System

#ACSF_GENERATOR = ACSF(
#    species=SYMBOL,
#    rcut=6.0,
#    g2_params = [[1, 1], [1, 2], [1, 3]],
#    g4_params = [[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]],
#)

ACSF_GENERATOR = ACSF(
    species=SYMBOL,
    rcut=10.0,
    g2_params=[[15, 0.5], [1.5, 0.5], [0.15, 0.5], [15, 2], [1.5, 2],
               [0.15, 2]],
    g4_params=[[1, 5, 1], [0.1, 5, 1], [0.01, 5, 1], [1, 5, -1], [0.1, 5, -1],
               [0.01, 5, -1]],
)

EDGE_DIM = 14  #  7 8 9 6 11 38
NODE_DIM = 165  # 120 13 93 123
NUM_TARGET = 8


class ChampsDataset(Dataset):
    def __init__(self,
                 split,
                 csv,
                 mode,
                 augment=None,
Exemplo n.º 22
0
    Chem.rdchem.HybridizationType.SP3,
]


def gaussian_rbf(x, min_x, max_x, center_num):
    center_point = np.linspace(min_x, max_x, center_num)
    x_vec = np.exp(np.square(center_point - x))
    return x_vec


dist_min = 0.95860666
dist_max = 12.040386

ACSF_GENERATOR = ACSF(
    species=['H', 'C', 'N', 'O', 'F'],
    rcut=6.0,
    g2_params=[[1, 1], [1, 2], [1, 3]],
    g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]],
)

obConversion = openbabel.OBConversion()
obConversion.SetInAndOutFormats("xyz", "mol2")

atomic_radius = {'H': 0.38, 'C': 0.77, 'N': 0.75, 'O': 0.73, 'F': 0.71}  # Without fudge factor
fudge_factor = 0.05
atomic_radius = {k: v + fudge_factor for k, v in atomic_radius.items()}
electronegativity = {'H': 2.2, 'C': 2.55, 'N': 3.04, 'O': 3.44, 'F': 3.98}
electronegativity_square = {'H': 2.2 * 2.2, 'C': 2.55 * 2.55, 'N': 3.04 * 3.04, 'O': 3.44 * 3.44, 'F': 3.98 * 3.98}


def normal_dict(dict_input):
    min_value = min(dict_input.values())
def compute_acsf_descriptors(prefix, rcutoffs):

    species = ['H', 'C', 'N', 'O', 'F']
    g2_params = [
        [1, 0],
        # [1, 1],
        [1, 2],
        # [1, 3],
        # [1, 4],
        # [4, 1],
        [4, 2],
        # [4, 3],
        # [4, 4],
    ]
    g4_params = [
        [1, 1, 1],
        # [1, 4, 1],
        [1, 8, 1],
        # [1, 16, 1],
        # [1, 32, 1],
        # [1, 64, 1],
        [1, 1, -1],
        # [1, 4, -1],
        [1, 8, -1],
        # [1, 16, -1],
        # [1, 32, -1],
        # [1, 64, -1],
    ]
    # g5_params = [
    #     [1, 1, 1],
    #     # [1, 4, 1],
    #     [1, 8, 1],
    #     # [1, 16, 1],
    #     [1, 32, 1],
    #     # [1, 64, 1],
    #     [1, 1, -1],
    #     # [1, 4, -1],
    #     [1, 8, -1],
    #     # [1, 16, -1],
    #     [1, 32, -1],
    #     # [1, 64, -1],
    # ]
    featnames = ['g1'] +\
                [f'g2_{i:d}' for i in range(len(g2_params))] +\
                [f'g4_{i:d}' for i in range(len(g4_params) * 3)]# +\
    # [f'g5_{i:d}' for i in range(len(g5_params) * 3)]

    col_names = []
    for s in species:
        col_names.extend([f'{s}_{fn}' for fn in featnames])

    # Set up ACSF descriptor
    acsf = ACSF(
        g2_params=g2_params,
        g4_params=g4_params,
        # g5_params=g5_params,
        species=species,
        rcut=rcutoffs[0],
    )

    # Read mol info
    xyz_files = glob.glob('data/structures/*.xyz')
    mols = []
    for xyz_file in tqdm.tqdm(xyz_files, total=len(xyz_files)):
        mol = read(xyz_file, format='xyz')
        # print(mol.get_atomic_numbers())
        mols.append(mol)

    # Create ACSF output for all mols
    acsf_mol = acsf.create(mols, positions=None, n_jobs=4)

    # Save ACSF descriptors
    pd.DataFrame(data=acsf_mol,
                 columns=col_names).to_hdf(f'data/descriptors/{prefix}.h5',
                                           key='acsf',
                                           mode='w')
Exemplo n.º 24
0
    def test_periodicity(self):
        """Test that periodic copies are correctly repeated and included in the
        output.
        """
        system = Atoms(symbols=["H"],
                       positions=[[0, 0, 0]],
                       cell=[2, 2, 2],
                       pbc=False)
        rcut = 2.5

        # Non-periodic
        desc = ACSF(rcut=rcut, species=[1], periodic=False)
        feat = desc.create(system)
        self.assertTrue(feat.sum() == 0)

        # Periodic cubic: 6 neighbours at distance 2 Å
        desc = ACSF(rcut=rcut, species=[1], periodic=True)
        feat = desc.create(system)
        self.assertTrue(feat.sum() != 0)
        self.assertAlmostEqual(feat[0, 0], 6 * cutoff(2, rcut), places=6)

        # Periodic cubic: 6 neighbours at distance 2 Å
        # from ase.visualize import view
        rcut = 3
        system_nacl = bulk("NaCl", "rocksalt", a=4)
        eta, zeta, lambd = 0.01, 0.1, 1
        desc = ACSF(rcut=rcut,
                    g4_params=[(eta, zeta, lambd)],
                    species=["Na", "Cl"],
                    periodic=True)
        feat = desc.create(system_nacl)

        # Cl-Cl: 12 triplets with 90 degree angle at 2 angstrom distance
        R_ij = 2
        R_ik = 2
        R_jk = np.sqrt(2) * 2
        theta = np.pi / 2
        g4_cl_cl = 2**(1 - zeta) * 12 * (
            1 + lambd * np.cos(theta))**zeta * np.e**(
                -eta * (R_ij**2 + R_ik**2 + R_jk**2)) * cutoff(
                    R_ij, rcut) * cutoff(R_ik, rcut) * cutoff(R_jk, rcut)
        self.assertTrue(np.allclose(feat[0, 4], g4_cl_cl, rtol=1e-6, atol=0))

        # Na-Cl: 24 triplets with 45 degree angle at sqrt(2)*2 angstrom distance
        R_ij = np.sqrt(2) * 2
        R_ik = 2
        R_jk = 2
        theta = np.pi / 4
        g4_na_cl = 2**(1 - zeta) * 24 * (
            1 + lambd * np.cos(theta))**zeta * np.e**(
                -eta * (R_ij**2 + R_ik**2 + R_jk**2)) * cutoff(
                    R_ij, rcut) * cutoff(R_ik, rcut) * cutoff(R_jk, rcut)
        self.assertTrue(np.allclose(feat[0, 3], g4_na_cl, rtol=1e-6, atol=0))

        # Periodic primitive FCC: 12 neighbours at distance sqrt(2)/2*5
        rcut = 4
        system_fcc = bulk("H", "fcc", a=5)
        desc = ACSF(rcut=rcut, species=[1], periodic=True)
        feat = desc.create(system_fcc)
        self.assertTrue(feat.sum() != 0)
        self.assertAlmostEqual(feat[0, 0],
                               12 * 0.5 *
                               (np.cos(np.pi * np.sqrt(2) / 2 * 5 / rcut) + 1),
                               places=6)