def test_properties(self): """Used to test that changing the setup through properties works as intended. """ # Test changing species a = ACSF( rcut=6.0, species=[1, 8], g2_params=[[1, 2]], sparse=False, ) nfeat1 = a.get_number_of_features() vec1 = a.create(H2O) a.species = ["C", "H", "O"] nfeat2 = a.get_number_of_features() vec2 = a.create(molecule("CH3OH")) self.assertTrue(nfeat1 != nfeat2) self.assertTrue(vec1.shape[1] != vec2.shape[1])
def ascf_Definition(species=None): if not species: species = ["H", "C", "N", "O", "F", "S"] rcut = 10 #G2 - eta/Rs couples: g2_params = [[1, 2], [0.1, 2], [0.01, 2], [1, 4], [0.1, 4], [0.01, 4], [1, 6], [0.1, 6], [0.01, 6]] #G4 - eta/ksi/lambda triplets: g4_params = [[1, 4, 1], [0.1, 4, 1], [0.01, 4, 1], [1, 4, -1], [0.1, 4, -1], [0.01, 4, -1]] g3_params = None g5_params = None acsf = ACSF(species=species, rcut=rcut, g2_params=g2_params, g3_params=g3_params, g4_params=g4_params, g5_params=g5_params, sparse=False) return acsf
def __init__(self, molecule_map, r_cut, g2_params=None, g3_params=None, g4_params=None, g5_params=None, n_jobs=1): super().__init__(molecule_map, n_jobs) self.r_cut = r_cut self.g2_params = g2_params self.g3_params = g3_params self.g4_params = g4_params self.g5_params = g5_params self.dscribe_func = ACSF(species=self.species, rcut=r_cut, g2_params=g2_params, g3_params=g3_params, g4_params=g4_params, g5_params=g5_params)
def test_species(self): """Tests that the species are correctly determined. """ # As atomic number in contructor d = ACSF(rcut=6.0, species=[5, 1]) self.assertEqual(d.species, [5, 1]) # Saves the original variable self.assertTrue(np.array_equal(d._atomic_numbers, [1, 5])) # Ordered here # Set through property d.species = [10, 2] self.assertEqual(d.species, [10, 2]) self.assertTrue(np.array_equal(d._atomic_numbers, [2, 10])) # Ordered here # As chemical symbol in the contructor d = ACSF(rcut=6.0, species=["O", "H"]) self.assertEqual(d.species, ["O", "H"]) # Saves the original variable self.assertTrue(np.array_equal(d._atomic_numbers, [1, 8])) # Ordered here # Set through property d.species = ["N", "Pb"] self.assertEqual(d.species, ["N", "Pb"]) self.assertTrue(np.array_equal(d._atomic_numbers, [7, 82]))
def test_parallel_sparse(self): """Tests creating sparse output parallelly. """ # Test indices samples = [molecule("CO"), molecule("N2O")] desc = ACSF(rcut=6.0, species=[6, 7, 8], g2_params=[[1, 2], [4, 5]], g3_params=[1, 2, 3, 4], g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]], g5_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]], sparse=True) n_features = desc.get_number_of_features() # Multiple systems, serial job output = desc.create( system=samples, positions=[[0], [0, 1]], n_jobs=1, ).toarray() assumed = np.empty((3, n_features)) assumed[0, :] = desc.create(samples[0], [0]).toarray() assumed[1, :] = desc.create(samples[1], [0]).toarray() assumed[2, :] = desc.create(samples[1], [1]).toarray() self.assertTrue(np.allclose(output, assumed)) # Test when position given as indices output = desc.create( system=samples, positions=[[0], [0, 1]], n_jobs=2, ).toarray() assumed = np.empty((3, n_features)) assumed[0, :] = desc.create(samples[0], [0]).toarray() assumed[1, :] = desc.create(samples[1], [0]).toarray() assumed[2, :] = desc.create(samples[1], [1]).toarray() self.assertTrue(np.allclose(output, assumed)) # Test with no positions specified output = desc.create( system=samples, positions=[None, None], n_jobs=2, ).toarray() assumed = np.empty((2 + 3, n_features)) assumed[0, :] = desc.create(samples[0], [0]).toarray() assumed[1, :] = desc.create(samples[0], [1]).toarray() assumed[2, :] = desc.create(samples[1], [0]).toarray() assumed[3, :] = desc.create(samples[1], [1]).toarray() assumed[4, :] = desc.create(samples[1], [2]).toarray() self.assertTrue(np.allclose(output, assumed))
def test_number_of_features(self): """Tests that the reported number of features is correct. """ species = [1, 8] n_elem = len(species) desc = ACSF(rcut=6.0, species=species) n_features = desc.get_number_of_features() self.assertEqual(n_features, n_elem) desc = ACSF(rcut=6.0, species=species, g2_params=[[1, 2], [4, 5]]) n_features = desc.get_number_of_features() self.assertEqual(n_features, n_elem * (2 + 1)) desc = ACSF(rcut=6.0, species=[1, 8], g3_params=[1, 2, 3, 4]) n_features = desc.get_number_of_features() self.assertEqual(n_features, n_elem * (4 + 1)) desc = ACSF(rcut=6.0, species=[1, 8], g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]]) n_features = desc.get_number_of_features() self.assertEqual(n_features, n_elem + 4 * 3) desc = ACSF(rcut=6.0, species=[1, 8], g2_params=[[1, 2], [4, 5]], g3_params=[1, 2, 3, 4], g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]]) n_features = desc.get_number_of_features() self.assertEqual(n_features, n_elem * (1 + 2 + 4) + 4 * 3)
]], symbols=["H", "O", "H"], ) H = Atoms( cell=[[15.0, 0.0, 0.0], [0.0, 15.0, 0.0], [0.0, 0.0, 15.0]], positions=[ [0, 0, 0], ], symbols=["H"], ) default_desc = ACSF( rcut=6.0, species=[1, 8], g2_params=[[1, 2], [4, 5]], g3_params=[1, 2, 3, 4], g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]], g5_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]], ) class ACSFTests(TestBaseClass, unittest.TestCase): def test_constructor(self): """Tests different valid and invalid constructor values. """ # Invalid species with self.assertRaises(ValueError): ACSF(rcut=6.0, species=None) # Invalid bond_params with self.assertRaises(ValueError):
from dscribe.descriptors import ACSF from ase.build import molecule import numpy as np from ase import Atoms # Setting up the ACSF descriptor acsf = ACSF( species=["H", "O"], rcut=6.0, g2_params=[[1, 1], [1, 2], [1, 3]], g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]], ) # a = 4.081 # b = a / 2 # fcc_atom = Atoms('Au', # cell=[(0, b, b), # (b, 0, b), # (b, b, 0)], # pbc=True) # acsf = ACSF(fcc_atom) -> "Please provide the species as an iterable, e.g. a list." water = molecule("H2O") print(water) # Create MBTR output for the hydrogen atom at index 1 acsf_water = acsf.create(water, positions=[1]) print(acsf_water) print(acsf_water.shape)
def __init__(self, desc_spec): """ make a DScribe ACSF object see: https://singroup.github.io/dscribe/tutorials/acsf.html # template for an ACSF descriptor # currenly Dscribe only supports ASCF for finite system! """ if "type" not in desc_spec.keys() or desc_spec["type"] != "ACSF": raise ValueError( "Type is not ACSF or cannot find the type of the descriptor") if 'periodic' in desc_spec.keys(): self.periodic = bool(desc_spec['periodic']) if self.periodic == True: raise ValueError( "Warning: currently DScribe only supports ACSF for finite systems" ) from dscribe.descriptors import ACSF self.acsf_dict = { 'g2_params': None, 'g3_params': None, 'g4_params': None, 'g5_params': None } # required try: self.species = desc_spec['species'] self.cutoff = desc_spec['cutoff'] except: raise ValueError( "Not enough information to intialize the `Atomic_Descriptor_ACF` object" ) # fill in the values for k, v in desc_spec.items(): if k in self.acsf_dict.keys(): if isinstance(v, list): self.acsf_dict[k] = np.asarray(v) else: self.acsf_dict[k] = v self.acsf = ACSF(species=self.species, rcut=self.cutoff, **self.acsf_dict, sparse=False) print("Using ACSF Descriptors ...") # make an acronym self.acronym = "ACSF-c" + str(self.cutoff) if self.acsf_dict['g2_params'] is not None: self.acronym += "-g2-" + str(len(self.acsf_dict['g2_params'])) if self.acsf_dict['g3_params'] is not None: self.acronym += "-g3-" + str(len(self.acsf_dict['g3_params'])) if self.acsf_dict['g4_params'] is not None: self.acronym += "-g4-" + str(len(self.acsf_dict['g4_params'])) if self.acsf_dict['g5_params'] is not None: self.acronym += "-g5-" + str(len(self.acsf_dict['g5_params']))
Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC, ] HYBRIDIZATIONS=[ #Chem.rdchem.HybridizationType.S, Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2, Chem.rdchem.HybridizationType.SP3, #Chem.rdchem.HybridizationType.SP3D, #Chem.rdchem.HybridizationType.SP3D2, ] ACSF_GENERATOR = ACSF( species = SYMBOLS, rcut = 6.0, g2_params=[[1, 1], [1, 2], [1, 3]], g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]], ) def one_hot_encoding(x, set): one_hot = [int(x == s) for s in set] return one_hot def one_hot_numpy(x, width): b = np.zeros((x.shape[0], width)) b[np.arange(x.shape[0]), x] = 1 return b
cell=[ [15.0, 0.0, 0.0], [0.0, 15.0, 0.0], [0.0, 0.0, 15.0] ], positions=[ [0, 0, 0], ], symbols=["H"], ) default_desc = ACSF( atomic_numbers=[1, 8], g2_params=[[1, 2]], # g2_params=[[1, 2], [4, 5]], # g3_params=[1, 2, 3, 4], # g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]], # g5_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]], ) class ACSFTests(TestBaseClass, unittest.TestCase): def test_constructor(self): """Tests different valid and invalid constructor values. """ # Invalid atomic_numbers with self.assertRaises(ValueError): ACSF(atomic_numbers=None) # Invalid bond_params
def main(fxyz, dictxyz, prefix, output, per_atom, r_cut, facsf_param, periodic, stride): """ Generate the ACSF Representation. Currently only implemented for finite system in DSCRIBE. Parameters ---------- fxyz: string giving location of xyz file dictxyz: string giving location of xyz file that is used as a dictionary prefix: string giving the filename prefix output: [xyz]: append the representations to extended xyz file; [mat] output as a standlone matrix rcut: float giving the cutoff radius, default value is 4.0 param_path': string Specify the Gn parameters using a json file. (see https://singroup.github.io/dscribe/tutorials/acsf.html for details) periodic: string (True or False) indicating whether the system is periodic stride: compute descriptor each X frames """ periodic = bool(periodic) per_atom = bool(per_atom) fframes = [] dictframes = [] # read frames if fxyz != 'none': fframes = read(fxyz, slice(0, None, stride)) nfframes = len(fframes) print("read xyz file:", fxyz, ", a total of", nfframes, "frames") # read frames in the dictionary if dictxyz != 'none': dictframes = read(dictxyz, ':') ndictframes = len(dictframes) print("read xyz file used for a dictionary:", dictxyz, ", a total of", ndictframes, "frames") frames = dictframes + fframes nframes = len(frames) global_species = [] for frame in frames: global_species.extend(frame.get_atomic_numbers()) if not periodic: frame.set_pbc([False, False, False]) global_species = np.unique(global_species) print("a total of", nframes, "frames, with elements: ", global_species) if periodic: print("Warning: currently DScribe only supports ACSF for finite systems") # template for an ACSF descriptor acsf_dict = {'species': global_species, 'rcut': r_cut, 'g2_params': None, 'g3_params': None, 'g4_params': None, 'g5_params': None} # , # 'periodic': periodic, 'sparse': False} # currenly Dscribe only supports ASCF for finite system! # Setting up the ACSF descriptor if os.path.isfile(facsf_param): # load file try: with open(facsf_param, 'r') as facsffile: acsf_param = json.load(facsffile) # print(acsf_param) except: raise IOError('Cannot load the json file for ACSF parameters') # fill in the values for k, v in acsf_param.items(): if k in acsf_dict.keys(): if isinstance(v, list): acsf_dict[k] = np.asarray(v) else: acsf_dict[k] = v else: print("Warning: unknown key ", k) elif facsf_param == 'smart' or facsf_param == 'SMART' or facsf_param == 'Smart': # TODO: add default selection pass else: print("use very basic selections for ACSF") acsf_dict['g2_params'] = [[1, 1], [1, 2], [1, 3]] acsf_dict['g4_params']: [[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]] # set it up rep_atomic = ACSF(**acsf_dict) if facsf_param != 'none': foutput = prefix + "-rcut" + str(r_cut) + '-' + facsf_param desc_name = "ACSF" + "-rcut" + str(r_cut) + '-' + facsf_param else: foutput = prefix + "-rcut" + str(r_cut) desc_name = "ACSF" + "-rcut" + str(r_cut) # prepare for the output if os.path.isfile(foutput + ".xyz"): os.rename(foutput + ".xyz", "bck." + foutput + ".xyz") if os.path.isfile(foutput + ".desc"): os.rename(foutput + ".desc", "bck." + foutput + ".desc") for i, frame in enumerate(frames): fnow = rep_atomic.create(frame, n_jobs=8) frame.info[desc_name] = fnow.mean(axis=0) # save if output == 'matrix': with open(foutput + ".desc", "ab") as f: np.savetxt(f, frame.info[desc_name][None]) if per_atom or nframes == 1: with open(foutput + ".atomic-desc", "ab") as f: np.savetxt(f, fnow) elif output == 'xyz': # output per-atom info if per_atom: frame.new_array(desc_name, fnow) # write xyze # print(desc_name,foutput,frame) write(foutput + ".xyz", frame, append=True) else: raise ValueError('Cannot find the output format')
# g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]], # ) # Node_Dim : 58 # ACSF_GENERATOR = ACSF( # species = SYMBOL, # rcut = 6.0, # g2_params=[[1, 2], [1, 6]], # g4_params=[[1, 4, 1], [1, 4, -1]], # ) # Boris params # Node_Dim : 88 ACSF_GENERATOR = ACSF(species=SYMBOL, rcut=10.0, g2_params=[[1, 2], [1, 6]], g4_params=[[1, 4, 1], [0.1, 4, 1], [1, 4, -1], [0.1, 4, -1]]) EDGE_DIM = 6 NODE_DIM = 88 ## 93 13 NUM_TARGET = 8 # HIDDEN_DIM = 64 # initial 128 HIDDEN_DIM = 128 # # EDGE_DIM = 80 # NODE_DIM = 16 ## 93 13 # NUM_TARGET = 8 #---------------------------------------------------------------------------------
import pandas as pd import numpy as np from tqdm import tqdm from dscribe.descriptors import ACSF from dscribe.core.system import System from multiprocessing import Pool import compe_data g2_params = [[1, 1], [1, 2], [1, 3]] g4_params = [[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]] gen = ACSF( species=["H", "C", "N", "O", "F"], rcut=6.0, g2_params=g2_params, g4_params=g4_params, ) st_df = compe_data.read_structures() molecules = st_df["molecule_name"].unique() st_gr = st_df.groupby("molecule_name") # make st_dict for pararells st_dict = {} for molecule in tqdm(molecules): st_dict[molecule] = st_gr.get_group(molecule) def func_acsf(params): i, molecule = params #if i%1000 == 0: # print(f"{i}th finish") st = st_dict[molecule]
# ## Setup descriptors #cm_desc = CoulombMatrix(n_atoms_max=3, permutation="sorted_l2") #soap_desc = SOAP(species=["C", "H", "O", "N"], rcut=5, nmax=8, lmax=6, crossover=True) # ## Create descriptors as numpy arrays or scipy sparse matrices #water = samples[0] #coulomb_matrix = cm_desc.create(water) #soap = soap_desc.create(water, positions=[0]) # ## Easy to use also on multiple systems, can be parallelized across processes #coulomb_matrices = cm_desc.create(samples) #coulomb_matrices = cm_desc.create(samples, n_jobs=3) #oxygen_indices = [np.where(x.get_atomic_numbers() == 8)[0] for x in samples] #oxygen_soap = soap_desc.create(samples, oxygen_indices, n_jobs=3) # # from dscribe.descriptors import ACSF # Setting up the ACSF descriptor acsf = ACSF( species=["C", "O"], rcut=6.0, g2_params=[[1, 1], [1, 2], [1, 3]], g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]], ) d = 1.1 co = Atoms(['C', 'O'], positions=[(0, 0, 0), (0, 0, d)]) acsf_water = acsf.create(co)
def test_features(self): """Tests that the correct features are present in the descriptor. """ rs = math.sqrt(2) kappa = math.sqrt(3) eta = math.sqrt(5) lmbd = 1 zeta = math.sqrt(7) # Test against assumed values dist_oh = H2O.get_distance(0, 1) dist_hh = H2O.get_distance(0, 2) ang_hoh = H2O.get_angle(0, 1, 2) * np.pi / 180.0 ang_hho = H2O.get_angle(1, 0, 2) * np.pi / 180.0 ang_ohh = -H2O.get_angle(2, 0, 1) * np.pi / 180.0 rc = 6.0 # G1 desc = ACSF(rcut=6.0, species=[1, 8]) acsfg1 = desc.create(H2O) g1_ho = 0.5 * (np.cos(np.pi * dist_oh / rc) + 1) g1_hh = 0.5 * (np.cos(np.pi * dist_hh / rc) + 1) g1_oh = 2 * 0.5 * (np.cos(np.pi * dist_oh / rc) + 1) self.assertAlmostEqual(acsfg1[0, 0], g1_hh, places=6) self.assertAlmostEqual(acsfg1[0, 1], g1_ho, places=6) self.assertAlmostEqual(acsfg1[1, 0], g1_oh, places=6) # G2 desc = ACSF(rcut=6.0, species=[1, 8], g2_params=[[eta, rs]]) acsfg2 = desc.create(H2O) g2_hh = np.exp(-eta * np.power((dist_hh - rs), 2)) * g1_hh g2_ho = np.exp(-eta * np.power((dist_oh - rs), 2)) * g1_ho g2_oh = np.exp(-eta * np.power((dist_oh - rs), 2)) * g1_oh self.assertAlmostEqual(acsfg2[0, 1], g2_hh, places=6) self.assertAlmostEqual(acsfg2[0, 3], g2_ho, places=6) self.assertAlmostEqual(acsfg2[1, 1], g2_oh, places=6) # G3 desc = ACSF(rcut=6.0, species=[1, 8], g3_params=[kappa]) acsfg3 = desc.create(H2O) g3_hh = np.cos(dist_hh * kappa) * g1_hh g3_ho = np.cos(dist_oh * kappa) * g1_ho g3_oh = np.cos(dist_oh * kappa) * g1_oh self.assertAlmostEqual(acsfg3[0, 1], g3_hh, places=6) self.assertAlmostEqual(acsfg3[0, 3], g3_ho, places=6) self.assertAlmostEqual(acsfg3[1, 1], g3_oh, places=6) # G4 desc = ACSF(rcut=6.0, species=[1, 8], g4_params=[[eta, zeta, lmbd]]) acsfg4 = desc.create(H2O) gauss = np.exp(-eta * (2 * dist_oh * dist_oh + dist_hh * dist_hh)) * g1_ho * g1_hh * g1_ho g4_h_ho = np.power(2, 1 - zeta) * np.power( (1 + lmbd * np.cos(ang_hho)), zeta) * gauss g4_h_oh = np.power(2, 1 - zeta) * np.power( (1 + lmbd * np.cos(ang_ohh)), zeta) * gauss g4_o_hh = np.power(2, 1 - zeta) * np.power( (1 + lmbd * np.cos(ang_hoh)), zeta) * gauss self.assertAlmostEqual(acsfg4[0, 3], g4_h_ho, places=6) self.assertAlmostEqual(acsfg4[2, 3], g4_h_oh, places=6) self.assertAlmostEqual(acsfg4[1, 2], g4_o_hh, places=6) # G5 desc = ACSF(rcut=6.0, species=[1, 8], g5_params=[[eta, zeta, lmbd]]) acsfg5 = desc.create(H2O) gauss = np.exp(-eta * (dist_oh * dist_oh + dist_hh * dist_hh)) * g1_ho * g1_hh g5_h_ho = np.power(2, 1 - zeta) * np.power( (1 + lmbd * np.cos(ang_hho)), zeta) * gauss g5_h_oh = np.power(2, 1 - zeta) * np.power( (1 + lmbd * np.cos(ang_ohh)), zeta) * gauss g5_o_hh = np.power(2, 1 - zeta) * np.power( (1 + lmbd * np.cos(ang_hoh)), zeta) * np.exp( -eta * (2 * dist_oh * dist_oh)) * g1_ho * g1_ho self.assertAlmostEqual(acsfg5[0, 3], g5_h_ho, places=6) self.assertAlmostEqual(acsfg5[2, 3], g5_h_oh, places=6) self.assertAlmostEqual(acsfg5[1, 2], g5_o_hh, places=6)
def create_data_ACSF(data, metadata): particles, scaler, test_size, rcut, nmax, lmax, N_PCA, sigma_SOAP = [ metadata[x] for x in [ 'particles', 'scaler', 'test_size', 'rcut', 'nmax', 'lmax', 'N_PCA', 'sigma_SOAP' ] ] acsf = ACSF(species=["H", "O"], rcut=9.0, g2_params=[[1, 0], [0.1, 0], [0.01, 0], [0.01, 0], [0.001, 0]], g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1], [0.1, 1, 1], [0.1, 2, 1], [0.1, 1, -1], [0.1, 2, -1], [0.01, 1, 1], [0.01, 2, 1], [0.01, 1, -1], [0.01, 2, -1]]) nb_features = acsf.get_number_of_features() descriptors = pd.np.empty( (data.index.max() + 1, len(particles), nb_features)) for i_time in tqdm.tqdm(range(data.index.max() + 1)): descriptors[i_time] = acsf.create(data['molec'][i_time], positions=np.arange(len(particles))) #create training set try: data['is_train'] except KeyError: data['is_train'] = create_is_train(data.index.max() + 1) else: pass #selecting best params if N_PCA: try: metadata['PCAs'] except KeyError: PCAs = select_best_params(descriptors[data['is_train'].values], nb_features, N_PCA) new_descriptors = pd.np.empty( (data.index.max() + 1, len(particles), N_PCA)) new_descriptors[:, :2, :] = PCAs[0].transform( descriptors[:, :2, :].reshape( descriptors[:, :2, :].shape[0] * 2, nb_features)).reshape(descriptors.shape[0], 2, N_PCA) new_descriptors[:, 2:, :] = PCAs[1].transform( descriptors[:, 2:, :].reshape( descriptors[:, 2:, :].shape[0] * 5, nb_features)).reshape(descriptors.shape[0], 5, N_PCA) descriptors = new_descriptors metadata['old_N_feature'] = nb_features nb_features = N_PCA metadata['PCAs'] = PCAs else: PCAs = metadata['PCAs'] new_descriptors = pd.np.empty( (data.index.max() + 1, len(particles), N_PCA)) new_descriptors[:, :2, :] = PCAs[0].transform( descriptors[:, :2, :].reshape( descriptors[:, :2, :].shape[0] * 2, nb_features)).reshape(descriptors.shape[0], 2, N_PCA) new_descriptors[:, 2:, :] = PCAs[1].transform( descriptors[:, 2:, :].reshape( descriptors[:, 2:, :].shape[0] * 5, nb_features)).reshape(descriptors.shape[0], 5, N_PCA) descriptors = new_descriptors nb_features = N_PCA else: pass #scaling if scaler == False: pass elif type(scaler) == type(None): descriptors, scaler = scale_descriptors(data, descriptors) else: descriptors[:, 0:2, :] = scaler[0].transform( descriptors[:, 0:2, :].reshape(descriptors[:, 0:2, :].shape[0] * 2, nb_features)).reshape( descriptors.shape[0], 2, nb_features) descriptors[:, 2:, :] = scaler[1].transform(descriptors[:, 2:, :].reshape( descriptors[:, 2:, :].shape[0] * 5, nb_features)).reshape(descriptors.shape[0], 5, nb_features) metadata['scaler'] = scaler return data.join(pd.DataFrame({'descriptor': list(descriptors)})), metadata
def main(fxyz, dictxyz, prefix, output, per_atom, r_cut , config_path , periodic): """ Generate the ASCF Representation. Parameters ---------- fxyz: string giving location of xyz file dictxyz: string giving location of xyz file that is used as a dictionary prefix: string giving the filename prefix output: [xyz]: append the representations to extended xyz file; [mat] output as a standlone matrix rcut: float giving the cutoff radius, default value is 3.0 input_path': string Specify the Gn parameters using a json file. (see https://singroup.github.io/dscribe/tutorials/acsf.html for details) periodic: string (True or False) indicating whether the system is periodic """ periodic = bool(periodic) per_atom = bool(per_atom) fframes = [] dictframes = [] # read frames if fxyz != 'none': fframes = read(fxyz, ':') nfframes = len(fframes) print("read xyz file:", fxyz, ", a total of", nfframes, "frames") # read frames in the dictionary if dictxyz != 'none': dictframes = read(dictxyz, ':') ndictframes = len(dictframes) print("read xyz file used for a dictionary:", dictxyz, ", a total of", ndictframes, "frames") frames = dictframes + fframes nframes = len(frames) global_species = [] for frame in frames: global_species.extend(frame.get_atomic_numbers()) if not periodic: frame.set_pbc([False, False, False]) global_species = np.unique(global_species) print("a total of", nframes, "frames, with elements: ", global_species) if config_path: try: with open(config_path, 'r') as config_file: config = json.load(config_file) for k,v in config.items(): if isinstance(v, list): config[k] = np.asarray(v) except Exception: raise IOError('Cannot load the json file for parameters') if config_path: rep_atomic = ACSF(rcut = r_cut,species = global_species,**config) else: rep_atomic = ACSF(rcut = r_cut,species = global_species) if config_path: foutput = prefix + "-rcut" + str(r_cut) + '-' + config_path desc_name = "ACSF" + "-rcut" + str(r_cut) + '-' + config_path else: foutput = prefix + "-rcut" + str(r_cut) desc_name = "ACSF" + "-rcut" + str(r_cut) # prepare for the output if os.path.isfile(foutput + ".xyz"): os.rename(foutput + ".xyz", "bck." + foutput + ".xyz") if os.path.isfile(foutput + ".desc"): os.rename(foutput + ".desc", "bck." + foutput + ".desc") for i, frame in enumerate(frames): fnow = rep_atomic.create(frame, n_jobs=8) frame.info[desc_name] = fnow.mean(axis=0) # save if output == 'matrix': with open(foutput + ".desc", "ab") as f: np.savetxt(f, frame.info[desc_name][None]) if per_atom or nframes == 1: with open(foutput + ".atomic-desc", "ab") as f: np.savetxt(f, fnow) elif output == 'xyz': # output per-atom info if per_atom: frame.new_array(desc_name, fnow) # write xyze #print(desc_name,foutput,frame) write(foutput + ".xyz", frame, append=True) else: raise ValueError('Cannot find the output format')
descriptor = "SOAP" # Compute local descriptors all_atomtypes = [1, 6] #all_atomtypes = [] if descriptor == "SOAP": desc = SOAP(all_atomtypes, 8.0, 2, 0, periodic=False, crossover=True) print(desc.get_number_of_features()) elif descriptor == "ACSF": desc = ACSF(n_atoms_max=15, types=[1, 6, 7, 8], bond_params=[[ 1, 2, ], [ 4, 5, ]], bond_cos_params=[1, 2, 3, 4], ang4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]], ang5_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]], flatten=False) else: print("Add your local descriptor here") exit(0) ave = AverageKernel() desc_list = [] atomic_numbers_list = [] ase_atoms_list = [] all_atomtypes = [
from dscribe.descriptors import ACSF # Setting up the ACSF descriptor acsf = ACSF( atomic_numbers=[1, 8], rcut=6.0, g2_params=[[1, 1], [1, 2], [1, 3]], g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]], ) # Creating an atomic system as an ase.Atoms-object from ase.build import molecule water = molecule("H2O") # Create MBTR output for the hydrogen atom at index 1 acsf_water = acsf.create(water, positions=[1]) print(acsf_water) print(acsf_water.shape)
from data import * from dscribe.descriptors import ACSF from dscribe.core.system import System #ACSF_GENERATOR = ACSF( # species=SYMBOL, # rcut=6.0, # g2_params = [[1, 1], [1, 2], [1, 3]], # g4_params = [[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]], #) ACSF_GENERATOR = ACSF( species=SYMBOL, rcut=10.0, g2_params=[[15, 0.5], [1.5, 0.5], [0.15, 0.5], [15, 2], [1.5, 2], [0.15, 2]], g4_params=[[1, 5, 1], [0.1, 5, 1], [0.01, 5, 1], [1, 5, -1], [0.1, 5, -1], [0.01, 5, -1]], ) EDGE_DIM = 14 # 7 8 9 6 11 38 NODE_DIM = 165 # 120 13 93 123 NUM_TARGET = 8 class ChampsDataset(Dataset): def __init__(self, split, csv, mode, augment=None,
Chem.rdchem.HybridizationType.SP3, ] def gaussian_rbf(x, min_x, max_x, center_num): center_point = np.linspace(min_x, max_x, center_num) x_vec = np.exp(np.square(center_point - x)) return x_vec dist_min = 0.95860666 dist_max = 12.040386 ACSF_GENERATOR = ACSF( species=['H', 'C', 'N', 'O', 'F'], rcut=6.0, g2_params=[[1, 1], [1, 2], [1, 3]], g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1]], ) obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("xyz", "mol2") atomic_radius = {'H': 0.38, 'C': 0.77, 'N': 0.75, 'O': 0.73, 'F': 0.71} # Without fudge factor fudge_factor = 0.05 atomic_radius = {k: v + fudge_factor for k, v in atomic_radius.items()} electronegativity = {'H': 2.2, 'C': 2.55, 'N': 3.04, 'O': 3.44, 'F': 3.98} electronegativity_square = {'H': 2.2 * 2.2, 'C': 2.55 * 2.55, 'N': 3.04 * 3.04, 'O': 3.44 * 3.44, 'F': 3.98 * 3.98} def normal_dict(dict_input): min_value = min(dict_input.values())
def compute_acsf_descriptors(prefix, rcutoffs): species = ['H', 'C', 'N', 'O', 'F'] g2_params = [ [1, 0], # [1, 1], [1, 2], # [1, 3], # [1, 4], # [4, 1], [4, 2], # [4, 3], # [4, 4], ] g4_params = [ [1, 1, 1], # [1, 4, 1], [1, 8, 1], # [1, 16, 1], # [1, 32, 1], # [1, 64, 1], [1, 1, -1], # [1, 4, -1], [1, 8, -1], # [1, 16, -1], # [1, 32, -1], # [1, 64, -1], ] # g5_params = [ # [1, 1, 1], # # [1, 4, 1], # [1, 8, 1], # # [1, 16, 1], # [1, 32, 1], # # [1, 64, 1], # [1, 1, -1], # # [1, 4, -1], # [1, 8, -1], # # [1, 16, -1], # [1, 32, -1], # # [1, 64, -1], # ] featnames = ['g1'] +\ [f'g2_{i:d}' for i in range(len(g2_params))] +\ [f'g4_{i:d}' for i in range(len(g4_params) * 3)]# +\ # [f'g5_{i:d}' for i in range(len(g5_params) * 3)] col_names = [] for s in species: col_names.extend([f'{s}_{fn}' for fn in featnames]) # Set up ACSF descriptor acsf = ACSF( g2_params=g2_params, g4_params=g4_params, # g5_params=g5_params, species=species, rcut=rcutoffs[0], ) # Read mol info xyz_files = glob.glob('data/structures/*.xyz') mols = [] for xyz_file in tqdm.tqdm(xyz_files, total=len(xyz_files)): mol = read(xyz_file, format='xyz') # print(mol.get_atomic_numbers()) mols.append(mol) # Create ACSF output for all mols acsf_mol = acsf.create(mols, positions=None, n_jobs=4) # Save ACSF descriptors pd.DataFrame(data=acsf_mol, columns=col_names).to_hdf(f'data/descriptors/{prefix}.h5', key='acsf', mode='w')
def test_periodicity(self): """Test that periodic copies are correctly repeated and included in the output. """ system = Atoms(symbols=["H"], positions=[[0, 0, 0]], cell=[2, 2, 2], pbc=False) rcut = 2.5 # Non-periodic desc = ACSF(rcut=rcut, species=[1], periodic=False) feat = desc.create(system) self.assertTrue(feat.sum() == 0) # Periodic cubic: 6 neighbours at distance 2 Å desc = ACSF(rcut=rcut, species=[1], periodic=True) feat = desc.create(system) self.assertTrue(feat.sum() != 0) self.assertAlmostEqual(feat[0, 0], 6 * cutoff(2, rcut), places=6) # Periodic cubic: 6 neighbours at distance 2 Å # from ase.visualize import view rcut = 3 system_nacl = bulk("NaCl", "rocksalt", a=4) eta, zeta, lambd = 0.01, 0.1, 1 desc = ACSF(rcut=rcut, g4_params=[(eta, zeta, lambd)], species=["Na", "Cl"], periodic=True) feat = desc.create(system_nacl) # Cl-Cl: 12 triplets with 90 degree angle at 2 angstrom distance R_ij = 2 R_ik = 2 R_jk = np.sqrt(2) * 2 theta = np.pi / 2 g4_cl_cl = 2**(1 - zeta) * 12 * ( 1 + lambd * np.cos(theta))**zeta * np.e**( -eta * (R_ij**2 + R_ik**2 + R_jk**2)) * cutoff( R_ij, rcut) * cutoff(R_ik, rcut) * cutoff(R_jk, rcut) self.assertTrue(np.allclose(feat[0, 4], g4_cl_cl, rtol=1e-6, atol=0)) # Na-Cl: 24 triplets with 45 degree angle at sqrt(2)*2 angstrom distance R_ij = np.sqrt(2) * 2 R_ik = 2 R_jk = 2 theta = np.pi / 4 g4_na_cl = 2**(1 - zeta) * 24 * ( 1 + lambd * np.cos(theta))**zeta * np.e**( -eta * (R_ij**2 + R_ik**2 + R_jk**2)) * cutoff( R_ij, rcut) * cutoff(R_ik, rcut) * cutoff(R_jk, rcut) self.assertTrue(np.allclose(feat[0, 3], g4_na_cl, rtol=1e-6, atol=0)) # Periodic primitive FCC: 12 neighbours at distance sqrt(2)/2*5 rcut = 4 system_fcc = bulk("H", "fcc", a=5) desc = ACSF(rcut=rcut, species=[1], periodic=True) feat = desc.create(system_fcc) self.assertTrue(feat.sum() != 0) self.assertAlmostEqual(feat[0, 0], 12 * 0.5 * (np.cos(np.pi * np.sqrt(2) / 2 * 5 / rcut) + 1), places=6)