class SmoothOverlapAtomicPosition(BaseDescriber): r""" Smooth overlap of atomic positions (SOAP) to describe the local environment of each atom. Reference: @article{bartok2013representing, title={On representing chemical environments}, author={Bart{\'o}k, Albert P and Kondor, Risi and Cs{\'a}nyi, G{\'a}bor}, journal={Physical Review B}, volume={87}, number={18}, pages={184115}, year={2013}, publisher={APS}} """ def __init__(self, cutoff: float, l_max: int = 8, n_max: int = 8, atom_sigma: float = 0.5, feature_batch: str = 'pandas_concat', **kwargs): """ Args: cutoff (float): Cutoff radius. l_max (int): The band limit of spherical harmonics basis function. Default to 8. n_max (int): The number of radial basis function. Default to 8. atom_sigma (float): The width of gaussian atomic density. Default to 0.5. feature_batch (str): way to batch together a list of features **kwargs: keyword args to specify memory, verbose, and n_jobs """ from maml.apps.pes import GAPotential self.operator = GAPotential() self.cutoff = cutoff self.l_max = l_max self.n_max = n_max self.atom_sigma = atom_sigma super().__init__(feature_batch=feature_batch, **kwargs) def transform_one(self, structure: Structure) -> pd.DataFrame: """ Args: structure (Structure): Pymatgen Structure object. """ if not which('quip'): raise RuntimeError( "quip has not been found.\n", "Please refer to https://github.com/libAtoms/QUIP for ", "further detail.") atoms_filename = 'structure.xyz' exe_command = ['quip'] exe_command.append('atoms_filename={}'.format(atoms_filename)) descriptor_command = ['soap'] descriptor_command.append("cutoff" + '=' + '{}'.format(self.cutoff)) descriptor_command.append("l_max" + '=' + '{}'.format(self.l_max)) descriptor_command.append("n_max" + '=' + '{}'.format(self.n_max)) descriptor_command.append("atom_sigma" + '=' + '{}'.format(self.atom_sigma)) atomic_numbers = [ str(element.number) for element in sorted(np.unique(structure.species)) ] n_Z = len(atomic_numbers) n_species = len(atomic_numbers) Z = '{' + '{}'.format(' '.join(atomic_numbers)) + '}' species_Z = '{' + '{}'.format(' '.join(atomic_numbers)) + '}' descriptor_command.append("n_Z" + '=' + str(n_Z)) descriptor_command.append("Z" + '=' + Z) descriptor_command.append("n_species" + '=' + str(n_species)) descriptor_command.append("species_Z" + '=' + species_Z) exe_command.append("descriptor_str=" + "{" + "{}".format(' '.join(descriptor_command)) + "}") with ScratchDir('.'): _ = self.operator.write_cfgs(filename=atoms_filename, cfg_pool=pool_from([structure])) descriptor_output = 'output' p = subprocess.Popen(exe_command, stdout=open(descriptor_output, 'w')) stdout = p.communicate()[0] rc = p.returncode if rc != 0: error_msg = 'quip/soap exited with return code %d' % rc msg = stdout.decode("utf-8").split('\n')[:-1] try: error_line = [ i for i, m in enumerate(msg) if m.startswith('ERROR') ][0] error_msg += ', '.join(msg[error_line:]) except Exception: error_msg += msg[-1] raise RuntimeError(error_msg) with zopen(descriptor_output, 'rt') as f: lines = f.read() descriptor_pattern = re.compile('DESC(.*?)\n', re.S) descriptors = pd.DataFrame([ np.array(c.split(), dtype=np.float) for c in descriptor_pattern.findall(lines) ]) return descriptors
class GAPotentialTest(unittest.TestCase): @classmethod def setUpClass(cls): cls.this_dir = os.path.dirname(os.path.abspath(__file__)) cls.test_dir = tempfile.mkdtemp() os.chdir(cls.test_dir) @classmethod def tearDownClass(cls): os.chdir(CWD) shutil.rmtree(cls.test_dir) def setUp(self): self.potential = GAPotential(name="test") self.test_pool = test_datapool self.test_structures = [] self.test_energies = [] self.test_forces = [] self.test_stresses = [] for d in self.test_pool: self.test_structures.append(d["structure"]) self.test_energies.append(d["outputs"]["energy"]) self.test_forces.append(d["outputs"]["forces"]) self.test_stresses.append(d["outputs"]["virial_stress"]) self.test_struct = d["structure"] def test_write_read_cfgs(self): self.potential.write_cfgs("test.xyz", cfg_pool=self.test_pool) datapool, df = self.potential.read_cfgs("test.xyz") self.assertEqual(len(self.test_pool), len(datapool)) for data1, data2 in zip(self.test_pool, datapool): struct1 = data1["structure"] struct2 = Structure.from_dict(data2["structure"]) self.assertTrue(struct1 == struct2) energy1 = data1["outputs"]["energy"] energy2 = data2["outputs"]["energy"] self.assertAlmostEqual(energy1, energy2) forces1 = np.array(data1["outputs"]["forces"]) forces2 = data2["outputs"]["forces"] np.testing.assert_array_almost_equal(forces1, forces2) stress1 = np.array(data1["outputs"]["virial_stress"]) stress2 = data2["outputs"]["virial_stress"] np.testing.assert_array_almost_equal(stress1, stress2) @unittest.skipIf(not which("gap_fit"), "No QUIP cmd found.") def test_train(self): self.potential.train( train_structures=self.test_structures, train_energies=self.test_energies, train_forces=self.test_forces, train_stresses=self.test_stresses, ) self.assertTrue(self.potential.param) @unittest.skipIf(not which("quip"), "No QUIP cmd found.") def test_evaluate(self): self.potential.train( train_structures=self.test_structures, train_energies=self.test_energies, train_forces=self.test_forces, train_stresses=self.test_stresses, ) df_orig, df_tar = self.potential.evaluate( test_structures=self.test_structures, test_energies=self.test_energies, test_forces=self.test_forces, test_stresses=self.test_stresses, ) self.assertEqual(df_orig.shape[0], df_tar.shape[0]) @unittest.skipIf(not which("gap_fit"), "No QUIP cmd found.") @unittest.skipIf(not which("lmp_serial"), "No LAMMPS cmd found.") def test_predict(self): self.potential.train( train_structures=self.test_structures, train_energies=self.test_energies, train_forces=self.test_forces, train_stresses=self.test_stresses, ) energy, forces, stress = self.potential.predict_efs(self.test_struct) self.assertEqual(len(forces), len(self.test_struct)) self.assertEqual(len(stress), 6)