def test_to_istructure(self): cscl = Structure( Lattice([[4.209, 0, 0], [0, 4.209, 0], [0, 0, 4.209]]), ["Cl", "Cs"], [[0.45, 0.5, 0.5], [0, 0, 0]]) df = DataFrame({"structure": [cscl]}) # Run the conversion sti = StructureToIStructure() df = sti.featurize_dataframe(df, 'structure') # Make sure the new structure is an IStructure, and equal # to the original structure self.assertIsInstance(df["istructure"][0], IStructure) self.assertEqual(df["istructure"][0], df["structure"][0])
def setUpClass(cls): if "PMG_VASP_PSP_DIR" not in os.environ: os.environ["PMG_VASP_PSP_DIR"] = test_dir filepath = os.path.join(test_dir, 'POSCAR') poscar = Poscar.from_file(filepath) cls.structure = poscar.structure cls.coords = [[0, 0, 0], [0.75, 0.5, 0.75]] cls.lattice = Lattice([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]]) cls.mitset = MITRelaxSet(cls.structure) cls.mitset_unsorted = MITRelaxSet(cls.structure, sort_structure=False) cls.mpset = MPRelaxSet(cls.structure)
def read_cfgs(self, filename="output.data"): """ Read the configuration file. Args: filename (str): The configuration file to be read. """ data_pool = [] with zopen(filename, "rt") as f: lines = f.read() block_pattern = re.compile("begin\n(.*?)end", re.S) lattice_pattern = re.compile("lattice(.*?)\n") position_pattern = re.compile("atom(.*?)\n") energy_pattern = re.compile("energy(.*?)\n") for block in block_pattern.findall(lines): d = {"outputs": {}} lattice_str = lattice_pattern.findall(block) lattice = Lattice( np.array([latt.split() for latt in lattice_str], dtype=np.float64) * self.bohr_to_angstrom) position_str = position_pattern.findall(block) positions = pd.DataFrame([pos.split() for pos in position_str]) positions.columns = [ "x", "y", "z", "specie", "charge", "atomic_energy", "fx", "fy", "fz" ] coords = np.array(positions.loc[:, ["x", "y", "z"]], dtype=np.float64) coords = coords * self.bohr_to_angstrom species = np.array(positions["specie"]) forces = np.array(positions.loc[:, ["fx", "fy", "fz"]], dtype=np.float64) forces = forces / self.eV_to_Ha / self.bohr_to_angstrom energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) / self.eV_to_Ha struct = Structure(lattice=lattice, species=species, coords=coords, coords_are_cartesian=True) d["structure"] = struct.as_dict() d["outputs"]["energy"] = energy d["outputs"]["forces"] = forces d["num_atoms"] = len(struct) data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def test_interstice_distribution_of_glass(self): cuzr_glass = Structure(Lattice([[25, 0, 0], [0, 25, 0], [0, 0, 25]]), [ "Cu", "Cu", "Cu", "Cu", "Cu", "Zr", "Cu", "Zr", "Cu", "Zr", "Cu", "Zr", "Cu", "Cu" ], [[11.81159679, 16.49480537, 21.69139442], [11.16777208, 17.87850033, 18.57877144], [12.22394796, 15.83218325, 19.37763412], [13.07053548, 14.34025424, 21.77557646], [10.78147725, 19.61647494, 20.77595531], [10.87541011, 14.65986432, 23.61517624], [12.76631002, 18.41479521, 20.46717947], [14.63911675, 16.47487037, 20.52671362], [14.2470256, 18.44215167, 22.56257566], [9.38050168, 16.87974592, 20.51885879], [10.66332986, 14.43900833, 20.545186], [11.57096832, 18.79848982, 23.26073408], [13.27048138, 16.38613795, 23.59697472], [9.55774984, 17.09220537, 23.1856528]], coords_are_cartesian=True) df_glass = pd.DataFrame({'struct': [cuzr_glass], 'site': [0]}) interstice_distribution = IntersticeDistribution() intersticefp = interstice_distribution.featurize_dataframe( df_glass, ['struct', 'site']) self.assertAlmostEqual(intersticefp['Interstice_vol_mean'][0], 0.28905, 5) self.assertAlmostEqual(intersticefp['Interstice_vol_std_dev'][0], 0.04037, 5) self.assertAlmostEqual(intersticefp['Interstice_vol_minimum'][0], 0.21672, 5) self.assertAlmostEqual(intersticefp['Interstice_vol_maximum'][0], 0.39084, 5) self.assertAlmostEqual(intersticefp['Interstice_area_mean'][0], 0.16070, 5) self.assertAlmostEqual(intersticefp['Interstice_area_std_dev'][0], 0.05245, 5) self.assertAlmostEqual(intersticefp['Interstice_area_minimum'][0], 0.07132, 5) self.assertAlmostEqual(intersticefp['Interstice_area_maximum'][0], 0.26953, 5) self.assertAlmostEqual(intersticefp['Interstice_dist_mean'][0], 0.08154, 5) self.assertAlmostEqual(intersticefp['Interstice_dist_std_dev'][0], 0.14778, 5) self.assertAlmostEqual(intersticefp['Interstice_dist_minimum'][0], -0.04668, 5) self.assertAlmostEqual(intersticefp['Interstice_dist_maximum'][0], 0.37565, 5)
def __update_c(self, new_c: float) -> None: """ Modifies the c-direction of the lattice without changing the site cartesian coordinates Be careful you can mess up the interface by setting a c-length that can't accommodate all the sites """ if new_c <= 0: raise ValueError("New c-length must be greater than 0") new_latt_matrix = self.lattice.matrix[:2].tolist() + [[0, 0, new_c]] new_latice = Lattice(new_latt_matrix) self._lattice = new_latice for site, c_coords in zip(self, self.cart_coords): site._lattice = new_latice # Update the lattice site.coords = c_coords # Put back into original cartesian space
def lattice_constant(self, structure, potential, supercell=(1, 1, 1), etol=1e-6, ftol=1e-6, nsearch=2000, neval=10000): conventional_structure = self.conventional_structure(structure) if self.calculator_type == 'lammps': relax_lammps_script = load_lammps_set('relax') relax_lammps_script['minimize'] = '%f %f %d %d' % (etol, ftol, nsearch, neval) kwargs = {'lammps_set': relax_lammps_script} elif self.calculator_type == 'lammps_cython': kwargs = { 'lammps_additional_commands': [ 'fix 1 all box/relax iso 0.0 vmax 0.001', 'min_style cg', 'minimize %f %f %d %d' % (etol, ftol, nsearch, neval) ] } async def calculate(): future = await self.calculator.submit(conventional_structure * supercell, potential, properties={'lattice'}, **kwargs) await future return future.result() result = self._run_async_func(calculate()) lattice = Lattice(result['results']['lattice']) return conventional_structure.lattice, Lattice(lattice.matrix / np.array(supercell))
def test_potcar_symbols(self): coords = list() coords.append([0, 0, 0]) coords.append([0.75, 0.5, 0.75]) coords.append([0.75, 0.25, 0.75]) lattice = Lattice([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]]) structure = Structure(lattice, ["P", "Fe", "O"], coords) mitparamset = MITRelaxSet(structure) syms = mitparamset.potcar_symbols self.assertEqual(syms, ['Fe', 'P', 'O']) paramset = MPRelaxSet(structure, sort_structure=False) syms = paramset.potcar_symbols self.assertEqual(syms, ['P', 'Fe_pv', 'O'])
def _lattice_crossover(self): # ---------- component --> self.w_lat matrix = ((self.w_lat[0] * self.parent_A.lattice.matrix + self.w_lat[1] * self.parent_B.lattice.matrix) / self.w_lat.sum()) mat_len = np.sqrt((matrix**2).sum(axis=1)) # ---------- absolute value of vector lat_len = ((np.array(self.parent_A.lattice.abc) * self.w_lat[0] + np.array(self.parent_B.lattice.abc) * self.w_lat[1]) / self.w_lat.sum()) # ---------- correction of vector length lat_array = np.empty([3, 3]) for i in range(3): lat_array[i] = matrix[i] * lat_len[i] / mat_len[i] # ---------- Lattice for pymatgen self.lattice = Lattice(lat_array)
def test_structure_to_composition(self): coords = [[0, 0, 0], [0.75, 0.5, 0.75]] lattice = Lattice([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]]) struct = Structure(lattice, ["Si"] * 2, coords) df = DataFrame(data={'structure': [struct]}) stc = StructureToComposition() df = stc.featurize_dataframe(df, 'structure') self.assertEqual(df["composition"].tolist()[0], Composition("Si2")) stc = StructureToComposition(reduce=True, target_col_id='composition_red') df = stc.featurize_dataframe(df, 'structure') self.assertEqual(df["composition_red"].tolist()[0], Composition("Si"))
def read_cfgs(self, filename='output.data'): """ Read the configuration file. Args: filename (str): The configuration file to be read. """ data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() block_pattern = re.compile('begin\n(.*?)end', re.S) lattice_pattern = re.compile('lattice(.*?)\n') position_pattern = re.compile('atom(.*?)\n') energy_pattern = re.compile('energy(.*?)\n') for block in block_pattern.findall(lines): d = {'outputs': {}} lattice_str = lattice_pattern.findall(block) lattice = Lattice( np.array([latt.split() for latt in lattice_str], dtype=np.float) * self.bohr_to_angstrom) position_str = position_pattern.findall(block) positions = pd.DataFrame([pos.split() for pos in position_str]) positions.columns = \ ['x', 'y', 'z', 'specie', 'charge', 'atomic_energy', 'fx', 'fy', 'fz'] coords = np.array(positions.loc[:, ['x', 'y', 'z']], dtype=np.float) coords = coords * self.bohr_to_angstrom species = np.array(positions['specie']) forces = np.array(positions.loc[:, ['fx', 'fy', 'fz']], dtype=np.float) forces = forces / self.eV_to_Ha / self.bohr_to_angstrom energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) / self.eV_to_Ha struct = Structure(lattice=lattice, species=species, coords=coords, coords_are_cartesian=True) d['structure'] = struct.as_dict() d['outputs']['energy'] = energy d['outputs']['forces'] = forces d['num_atoms'] = len(struct) data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def test_structure_to_oxidstructure(self): cscl = Structure( Lattice([[4.209, 0, 0], [0, 4.209, 0], [0, 0, 4.209]]), ["Cl", "Cs"], [[0.45, 0.5, 0.5], [0, 0, 0]]) d = {'structure': [cscl]} df = DataFrame(data=d) sto = StructureToOxidStructure() df = sto.featurize_dataframe(df, 'structure') self.assertEqual(df["structure_oxid"].tolist()[0][0].specie.oxi_state, -1) self.assertEqual(df["structure_oxid"].tolist()[0][1].specie.oxi_state, +1) sto = StructureToOxidStructure(target_col_id='structure_oxid2', oxi_states_override={ "Cl": [-2], "Cs": [+2] }) df = sto.featurize_dataframe(df, 'structure') self.assertEqual(df["structure_oxid2"].tolist()[0][0].specie.oxi_state, -2) self.assertEqual(df["structure_oxid2"].tolist()[0][1].specie.oxi_state, +2) # original is preserved self.assertEqual(df["structure"].tolist()[0][0].specie, Element("Cl")) # test in-place sto = StructureToOxidStructure(target_col_id=None, overwrite_data=True) df = sto.featurize_dataframe(df, 'structure') self.assertEqual(df["structure"].tolist()[0][0].specie.oxi_state, -1) # test error handling test_struct = Structure([5, 0, 0, 0, 5, 0, 0, 0, 5], ['Sb', 'F', 'O'], [[0, 0, 0], [0.2, 0.2, 0.2], [0.5, 0.5, 0.5]]) df = DataFrame(data={'structure': [test_struct]}) sto = StructureToOxidStructure(return_original_on_error=False, max_sites=2) self.assertRaises(ValueError, sto.featurize_dataframe, df, 'structure') # check non oxi state structure returned correctly sto = StructureToOxidStructure(return_original_on_error=True, max_sites=2) df = sto.featurize_dataframe(df, 'structure') self.assertEqual(df["structure_oxid"].tolist()[0][0].specie, Element("Sb"))
def test_check_structures(self): s = Structure( Lattice( np.array([[3.16, 0.1, 0.2], [0.1, 3.17, 0.3], [0.1, 0.2, 3]])), ["Mo", "Mo"], [[0, 0, 0], [0.13, 0.4, 0.2]], ) forces = np.array([[0.04844841, 0.08648062, 0.07070806], [-0.04844841, -0.08648062, -0.07070806]]) stress = np.array([ -0.22279327, -1.2809575, -0.44279698, -0.23345818, -0.37798718, -0.17676364 ]) checked_force = np.array([[0.05552151, 0.09063424, 0.05940176], [-0.05552151, -0.09063424, -0.05940176]]) checked_stress = np.array([ -0.26319715, -1.3219795, -0.3613719, -0.30627516, -0.27276486, -0.17306383 ]) new_structures, new_forces, new_stresses = check_structures_forces_stresses( [s], [forces], [stress]) # print(np.linalg.norm(checked_stress - new_stresses[0])) print(new_stresses[0], checked_stress) self.assertTrue(np.linalg.norm(checked_force - new_forces[0]) < 1e-4) self.assertTrue( np.linalg.norm(checked_stress - new_stresses[0]) < 1e-4) new_structures = check_structures_forces_stresses(structures=[s], return_none=False) self.assertTrue(len(new_structures) == 1) self.assertTrue(isinstance(new_structures[0], Structure)) new_structures, new_forces, new_stresses = check_structures_forces_stresses( structures=[s, s], return_none=True) self.assertTrue(len(new_forces) == 2) self.assertTrue(new_forces[0] is None) self.assertTrue(len(new_stresses) == 2) self.assertTrue(new_stresses[0] is None)
def generate_json_files(): diamond = Structure(Lattice([[2.189, 0, 1.264], [0.73, 2.064, 1.264], [0, 0, 2.528]]), ["C0+", "C0+"], [[2.554, 1.806, 4.423], [0.365, 0.258, 0.632]], validate_proximity=False, to_unit_cell=False, coords_are_cartesian=True, site_properties=None) df = pd.DataFrame(data={'structure': [diamond]}) plain_file = os.path.join(test_dir, "dataframe.json") store_dataframe_as_json(df, plain_file) gz_file = os.path.join(test_dir, "dataframe.json.gz") store_dataframe_as_json(df, gz_file, compression='gz') bz2_file = os.path.join(test_dir, "dataframe.json.bz2") store_dataframe_as_json(df, bz2_file, compression='bz2')
def setUpClass(cls): coords = [[0, 0, 0], [0.75, 0.5, 0.75]] lattice = Lattice([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]]) cls.struct_si = IStructure(lattice, ["Si"] * 2, coords) cls.ref_incar = Incar.from_file( os.path.join(module_dir, "..", "test_files", "setup_test", "INCAR")) cls.ref_poscar = Poscar.from_file( os.path.join(module_dir, "..", "test_files", "setup_test", "POSCAR")) cls.ref_potcar = Potcar.from_file( os.path.join(module_dir, "..", "test_files", "setup_test", "POTCAR")) cls.ref_kpoints = Kpoints.from_file( os.path.join(module_dir, "..", "test_files", "setup_test", "KPOINTS"))
def test_conversion_multiindex_dynamic(self): # test dynamic target_col_id setting with multiindex coords = [[0, 0, 0], [0.75, 0.5, 0.75]] lattice = Lattice([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]]) struct = Structure(lattice, ["Si"] * 2, coords) d = {'structure_dict': [struct.as_dict(), struct.as_dict()]} df_2lvl = DataFrame(data=d) df_2lvl.columns = MultiIndex.from_product((["custom"], df_2lvl.columns.values)) dto = DictToObject() df_2lvl = dto.featurize_dataframe(df_2lvl, ('custom', 'structure_dict'), multiindex=True) new_col_id = ('DictToObject', 'structure_dict_object') self.assertEqual(df_2lvl[new_col_id].tolist()[0], struct) self.assertEqual(df_2lvl[new_col_id].tolist()[1], struct)
def lattice(self): # lattice matrix: basis vectors are rows si = self["=.in"].structure_information # todo: convert non-angstrom units assert si.lengthunit.type == 2 lattice = Lattice.from_parameters( *si.lattice_constants, *si.axis_angles) # translate to FPLO convention # see also: https://www.listserv.dfn.de/sympa/arc/fplo-users/2020-01/msg00002.html if self.spacegroup.crystal_system in ('trigonal', 'hexagonal'): lattice = Lattice(lattice.matrix @ Rotation.from_rotvec([0, 0, 30], degrees=True).as_matrix()) elif self.spacegroup.crystal_system not in ('cubic', 'tetragonal', 'orthorhombic'): log.warning('untested lattice, crystal orientation may not be correct') return lattice
def test_properties(self): self.assertEqual(self.mos2_sg.name, "bonds") self.assertEqual(self.mos2_sg.edge_weight_name, "bond_length") self.assertEqual(self.mos2_sg.edge_weight_unit, "Å") self.assertEqual(self.mos2_sg.get_coordination_of_site(0), 6) self.assertEqual(len(self.mos2_sg.get_connected_sites(0)), 6) self.assertTrue( isinstance( self.mos2_sg.get_connected_sites(0)[0].site, PeriodicSite)) self.assertEqual( str(self.mos2_sg.get_connected_sites(0)[0].site.specie), "S") self.assertAlmostEqual( self.mos2_sg.get_connected_sites( 0, jimage=(0, 0, 100))[0].site.frac_coords[2], 100.303027, ) # these two graphs should be equivalent for n in range(len(self.bc_square_sg)): self.assertEqual( self.bc_square_sg.get_coordination_of_site(n), self.bc_square_sg_r.get_coordination_of_site(n), ) # test we're not getting duplicate connected sites # thanks to Jack D. Sundberg for reporting this bug # known example where this bug occurred due to edge weights not being # bit-for-bit identical in otherwise identical edges nacl_lattice = Lattice([ [3.48543625, 0.0, 2.01231756], [1.16181208, 3.28610081, 2.01231756], [0.0, 0.0, 4.02463512], ]) nacl = Structure(nacl_lattice, ["Na", "Cl"], [[0, 0, 0], [0.5, 0.5, 0.5]]) nacl_graph = StructureGraph.with_local_env_strategy( nacl, CutOffDictNN({("Cl", "Cl"): 5.0})) self.assertEqual(len(nacl_graph.get_connected_sites(1)), 12) self.assertEqual(len(nacl_graph.graph.get_edge_data(1, 1)), 6)
def test_structure_to_oxidstructure(self): cscl = Structure(Lattice([[4.209, 0, 0], [0, 4.209, 0], [0, 0, 4.209]]), ["Cl", "Cs"], [[0.45, 0.5, 0.5], [0, 0, 0]]) d = {'structure': [cscl]} df = DataFrame(data=d) df["struct_oxid"] = structure_to_oxidstructure(df["structure"]) self.assertEqual(df["struct_oxid"].tolist()[0][0].specie.oxi_state, -1) self.assertEqual(df["struct_oxid"].tolist()[0][1].specie.oxi_state, +1) df["struct_oxid2"] = structure_to_oxidstructure(df["structure"], oxi_states_override={"Cl": [-2], "Cs": [+2]}) self.assertEqual(df["struct_oxid2"].tolist()[0][0].specie.oxi_state, -2) self.assertEqual(df["struct_oxid2"].tolist()[0][1].specie.oxi_state, +2) # original is preserved self.assertEqual(df["structure"].tolist()[0][0].specie, Element("Cl")) # test in-place structure_to_oxidstructure(df["structure"], inplace=True) self.assertEqual(df["structure"].tolist()[0][0].specie.oxi_state, -1)
def test_json_to_object(self): coords = [[0, 0, 0], [0.75, 0.5, 0.75]] lattice = Lattice([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]]) struct = Structure(lattice, ["Si"] * 2, coords) struct_json = json.dumps(struct, cls=MontyEncoder) d = {'structure_json': [struct_json]} df = DataFrame(data=d) jto = JsonToObject(target_col_id='structure') df = jto.featurize_dataframe(df, 'structure_json') self.assertEqual(df["structure"].tolist()[0], struct) # test dynamic target_col_id setting df = DataFrame(data=d) jto = JsonToObject() df = jto.featurize_dataframe(df, 'structure_json') self.assertEqual(df["structure_json_object"].tolist()[0], struct)
def test_dict_to_object(self): coords = [[0, 0, 0], [0.75, 0.5, 0.75]] lattice = Lattice([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]]) struct = Structure(lattice, ["Si"] * 2, coords) d = {'structure_dict': [struct.as_dict(), struct.as_dict()]} df = DataFrame(data=d) dto = DictToObject(target_col_id='structure') df = dto.featurize_dataframe(df, 'structure_dict') self.assertEqual(df["structure"].tolist()[0], struct) self.assertEqual(df["structure"].tolist()[1], struct) # test dynamic target_col_id setting df = DataFrame(data=d) dto = DictToObject() df = dto.featurize_dataframe(df, 'structure_dict') self.assertEqual(df["structure_dict_object"].tolist()[0], struct) self.assertEqual(df["structure_dict_object"].tolist()[1], struct)
def setUp(self): self.diamond = Structure( Lattice([[2.189, 0, 1.264], [0.73, 2.064, 1.264], [0, 0, 2.528]]), ["C0+", "C0+"], [[2.554, 1.806, 4.423], [0.365, 0.258, 0.632]], validate_proximity=False, to_unit_cell=False, coords_are_cartesian=True, site_properties=None) self.diamond_no_oxi = Structure( Lattice([[2.189, 0, 1.264], [0.73, 2.064, 1.264], [0, 0, 2.528]]), ["C", "C"], [[2.554, 1.806, 4.423], [0.365, 0.258, 0.632]], validate_proximity=False, to_unit_cell=False, coords_are_cartesian=True, site_properties=None) self.nacl = Structure(Lattice([[3.485, 0, 2.012], [1.162, 3.286, 2.012], [0, 0, 4.025]]), ["Na1+", "Cl1-"], [[0, 0, 0], [2.324, 1.643, 4.025]], validate_proximity=False, to_unit_cell=False, coords_are_cartesian=True, site_properties=None) self.cscl = Structure(Lattice([[4.209, 0, 0], [0, 4.209, 0], [0, 0, 4.209]]), ["Cl1-", "Cs1+"], [[2.105, 2.1045, 2.1045], [0, 0, 0]], validate_proximity=False, to_unit_cell=False, coords_are_cartesian=True, site_properties=None) self.ni3al = Structure( Lattice([[3.52, 0, 0], [0, 3.52, 0], [0, 0, 3.52]]), [ "Al", ] + ["Ni"] * 3, [[0, 0, 0], [0.5, 0.5, 0], [0.5, 0, 0.5], [0, 0.5, 0.5]], validate_proximity=False, to_unit_cell=False, coords_are_cartesian=False, site_properties=None) self.sc = Structure(Lattice([[3.52, 0, 0], [0, 3.52, 0], [0, 0, 3.52]]), ["Al"], [[0, 0, 0]], validate_proximity=False, to_unit_cell=False, coords_are_cartesian=False) self.bond_angles = range(5, 180, 5)
def get_sqrt2_1_struct(self): """Setup a sqrt 2 x sqrt 2 x 1 structure""" orig_scaled_mat = np.array([[self.lattconst * np.sqrt(2), 0., 0.], [0., self.lattconst * np.sqrt(2), 0.], [0., 0., self.lattconst]]) rotation = np.array([[np.cos(45. * np.pi / 180.), -np.sin(45. * np.pi / 180.), 0], [np.sin(45. * np.pi / 180.), np.cos(45. * np.pi / 180.), 0], [0., 0., 1.]]) rotated_mat = np.dot(orig_scaled_mat, rotation) lattice = Lattice(rotated_mat) species = [self.eltA, self.eltA, self.eltB, self.eltB, self.eltC, self.eltC, self.eltC, self.eltC, self.eltC, self.eltC] coords = [[0.5, 0., 0.5], [0., 0.5, 0.5], [0., 0., 0.], [0.5, 0.5, 0.], [0.25, 0.25, 0.], [0.75, 0.75, 0.], [0.25, 0.75, 0.], [0.75, 0.25, 0.], [0., 0., 0.5], [0.5, 0.5, 0.5]] struct = Structure(lattice, species, coords, coords_are_cartesian=False) return struct.copy()
def __mul__(self, scaling_matrix): """ Replicates the graph, creating a supercell, intelligently joining together edges that lie on periodic boundaries. In principle, any operations on the expanded graph could also be done on the original graph, but a larger graph can be easier to visualize and reason about. :param scaling_matrix: same as Structure.__mul__ :return: """ # Developer note: a different approach was also trialed, using # a simple Graph (instead of MultiDiGraph), with node indices # representing both site index and periodic image. Here, the # number of nodes != number of sites in the Structure. This # approach has many benefits, but made it more difficult to # keep the graph in sync with its corresponding Structure. # Broadly, it would be easier to multiply the Structure # *before* generating the StructureGraph, but this isn't # possible when generating the graph using critic2 from # charge density. # Multiplication works by looking for the expected position # of an image node, and seeing if that node exists in the # supercell. If it does, the edge is updated. This is more # computationally expensive than just keeping track of the # which new lattice images present, but should hopefully be # easier to extend to a general 3x3 scaling matrix. # code adapted from Structure.__mul__ scale_matrix = np.array(scaling_matrix, np.int16) if scale_matrix.shape != (3, 3): scale_matrix = np.array(scale_matrix * np.eye(3), np.int16) else: # TODO: test __mul__ with full 3x3 scaling matrices raise NotImplementedError('Not tested with 3x3 scaling matrices yet.') new_lattice = Lattice(np.dot(scale_matrix, self.structure.lattice.matrix)) f_lat = lattice_points_in_supercell(scale_matrix) c_lat = new_lattice.get_cartesian_coords(f_lat) new_sites = [] new_graphs = [] for v in c_lat: # create a map of nodes from original graph to its image mapping = {n: n + len(new_sites) for n in range(len(self.structure))} for idx, site in enumerate(self.structure): s = PeriodicSite(site.species_and_occu, site.coords + v, new_lattice, properties=site.properties, coords_are_cartesian=True, to_unit_cell=False) new_sites.append(s) new_graphs.append(nx.relabel_nodes(self.graph, mapping, copy=True)) new_structure = Structure.from_sites(new_sites) # merge all graphs into one big graph new_g = nx.MultiDiGraph() for new_graph in new_graphs: new_g = nx.union(new_g, new_graph) edges_to_remove = [] # tuple of (u, v, k) edges_to_add = [] # tuple of (u, v, attr_dict) # list of new edges inside supercell # for duplicate checking edges_inside_supercell = [{u, v} for u, v, d in new_g.edges(data=True) if d['to_jimage'] == (0, 0, 0)] new_periodic_images = [] orig_lattice = self.structure.lattice # use k-d tree to match given position to an # existing Site in Structure kd_tree = KDTree(new_structure.cart_coords) # tolerance in Å for sites to be considered equal # this could probably be a lot smaller tol = 0.05 for u, v, k, d in new_g.edges(keys=True, data=True): to_jimage = d['to_jimage'] # for node v # reduce unnecessary checking if to_jimage != (0, 0, 0): # get index in original site n_u = u % len(self.structure) n_v = v % len(self.structure) # get fractional co-ordinates of where atoms defined # by edge are expected to be, relative to original # lattice (keeping original lattice has # significant benefits) v_image_frac = np.add(self.structure[n_v].frac_coords, to_jimage) u_frac = self.structure[n_u].frac_coords # using the position of node u as a reference, # get relative Cartesian co-ordinates of where # atoms defined by edge are expected to be v_image_cart = orig_lattice.get_cartesian_coords(v_image_frac) u_cart = orig_lattice.get_cartesian_coords(u_frac) v_rel = np.subtract(v_image_cart, u_cart) # now retrieve position of node v in # new supercell, and get absolute Cartesian # co-ordinates of where atoms defined by edge # are expected to be v_expec = new_structure[u].coords + v_rel # now search in new structure for these atoms # query returns (distance, index) v_present = kd_tree.query(v_expec) v_present = v_present[1] if v_present[0] <= tol else None # check if image sites now present in supercell # and if so, delete old edge that went through # periodic boundary if v_present is not None: new_u = u new_v = v_present new_d = d.copy() # node now inside supercell new_d['to_jimage'] = (0, 0, 0) edges_to_remove.append((u, v, k)) # make sure we don't try to add duplicate edges # will remove two edges for everyone one we add if {new_u, new_v} not in edges_inside_supercell: # normalize direction if new_v < new_u: new_u, new_v = new_v, new_u edges_inside_supercell.append({new_u, new_v}) edges_to_add.append((new_u, new_v, new_d)) else: # want to find new_v such that we have # full periodic boundary conditions # so that nodes on one side of supercell # are connected to nodes on opposite side v_expec_frac = new_structure.lattice.get_fractional_coords(v_expec) # find new to_jimage # use np.around to fix issues with finite precision leading to incorrect image v_expec_image = np.around(v_expec_frac, decimals=3) v_expec_image = v_expec_image - v_expec_image%1 v_expec_frac = np.subtract(v_expec_frac, v_expec_image) v_expec = new_structure.lattice.get_cartesian_coords(v_expec_frac) v_present = kd_tree.query(v_expec) v_present = v_present[1] if v_present[0] <= tol else None if v_present is not None: new_u = u new_v = v_present new_d = d.copy() new_to_jimage = tuple(map(int, v_expec_image)) # normalize direction if new_v < new_u: new_u, new_v = new_v, new_u new_to_jimage = tuple(np.multiply(-1, d['to_jimage']).astype(int)) new_d['to_jimage'] = new_to_jimage edges_to_remove.append((u, v, k)) if (new_u, new_v, new_to_jimage) not in new_periodic_images: edges_to_add.append((new_u, new_v, new_d)) new_periodic_images.append((new_u, new_v, new_to_jimage)) logger.debug("Removing {} edges, adding {} new edges.".format(len(edges_to_remove), len(edges_to_add))) # add/delete marked edges for edges_to_remove in edges_to_remove: new_g.remove_edge(*edges_to_remove) for (u, v, d) in edges_to_add: new_g.add_edge(u, v, **d) # return new instance of StructureGraph with supercell d = {"@module": self.__class__.__module__, "@class": self.__class__.__name__, "structure": new_structure.as_dict(), "graphs": json_graph.adjacency_data(new_g)} sg = StructureGraph.from_dict(d) return sg
def reshape_gridded_data(self, data='padded_symm', missing_coords_strategy='backfold'): """Tries to detect if the band data coordinates form a regular, rectangular grid, and returns the band data `indexes` reshaped to that grid.""" if data in ('padded_symm', None): data = self.padded_symm_data elif data == 'symm': data = self.symm_data elif data in ('raw', 'data'): data = self._gen_band_data_array(len(self.data), k_coords=True, index=True) data['k'] = self.data['k'] data['idx'] = np.arange(len(self.data)) data = remove_duplicates(data) # remove duplicate k values basis = find_basis(data['k']) if basis is None: log.warning('No regular k grid detected') return None lattice = Lattice(basis).get_niggli_reduced_lattice() if not lattice.is_orthogonal: log.warning( 'Non-orthogonal grid detected, reshape_gridded_data will return `None`' ) return None if np.logical_not(np.isclose(lattice.matrix, 0)).sum() > 3: log.debug(lattice) log.warning('Rotated orthogonal grid not implemented') return None xs, ys, zs = axes = detect_grid(data['k']) with writeable(data): data['k'] = snap_to_grid( data['k'], *axes) # required to prevent float inaccuracy errors below regular_grid_coords = cartesian_product(*axes) shape = len(xs), len(ys), len(zs) k = data['k'] k_set = set(map(tuple, k.round(decimals=4))) # todo use ksamp lattice ijk rgc_set = set(map(tuple, regular_grid_coords.round( decimals=4))) # todo use ksamp lattice ijk if k_set == rgc_set: log.debug('detected regular k-sample grid of shape {}', shape) sort_idx = np.lexsort((k[:, 2], k[:, 1], k[:, 0])) return axes, data[sort_idx].reshape(*shape)['idx'] else: log.debug('detected sparse k-sample grid') # skipping check that sorted_data['k'] is a subset, in that case # (irregular k-points), detect_grid should throw an AssertionError sd_coords = np.core.records.fromarrays(k.T, formats="f4, f4, f4") rgc_coords = np.core.records.fromarrays(regular_grid_coords.T, formats="f4, f4, f4") missing_coords = np.setdiff1d(rgc_coords, sd_coords, assume_unique=True) # due to float inaccuracy errors, sorted_data may not be a strict # subset of regular_grid_coords if len(missing_coords) != len(rgc_coords) - len(sd_coords): log.error("FIXME float inaccuracy errors") log.debug( f"{len(missing_coords)} {len(rgc_coords)} {len(sd_coords)}" ) breakpoint() raise Exception() new_data = self._gen_band_data_array(len(regular_grid_coords), k_coords=True, index=True) # add existing data to the beginning new_data[:len(data)]['k'] = data['k'] new_data[:len(data)]['idx'] = data['idx'] # add missing coordinates after new_data[len(data):]['k'] = missing_coords.view('3f4') if missing_coords_strategy == 'nan': new_data[len(data):]['idx'] = -1 if missing_coords_strategy == 'backfold': # find exact matches all_k = new_data['k'].copy() all_k = ( all_k @ self.run.primitive_lattice.reciprocal_lattice. inv_matrix + 1e-4 ) % 1 - 1e-4 # k to reciprocal lattice vectors parallelepiped all_k = all_k @ self.run.primitive_lattice.reciprocal_lattice.matrix lattice_ijk = all_k @ lattice.inv_matrix lattice_ijk_int = np.rint(lattice_ijk).astype(int) assert np.abs(lattice_ijk_int - lattice_ijk).max( ) < 1e-4, 'detect_grid should have caught this' k_u, idx_u, inv_u = np.unique(lattice_ijk_int, axis=0, return_index=True, return_inverse=True) # k_u: unique values of all_k # idx_u: indices of unique values of all_k # inv_u: indices of origin values on k_u (k_u[inv_u] == all_k) # assert that all missing coordinates have an exact match in # data. existing_data_indices = np.arange(len(data)) missing_data_indices = np.setdiff1d(idx_u, existing_data_indices) log.debug('mdi {}', missing_data_indices) missing_idx = idx_u >= len(data) # make sure all unique k are contained in available data assert not any(missing_idx), f"{sum(missing_idx)} k not found" new_data['idx'] = new_data['idx'][idx_u][inv_u] new_k = new_data['k'] nsd_idx = np.lexsort((new_k[:, 2], new_k[:, 1], new_k[:, 0])) new_sorted_data = new_data[nsd_idx] assert np.array_equal(new_sorted_data['k'], regular_grid_coords) return axes, new_sorted_data.reshape(*shape)['idx']
def check_structures_forces_stresses(structures: List[Structure], forces: Optional[List] = None, stresses: Optional[List] = None, stress_format: str = 'VASP', return_none: bool = True): """ Check structures, forces and stresses. The forces and stress are dependent on the lattice orientation. This function will rotate the structures and the corresponding forces and structures to lammps format [[ax, 0, 0], [bx, by, 0], [cx, cy, cz]] The lattice are formed by the row vectors. Args: structures (list): list of structures forces (list): list of force matrixs (m, 3) stresses (list): list of stress vectors stress_format (str): stress format, choose from "VASP", "LAMMPS", "SNAP" return_none (bool): whether to return list of None for forces and stresses Returns: structures [forces], [stresses] """ new_structures = [] new_forces = [] new_stresses = [] no_force = forces is None no_stress = stresses is None if forces is None: forces = [None] * len(structures) if stresses is None: stresses = [None] * len(structures) for i in range(len(structures)): s = structures[i] # orthogonal structures do not need to rotate if s.lattice.is_orthogonal: new_structures.append(s) new_forces.append(forces[i]) # type: ignore new_stresses.append(stresses[i]) # type: ignore continue logger.info("Structure index %d is rotated." % i) new_latt_matrix, symmop, rot_matrix = \ get_lammps_lattice_and_rotation(s, (0, 0, 0)) coords = symmop.operate_multi(s.cart_coords) new_s = Structure(Lattice(new_latt_matrix), s.species, coords, site_properties=s.site_properties, coords_are_cartesian=True) new_structures.append(new_s) if not no_force: new_f = symmop.operate_multi(forces[i]) new_forces.append(new_f) else: new_forces.append(None) if not no_stress: stress_matrix = stress_list_to_matrix(stresses[i], stress_format) stress_matrix = rot_matrix.dot(stress_matrix).dot(rot_matrix.T) # R \sigma R^T stress rotation new_stresses.append( stress_matrix_to_list(stress_matrix, stress_format)) else: new_stresses.append(None) if return_none: return new_structures, new_forces, new_stresses out = [new_structures] if not no_force: out += [new_forces] if not no_stress: out += [new_stresses] if len(out) == 1: return out[0] return out
def sto_perovskite() -> Structure: lattice = Lattice(3.945 * np.eye(3)) species = ["Sr", "Ti", "O", "O", "O"] frac_coords = np.array([[0, 0, 0], [0.5, 0.5, 0.5], [0.0, 0.5, 0.5], [0.5, 0.0, 0.5], [0.5, 0.5, 0.0]]) return Structure(lattice, species, frac_coords)
def from_slabs( cls, substrate_slab: Slab, film_slab: Slab, in_plane_offset: Tuple[float, float] = (0, 0), gap: float = 1.6, vacuum_over_film: float = 0.0, interface_properties: Optional[Dict] = None, center_slab: bool = True, ) -> "Interface": """ Makes an interface structure by merging a substrate and film slabs The film a- and b-vectors will be forced to be the substrate slab's a- and b-vectors. For now, it's suggested to use a factory method that will ensure the appropriate interface structure is already met. Args: sub_slab: slab for the substrate film_slab: slab for the film in_plane_offset: fractional shift in plane for the film with respect to the substrate gap: gap between substrate and film in Angstroms vacuum_over_film: vacuum space above the film in Angstroms structure_properties: dictionary of misc properties for this structure center_slab: center the slab """ interface_properties = interface_properties or {} # Ensure c-axis is orthogonal to a/b plane if isinstance(substrate_slab, Slab): substrate_slab = substrate_slab.get_orthogonal_c_slab() if isinstance(film_slab, Slab): film_slab = film_slab.get_orthogonal_c_slab() assert np.allclose(film_slab.lattice.alpha, 90, 0.1) assert np.allclose(film_slab.lattice.beta, 90, 0.1) assert np.allclose(substrate_slab.lattice.alpha, 90, 0.1) assert np.allclose(substrate_slab.lattice.beta, 90, 0.1) # Ensure sub is right-handed # IE sub has surface facing "up" sub_vecs = substrate_slab.lattice.matrix.copy() if np.dot(np.cross(*sub_vecs[:2]), sub_vecs[2]) < 0: sub_vecs[2] *= -1.0 substrate_slab.lattice = Lattice(sub_vecs) # Find the limits of C-coords sub_coords = substrate_slab.frac_coords film_coords = film_slab.frac_coords sub_min_c = np.min(sub_coords[:, 2]) * substrate_slab.lattice.c sub_max_c = np.max(sub_coords[:, 2]) * substrate_slab.lattice.c film_min_c = np.min(film_coords[:, 2]) * film_slab.lattice.c film_max_c = np.max(film_coords[:, 2]) * film_slab.lattice.c min_height = np.abs(film_max_c - film_min_c) + np.abs(sub_max_c - sub_min_c) # construct new lattice abc = substrate_slab.lattice.abc[:2] + (min_height + gap + vacuum_over_film, ) angles = substrate_slab.lattice.angles lattice = Lattice.from_parameters(*abc, *angles) # Get the species species = substrate_slab.species + film_slab.species # Get the coords # Shift substrate to bottom in new lattice sub_coords = np.subtract(sub_coords, [0, 0, np.min(sub_coords[:, 2])]) sub_coords[:, 2] *= substrate_slab.lattice.c / lattice.c # Flip the film over film_coords[:, 2] *= -1.0 film_coords[:, 2] *= film_slab.lattice.c / lattice.c # Shift the film coords to right over the substrate + gap film_coords = np.subtract(film_coords, [0, 0, np.min(film_coords[:, 2])]) film_coords = np.add( film_coords, [0, 0, gap / lattice.c + np.max(sub_coords[:, 2])]) # Build coords coords = np.concatenate([sub_coords, film_coords]) # Shift coords to center if center_slab: coords = np.add(coords, [0, 0, 0.5 - np.average(coords[:, 2])]) # Only merge site properties in both slabs site_properties = {} site_props_in_both = set(substrate_slab.site_properties.keys()) & set( film_slab.site_properties.keys()) for key in site_props_in_both: site_properties[key] = [ *substrate_slab.site_properties[key], *film_slab.site_properties[key], ] site_properties["interface_label"] = ["substrate"] * len( substrate_slab) + ["film"] * len(film_slab) iface = cls( lattice=lattice, species=species, coords=coords, to_unit_cell=False, coords_are_cartesian=False, site_properties=site_properties, validate_proximity=False, in_plane_offset=in_plane_offset, gap=gap, vacuum_over_film=vacuum_over_film, interface_properties=interface_properties, ) iface.sort() return iface
def test_kumagai(self): gamma = 0.19357221 prec = 28 lattice = Lattice( [[4.692882, -8.12831, 0.0], [4.692882, 8.12831, 0.0], [0.0, 0.0, 10.03391]] ) # note that real/recip vector generation is not dependent on epsilon g_vecs, _, r_vecs, _ = generate_R_and_G_vecs( gamma, prec, lattice, 80.0 * np.identity(3) ) # test real space summation (bigger for large epsilon) kc_high_diel = KumagaiCorrection(80.0 * np.identity(3), gamma=gamma) real_sum = kc_high_diel.get_real_summation(gamma, r_vecs[0]) self.assertAlmostEqual(real_sum, 0.00843104) # test recip space summation (bigger for small epsilon) kc_low_diel = KumagaiCorrection(0.1 * np.identity(3), gamma=gamma) recip_sum = kc_low_diel.get_recip_summation(gamma, g_vecs[0], lattice.volume) self.assertAlmostEqual(recip_sum, 0.31117099) # test self interaction si_corr = kc_low_diel.get_self_interaction(gamma) self.assertAlmostEqual(si_corr, -0.54965249) # test potenital shift interaction correction ps_corr = kc_low_diel.get_potential_shift(gamma, lattice.volume) self.assertAlmostEqual(ps_corr, -0.00871593) # """Test Defect Entry approach to correction """ bulk_struc = Poscar.from_file( os.path.join(PymatgenTest.TEST_FILES_DIR, "defect", "CONTCAR_bulk") ).structure bulk_out = Outcar(os.path.join(PymatgenTest.TEST_FILES_DIR, "defect", "OUTCAR_bulk.gz")) defect_out = Outcar(os.path.join(PymatgenTest.TEST_FILES_DIR, "defect", "OUTCAR_vac_Ga_-3.gz")) epsilon = 18.118 * np.identity(3) vac = Vacancy(bulk_struc, bulk_struc.sites[0], charge=-3) defect_structure = vac.generate_defect_structure() defect_frac_coords = [0.0, 0.0, 0.0] parameters = { "bulk_atomic_site_averages": bulk_out.electrostatic_potential, "defect_atomic_site_averages": defect_out.electrostatic_potential, "site_matching_indices": [[ind, ind - 1] for ind in range(len(bulk_struc))], "initial_defect_structure": defect_structure, "defect_frac_sc_coords": defect_frac_coords, } dentry = DefectEntry(vac, 0.0, parameters=parameters) kc = KumagaiCorrection(epsilon) kcorr = kc.get_correction(dentry) self.assertAlmostEqual(kcorr["kumagai_electrostatic"], 0.88236299) self.assertAlmostEqual(kcorr["kumagai_potential_alignment"], 2.09704862) # test ES correction high_diel_es_corr = kc_high_diel.perform_es_corr(gamma, prec, lattice, -3.0) self.assertAlmostEqual(high_diel_es_corr, 0.25176240) low_diel_es_corr = kc_low_diel.perform_es_corr(gamma, prec, lattice, -3.0) self.assertAlmostEqual(low_diel_es_corr, 201.28810966) # test pot correction site_list = [] for bs_ind, ds_ind in dentry.parameters["site_matching_indices"]: Vqb = -( defect_out.electrostatic_potential[ds_ind] - bulk_out.electrostatic_potential[bs_ind] ) site_list.append([defect_structure[ds_ind], Vqb]) sampling_radius = dentry.parameters["kumagai_meta"]["sampling_radius"] gamma = dentry.parameters["kumagai_meta"]["gamma"] q = -3 g_vecs, _, r_vecs, _ = generate_R_and_G_vecs( gamma, 28, defect_structure.lattice, np.identity(3) ) high_diel_pot_corr = kc_high_diel.perform_pot_corr( defect_structure, defect_frac_coords, site_list, sampling_radius, q, r_vecs[0], g_vecs[0], gamma, ) self.assertAlmostEqual(high_diel_pot_corr, 2.35840716) low_diel_pot_corr = kc_low_diel.perform_pot_corr( defect_structure, defect_frac_coords, site_list, sampling_radius, q, r_vecs[0], g_vecs[0], gamma, ) self.assertAlmostEqual(low_diel_pot_corr, -58.83598095) # test the kumagai plotter kcp = kc.plot() self.assertTrue(kcp) # check that uncertainty metadata exists self.assertAlmostEqual( set(kc.metadata["pot_corr_uncertainty_md"].keys()), set(["number_sampled", "stats"]), )
def get_ph_bs_symm_line_from_dict(bands_dict, has_nac=False, labels_dict=None): r""" Creates a pymatgen PhononBandStructure object from the dictionary extracted by the band.yaml file produced by phonopy. The labels will be extracted from the dictionary, if present. If the 'eigenvector' key is found the eigendisplacements will be calculated according to the formula:: exp(2*pi*i*(frac_coords \\dot q) / sqrt(mass) * v and added to the object. Args: bands_dict: the dictionary extracted from the band.yaml file has_nac: True if the data have been obtained with the option --nac option. Default False. labels_dict: dict that links a qpoint in frac coords to a label. Its value will replace the data contained in the band.yaml. """ structure = get_structure_from_dict(bands_dict) qpts = [] frequencies = [] eigendisplacements = [] phonopy_labels_dict = {} for p in bands_dict["phonon"]: q = p["q-position"] qpts.append(q) bands = [] eig_q = [] for b in p["band"]: bands.append(b["frequency"]) if "eigenvector" in b: eig_b = [] for i, eig_a in enumerate(b["eigenvector"]): v = np.zeros(3, np.complex) for x in range(3): v[x] = eig_a[x][0] + eig_a[x][1] * 1j eig_b.append( eigvec_to_eigdispl( v, q, structure[i].frac_coords, structure.site_properties["phonopy_masses"][i], )) eig_q.append(eig_b) frequencies.append(bands) if "label" in p: phonopy_labels_dict[p["label"]] = p["q-position"] if eig_q: eigendisplacements.append(eig_q) qpts = np.array(qpts) # transpose to match the convention in PhononBandStructure frequencies = np.transpose(frequencies) if eigendisplacements: eigendisplacements = np.transpose(eigendisplacements, (1, 0, 2, 3)) rec_latt = Lattice(bands_dict["reciprocal_lattice"]) labels_dict = labels_dict or phonopy_labels_dict ph_bs = PhononBandStructureSymmLine( qpts, frequencies, rec_latt, has_nac=has_nac, labels_dict=labels_dict, structure=structure, eigendisplacements=eigendisplacements, ) return ph_bs
def read_cfgs(filename, predict=False): """ Read the configuration file. Args: filename (str): The configuration file to be read. """ type_convert = {'R': np.float32, 'I': np.int, 'S': np.str} data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() repl = re.compile('AT ') lines = repl.sub('', string=lines) block_pattern = re.compile( r'(\n[0-9]+\n|^[0-9]+\n)(.+?)(?=\n[0-9]+\n|$)', re.S) lattice_pattern = re.compile(r'Lattice="(.+)"') # energy_pattern = re.compile('dft_energy=(-?[0-9]+.[0-9]+)', re.I) energy_pattern = re.compile( r'(?<=\S{3}\s|dft_)energy=(-?[0-9]+.[0-9]+)') # stress_pattern = re.compile('dft_virial={(.+)}') stress_pattern = re.compile(r'dft_virial=({|)(.+?)(}|) \S.*') properties_pattern = re.compile(r'properties=(\S+)', re.I) # position_pattern = re.compile('\n(.+)', re.S) position_pattern = re.compile('\n(.+?)(?=\nE.*|\n\n.*|$)', re.S) # formatify = lambda string: [float(s) for s in string.split()] for (size, block) in block_pattern.findall(lines): d = {'outputs': {}} size = int(size) lattice_str = lattice_pattern.findall(block)[0] lattice = Lattice( list(map(lambda s: float(s), lattice_str.split()))) energy_str = energy_pattern.findall(block)[-1] energy = float(energy_str) stress_str = stress_pattern.findall(block)[0][1] virial_stress = np.array( list(map(lambda s: float(s), stress_str.split()))) virial_stress = [virial_stress[i] for i in [0, 4, 8, 1, 5, 6]] properties = properties_pattern.findall(block)[0].split(":") labels_columns = OrderedDict() labels = defaultdict() for i in range(0, len(properties), 3): labels_columns[properties[i]] = [ int(properties[i + 2]), properties[i + 1] ] position_str = position_pattern.findall(block)[0].split('\n') position = np.array([p.split() for p in position_str]) column_index = 0 for key in labels_columns: num_columns, dtype = labels_columns[key] labels[key] = position[:, column_index:column_index + num_columns].astype(type_convert[dtype]) column_index += num_columns struct = Structure(lattice=lattice, species=labels['species'].ravel(), coords=labels['pos'], coords_are_cartesian=True) if predict: forces = labels['force'] else: forces = labels['dft_force'] d['structure'] = struct.as_dict() d['outputs']['energy'] = energy assert size == struct.num_sites d['num_atoms'] = size d['outputs']['forces'] = forces d['outputs']['virial_stress'] = virial_stress data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df