def test_dict_to_object(self): coords = [[0, 0, 0], [0.75, 0.5, 0.75]] lattice = Lattice([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]]) struct = Structure(lattice, ["Si"] * 2, coords) d = {'structure_dict': [struct.as_dict(), struct.as_dict()]} df = DataFrame(data=d) df["structure"] = dict_to_object(df["structure_dict"]) self.assertEqual(df["structure"].tolist()[0], struct) self.assertEqual(df["structure"].tolist()[1], struct)
def from_entries( cls, grouped_entries: List[ComputedEntry], working_ion_entry: ComputedEntry, battery_id: str, host_structure: Structure, ) -> Union["InsertionElectrodeDoc", None]: try: ie = InsertionElectrode.from_entries( entries=grouped_entries, working_ion_entry=working_ion_entry, strip_structures=True, ) except IndexError: return None d = ie.get_summary_dict() d["material_ids"] = d["stable_material_ids"] + d[ "unstable_material_ids"] d["num_steps"] = d.pop("nsteps", None) d["last_updated"] = datetime.utcnow() elements = sorted(host_structure.composition.elements) chemsys = "-".join(sorted(map(str, elements))) framework = Composition(d["framework_formula"]) return cls(battery_id=battery_id, host_structure=host_structure.as_dict(), framework=framework, electrode_object=ie.as_dict(), elements=elements, nelements=len(elements), chemsys=chemsys, formula_anonymous=framework.anonymized_formula, **d)
def test_conversion_multiindex_dynamic(self): # test dynamic target_col_id setting with multiindex coords = [[0, 0, 0], [0.75, 0.5, 0.75]] lattice = Lattice([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]]) struct = Structure(lattice, ["Si"] * 2, coords) d = {'structure_dict': [struct.as_dict(), struct.as_dict()]} df_2lvl = DataFrame(data=d) df_2lvl.columns = MultiIndex.from_product((["custom"], df_2lvl.columns.values)) dto = DictToObject() df_2lvl = dto.featurize_dataframe(df_2lvl, ('custom', 'structure_dict'), multiindex=True) new_col_id = ('DictToObject', 'structure_dict_object') self.assertEqual(df_2lvl[new_col_id].tolist()[0], struct) self.assertEqual(df_2lvl[new_col_id].tolist()[1], struct)
def test_dict_to_object(self): coords = [[0, 0, 0], [0.75, 0.5, 0.75]] lattice = Lattice([[3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603]]) struct = Structure(lattice, ["Si"] * 2, coords) d = {'structure_dict': [struct.as_dict(), struct.as_dict()]} df = DataFrame(data=d) dto = DictToObject(target_col_id='structure') df = dto.featurize_dataframe(df, 'structure_dict') self.assertEqual(df["structure"].tolist()[0], struct) self.assertEqual(df["structure"].tolist()[1], struct) # test dynamic target_col_id setting df = DataFrame(data=d) dto = DictToObject() df = dto.featurize_dataframe(df, 'structure_dict') self.assertEqual(df["structure_dict_object"].tolist()[0], struct) self.assertEqual(df["structure_dict_object"].tolist()[1], struct)
def read_cfgs(self, filename="output.data"): """ Read the configuration file. Args: filename (str): The configuration file to be read. """ data_pool = [] with zopen(filename, "rt") as f: lines = f.read() block_pattern = re.compile("begin\n(.*?)end", re.S) lattice_pattern = re.compile("lattice(.*?)\n") position_pattern = re.compile("atom(.*?)\n") energy_pattern = re.compile("energy(.*?)\n") for block in block_pattern.findall(lines): d = {"outputs": {}} lattice_str = lattice_pattern.findall(block) lattice = Lattice( np.array([latt.split() for latt in lattice_str], dtype=np.float64) * self.bohr_to_angstrom) position_str = position_pattern.findall(block) positions = pd.DataFrame([pos.split() for pos in position_str]) positions.columns = [ "x", "y", "z", "specie", "charge", "atomic_energy", "fx", "fy", "fz" ] coords = np.array(positions.loc[:, ["x", "y", "z"]], dtype=np.float64) coords = coords * self.bohr_to_angstrom species = np.array(positions["specie"]) forces = np.array(positions.loc[:, ["fx", "fy", "fz"]], dtype=np.float64) forces = forces / self.eV_to_Ha / self.bohr_to_angstrom energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) / self.eV_to_Ha struct = Structure(lattice=lattice, species=species, coords=coords, coords_are_cartesian=True) d["structure"] = struct.as_dict() d["outputs"]["energy"] = energy d["outputs"]["forces"] = forces d["num_atoms"] = len(struct) data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def read_cfgs(self, filename='output.data'): """ Read the configuration file. Args: filename (str): The configuration file to be read. """ data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() block_pattern = re.compile('begin\n(.*?)end', re.S) lattice_pattern = re.compile('lattice(.*?)\n') position_pattern = re.compile('atom(.*?)\n') energy_pattern = re.compile('energy(.*?)\n') for block in block_pattern.findall(lines): d = {'outputs': {}} lattice_str = lattice_pattern.findall(block) lattice = Lattice( np.array([latt.split() for latt in lattice_str], dtype=np.float) * self.bohr_to_angstrom) position_str = position_pattern.findall(block) positions = pd.DataFrame([pos.split() for pos in position_str]) positions.columns = \ ['x', 'y', 'z', 'specie', 'charge', 'atomic_energy', 'fx', 'fy', 'fz'] coords = np.array(positions.loc[:, ['x', 'y', 'z']], dtype=np.float) coords = coords * self.bohr_to_angstrom species = np.array(positions['specie']) forces = np.array(positions.loc[:, ['fx', 'fy', 'fz']], dtype=np.float) forces = forces / self.eV_to_Ha / self.bohr_to_angstrom energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) / self.eV_to_Ha struct = Structure(lattice=lattice, species=species, coords=coords, coords_are_cartesian=True) d['structure'] = struct.as_dict() d['outputs']['energy'] = energy d['outputs']['forces'] = forces d['num_atoms'] = len(struct) data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def find_inequivalent_coords(structure: Structure, df: DataFrame) -> List[CoordInfo]: result = [] initial_sg = StructureSymmetrizer(structure).sg_number added_structure = Structure.from_dict(structure.as_dict()) start_index = len(structure) for _, column in df.iterrows(): coords = [column.a, column.b, column.c] assert Element.Og not in structure.composition.elements added_structure.append(Element.Og, coords) end_index = len(added_structure) symmetrizer = StructureSymmetrizer(added_structure) if initial_sg != symmetrizer.sg_number: logger.warning("The symmetry has changed, meaning all the symmetry " "equivalent sites do not exist.") sym_data = symmetrizer.spglib_sym_data _indices = [i for i in range(start_index, end_index)] repr_atom_pairs = zip(sym_data["equivalent_atoms"][_indices], _indices) key = lambda x: x[0] for _, equiv_sites in groupby(sorted(repr_atom_pairs, key=key), key=key): coords, quantity = [], [] for repr_idx, atom_idx in equiv_sites: fcoord = added_structure[atom_idx].frac_coords coords.append(tuple(fcoord)) key = "ave_value" if "ave_value" in df else "value" quantity.append(df[key][atom_idx - start_index]) if repr_idx == atom_idx: site_sym = sym_data["site_symmetry_symbols"][atom_idx] distances = Distances(structure, fcoord) coordination = distances.coordination() coord_info = CoordInfo(site_symmetry=remove_dot(site_sym), coordination=coordination, frac_coords=coords, quantities=quantity) result.append(coord_info) return result
def read_cfgs(filename, predict=False): """ Read the configuration file. Args: filename (str): The configuration file to be read. """ type_convert = {'R': np.float32, 'I': np.int, 'S': np.str} data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() repl = re.compile('AT ') lines = repl.sub('', string=lines) block_pattern = re.compile( r'(\n[0-9]+\n|^[0-9]+\n)(.+?)(?=\n[0-9]+\n|$)', re.S) lattice_pattern = re.compile(r'Lattice="(.+)"') # energy_pattern = re.compile('dft_energy=(-?[0-9]+.[0-9]+)', re.I) energy_pattern = re.compile( r'(?<=\S{3}\s|dft_)energy=(-?[0-9]+.[0-9]+)') # stress_pattern = re.compile('dft_virial={(.+)}') stress_pattern = re.compile(r'dft_virial=({|)(.+?)(}|) \S.*') properties_pattern = re.compile(r'properties=(\S+)', re.I) # position_pattern = re.compile('\n(.+)', re.S) position_pattern = re.compile('\n(.+?)(?=\nE.*|\n\n.*|$)', re.S) # formatify = lambda string: [float(s) for s in string.split()] for (size, block) in block_pattern.findall(lines): d = {'outputs': {}} size = int(size) lattice_str = lattice_pattern.findall(block)[0] lattice = Lattice( list(map(lambda s: float(s), lattice_str.split()))) energy_str = energy_pattern.findall(block)[-1] energy = float(energy_str) stress_str = stress_pattern.findall(block)[0][1] virial_stress = np.array( list(map(lambda s: float(s), stress_str.split()))) virial_stress = [virial_stress[i] for i in [0, 4, 8, 1, 5, 6]] properties = properties_pattern.findall(block)[0].split(":") labels_columns = OrderedDict() labels = defaultdict() for i in range(0, len(properties), 3): labels_columns[properties[i]] = [ int(properties[i + 2]), properties[i + 1] ] position_str = position_pattern.findall(block)[0].split('\n') position = np.array([p.split() for p in position_str]) column_index = 0 for key in labels_columns: num_columns, dtype = labels_columns[key] labels[key] = position[:, column_index:column_index + num_columns].astype(type_convert[dtype]) column_index += num_columns struct = Structure(lattice=lattice, species=labels['species'].ravel(), coords=labels['pos'], coords_are_cartesian=True) if predict: forces = labels['force'] else: forces = labels['dft_force'] d['structure'] = struct.as_dict() d['outputs']['energy'] = energy assert size == struct.num_sites d['num_atoms'] = size d['outputs']['forces'] = forces d['outputs']['virial_stress'] = virial_stress data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def read_cfgs(self, filename): """ Args: filename (str): The configuration file to be read. """ def formatify(string): return [float(s) for s in string.split()] if not self.elements: raise ValueError("No species given.") data_pool = [] with zopen(filename, 'rt') as f: lines = f.read() block_pattern = re.compile('BEGIN_CFG\n(.*?)\nEND_CFG', re.S) size_pattern = re.compile('Size\n(.*?)\n SuperCell', re.S | re.I) lattice_pattern = re.compile('SuperCell\n(.*?)\n AtomData', re.S | re.I) position_pattern = re.compile('fz\n(.*?)\n Energy', re.S) energy_pattern = re.compile('Energy\n(.*?)\n (?=PlusStress|Stress)', re.S) stress_pattern = re.compile('xy\n(.*?)(?=\n|$)', re.S) for block in block_pattern.findall(lines): d = {'outputs': {}} size_str = size_pattern.findall(block)[0] size = int(size_str.lstrip()) lattice_str = lattice_pattern.findall(block)[0] lattice = Lattice( np.array(list(map(formatify, lattice_str.split('\n'))))) position_str = position_pattern.findall(block)[0] position = np.array(list(map(formatify, position_str.split('\n')))) species = np.array(self.elements)[position[:, 1].astype(np.int)] forces = position[:, 5:8].tolist() position = position[:, 2:5] energy_str = energy_pattern.findall(block)[0] energy = float(energy_str.lstrip()) stress_str = stress_pattern.findall(block)[0] virial_stress = np.array(list(map(formatify, stress_str.split()))).reshape( 6, ).tolist() virial_stress = [ virial_stress[self.mtp_stress_order.index(n)] for n in self.vasp_stress_order ] struct = Structure(lattice=lattice, species=species, coords=position, coords_are_cartesian=True) d['structure'] = struct.as_dict() d['outputs']['energy'] = energy assert size == struct.num_sites d['num_atoms'] = size d['outputs']['forces'] = forces d['outputs']['virial_stress'] = virial_stress data_pool.append(d) _, df = convert_docs(docs=data_pool) return data_pool, df
def to_istructure(structure: Structure) -> IStructure: return IStructure.from_dict(structure.as_dict())