def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing #common representations for elements/water in cif files special_symbols = { "D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O" } elements = [el.symbol for el in Element] lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except (KeyError, ValueError): # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp( special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol) if \ symbol in special_symbols else symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) if coord not in coord_to_species: coord_to_species[coord] = {el: occu} else: coord_to_species[coord][el] = occu coord_to_species = { k: Composition(v) for k, v in coord_to_species.items() } allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby(sorted(list( coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) #rescale occupancies if necessary for species in allspecies: totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: for key, value in six.iteritems(species): species[key] = value / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing # common representations for elements/water in cif files special_symbols = {"D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O"} elements = [el.symbol for el in Element] lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) elif sym in ['OH', 'OH2']: warnings.warn("Symbol '{}' not recognized".format(sym)) return "" else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" def get_matching_coord(coord): for op in self.symmetry_operations: c = op.operate(coord) for k in coord_to_species.keys(): if np.allclose(pbc_diff(c, k), (0, 0, 0), atol=self._site_tolerance): return tuple(k) return False ############################################################ """ This part of the code deals with handling formats of data as found in CIF files extracted from the Springer Materials/Pauling File databases, and that are different from standard ICSD formats. """ # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do not contain this key. if "_atom_site_type_symbol" in data.data.keys(): # Keep a track of which data row needs to be removed. # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14 'rhombic dodecahedron, Nb<sub>14</sub>' # Without this code, the above row in a structure would be parsed as an ordered site with only Nb (since # CifParser would try to parse the first two characters of the label "Nb,Zr") and occupancy=1. # However, this site is meant to be a disordered site with 0.8 of Nb and 0.2 of Zr. idxs_to_remove = [] for idx, el_row in enumerate(data["_atom_site_label"]): # CIF files from the Springer Materials/Pauling File have switched the label and symbol. Thus, in the # above shown example row, '0.8Nb + 0.2Zr' is the symbol. Below, we split the strings on ' + ' to # check if the length (or number of elements) in the label and symbol are equal. if len(data["_atom_site_type_symbol"][idx].split(' + ')) > \ len(data["_atom_site_label"][idx].split(' + ')): # Dictionary to hold extracted elements and occupancies els_occu = {} # parse symbol to get element names and occupancy and store in "els_occu" symbol_str = data["_atom_site_type_symbol"][idx] symbol_str_lst = symbol_str.split(' + ') for elocc_idx in range(len(symbol_str_lst)): # Remove any bracketed items in the string symbol_str_lst[elocc_idx] = re.sub('\([0-9]*\)', '', symbol_str_lst[elocc_idx].strip()) # Extract element name and its occupancy from the string, and store it as a # key-value pair in "els_occ". els_occu[str(re.findall('\D+', symbol_str_lst[elocc_idx].strip())[1]).replace('<sup>', '')] = \ float('0' + re.findall('\.?\d+', symbol_str_lst[elocc_idx].strip())[1]) x = str2float(data["_atom_site_fract_x"][idx]) y = str2float(data["_atom_site_fract_y"][idx]) z = str2float(data["_atom_site_fract_z"][idx]) coord = (x, y, z) # Add each partially occupied element on the site coordinate for et in els_occu: match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({parse_symbol(et): els_occu[parse_symbol(et)]}) else: coord_to_species[match] += {parse_symbol(et): els_occu[parse_symbol(et)]} idxs_to_remove.append(idx) # Remove the original row by iterating over all keys in the CIF data looking for lists, which indicates # multiple data items, one for each row, and remove items from the list that corresponds to the removed row, # so that it's not processed by the rest of this function (which would result in an error). for cif_key in data.data: if type(data.data[cif_key]) == list: for id in sorted(idxs_to_remove, reverse=True): del data.data[cif_key][id] ############################################################ for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except (KeyError, ValueError): # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp(special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol, symbol)) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) else: coord_to_species[match] += {el: occu} if any([sum(c.values()) > 1 for c in coord_to_species.values()]): warnings.warn("Some occupancies sum to > 1! If they are within " "the tolerance, they will be rescaled.") allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby( sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive): """ Generate structure from part of the cif. """ def parse_symbol(sym): # Common representations for elements/water in cif files # TODO: fix inconsistent handling of water special = { "D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O", "OH": "", "OH2": "" } m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": if sym in special: v = special[sym] else: v = special.get(m[0], m[0]) if len(m) > 1 or (m[0] in special): warnings.warn("{} parsed as {}".format(sym, v)) return v lattice = self.get_lattice(data) # if magCIF, get magnetic symmetry moments and magmoms # else standard CIF, and use empty magmom dict if self.feature_flags["magcif_incommensurate"]: raise NotImplementedError( "Incommensurate structures not currently supported.") elif self.feature_flags["magcif"]: self.symmetry_operations = self.get_magsymops(data) magmoms = self.parse_magmoms(data, lattice=lattice) else: self.symmetry_operations = self.get_symops(data) magmoms = {} oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() coord_to_magmoms = OrderedDict() def get_matching_coord(coord): keys = list(coord_to_species.keys()) coords = np.array(keys) for op in self.symmetry_operations: c = op.operate(coord) inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance) # cant use if inds, because python is dumb and np.array([0]) evaluates # to False if len(inds): return keys[inds[0]] return False for i in range(len(data["_atom_site_label"])): try: # If site type symbol exists, use it. Otherwise, we use the # label. symbol = parse_symbol(data["_atom_site_type_symbol"][i]) except KeyError: symbol = parse_symbol(data["_atom_site_label"][i]) if not symbol: continue if oxi_states is not None: o_s = oxi_states.get(symbol, 0) # use _atom_site_type_symbol if possible for oxidation state if "_atom_site_type_symbol" in data.data.keys(): oxi_symbol = data["_atom_site_type_symbol"][i] o_s = oxi_states.get(oxi_symbol, o_s) try: el = Specie(symbol, o_s) except: el = DummySpecie(symbol, o_s) else: el = get_el_sp(symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) magmom = magmoms.get(data["_atom_site_label"][i], Magmom(0)) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) coord_to_magmoms[coord] = magmom else: coord_to_species[match] += {el: occu} coord_to_magmoms[ match] = None # disordered magnetic not currently supported sum_occu = [sum(c.values()) for c in coord_to_species.values()] if any([o > 1 for o in sum_occu]): warnings.warn( "Some occupancies (%s) sum to > 1! If they are within " "the tolerance, they will be rescaled." % str(sum_occu)) allspecies = [] allcoords = [] allmagmoms = [] # check to see if magCIF file is disordered if self.feature_flags["magcif"]: for k, v in coord_to_magmoms.items(): if v is None: # Proposed solution to this is to instead store magnetic moments # as Specie 'spin' property, instead of site property, but this # introduces ambiguities for end user (such as unintended use of # `spin` and Specie will have fictious oxidation state). raise NotImplementedError( 'Disordered magnetic structures not currently supported.' ) if coord_to_species.items(): for species, group in groupby(sorted(list( coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] tmp_magmom = [ coord_to_magmoms[tmp_coord] for tmp_coord in tmp_coords ] if self.feature_flags["magcif"]: coords, magmoms = self._unique_coords( tmp_coords, tmp_magmom) else: coords, magmoms = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) allmagmoms.extend(magmoms) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords) and len( allspecies) == len(allmagmoms): if self.feature_flags["magcif"]: struct = Structure(lattice, allspecies, allcoords, site_properties={"magmom": allmagmoms}) else: struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing #common representations for elements/water in cif files special_symbols = {"D":"D", "Hw":"H", "Ow":"O", "Wat":"O", "wat": "O"} elements = map(str, ptable.all_elements) lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except KeyError: # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp(special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol) if \ symbol in special_symbols else symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) if coord not in coord_to_species: coord_to_species[coord] = {el: occu} else: coord_to_species[coord][el] = occu coord_to_species = {k: Composition(v) for k, v in coord_to_species.items()} allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby( sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) #rescale occupancies if necessary for species in allspecies: totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: for key, value in six.iteritems(species): species[key] = value / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive): """ Generate structure from part of the cif. """ def parse_symbol(sym): # Common representations for elements/water in cif files # TODO: fix inconsistent handling of water special = {"D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O", "OH": "", "OH2": ""} m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": if sym in special: v = special[sym] else: v = special.get(m[0], m[0]) if len(m) > 1 or (m[0] in special): warnings.warn("{} parsed as {}".format(sym, v)) return v lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def get_matching_coord(coord): keys = list(coord_to_species.keys()) coords = np.array(keys) for op in self.symmetry_operations: c = op.operate(coord) inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance) # cant use if inds, because python is dumb and np.array([0]) evaluates # to False if len(inds): return keys[inds[0]] return False ############################################################ """ This part of the code deals with handling formats of data as found in CIF files extracted from the Springer Materials/Pauling File databases, and that are different from standard ICSD formats. """ # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do # not contain this key. if "_atom_site_type_symbol" in data.data.keys(): # Keep a track of which data row needs to be removed. # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14 # 'rhombic dodecahedron, Nb<sub>14</sub>' # Without this code, the above row in a structure would be parsed # as an ordered site with only Nb (since # CifParser would try to parse the first two characters of the # label "Nb,Zr") and occupancy=1. # However, this site is meant to be a disordered site with 0.8 of # Nb and 0.2 of Zr. idxs_to_remove = [] for idx, el_row in enumerate(data["_atom_site_label"]): # CIF files from the Springer Materials/Pauling File have # switched the label and symbol. Thus, in the # above shown example row, '0.8Nb + 0.2Zr' is the symbol. # Below, we split the strings on ' + ' to # check if the length (or number of elements) in the label and # symbol are equal. if len(data["_atom_site_type_symbol"][idx].split(' + ')) > \ len(data["_atom_site_label"][idx].split(' + ')): # Dictionary to hold extracted elements and occupancies els_occu = {} # parse symbol to get element names and occupancy and store # in "els_occu" symbol_str = data["_atom_site_type_symbol"][idx] symbol_str_lst = symbol_str.split(' + ') for elocc_idx in range(len(symbol_str_lst)): # Remove any bracketed items in the string symbol_str_lst[elocc_idx] = re.sub(r'\([0-9]*\)', '', symbol_str_lst[elocc_idx].strip()) # Extract element name and its occupancy from the # string, and store it as a # key-value pair in "els_occ". els_occu[str(re.findall(r'\D+', symbol_str_lst[ elocc_idx].strip())[1]).replace('<sup>', '')] = \ float('0' + re.findall(r'\.?\d+', symbol_str_lst[ elocc_idx].strip())[1]) x = str2float(data["_atom_site_fract_x"][idx]) y = str2float(data["_atom_site_fract_y"][idx]) z = str2float(data["_atom_site_fract_z"][idx]) coord = (x, y, z) # Add each partially occupied element on the site coordinate for et in els_occu: match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition( {parse_symbol(et): els_occu[parse_symbol(et)]}) else: coord_to_species[match] += { parse_symbol(et): els_occu[parse_symbol(et)]} idxs_to_remove.append(idx) # Remove the original row by iterating over all keys in the CIF # data looking for lists, which indicates # multiple data items, one for each row, and remove items from the # list that corresponds to the removed row, # so that it's not processed by the rest of this function (which # would result in an error). for cif_key in data.data: if type(data.data[cif_key]) == list: for id in sorted(idxs_to_remove, reverse=True): del data.data[cif_key][id] ############################################################ for i in range(len(data["_atom_site_label"])): try: # If site type symbol exists, use it. Otherwise, we use the # label. symbol = parse_symbol(data["_atom_site_type_symbol"][i]) except KeyError: symbol = parse_symbol(data["_atom_site_label"][i]) if not symbol: continue if oxi_states is not None: o_s = oxi_states.get(symbol, 0) # use _atom_site_type_symbol if possible for oxidation state if "_atom_site_type_symbol" in data.data.keys(): oxi_symbol = data["_atom_site_type_symbol"][i] o_s = oxi_states.get(oxi_symbol, o_s) try: el = Specie(symbol, o_s) except: el = DummySpecie(symbol, o_s) else: el = get_el_sp(symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) else: coord_to_species[match] += {el: occu} sum_occu = [sum(c.values()) for c in coord_to_species.values()] if any([o > 1 for o in sum_occu]): warnings.warn("Some occupancies (%s) sum to > 1! If they are within " "the tolerance, they will be rescaled." % str(sum_occu)) allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby( sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing # common representations for elements/water in cif files special_symbols = { "D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O" } elements = [el.symbol for el in Element] lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" def get_matching_coord(coord): for op in self.symmetry_operations: c = op.operate(coord) for k in coord_to_species.keys(): if np.allclose(pbc_diff(c, k), (0, 0, 0), atol=self._site_tolerance): return tuple(k) return False for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except (KeyError, ValueError): # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp( special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol, symbol)) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) else: coord_to_species[match] += {el: occu} if any([sum(c.values()) > 1 for c in coord_to_species.values()]): warnings.warn("Some occupancies sum to > 1! If they are within " "the tolerance, they will be rescaled.") allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby(sorted(list( coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing # common representations for elements/water in cif files special_symbols = { "D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O" } elements = [el.symbol for el in Element] lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) elif sym in ['OH', 'OH2']: warnings.warn("Symbol '{}' not recognized".format(sym)) return "" else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" def get_matching_coord(coord): for op in self.symmetry_operations: c = op.operate(coord) for k in coord_to_species.keys(): if np.allclose(pbc_diff(c, k), (0, 0, 0), atol=self._site_tolerance): return tuple(k) return False ############################################################ """ This part of the code deals with handling formats of data as found in CIF files extracted from the Springer Materials/Pauling File databases, and that are different from standard ICSD formats. """ # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do not contain this key. if "_atom_site_type_symbol" in data.data.keys(): # Keep a track of which data row needs to be removed. # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14 'rhombic dodecahedron, Nb<sub>14</sub>' # Without this code, the above row in a structure would be parsed as an ordered site with only Nb (since # CifParser would try to parse the first two characters of the label "Nb,Zr") and occupancy=1. # However, this site is meant to be a disordered site with 0.8 of Nb and 0.2 of Zr. idxs_to_remove = [] for idx, el_row in enumerate(data["_atom_site_label"]): # CIF files from the Springer Materials/Pauling File have switched the label and symbol. Thus, in the # above shown example row, '0.8Nb + 0.2Zr' is the symbol. Below, we split the strings on ' + ' to # check if the length (or number of elements) in the label and symbol are equal. if len(data["_atom_site_type_symbol"][idx].split(' + ')) > \ len(data["_atom_site_label"][idx].split(' + ')): # Dictionary to hold extracted elements and occupancies els_occu = {} # parse symbol to get element names and occupancy and store in "els_occu" symbol_str = data["_atom_site_type_symbol"][idx] symbol_str_lst = symbol_str.split(' + ') for elocc_idx in range(len(symbol_str_lst)): # Remove any bracketed items in the string symbol_str_lst[elocc_idx] = re.sub( '\([0-9]*\)', '', symbol_str_lst[elocc_idx].strip()) # Extract element name and its occupancy from the string, and store it as a # key-value pair in "els_occ". els_occu[str(re.findall('\D+', symbol_str_lst[elocc_idx].strip())[1]).replace('<sup>', '')] = \ float('0' + re.findall('\.?\d+', symbol_str_lst[elocc_idx].strip())[1]) x = str2float(data["_atom_site_fract_x"][idx]) y = str2float(data["_atom_site_fract_y"][idx]) z = str2float(data["_atom_site_fract_z"][idx]) coord = (x, y, z) # Add each partially occupied element on the site coordinate for et in els_occu: match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition( {parse_symbol(et): els_occu[parse_symbol(et)]}) else: coord_to_species[match] += { parse_symbol(et): els_occu[parse_symbol(et)] } idxs_to_remove.append(idx) # Remove the original row by iterating over all keys in the CIF data looking for lists, which indicates # multiple data items, one for each row, and remove items from the list that corresponds to the removed row, # so that it's not processed by the rest of this function (which would result in an error). for cif_key in data.data: if type(data.data[cif_key]) == list: for id in sorted(idxs_to_remove, reverse=True): del data.data[cif_key][id] ############################################################ for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except (KeyError, ValueError): # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp( special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol, symbol)) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) else: coord_to_species[match] += {el: occu} if any([sum(c.values()) > 1 for c in coord_to_species.values()]): warnings.warn("Some occupancies sum to > 1! If they are within " "the tolerance, they will be rescaled.") allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby(sorted(list( coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
def _get_structure(self, data, primitive): """ Generate structure from part of the cif. """ def get_num_implicit_hydrogens(sym): num_h = {"Wat": 2, "wat": 2, "O-H": 1} return num_h.get(sym[:3], 0) lattice = self.get_lattice(data) # if magCIF, get magnetic symmetry moments and magmoms # else standard CIF, and use empty magmom dict if self.feature_flags["magcif_incommensurate"]: raise NotImplementedError( "Incommensurate structures not currently supported.") elif self.feature_flags["magcif"]: self.symmetry_operations = self.get_magsymops(data) magmoms = self.parse_magmoms(data, lattice=lattice) else: self.symmetry_operations = self.get_symops(data) magmoms = {} oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() coord_to_magmoms = OrderedDict() def get_matching_coord(coord): keys = list(coord_to_species.keys()) coords = np.array(keys) for op in self.symmetry_operations: c = op.operate(coord) inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance) # cant use if inds, because python is dumb and np.array([0]) evaluates # to False if len(inds): return keys[inds[0]] return False label_el_dict = {} for i in range(len(data["_atom_site_label"])): try: # If site type symbol exists, use it. Otherwise, we use the # label. symbol = self._parse_symbol(data["_atom_site_type_symbol"][i]) label = data["_atom_site_label"][i] num_h = get_num_implicit_hydrogens( data["_atom_site_type_symbol"][i]) except KeyError: symbol = self._parse_symbol(data["_atom_site_label"][i]) label = data["_atom_site_label"][i] num_h = get_num_implicit_hydrogens(data["_atom_site_label"][i]) if not symbol: continue if oxi_states is not None: o_s = oxi_states.get(symbol, 0) # use _atom_site_type_symbol if possible for oxidation state if "_atom_site_type_symbol" in data.data.keys(): oxi_symbol = data["_atom_site_type_symbol"][i] o_s = oxi_states.get(oxi_symbol, o_s) try: el = Specie(symbol, o_s) except: el = DummySpecie(symbol, o_s) else: el = get_el_sp(symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) magmom = magmoms.get(data["_atom_site_label"][i], np.array([0, 0, 0])) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) comp_d = {el: occu} if num_h > 0: comp_d["H"] = num_h comp = Composition(comp_d) if not match: coord_to_species[coord] = comp coord_to_magmoms[coord] = magmom else: coord_to_species[match] += comp # disordered magnetic not currently supported coord_to_magmoms[match] = None label_el_dict[coord] = label sum_occu = [ sum(c.values()) for c in coord_to_species.values() if not set(c.elements) == {Element("O"), Element("H")} ] if any([o > 1 for o in sum_occu]): msg = "Some occupancies (%s) sum to > 1! If they are within " \ "the tolerance, they will be rescaled." % str(sum_occu) warnings.warn(msg) self.errors.append(msg) allspecies = [] allcoords = [] allmagmoms = [] allhydrogens = [] alllabels = [] # check to see if magCIF file is disordered if self.feature_flags["magcif"]: for k, v in coord_to_magmoms.items(): if v is None: # Proposed solution to this is to instead store magnetic # moments as Specie 'spin' property, instead of site # property, but this introduces ambiguities for end user # (such as unintended use of `spin` and Specie will have # fictious oxidation state). raise NotImplementedError( 'Disordered magnetic structures not currently supported.' ) if coord_to_species.items(): for comp, group in groupby(sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] #print(tmp_coords) labels = [] for i in tmp_coords: labels.append(label_el_dict[i]) #print(labels) tmp_magmom = [ coord_to_magmoms[tmp_coord] for tmp_coord in tmp_coords ] if self.feature_flags["magcif"]: coords, magmoms, coords_num = self._unique_coords( tmp_coords, magmoms_in=tmp_magmom, lattice=lattice) else: coords, magmoms, coords_num = self._unique_coords( tmp_coords) if set(comp.elements) == {Element("O"), Element("H")}: # O with implicit hydrogens im_h = comp["H"] species = Composition({"O": comp["O"]}) else: im_h = 0 species = comp allhydrogens.extend(len(coords) * [im_h]) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) allmagmoms.extend(magmoms) for i in range(len(coords_num)): alllabels.extend(coords_num[i] * [labels[i]]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords) \ and len(allspecies) == len(allmagmoms): site_properties = dict() if any(allhydrogens): assert len(allhydrogens) == len(allcoords) site_properties["implicit_hydrogens"] = allhydrogens if self.feature_flags["magcif"]: site_properties["magmom"] = allmagmoms if len(site_properties) == 0: site_properties = None struct = Structure(lattice, allspecies, allcoords, site_properties=site_properties) #struct = struct.get_sorted_structure() if primitive and self.feature_flags['magcif']: struct = struct.get_primitive_structure(use_site_props=True) elif primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() struct.add_site_property("_atom_site_label", alllabels) return struct
def _get_structure(self, data, primitive, substitution_dictionary=None): """ Generate structure from part of the cif. """ # Symbols often representing # common representations for elements/water in cif files special_symbols = {"D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O"} elements = [el.symbol for el in Element] lattice = self.get_lattice(data) self.symmetry_operations = self.get_symops(data) oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() def parse_symbol(sym): if substitution_dictionary: return substitution_dictionary.get(sym) else: m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": return m[0] return "" def get_matching_coord(coord): for op in self.symmetry_operations: c = op.operate(coord) for k in coord_to_species.keys(): if np.allclose(pbc_diff(c, k), (0, 0, 0), atol=self._site_tolerance): return tuple(k) return False for i in range(len(data["_atom_site_label"])): symbol = parse_symbol(data["_atom_site_label"][i]) if symbol: if symbol not in elements and symbol not in special_symbols: symbol = symbol[:2] else: continue # make sure symbol was properly parsed from _atom_site_label # otherwise get it from _atom_site_type_symbol try: if symbol in special_symbols: get_el_sp(special_symbols.get(symbol)) else: Element(symbol) except (KeyError, ValueError): # sometimes the site doesn't have the type_symbol. # we then hope the type_symbol can be parsed from the label if "_atom_site_type_symbol" in data.data.keys(): symbol = data["_atom_site_type_symbol"][i] if oxi_states is not None: if symbol in special_symbols: el = get_el_sp(special_symbols.get(symbol) + str(oxi_states[symbol])) else: el = Specie(symbol, oxi_states.get(symbol, 0)) else: el = get_el_sp(special_symbols.get(symbol, symbol)) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) if not match: coord_to_species[coord] = Composition({el: occu}) else: coord_to_species[match] += {el: occu} if any([sum(c.values()) > 1 for c in coord_to_species.values()]): warnings.warn("Some occupancies sum to > 1! If they are within " "the tolerance, they will be rescaled.") allspecies = [] allcoords = [] if coord_to_species.items(): for species, group in groupby( sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] coords = self._unique_coords(tmp_coords) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords): struct = Structure(lattice, allspecies, allcoords) struct = struct.get_sorted_structure() if primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct