def stoichiometry(self): """ Return stoichiometry in matador format: a list of two-member lists containing element symbol and number of atoms per formula unit, sorted in alphabetical order by element symbol). """ if 'stoichiometry' not in self._data: from matador.utils.chem_utils import get_stoich self._data['stoichiometry'] = get_stoich(self.atom_types) return self._data['stoichiometry']
def test_atoms_to_stoich(self): atoms = 5 * ["Li"] atoms.extend(5 * ["P"]) stoich = [["Li", 1], ["P", 1]] self.assertEqual(stoich, get_stoich(atoms)) atoms = 99 * ["Li"] atoms.extend(1 * ["P"]) stoich = [["Li", 99], ["P", 1]] self.assertEqual(stoich, get_stoich(atoms)) atoms = 4 * ["Li"] atoms.extend(36 * ["P"]) stoich = [["Li", 1], ["P", 9]] self.assertEqual(stoich, get_stoich(atoms)) atoms = 3 * ["Li"] atoms.extend(2 * ["P"]) stoich = [["Li", 3], ["P", 2]] self.assertEqual(stoich, get_stoich(atoms)) atoms = 9 * ["Li"] atoms.extend(6 * ["P"]) stoich = [["Li", 3], ["P", 2]] self.assertEqual(stoich, get_stoich(atoms)) atoms = 36 * ["P"] atoms.extend(4 * ["Li"]) stoich = [["Li", 1], ["P", 9]] self.assertEqual(stoich, get_stoich(atoms))
def atomic_swaps(self, source_doc): """ Swap atomic species according to parsed options. Parameters: source_doc (dict): matador doc to swap from. """ new_doc = deepcopy(source_doc) swapped_docs = [] unswapped_num_species = len(set(source_doc['atom_types'])) for swap in self.swap_dict_list: if any(key in source_doc['atom_types'] for key in swap): new_doc['atom_types'] = [swap.get(atom, atom) for atom in source_doc['atom_types']] new_doc['_swapped_stoichiometry'] = get_stoich(source_doc['atom_types']) new_doc['stoichiometry'] = get_stoich(new_doc['atom_types']) new_doc['elems'] = set(new_doc['atom_types']) new_doc['num_species'] = len(new_doc['elems']) if not self.maintain_num_species or new_doc['num_species'] == unswapped_num_species: swapped_doc = deepcopy(new_doc) swapped_docs.append(swapped_doc) return swapped_docs, len(swapped_docs)
def ase2dict(atoms, as_model=False) -> Union[dict, Crystal]: """ Return a matador document (dictionary or :obj:`Crystal`) from an `ase.Atoms` object. Parameters: atoms (ase.Atoms): input structure. Keyword arguments: as_model (bool): if `True`, return a Crystal instead of a dictionary. Returns: Union[dict, Crystal]: matador output. """ from matador.utils.cell_utils import cart2abc doc = {} # sort atoms, then their positions doc['atom_types'] = atoms.get_chemical_symbols() inds = [ i[0] for i in sorted(enumerate(doc['atom_types']), key=lambda x: x[1]) ] doc['positions_frac'] = atoms.get_scaled_positions().tolist() doc['positions_frac'] = [doc['positions_frac'][ind] for ind in inds] doc['atom_types'] = [doc['atom_types'][ind] for ind in inds] try: doc['lattice_cart'] = atoms.get_cell().tolist() except AttributeError: doc['lattice_cart'] = atoms.get_cell().array.tolist() doc['lattice_abc'] = cart2abc(doc['lattice_cart']) doc['num_atoms'] = len(doc['atom_types']) doc['stoichiometry'] = get_stoich(doc['atom_types']) doc['cell_volume'] = atoms.get_volume() doc['elems'] = {atom for atom in doc['atom_types']} doc['num_fu'] = doc['num_atoms'] / int( sum(doc['stoichiometry'][i][1] for i in range(len(doc['stoichiometry'])))) doc['space_group'] = get_spacegroup_spg(doc, symprec=0.001) if atoms.info: doc["ase_info"] = copy.deepcopy(atoms.info) if as_model: doc = Crystal(doc) return doc
def vacancy(mutant, debug=False): """ Remove a random atom from the structure. Parameters: mutant (dict): structure to mutate in-place. """ if mutant["num_atoms"] < 2: raise RuntimeError("Cannot apply vacancy to cell with 1 atom.") vacancy_idx = np.random.randint(0, mutant["num_atoms"] - 1) if debug: print("Removing atom {} of type {} from cell.".format( vacancy_idx, mutant["atom_types"][vacancy_idx])) del mutant["atom_types"][vacancy_idx] del mutant["positions_frac"][vacancy_idx] if "positions_abs" in mutant: del mutant["positions_abs"][vacancy_idx] mutant["num_atoms"] = len(mutant["atom_types"]) # calculate stoichiometry mutant["stoichiometry"] = get_stoich(mutant["atom_types"])
def query2files(cursor, dirname=None, max_files=10000, top=None, prefix=None, cell=None, param=None, res=None, pdb=None, json=None, xsf=None, markdown=True, latex=False, subcmd=None, argstr=None, **kwargs): """ Many-to-many convenience function for many structures being written to many file types. Parameters: cursor (:obj:`list` of :obj:`dict`/:class:`AtomicSwapper`): list of matador dictionaries to write out. Keyword arguments: dirname (str): the folder to save the results into. Will be created if non-existent. Will have integer appended to it if already existing. max_files (int): if the number of files to be written exceeds this number, then raise RuntimeError. **kwargs (dict): dictionary of {filetype: bool(whether to write)}. Accepted file types are cell, param, res, pdb, json, xsf, markdown and latex. """ multiple_files = any((cell, param, res, pdb, xsf)) prefix = prefix + '-' if prefix is not None else '' if isinstance(cursor, AtomicSwapper): cursor = cursor.cursor subcmd = "swaps" if subcmd in ['polish', 'swaps']: info = False hash_dupe = False else: info = True hash_dupe = False if isinstance(cursor, list): num = len(cursor) else: num = cursor.count() if top is not None: if top < num: num = top num_files = num * sum(1 for ext in [cell, param, res, pdb, xsf] if ext) if multiple_files: print('Intending to write', num, 'structures to file...') if num_files > max_files: raise RuntimeError( "Not writing {} files as it exceeds argument `max_files` limit of {}" .format(num_files, max_files)) if dirname is None: dirname = generate_relevant_path(subcmd=subcmd, **kwargs) _dir = False dir_counter = 0 # postfix integer on end of directory name if it exists while not _dir: if dir_counter != 0: directory = dirname + str(dir_counter) else: directory = dirname if not os.path.isdir(directory): os.makedirs(directory) _dir = True else: dir_counter += 1 for _, doc in enumerate(cursor[:num]): # generate an appropriate filename for the structure root_source = get_root_source(doc) if '_swapped_stoichiometry' in doc: formula = get_formula_from_stoich(doc['_swapped_stoichiometry']) else: formula = get_formula_from_stoich(doc['stoichiometry']) if subcmd == 'swaps': root_source = root_source.replace('-swap-', '-') name = root_source if 'OQMD ' in root_source: name = '{formula}-OQMD_{src}'.format( formula=formula, src=root_source.split(' ')[-1]) elif 'mp-' in root_source: name = '{formula}-MP_{src}'.format(formula=formula, src=root_source.split('-')[-1]) if 'icsd' in doc and 'CollCode' not in name: name += '-CollCode{}'.format(doc['icsd']) else: pf_id = None for source in doc['source']: if 'pf-' in source: pf_id = source.split('-')[-1] break else: if 'pf_ids' in doc: pf_id = doc['pf_ids'][0] if pf_id is not None: name += '-PF-{}'.format(pf_id) # if swaps, prepend new composition if subcmd == 'swaps': new_formula = get_formula_from_stoich(get_stoich( doc['atom_types'])) name = '{}-swap-{}'.format(new_formula, name) path = "{directory}/{prefix}{name}".format(directory=directory, prefix=prefix, name=name) if param: doc2param(doc, path, hash_dupe=hash_dupe) if cell: doc2cell(doc, path, hash_dupe=hash_dupe) if res: doc2res(doc, path, info=info, hash_dupe=hash_dupe) if json: doc2json(doc, path, hash_dupe=hash_dupe) if pdb: doc2pdb(doc, path, hash_dupe=hash_dupe) if xsf: doc2xsf(doc, path) hull = subcmd in ['hull', 'voltage'] if isinstance(cursor, pm.cursor.Cursor): cursor.rewind() md_path = "{directory}/{directory}.md".format(directory=directory) md_kwargs = {} md_kwargs.update(kwargs) md_kwargs.update({ 'markdown': True, 'latex': False, 'argstr': argstr, 'hull': hull }) md_string = display_results(cursor, **md_kwargs) with open(md_path, 'w') as f: f.write(md_string) if latex: if isinstance(cursor, pm.cursor.Cursor): cursor.rewind() tex_path = "{directory}/{directory}.tex".format(directory=directory) print('Writing LaTeX file', tex_path + '...') tex_kwargs = {} tex_kwargs.update(kwargs) tex_kwargs.update({ 'latex': True, 'markdown': False, 'argstr': argstr, 'hull': hull }) tex_string = display_results(cursor, **tex_kwargs) with open(tex_path, 'w') as f: f.write(tex_string) print('Done!')
def random_slice(parent_seeds, standardize=True, supercell=True, shift=True, debug=False): """ Simple cut-and-splice crossover of two parents. The overall size of the child can vary between 0.5 and 1.5 the size of the parent structures. Both parent structures are cut and spliced along the same crystallographic axis. Parameters: parents (list(dict)) : parent structures to crossover, standardize (bool) : use spglib to standardize parents pre-crossover, supercell (bool) : make a random supercell to rescale parents, shift (bool) : randomly shift atoms in parents to unbias. Returns: dict: newborn structure from parents. """ parents = deepcopy(parent_seeds) child = dict() # child_size is a number between 0.5 and 2 child_size = 0.5 + 1.5 * np.random.rand() # cut_val is a number between 0.25*child_size and 0.75*child_size # the slice position of one parent in fractional coordinates # (the other is (child_size-cut_val)) cut_val = child_size * (0.25 + (np.random.rand() / 2.0)) parent_densities = [] for ind, parent in enumerate(parents): if "cell_volume" not in parent: parents[ind]["cell_volume"] = cart2volume(parent["lattice_cart"]) parent_densities.append(parent["num_atoms"] / parent["cell_volume"]) target_density = sum(parent_densities) / len(parent_densities) if standardize: parents = [standardize_doc_cell(parent) for parent in parents] if supercell: # check ratio of num atoms in parents and grow the smaller one parent_extent_ratio = parents[0]["cell_volume"] / parents[1][ "cell_volume"] if debug: print( parent_extent_ratio, parents[0]["cell_volume"], "vs", parents[1]["cell_volume"], ) if parent_extent_ratio < 1: supercell_factor = int(round(1 / parent_extent_ratio)) supercell_target = 0 elif parent_extent_ratio >= 1: supercell_factor = int(round(parent_extent_ratio)) supercell_target = 1 if debug: print(supercell_target, supercell_factor) supercell_vector = [1, 1, 1] if supercell_factor > 1: for ind in range(supercell_factor): min_lat_vec_abs = 1e10 min_lat_vec_ind = -1 for i in range(3): lat_vec_abs = np.sum( np.asarray( parents[supercell_target]["lattice_cart"][i])**2) if lat_vec_abs < min_lat_vec_abs: min_lat_vec_abs = lat_vec_abs min_lat_vec_ind = i supercell_vector[min_lat_vec_ind] += 1 if debug: print("Making supercell of {} with {}".format( parents[supercell_target]["source"][0], supercell_vector)) if supercell_vector != [1, 1, 1]: parents[supercell_target] = create_simple_supercell( parents[supercell_target], supercell_vector, standardize=False) child["positions_frac"] = [] child["atom_types"] = [] child["lattice_cart"] = cut_val * np.asarray( parents[0]["lattice_cart"]) + (child_size - cut_val) * np.asarray( parents[1]["lattice_cart"]) child["lattice_cart"] = child["lattice_cart"].tolist() # choose slice axis axis = np.random.randint(low=0, high=3) for ind, parent in enumerate(parents): if shift: # apply same random shift to all atoms in parents shift_vec = np.random.rand(3) for idx, _ in enumerate(parent["positions_frac"]): for k in range(3): parent["positions_frac"][idx][k] += shift_vec[k] if parent["positions_frac"][idx][k] >= 1: parent["positions_frac"][idx][k] -= 1 elif parent["positions_frac"][idx][k] < 0: parent["positions_frac"][idx][k] += 1 # slice parent for atom, pos in zip(parent["atom_types"], parent["positions_frac"]): if ind == (pos[axis] <= cut_val): child["positions_frac"].append(pos) child["atom_types"].append(atom) # check child is sensible child["mutations"] = ["crossover"] child["stoichiometry"] = get_stoich(child["atom_types"]) child["num_atoms"] = len(child["atom_types"]) if "cell_volume" not in child: child["cell_volume"] = cart2volume(child["lattice_cart"]) number_density = child["num_atoms"] / child["cell_volume"] # rescale cell based on number density of parents new_scale = np.cbrt(number_density / target_density) child["lattice_abc"] = np.asarray(cart2abc(child["lattice_cart"])) child["lattice_abc"][0] *= new_scale child["lattice_abc"] = child["lattice_abc"].tolist() child["lattice_cart"] = abc2cart(child["lattice_abc"]) child["cell_volume"] = cart2volume(child["lattice_cart"]) child["positions_abs"] = frac2cart(child["lattice_cart"], child["positions_frac"]) return child
def magres2dict(fname, **kwargs): """ Extract available information from .magres file. Assumes units of Angstrom and ppm for relevant quantities. """ magres = defaultdict(list) flines, fname = get_flines_extension_agnostic(fname, "magres") magres['source'] = [fname] # grab file owner username try: from pwd import getpwuid magres['user'] = getpwuid(stat(fname).st_uid).pw_name except Exception: magres['user'] = '******' magres['magres_units'] = dict() for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<atoms>', '[atoms]']: i = 1 while flines[line_no + i].strip().lower() not in ['</atoms>', '[/atoms]']: split_line = flines[line_no + i].split() if not split_line: i += 1 continue if i > len(flines): raise RuntimeError("Something went wrong in reader loop") if split_line[0] == 'units': magres['magres_units'][split_line[1]] = split_line[2] elif 'lattice' in split_line: lattice = split_line[1:] for j in range(3): magres['lattice_cart'].append([ float(elem) for elem in lattice[j * 3:(j + 1) * 3] ]) magres['lattice_abc'] = cart2abc(magres['lattice_cart']) elif 'atom' in split_line: atom = split_line magres['atom_types'].append(atom[1]) magres['positions_abs'].append( [float(elem) for elem in atom[-3:]]) i += 1 break if "atom_types" in magres: magres['num_atoms'] = len(magres['atom_types']) magres['positions_frac'] = cart2frac(magres['lattice_cart'], magres['positions_abs']) magres['stoichiometry'] = get_stoich(magres['atom_types']) for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<magres>', '[magres]']: i = 1 while flines[line_no + i].strip().lower() not in ['</magres>', '[/magres]']: split_line = flines[line_no + i].split() if not split_line: i += 1 continue if i > len(flines): raise RuntimeError("Something went wrong in reader loop") if split_line[0] == 'units': magres['magres_units'][split_line[1]] = split_line[2] elif 'sus' in split_line: magres["susceptibility_tensor"] = np.array( [float(val) for val in split_line[1:]]).reshape(3, 3) elif 'ms' in split_line: ms = np.array([float(val) for val in split_line[3:]]).reshape(3, 3) s_iso = np.trace(ms) / 3 # find eigenvalues of symmetric part of shielding and order them to calc anisotropy eta symmetric_shielding = _symmetrise_tensor(ms) s_yy, s_xx, s_zz = _get_haeberlen_eigs(symmetric_shielding) s_aniso = s_zz - (s_xx + s_yy) / 2.0 asymm = (s_yy - s_xx) / (s_zz - s_iso) # convert from reduced anistropy to CSA magres["magnetic_shielding_tensors"].append(ms) magres["chemical_shielding_isos"].append(s_iso) magres["chemical_shift_anisos"].append(s_aniso) magres["chemical_shift_asymmetries"].append(asymm) elif "efg" in split_line: efg = np.array([float(val) for val in split_line[3:]]).reshape(3, 3) species = split_line[1] eigs = _get_haeberlen_eigs(efg) v_zz, eta = eigs[2], (eigs[0] - eigs[1]) / eigs[2] # calculate C_Q in MHz quadrupole_moment = ELECTRIC_QUADRUPOLE_MOMENTS.get( species, 1.0) C_Q = ((ELECTRON_CHARGE * v_zz * quadrupole_moment * EFG_AU_TO_SI * BARN_TO_M2) / (PLANCK_CONSTANT * 1e6)) magres["electric_field_gradient"].append(efg) magres["quadrupolar_couplings"].append(C_Q) magres["quadrupolar_asymmetries"].append(eta) i += 1 for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<calculation>', '[calculation]']: i = 1 while flines[line_no + i].strip().lower() not in [ '</calculation>', '[/calculation]' ]: if i > len(flines): raise RuntimeError("Something went wrong in reader loop") # space important as it excludes other calc_code_x variables if 'calc_code ' in flines[line_no + i]: magres['calculator'] = flines[line_no + i].split()[1] if 'calc_code_version' in flines[line_no + i]: magres['calculator_version'] = flines[line_no + i].split()[1] i += 1 return dict(magres), True
def magres2dict(fname, **kwargs): """ Extract available information from .magres file. Assumes units of Angstrom and ppm for relevant quantities. """ magres = defaultdict(list) flines, fname = get_flines_extension_agnostic(fname, "magres") magres['source'] = [fname] # grab file owner username try: magres['user'] = getpwuid(stat(fname).st_uid).pw_name except Exception: magres['user'] = '******' magres['magres_units'] = dict() for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<atoms>', '[atoms]']: i = 1 while flines[line_no + i].strip().lower() not in ['</atoms>', '[/atoms]']: split_line = flines[line_no + i].split() if not split_line: i += 1 continue if i > len(flines): raise RuntimeError("Something went wrong in reader loop") if split_line[0] == 'units': magres['magres_units'][split_line[1]] = split_line[2] elif 'lattice' in flines[line_no + i]: lattice = flines[line_no + i].split()[1:] for j in range(3): magres['lattice_cart'].append([ float(elem) for elem in lattice[j * 3:(j + 1) * 3] ]) magres['lattice_abc'] = cart2abc(magres['lattice_cart']) elif 'atom' in flines[line_no + i]: atom = flines[line_no + i].split() magres['atom_types'].append(atom[1]) magres['positions_abs'].append( [float(elem) for elem in atom[-3:]]) i += 1 break magres['num_atoms'] = len(magres['atom_types']) magres['positions_frac'] = cart2frac(magres['lattice_cart'], magres['positions_abs']) magres['stoichiometry'] = get_stoich(magres['atom_types']) for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<magres>', '[magres]']: i = 1 while flines[line_no + i].strip().lower() not in ['</magres>', '[/magres]']: split_line = flines[line_no + i].split() if not split_line: i += 1 continue if i > len(flines): raise RuntimeError("Something went wrong in reader loop") if split_line[0] == 'units': magres['magres_units'][split_line[1]] = split_line[2] elif 'sus' in flines[line_no + i]: sus = flines[line_no + i].split()[1:] for j in range(3): magres['susceptibility_tensor'].append( [float(val) for val in sus[3 * j:3 * (j + 1)]]) elif 'ms' in flines[line_no + i]: ms = flines[line_no + i].split()[3:] magres['magnetic_shielding_tensors'].append([]) for j in range(3): magres['magnetic_shielding_tensors'][-1].append( [float(val) for val in ms[3 * j:3 * (j + 1)]]) magres['chemical_shielding_isos'].append(0) magres['chemical_shift_anisos'].append(0) magres['chemical_shift_asymmetries'].append(0) for j in range(3): magres['chemical_shielding_isos'][-1] += magres[ 'magnetic_shielding_tensors'][-1][j][j] / 3 # find eigenvalues of symmetric part of shielding and order them to calc anisotropy eta symmetric_shielding = ( 0.5 * (magres['magnetic_shielding_tensors'][-1] + np.asarray( magres['magnetic_shielding_tensors'][-1]).T)) eig_vals, eig_vecs = np.linalg.eig(symmetric_shielding) eig_vals, eig_vecs = zip( *sorted(zip(eig_vals, eig_vecs), key=lambda eig: abs(eig[0] - magres[ 'chemical_shielding_isos'][-1]))) # Haeberlen convention: |s_zz - s_iso| >= |s_xx - s_iso| >= |s_yy - s_iso| s_yy, s_xx, s_zz = eig_vals s_iso = magres['chemical_shielding_isos'][-1] # convert from reduced anistropy to CSA magres['chemical_shift_anisos'][-1] = s_zz - (s_xx + s_yy) / 2.0 magres['chemical_shift_asymmetries'][-1] = ( s_yy - s_xx) / (s_zz - s_iso) i += 1 for line_no, line in enumerate(flines): line = line.lower().strip() if line in ['<calculation>', '[calculation]']: i = 1 while flines[line_no + i].strip().lower() not in [ '</calculation>', '[/calculation]' ]: if i > len(flines): raise RuntimeError("Something went wrong in reader loop") # space important as it excludes other calc_code_x variables if 'calc_code ' in flines[line_no + i]: magres['calculator'] = flines[line_no + i].split()[1] if 'calc_code_version' in flines[line_no + i]: magres['calculator_version'] = flines[line_no + i].split()[1] i += 1 return magres, True
def __init__(self, doc, lazy=False, **kwargs): """ Initialise parameters and run PDF (unless lazy=True). Parameters: doc (dict) : matador document to calculate PDF of Keyword Arguments: dr (float) : bin width for PDF (Angstrom) (DEFAULT: 0.01) gaussian_width (float) : width of Gaussian smearing (Angstrom) (DEFAULT: 0.01) num_images (int/str) : number of unit cell images include in PDF calculation (DEFAULT: 'auto') max_num_images (int) : cutoff number of unit cells before crashing (DEFAULT: 50) rmax (float) : maximum distance cutoff for PDF (Angstrom) (DEFAULT: 15) projected (bool) : optionally calculate the element-projected PDF standardize (bool) : standardize cell before calculating PDF lazy (bool) : if True, calculator is not called when initializing PDF object timing (bool) : if True, print the total time taken to calculate the PDF """ prop_defaults = { 'dr': 0.01, 'gaussian_width': 0.1, 'rmax': 15, 'num_images': 'auto', 'style': 'smear', 'debug': False, 'timing': False, 'low_mem': False, 'projected': True, 'max_num_images': 50, 'standardize': True } # read and store kwargs self.kwargs = prop_defaults self.kwargs.update( {key: kwargs[key] for key in kwargs if kwargs[key] is not None}) # useful data for labelling self.spg = None structure = copy.deepcopy(doc) if self.kwargs.get('standardize'): structure = standardize_doc_cell(structure) self.spg = structure['space_group'] self.stoichiometry = structure.get('stoichiometry', get_stoich(structure['atom_types'])) # private variables self._num_images = self.kwargs.get('num_images') self._lattice = np.asarray(structure['lattice_cart']) self._poscart = np.asarray( frac2cart(structure['lattice_cart'], structure['positions_frac'])).reshape(-1, 3) self._types = structure['atom_types'] self._num_atoms = len(self._poscart) self._volume = cart2volume(self._lattice) self._image_vec = None # public variables self.rmax = self.kwargs.get('rmax') self.number_density = self._num_atoms / self._volume self.dr = self.kwargs.get('dr') self.r_space = None self.gr = None self.elem_gr = None self.label = None if self.kwargs.get('label'): self.label = self.kwargs["label"] elif 'text_id' in structure: self.label = ' '.join(structure['text_id']) if not lazy: if self.kwargs.get('timing'): start = time.time() self.calc_pdf() if self.kwargs.get('timing'): end = time.time() print('PDF calculated in {:.3f} s'.format(end - start))
def voronoi_shuffle(mutant, element_to_remove=None, preserve_stoich=False, debug=False, testing=False): """ Remove all atoms of type element, then perform Voronoi analysis on the remaining sublattice. Cluster the nodes with KMeans, then repopulate the clustered Voronoi nodes with atoms of the removed element. Parameters: mutant (dict): structure to mutate in-place. Keyword Arguments: element_to_remove (str) : symbol of element to remove, preserve_stoich (bool) : whether to always reinsert the same number of atoms. testing (bool): write a cell at each step, with H atoms indicating Voronoi nodes. Raises: RuntimeError: if unable to perform Voronoi shuffle. """ if testing: from matador.export import doc2res doc2res(mutant, "initial_cell") if element_to_remove is None: element_to_remove = np.random.choice(list(set(mutant["atom_types"]))) try: mutant["atom_types"], mutant["positions_frac"] = zip( *[(atom, pos) for (atom, pos) in zip(mutant["atom_types"], mutant["positions_frac"]) if atom != element_to_remove]) except ValueError: raise RuntimeError("Unable to Voronize atoms {}".format( mutant["atom_types"])) num_removed = mutant["num_atoms"] - len(mutant["atom_types"]) if debug: print("Removed {} atoms of type {}".format(num_removed, element_to_remove)) mutant["num_atoms"] = len(mutant["atom_types"]) mutant["atom_types"], mutant["positions_frac"] = ( list(mutant["atom_types"]), list(mutant["positions_frac"]), ) if testing: doc2res(mutant, "post_removal_cell") try: mutant["voronoi_nodes"] = get_voronoi_points(mutant) if not mutant["voronoi_nodes"]: raise RuntimeError if testing: voro_mutant = deepcopy(mutant) for node in mutant["voronoi_nodes"]: voro_mutant["atom_types"].append("H") voro_mutant["positions_frac"].append(node) voro_mutant["num_atoms"] += 1 doc2res(voro_mutant, "voronoi_cell") except Exception: if debug: print_exc() raise RuntimeError("Voronoi code failed") if debug: print("Computed {} Voronoi nodes".format(len(mutant["voronoi_nodes"]))) if preserve_stoich: num_to_put_back = num_removed else: std_dev = int(np.sqrt(num_removed)) try: num_to_put_back = np.random.randint( low=max(num_removed - std_dev, 1), high=min(num_removed + std_dev, len(mutant["voronoi_nodes"])), ) except Exception: num_to_put_back = len(mutant["voronoi_nodes"]) if debug: print("Going to insert {} atoms of type {}".format( num_to_put_back, element_to_remove)) k_means = KMeans(n_clusters=num_to_put_back, precompute_distances=True) k_means.fit(mutant["voronoi_nodes"]) mutant["voronoi_nodes"] = k_means.cluster_centers_.tolist() if testing: voro_mutant = deepcopy(mutant) for node in mutant["voronoi_nodes"]: voro_mutant["atom_types"].append("H") voro_mutant["positions_frac"].append(node) voro_mutant["num_atoms"] += 1 doc2res(voro_mutant, "clustered_voronoi_cell") for node in mutant["voronoi_nodes"]: mutant["atom_types"].append(element_to_remove) mutant["positions_frac"].append(node) if debug: print("Previously {} atoms in cell".format(mutant["num_atoms"])) mutant["num_atoms"] = len(mutant["atom_types"]) mutant["stoichiometry"] = get_stoich(mutant["atom_types"]) if testing: doc2res(mutant, "final_cell") if debug: print("Now {} atoms in cell".format(mutant["num_atoms"]))