def readStructs(datadir, shallow=True, name="output"): """ Currently designed for output from single layer directory trees. Reads in final adsorption geometries and energy data, returns dataframe with geometry and energy data Input: datadir: string that points to directory containing the following: - convergence: each line i has convergence status of run i - energies: each line i has total energy and ads energy from run i - output{indices}.gen: final geometries for each index slabEnergy: energy of slab adsorbateEnergy: energy of the adsorbate in the system Returns: output: pd Dataframe with: - index: indices for runs that worked - geometry: final geometry of run - total energy: raw energy from file - adsorption energy: energy as adjusted by adsorbate_energy """ geometries = {} if shallow: pattern = r"{}(\d+).gen".format(name) else: pattern = r"{}(\d+-\d+).gen".format(name) files = os.listdir(datadir) if "energies" in files and "convergence" in files: convergence = pd.read_csv(datadir + "convergence", header=None) energies = pd.read_csv(datadir + "energies", header=None) output = pd.concat([energies, convergence], axis=1) output.columns = ["E", "E_ads", "conv"] for i in files: key = re.search(pattern, i) if key: if shallow: key = int(key.group(1)) else: key = key.group(1) geometries[key] = gen.read_gen(datadir + i) output['geom'] = pd.Series(geometries) output = output[output['conv'] == "Geometry converged"] output = output.drop("conv", axis=1) else: for i in files: key = re.search(pattern, i) if key: if shallow: key = int(key.group(1)) else: key = key.group(1) geometries[key] = gen.read_gen(datadir + i) output = pd.DataFrame(pd.Series(geometries)) output.columns = ['geom'] return output
def getslabs(data, directory, useInputs=False): """ Utility for getting and writing slab files from readData (utils.py) function data is the df from readData function or any df with (struct, in) and (struct, out) columns """ if useInputs: slabSource = data['struct']['in'] else: slabSource = data['struct']['out'] dataDir = directory slabs = {} # # to generate slabs for key, value in slabSource.iteritems(): slabs[key] = getslab(value) for key, value in slabs.items(): gen.write_gen(dataDir + "slab{}.gen".format(key), value) # to read slabs for key in data.index: slabs[key] = gen.read_gen(dataDir + "slab{}.gen".format(key)) if useInputs: data.loc[:, ('struct', 'inslab')] = pd.Series(slabs) else: data.loc[:, ('struct', 'outslab')] = pd.Series(slabs)
def viewStructs(name, directory, kind = 'gen'): """ View collection of structures as a "trajectory" Args: - name (str): substring unique to structures (.gen, POSCAR, slab, etc) - directory (str): Directory where the structures live - kind: kind of output froim list of (vasp, gen) Opens viewer with loaded trajectory (if remote, need X server) """ geometries = [] files = os.listdir(directory) if kind == 'gen': pattern = r"{}.*.gen".format(name) elif kind == 'vasp': pattern = r"{}".format(name) else: raise ValueError("file kind must be from (vasp, gen)") for i in files: key = re.search(pattern, i) if key: if kind == 'gen': geometries += [gen.read_gen(directory + i)] elif kind == 'vasp': geometries += [vasp.read_vasp(directory + i)] else: raise ValueError("file kind must be from (vasp, gen)") view(geometries)
def main(basename): """ Perform ML-based isotherm seeding. Args: basename: name of base slab """ # load z prediction and E_ads prediction models (pickled KRR models) with open('models/zmodel.pkl', 'rb') as f: zmodel = pickle.load(f) with open('models/Emodel.pkl', 'rb') as f: Emodel = pickle.load(f) # load base slab, remove extraneous atoms, and wrap base = gen.read_gen(basename) del base[[atom.index for atom in base if atom.symbol in ['He', 'Ar']]] base.wrap() # generate regular grid based on cell parameters of slab a, b, c = base.cell a, b, c = np.linalg.norm(a), np.linalg.norm(b), np.linalg.norm(c) npoints = 20 apoints = np.linspace(0, a, npoints) # regular spacing bpoints = np.linspace(0, b, npoints) # regular spacing # place He atoms in grid points gridpoints = [] for apoint in apoints: for bpoint in bpoints: newstruct = base.copy() zhat = predictz(newstruct, apoint, bpoint) newstruct.append(Atom('He', position=(apoint, bpoint, zhat))) gridpoints += [newstruct] # generate pd df with data gridpoints = pd.Series(gridpoints) gridpoints = pd.DataFrame({'geom': gridpoints}) gridpoints = pd.concat([gridpoints, getSOAPs(gridpoints['geom'])], axis=1) # data matrix for ML X = pd.DataFrame(gridpoints['SOAP'].to_list(), index=gridpoints.index) gridpoints['predE'] = Emodel.predict(X) charges = np.append(np.zeros(len(base)), gridpoints['predE']) base.set_initial_charges(charges) for geom in gridpoints['geom']: base.append(Atom("He", position=geom[-1].position)) # TODO adaptive sampling portion if visualize: view(visbase) print( "pearson r:", pearsonr([geom[-1].position[2] for geom in gridpoints['geom']], gridpoints['predE']))
def read_results(self): """ all results are read from results.tag file It will be destroyed after it is read to avoid reading it once again after some runtime error """ myfile = open(os.path.join(self.directory, 'results.tag'), 'r') self.lines = myfile.readlines() myfile.close() self.atoms = self.atoms_input charges, energy = self.read_charges_and_energy() if charges is not None: self.results['charges'] = charges self.results['energy'] = energy forces = self.read_forces() self.results['forces'] = forces self.mmpositions = None # Get positions and pbc's if there if self.parameters['Driver_OutputPrefix']: genname = self.parameters['Driver_OutputPrefix'] + '.gen' else: genname = 'geo_end.gen' output = read_gen(os.path.join(self.directory, genname)) self.results['positions'] = output.get_positions() if output.get_pbc() is not None: self.results['pbc'] = output.get_pbc() # stress stuff begins sstring = 'stress' have_stress = False stress = list() for iline, line in enumerate(self.lines): if sstring in line: have_stress = True start = iline + 1 end = start + 3 for i in range(start, end): cell = [float(x) for x in self.lines[i].split()] stress.append(cell) if have_stress: stress = -np.array(stress) * Hartree / Bohr**3 self.results['stress'] = stress.flat[[0, 4, 8, 5, 2, 1]] # stress stuff ends # eigenvalues and fermi levels fermi_levels = self.read_fermi_levels() if fermi_levels is not None: self.results['fermi_levels'] = fermi_levels eigenvalues = self.read_eigenvalues() if eigenvalues is not None: self.results['eigenvalues'] = eigenvalues # calculation was carried out with atoms written in write_input os.remove(os.path.join(self.directory, 'results.tag'))
def generate_band_structure_inputs(scc_to_band_directory: dict): """ Given a list of directories with converged calculations, generate band structure inputs for DFTB+ TB Lite. """ for scc_directory, bs_directory in scc_to_band_directory.items(): # Make directory if it does not exist Path(bs_directory).mkdir(parents=True, exist_ok=True) # Copy the charges and structure for file in ['charges.bin', 'geometry.gen']: shutil.copyfile(os.path.join(scc_directory, file), os.path.join(bs_directory, file)) # Generate a new input file, for band structure atoms: Atoms = read_gen(os.path.join(bs_directory, 'geometry.gen')) input_xml_str = generate_band_structure_input(atoms.get_cell(), 'GFN1-xTB') with open(os.path.join(bs_directory, 'dftb_in.hsd'), 'w') as fid: fid.write(input_xml_str)
Includes: - mef - cf4 - amorphous - xtl_n - xtl_si - xtl2x2 - xtl2x2_sifterm - heavy_bomb - bulk222 - annealed """ import os from ase.io import gen, vasp from inspect import getsourcefile ############## # structures # ############## path = os.path.dirname(os.path.abspath(getsourcefile(lambda: 0))) mef = vasp.read_vasp(path + "/reference_files/CONTCAR_mef") cf4 = vasp.read_vasp(path + "/reference_files/CONTCAR_cf4") amorphous = gen.read_gen(path + "/reference_files/amorphous_base.gen") xtl_n = vasp.read_vasp(path + "/reference_files/CONTCAR_nrich") xtl_si = vasp.read_vasp(path + "/reference_files/CONTCAR_sirich") xtl2x2 = gen.read_gen(path + "/reference_files/2x2xtl.gen") xtl2x2_sifterm = gen.read_gen(path + "/reference_files/2x2xtl_sifterm.gen") heavy_bomb = vasp.read_vasp(path + "/reference_files/CONTCAR_heavy_bombard") bulk222 = vasp.read_vasp(path + "/reference_files/CONTCAR_222bulk") annealed = vasp.read_vasp(path + "/reference_files/CONTCAR_annealed_unitcell")
def main( datadir = "temp/", #data files, structured as datadir/output$i-$j.gen and datadir/velos$i-$j outputdir = "temp.new/", #files for output hbondrange = 6, #offset from surface corresponding to Hbond range zmincutoff = 0.1, #somewhat arbitrary value to get rid of atoms that have gone into bulk output_geom_name = "output", #prefix for output geometry files output_velos_name = "velos" #prefix for output velocity files ): ############################## ### Read in geometry files ### ############################## hbondrange = int(hbondrange) zmincutoff = float(zmincutoff) geometries = {} for i in os.listdir(datadir): if output_geom_name in i: key = re.search(r"\d+", i) if key: key = key.group(0) geometries[key] = gen.read_gen(datadir + i) ########################## ### Read in velocities ### ########################## velos = dict() for i in os.listdir(datadir): if output_velos_name in i: key = re.search(r"\d+", i) if key: key = key.group(0) velos[key] = pd.read_csv(datadir + i, header = None, dtype = float, sep = "\s+") ################ ### trimming ### ################ trimmedgeoms = dict() trimmedvelos = dict() removedspecies = dict() for key, geom in geometries.items(): removedatoms = {'Si': 0, 'N': 0, 'H': 0, 'Ar': 0, 'F':0, 'C':0} # construct graph adjmat = Analysis(geom).adjacency_matrix[0] numnodes = adjmat.shape[0] g = Graph(numnodes) for i in range(numnodes): for j in range(numnodes): if adjmat[i,j]: g.addEdge(i,j) cc = g.connectedComponents() #identify slab, and max height of slab maingraph = np.array([i for i in cc if 0 in i][0]) slab = geom[[atom.index for atom in geom if atom.index in maingraph]] gen.write_gen(outputdir + "slab{}.gen".format(key), slab) zcutoff = np.max([atom.position[2] for atom in slab]) + hbondrange # isolate fragments and identify which to remove fragGraphs = [i for i in cc if 0 not in i] fragZs = [[geom[i].position[2] for i in frag] for frag in fragGraphs] removeFrag = [np.all(np.array(i) > zcutoff) or np.all(np.array(i) < zmincutoff) for i in fragZs] atomsToRemove = [i for g,r in zip(fragGraphs, removeFrag) if r for i in g] #account for any atoms that have wrapped around through the top of the cell (lookin at you, H) atomsToRemove += [a.index for a in geom if a.z > geom.cell[2,2]] for idx in atomsToRemove: removedatoms[geom[idx].symbol] += 1 #tally removed atoms by species geomcopy = geom.copy() del geomcopy[[atom.index for atom in geomcopy if atom.index in atomsToRemove]] removedspecies[key] = pd.Series(removedatoms) trimmedgeoms[key] = geomcopy trimmedvelos[key] = velos[key][[i not in atomsToRemove for i in np.arange(len(velos[key]))]] # collect all removed species series into a df and write as csv pd.DataFrame(removedspecies).to_csv("removedspecies.csv") #write for key, geom in trimmedgeoms.items(): gen.write_gen("%sinput%s.gen" % (outputdir, key), geom) for key, v in trimmedvelos.items(): v.to_csv("%s%s%s.in" % (outputdir, output_velos_name, key), sep = " ", index = False, header = False)
def main(cutoff, datapath, zmodelPath, EmodelPath, smallset, adsorbate='mef', npoints=20, outputpath='input.gen'): last = time.time() npoints = int(npoints) cutoff = float(cutoff) smallset = bool(int(smallset)) adsorbate_types = {'mef': mef, 'cf4': cf4} ads = adsorbate_types[adsorbate] with open(zmodelPath, 'rb') as f: zmodel = pickle.load(f) with open(EmodelPath, 'rb') as f: Emodel = pickle.load(f) # read in calculated structure if "gen" in datapath: data = gen.read_gen(datapath) elif "CAR" in datapath: data = vasp.read_vasp(datapath) print('maxz: ', max([i.position[2] for i in data])) print('data read') now = time.time() print(now - last) last = now # obtain base slab base = getslab(data) # assume any adsorption influence enters via config, independent of Ar del base[[atom.index for atom in base if atom.symbol in ['He', 'Ar']]] base.wrap() print('base obtained') now = time.time() print(now - last) last = now # set up gridpoints with predicted z heights a, b, c = base.cell a, b, c = np.linalg.norm(a), np.linalg.norm(b), np.linalg.norm(c) apoints = np.linspace(0, a, npoints) bpoints = np.linspace(0, b, npoints) if smallset: species = ['Si', 'N', 'H', 'He'] else: species = ["Si", "N", "H", "C", "F", "Ar", "He"] print(smallset, species) gridpoints = [] for apoint in apoints: for bpoint in bpoints: newstruct = base.copy() print(newstruct) zhat = predictz(newstruct, apoint, bpoint, zmodel, species) newstruct.append(Atom('He', position=(apoint, bpoint, zhat))) gridpoints += [newstruct] print('gridpoints done') now = time.time() print(now - last) last = now gridpoints = pd.Series(gridpoints) gridpoints = pd.DataFrame({'geom': gridpoints}) # add SOAP representation for gridpoint structs gridpoints = pd.concat( [gridpoints, getSOAPs(gridpoints['geom'], species=species)], axis=1) # create prediction matrix X = pd.DataFrame(gridpoints['SOAP'].to_list(), index=gridpoints.index) # predict energies, append to original df gridpoints['predE'] = Emodel.predict(X) # create 'visbase': struct with all He points included in one struct charges = np.append(np.zeros(len(base)), gridpoints['predE']) visbase = base.copy() for geom in gridpoints['geom']: visbase.append(Atom("He", position=geom[-1].position)) visbase.set_initial_charges(charges) view(visbase) print('energy prediction done') now = time.time() print(now - last) last = now # assess gridpoints and place adsorbates gridpoints = gridpoints.sort_values(by='predE') gridpoints['xpos'] = [geom[-1].position[0] for geom in gridpoints['geom']] gridpoints['ypos'] = [geom[-1].position[1] for geom in gridpoints['geom']] gridpoints['zpos'] = [geom[-1].position[2] for geom in gridpoints['geom']] adsorbatePoints = [] a = visbase.cell[0] b = visbase.cell[1] for _, row in gridpoints.iterrows(): isclose = False point1 = np.array([row['xpos'], row['ypos']]) for x, y, z in adsorbatePoints: for dispx in [-a, a * 0, a]: for dispy in [-b, b * 0, b]: point2 = np.array([x, y]) point2 = point2 + dispx[:2] + dispy[:2] if np.linalg.norm(point1 - point2) < cutoff: isclose = True if not isclose: adsorbatePoints.append(np.append(point1, row['zpos'])) print('placement done') now = time.time() print(now - last) last = now adsvisbase = base.copy() maxz = np.max([atom.position[2] for atom in adsvisbase]) for point in adsorbatePoints: print(point[2]) add_adsorbate(adsvisbase, ads, height=point[2] - maxz + 1, position=(point[0], point[1])) gen.write_gen(outputpath, adsvisbase) view([data, base, adsvisbase])
def read(filename, index=-1, format=None): """Read Atoms object(s) from file. filename: str Name of the file to read from. index: int or slice If the file contains several configurations, the last configuration will be returned by default. Use index=n to get configuration number n (counting from zero). format: str Used to specify the file-format. If not given, the file-format will be guessed by the *filetype* function. Known formats: ========================= =========== format short name ========================= =========== GPAW restart-file gpw Dacapo netCDF output file dacapo Old ASE netCDF trajectory nc Virtual Nano Lab file vnl ASE pickle trajectory traj ASE bundle trajectory bundle GPAW text output gpaw-text CUBE file cube XCrySDen Structure File xsf Dacapo text output dacapo-text XYZ-file xyz VASP POSCAR/CONTCAR file vasp VASP OUTCAR file vasp_out SIESTA STRUCT file struct_out ABINIT input file abinit V_Sim ascii file v_sim Protein Data Bank pdb CIF-file cif FHI-aims geometry file aims FHI-aims output file aims_out VTK XML Image Data vti VTK XML Structured Grid vts VTK XML Unstructured Grid vtu TURBOMOLE coord file tmol TURBOMOLE gradient file tmol-gradient exciting input exi AtomEye configuration cfg WIEN2k structure file struct DftbPlus input file dftb CASTEP geom file cell CASTEP output file castep CASTEP trajectory file geom ETSF format etsf.nc DFTBPlus GEN format gen CMR db/cmr-file db CMR db/cmr-file cmr LAMMPS dump file lammps ========================= =========== """ if isinstance(filename, str): p = filename.rfind('@') if p != -1: try: index = string2index(filename[p + 1:]) except ValueError: pass else: filename = filename[:p] if isinstance(index, str): index = string2index(index) if format is None: format = filetype(filename) if format.startswith('gpw'): import gpaw r = gpaw.io.open(filename, 'r') positions = r.get('CartesianPositions') * Bohr numbers = r.get('AtomicNumbers') cell = r.get('UnitCell') * Bohr pbc = r.get('BoundaryConditions') tags = r.get('Tags') magmoms = r.get('MagneticMoments') energy = r.get('PotentialEnergy') * Hartree if r.has_array('CartesianForces'): forces = r.get('CartesianForces') * Hartree / Bohr else: forces = None atoms = Atoms(positions=positions, numbers=numbers, cell=cell, pbc=pbc) if tags.any(): atoms.set_tags(tags) if magmoms.any(): atoms.set_initial_magnetic_moments(magmoms) else: magmoms = None atoms.calc = SinglePointCalculator(energy, forces, None, magmoms, atoms) return atoms if format == 'castep': from ase.io.castep import read_castep return read_castep(filename, index) if format == 'castep_cell': import ase.io.castep return ase.io.castep.read_cell(filename, index) if format == 'castep_geom': import ase.io.castep return ase.io.castep.read_geom(filename, index) if format == 'exi': from ase.io.exciting import read_exciting return read_exciting(filename, index) if format == 'xyz': from ase.io.xyz import read_xyz return read_xyz(filename, index) if format == 'traj': from ase.io.trajectory import read_trajectory return read_trajectory(filename, index) if format == 'bundle': from ase.io.bundletrajectory import read_bundletrajectory return read_bundletrajectory(filename, index) if format == 'cube': from ase.io.cube import read_cube return read_cube(filename, index) if format == 'nc': from ase.io.netcdf import read_netcdf return read_netcdf(filename, index) if format == 'gpaw-text': from ase.io.gpawtext import read_gpaw_text return read_gpaw_text(filename, index) if format == 'dacapo-text': from ase.io.dacapo import read_dacapo_text return read_dacapo_text(filename) if format == 'dacapo': from ase.io.dacapo import read_dacapo return read_dacapo(filename) if format == 'xsf': from ase.io.xsf import read_xsf return read_xsf(filename, index) if format == 'vasp': from ase.io.vasp import read_vasp return read_vasp(filename) if format == 'vasp_out': from ase.io.vasp import read_vasp_out return read_vasp_out(filename, index) if format == 'abinit': from ase.io.abinit import read_abinit return read_abinit(filename) if format == 'v_sim': from ase.io.v_sim import read_v_sim return read_v_sim(filename) if format == 'mol': from ase.io.mol import read_mol return read_mol(filename) if format == 'pdb': from ase.io.pdb import read_pdb return read_pdb(filename, index) if format == 'cif': from ase.io.cif import read_cif return read_cif(filename, index) if format == 'struct': from ase.io.wien2k import read_struct return read_struct(filename) if format == 'struct_out': from ase.io.siesta import read_struct return read_struct(filename) if format == 'vti': from ase.io.vtkxml import read_vti return read_vti(filename) if format == 'vts': from ase.io.vtkxml import read_vts return read_vts(filename) if format == 'vtu': from ase.io.vtkxml import read_vtu return read_vtu(filename) if format == 'aims': from ase.io.aims import read_aims return read_aims(filename) if format == 'aims_out': from ase.io.aims import read_aims_output return read_aims_output(filename, index) if format == 'iwm': from ase.io.iwm import read_iwm return read_iwm(filename) if format == 'Cmdft': from ase.io.cmdft import read_I_info return read_I_info(filename) if format == 'tmol': from ase.io.turbomole import read_turbomole return read_turbomole(filename) if format == 'tmol-gradient': from ase.io.turbomole import read_turbomole_gradient return read_turbomole_gradient(filename) if format == 'cfg': from ase.io.cfg import read_cfg return read_cfg(filename) if format == 'dftb': from ase.io.dftb import read_dftb return read_dftb(filename) if format == 'sdf': from ase.io.sdf import read_sdf return read_sdf(filename) if format == 'etsf': from ase.io.etsf import ETSFReader return ETSFReader(filename).read_atoms() if format == 'gen': from ase.io.gen import read_gen return read_gen(filename) if format == 'db': from ase.io.cmr_io import read_db return read_db(filename, index) if format == 'lammps': from ase.io.lammps import read_lammps_dump return read_lammps_dump(filename, index) raise RuntimeError('File format descriptor ' + format + ' not recognized!')
def read(filename, index=-1, format=None): """Read Atoms object(s) from file. filename: str Name of the file to read from. index: int or slice If the file contains several configurations, the last configuration will be returned by default. Use index=n to get configuration number n (counting from zero). format: str Used to specify the file-format. If not given, the file-format will be guessed by the *filetype* function. Known formats: ========================= =========== format short name ========================= =========== GPAW restart-file gpw Dacapo netCDF output file dacapo Old ASE netCDF trajectory nc Virtual Nano Lab file vnl ASE pickle trajectory traj ASE bundle trajectory bundle GPAW text output gpaw-text CUBE file cube XCrySDen Structure File xsf Dacapo text output dacapo-text XYZ-file xyz VASP POSCAR/CONTCAR file vasp VASP OUTCAR file vasp_out SIESTA STRUCT file struct_out ABINIT input file abinit V_Sim ascii file v_sim Protein Data Bank pdb CIF-file cif FHI-aims geometry file aims FHI-aims output file aims_out VTK XML Image Data vti VTK XML Structured Grid vts VTK XML Unstructured Grid vtu TURBOMOLE coord file tmol TURBOMOLE gradient file tmol-gradient exciting input exi AtomEye configuration cfg WIEN2k structure file struct DftbPlus input file dftb CASTEP geom file cell CASTEP output file castep CASTEP trajectory file geom ETSF format etsf.nc DFTBPlus GEN format gen CMR db/cmr-file db CMR db/cmr-file cmr LAMMPS dump file lammps ========================= =========== """ if isinstance(filename, str): p = filename.rfind('@') if p != -1: try: index = string2index(filename[p + 1:]) except ValueError: pass else: filename = filename[:p] if isinstance(index, str): index = string2index(index) if format is None: format = filetype(filename) if format.startswith('gpw'): import gpaw r = gpaw.io.open(filename, 'r') positions = r.get('CartesianPositions') * Bohr numbers = r.get('AtomicNumbers') cell = r.get('UnitCell') * Bohr pbc = r.get('BoundaryConditions') tags = r.get('Tags') magmoms = r.get('MagneticMoments') energy = r.get('PotentialEnergy') * Hartree if r.has_array('CartesianForces'): forces = r.get('CartesianForces') * Hartree / Bohr else: forces = None atoms = Atoms(positions=positions, numbers=numbers, cell=cell, pbc=pbc) if tags.any(): atoms.set_tags(tags) if magmoms.any(): atoms.set_initial_magnetic_moments(magmoms) else: magmoms = None atoms.calc = SinglePointCalculator(energy, forces, None, magmoms, atoms) return atoms if format == 'castep': from ase.io.castep import read_castep return read_castep(filename, index) if format == 'castep_cell': import ase.io.castep return ase.io.castep.read_cell(filename, index) if format == 'castep_geom': import ase.io.castep return ase.io.castep.read_geom(filename, index) if format == 'exi': from ase.io.exciting import read_exciting return read_exciting(filename, index) if format == 'xyz': from ase.io.xyz import read_xyz return read_xyz(filename, index) if format == 'traj': from ase.io.trajectory import read_trajectory return read_trajectory(filename, index) if format == 'bundle': from ase.io.bundletrajectory import read_bundletrajectory return read_bundletrajectory(filename, index) if format == 'cube': from ase.io.cube import read_cube return read_cube(filename, index) if format == 'nc': from ase.io.netcdf import read_netcdf return read_netcdf(filename, index) if format == 'gpaw-text': from ase.io.gpawtext import read_gpaw_text return read_gpaw_text(filename, index) if format == 'dacapo-text': from ase.io.dacapo import read_dacapo_text return read_dacapo_text(filename) if format == 'dacapo': from ase.io.dacapo import read_dacapo return read_dacapo(filename) if format == 'xsf': from ase.io.xsf import read_xsf return read_xsf(filename, index) if format == 'vasp': from ase.io.vasp import read_vasp return read_vasp(filename) if format == 'vasp_out': from ase.io.vasp import read_vasp_out return read_vasp_out(filename, index) if format == 'abinit': from ase.io.abinit import read_abinit return read_abinit(filename) if format == 'v_sim': from ase.io.v_sim import read_v_sim return read_v_sim(filename) if format == 'mol': from ase.io.mol import read_mol return read_mol(filename) if format == 'pdb': from ase.io.pdb import read_pdb return read_pdb(filename, index) if format == 'cif': from ase.io.cif import read_cif return read_cif(filename, index) if format == 'struct': from ase.io.wien2k import read_struct return read_struct(filename) if format == 'struct_out': from ase.io.siesta import read_struct return read_struct(filename) if format == 'vti': from ase.io.vtkxml import read_vti return read_vti(filename) if format == 'vts': from ase.io.vtkxml import read_vts return read_vts(filename) if format == 'vtu': from ase.io.vtkxml import read_vtu return read_vtu(filename) if format == 'aims': from ase.io.aims import read_aims return read_aims(filename) if format == 'aims_out': from ase.io.aims import read_aims_output return read_aims_output(filename, index) if format == 'iwm': from ase.io.iwm import read_iwm return read_iwm(filename) if format == 'Cmdft': from ase.io.cmdft import read_I_info return read_I_info(filename) if format == 'tmol': from ase.io.turbomole import read_turbomole return read_turbomole(filename) if format == 'tmol-gradient': from ase.io.turbomole import read_turbomole_gradient return read_turbomole_gradient(filename) if format == 'cfg': from ase.io.cfg import read_cfg return read_cfg(filename) if format == 'dftb': from ase.io.dftb import read_dftb return read_dftb(filename) if format == 'sdf': from ase.io.sdf import read_sdf return read_sdf(filename) if format == 'etsf': from ase.io.etsf import ETSFReader return ETSFReader(filename).read_atoms() if format == 'gen': from ase.io.gen import read_gen return read_gen(filename) if format == 'db': from ase.io.cmr_io import read_db return read_db(filename, index) if format == 'lammps': from ase.io.lammps import read_lammps_dump return read_lammps_dump(filename, index) raise RuntimeError('File format descriptor '+format+' not recognized!')
def stichTrajectories(bombID, bombardments=1, prefix="geom.out"): """ bombID (string): a string in the format of "{batch}-{sample}", eg "5-3" bombardments (arraylike): result of range() or np.arange() specifiying which bombardment events to use otherwise, should be an int, and the range (0, bombardments) will be used filename (string): name of .xyz trajectory file in each replicate """ startTime = time.time() bombardments = bombardments if hasattr( bombardments, '__iter__') else np.arange(bombardments) batch, sample = bombID.split("-") # Create an empty array to populate with the trajectory print("bombardments:", bombardments) nMaxAtoms = np.max([ len( gen.read_gen("{}/{}/{}/{}/geom.out.gen".format( _b, step, batch, sample, prefix))) for _b in bombardments for step in ['bomb', 'quench', 'eq'] ]) # nMaxAtoms is guaranteed to correspond to some frame with an Ar in it # then, I basically generate nBombardments slots at the bottom of the df for the Ar atoms introduced nMaxAtoms = nMaxAtoms - 1 + len(bombardments) print("nMaxAtoms: ", nMaxAtoms) #create the final matrix that will actually represent stitched traj trajFrame = np.zeros((nMaxAtoms, 0), dtype=object) frameIdx = 0 #initialize global frame count for _b in bombardments: with open("{}/bomb/{}/{}/{}.xyz".format(_b, batch, sample, prefix)) as f1: with open("{}/quench/{}/{}/{}.xyz".format(_b, batch, sample, prefix)) as f2: with open("{}/eq/{}/{}/{}.xyz".format(_b, batch, sample, prefix)) as f3: # print("{}/bomb/{}/{}/{}.xyz".format(_b, batch, sample, prefix)) _btemp = list(extxyz.read_extxyz(f1, index=slice(0, None))) _qtemp = list(extxyz.read_extxyz(f2, index=slice(0, None))) _etemp = list(extxyz.read_extxyz(f3, index=slice(0, None))) trajList = _btemp + _qtemp + _etemp #list form of this thing if _b == bombardments[0]: lastLen = len( _btemp[0] ) # initialize lastLen in the very first frame # fragNames, fragIdxs = getFragsTraj(trajList) newIndices = np.arange(lastLen) for frame in trajList: # try: # print(frame) frame.set_momenta(frame.get_masses().reshape(-1, 1) * frame.arrays['vel']) # except: # pass # print("no velocity data") trajFrame = np.hstack( (trajFrame, np.zeros((nMaxAtoms, 1), dtype=object))) if len( frame ) != lastLen: # check for changes in nAtoms present in frame; indicative of step change # print("entered if block for ", frameIdx) newIndices = reindexTrajBreak( prevFrame, frame, newIndices) ArAdded = (frame[-1].symbol == "Ar" and np.all( frame[-1].position != prevFrame[-1].position)) if ArAdded: newIndices[-1] = nMaxAtoms - ( len(bombardments) - _b) lastLen = len(frame) trajFrame[newIndices, frameIdx] = frame prevFrame = frame frameIdx += 1 # trajFrame[trajFrame == 0] = np.nan endTime = time.time() print("execution time (s) = {}".format((endTime - startTime))) return pd.DataFrame(trajFrame)
def main(file): view(gen.read_gen(file))
def main(file, output): vasp.write_vasp(output, gen.read_gen(file), sort=True, vasp5=True)
def read(filename, index=None, format=None): """Read Atoms object(s) from file. filename: str Name of the file to read from. index: int or slice If the file contains several configurations, the last configuration will be returned by default. Use index=n to get configuration number n (counting from zero). format: str Used to specify the file-format. If not given, the file-format will be guessed by the *filetype* function. Known formats: ========================= ============= format short name ========================= ============= GPAW restart-file gpw Dacapo netCDF output file dacapo Old ASE netCDF trajectory nc Virtual Nano Lab file vnl ASE pickle trajectory traj ASE bundle trajectory bundle GPAW text output gpaw-text CUBE file cube XCrySDen Structure File xsf Dacapo text output dacapo-text XYZ-file xyz VASP POSCAR/CONTCAR file vasp VASP OUTCAR file vasp_out VASP XDATCAR file vasp_xdatcar SIESTA STRUCT file struct_out ABINIT input file abinit V_Sim ascii file v_sim Protein Data Bank pdb CIF-file cif FHI-aims geometry file aims FHI-aims output file aims_out VTK XML Image Data vti VTK XML Structured Grid vts VTK XML Unstructured Grid vtu TURBOMOLE coord file tmol TURBOMOLE gradient file tmol-gradient exciting input exi AtomEye configuration cfg WIEN2k structure file struct DftbPlus input file dftb CASTEP geom file cell CASTEP output file castep CASTEP trajectory file geom ETSF format etsf.nc DFTBPlus GEN format gen CMR db/cmr-file db CMR db/cmr-file cmr LAMMPS dump file lammps EON reactant.con file eon Gromacs coordinates gro Gaussian com (input) file gaussian Gaussian output file gaussian_out Quantum espresso in file esp_in Quantum espresso out file esp_out Extended XYZ file extxyz NWChem input file nw Materials Studio file xsd ========================= ============= Many formats allow on open file-like object to be passed instead of ``filename``. In this case the format cannot be auto-decected, so the ``format`` argument should be explicitly given. """ if isinstance(filename, str) and ( '.json@' in filename or '.db@' in filename or filename.startswith('pg://') and '@' in filename): filename, index = filename.rsplit('@', 1) if index.isdigit(): index = int(index) else: if isinstance(filename, str): p = filename.rfind('@') if p != -1: try: index = string2index(filename[p + 1:]) except ValueError: pass else: filename = filename[:p] if isinstance(index, str): index = string2index(index) if format is None: format = filetype(filename) if format.startswith('gpw'): import gpaw r = gpaw.io.open(filename, 'r') positions = r.get('CartesianPositions') * Bohr numbers = r.get('AtomicNumbers') cell = r.get('UnitCell') * Bohr pbc = r.get('BoundaryConditions') tags = r.get('Tags') magmoms = r.get('MagneticMoments') energy = r.get('PotentialEnergy') * Hartree if r.has_array('CartesianForces'): forces = r.get('CartesianForces') * Hartree / Bohr else: forces = None atoms = Atoms(positions=positions, numbers=numbers, cell=cell, pbc=pbc) if tags.any(): atoms.set_tags(tags) if magmoms.any(): atoms.set_initial_magnetic_moments(magmoms) else: magmoms = None atoms.calc = SinglePointDFTCalculator(atoms, energy=energy, forces=forces, magmoms=magmoms) kpts = [] if r.has_array('IBZKPoints'): for w, kpt, eps_n, f_n in zip(r.get('IBZKPointWeights'), r.get('IBZKPoints'), r.get('Eigenvalues'), r.get('OccupationNumbers')): kpts.append(SinglePointKPoint(w, kpt[0], kpt[1], eps_n[0], f_n[0])) atoms.calc.kpts = kpts return atoms if format in ['json', 'db', 'postgresql']: if index == slice(None, None): index = None from ase.db.core import connect images = [row.toatoms() for row in connect(filename, format).select(index)] if len(images) == 1: return images[0] else: return images if index is None: index = -1 if format == 'castep': from ase.io.castep import read_castep return read_castep(filename, index) if format == 'castep_cell': import ase.io.castep return ase.io.castep.read_cell(filename, index) if format == 'castep_geom': import ase.io.castep return ase.io.castep.read_geom(filename, index) if format == 'exi': from ase.io.exciting import read_exciting return read_exciting(filename, index) if format in ['xyz', 'extxyz']: from ase.io.extxyz import read_xyz return read_xyz(filename, index) if format == 'traj': from ase.io.trajectory import read_trajectory return read_trajectory(filename, index) if format == 'trj': from ase.io.pickletrajectory import read_trajectory return read_trajectory(filename, index) if format == 'bundle': from ase.io.bundletrajectory import read_bundletrajectory return read_bundletrajectory(filename, index) if format == 'cube': from ase.io.cube import read_cube return read_cube(filename, index) if format == 'nc': from ase.io.netcdf import read_netcdf return read_netcdf(filename, index) if format == 'gpaw-text': from ase.io.gpawtext import read_gpaw_text return read_gpaw_text(filename, index) if format == 'dacapo-text': from ase.io.dacapo import read_dacapo_text return read_dacapo_text(filename) if format == 'dacapo': from ase.io.dacapo import read_dacapo return read_dacapo(filename) if format == 'xsf': from ase.io.xsf import read_xsf return read_xsf(filename, index) if format == 'vasp': from ase.io.vasp import read_vasp return read_vasp(filename) if format == 'vasp_out': from ase.io.vasp import read_vasp_out return read_vasp_out(filename, index) if format == 'vasp_xdatcar': from ase.io.vasp import read_vasp_xdatcar return read_vasp_xdatcar(filename, index) if format == 'abinit': from ase.io.abinit import read_abinit return read_abinit(filename) if format == 'v_sim': from ase.io.v_sim import read_v_sim return read_v_sim(filename) if format == 'mol': from ase.io.mol import read_mol return read_mol(filename) if format == 'pdb': from ase.io.pdb import read_pdb return read_pdb(filename, index) if format == 'cif': from ase.io.cif import read_cif return read_cif(filename, index) if format == 'struct': from ase.io.wien2k import read_struct return read_struct(filename) if format == 'struct_out': from ase.io.siesta import read_struct return read_struct(filename) if format == 'vti': from ase.io.vtkxml import read_vti return read_vti(filename) if format == 'vts': from ase.io.vtkxml import read_vts return read_vts(filename) if format == 'vtu': from ase.io.vtkxml import read_vtu return read_vtu(filename) if format == 'aims': from ase.io.aims import read_aims return read_aims(filename) if format == 'aims_out': from ase.io.aims import read_aims_output return read_aims_output(filename, index) if format == 'iwm': from ase.io.iwm import read_iwm return read_iwm(filename) if format == 'Cmdft': from ase.io.cmdft import read_I_info return read_I_info(filename) if format == 'tmol': from ase.io.turbomole import read_turbomole return read_turbomole(filename) if format == 'tmol-gradient': from ase.io.turbomole import read_turbomole_gradient return read_turbomole_gradient(filename) if format == 'cfg': from ase.io.cfg import read_cfg return read_cfg(filename) if format == 'dftb': from ase.io.dftb import read_dftb return read_dftb(filename) if format == 'sdf': from ase.io.sdf import read_sdf return read_sdf(filename) if format == 'etsf': from ase.io.etsf import ETSFReader return ETSFReader(filename).read_atoms() if format == 'gen': from ase.io.gen import read_gen return read_gen(filename) if format == 'cmr': from ase.io.cmr_io import read_db return read_db(filename, index) if format == 'lammps': from ase.io.lammpsrun import read_lammps_dump return read_lammps_dump(filename, index) if format == 'eon': from ase.io.eon import read_reactant_con return read_reactant_con(filename) if format == 'gromacs': from ase.io.gromacs import read_gromacs return read_gromacs(filename) if format == 'gaussian': from ase.io.gaussian import read_gaussian return read_gaussian(filename) if format == 'gaussian_out': from ase.io.gaussian import read_gaussian_out return read_gaussian_out(filename, index) if format == 'esp_in': from ase.io.espresso import read_espresso_in return read_espresso_in(filename) if format == 'esp_out': from ase.io.espresso import read_espresso_out return read_espresso_out(filename, index) if format == 'nw': from ase.io.nwchem import read_nwchem_input return read_nwchem_input(filename) if format == 'xsd': from ase.io.xsd import read_xsd return read_xsd(filename) raise RuntimeError('File format descriptor ' + format + ' not recognized!')
def read(filename, index=None, format=None): """Read Atoms object(s) from file. filename: str Name of the file to read from. index: int or slice If the file contains several configurations, the last configuration will be returned by default. Use index=n to get configuration number n (counting from zero). format: str Used to specify the file-format. If not given, the file-format will be guessed by the *filetype* function. Known formats: ========================= ============= format short name ========================= ============= GPAW restart-file gpw Dacapo netCDF output file dacapo Old ASE netCDF trajectory nc Virtual Nano Lab file vnl ASE pickle trajectory traj ASE bundle trajectory bundle GPAW text output gpaw-text CUBE file cube XCrySDen Structure File xsf Dacapo text output dacapo-text XYZ-file xyz VASP POSCAR/CONTCAR file vasp VASP OUTCAR file vasp_out SIESTA STRUCT file struct_out ABINIT input file abinit V_Sim ascii file v_sim Protein Data Bank pdb CIF-file cif FHI-aims geometry file aims FHI-aims output file aims_out VTK XML Image Data vti VTK XML Structured Grid vts VTK XML Unstructured Grid vtu TURBOMOLE coord file tmol TURBOMOLE gradient file tmol-gradient exciting input exi AtomEye configuration cfg WIEN2k structure file struct DftbPlus input file dftb CASTEP geom file cell CASTEP output file castep CASTEP trajectory file geom ETSF format etsf.nc DFTBPlus GEN format gen CMR db/cmr-file db CMR db/cmr-file cmr LAMMPS dump file lammps EON reactant.con file eon Gromacs coordinates gro Gaussian com (input) file gaussian Gaussian output file gaussian_out Quantum espresso in file esp_in Quantum espresso out file esp_out Extended XYZ file extxyz NWChem input file nw ========================= ============= """ if isinstance(filename, str) and ('.json@' in filename or '.db@' in filename or filename.startswith('pg://') and '@' in filename): filename, index = filename.rsplit('@', 1) if index.isdigit(): index = int(index) else: if isinstance(filename, str): p = filename.rfind('@') if p != -1: try: index = string2index(filename[p + 1:]) except ValueError: pass else: filename = filename[:p] if isinstance(index, str): index = string2index(index) if format is None: format = filetype(filename) if format.startswith('gpw'): import gpaw r = gpaw.io.open(filename, 'r') positions = r.get('CartesianPositions') * Bohr numbers = r.get('AtomicNumbers') cell = r.get('UnitCell') * Bohr pbc = r.get('BoundaryConditions') tags = r.get('Tags') magmoms = r.get('MagneticMoments') energy = r.get('PotentialEnergy') * Hartree if r.has_array('CartesianForces'): forces = r.get('CartesianForces') * Hartree / Bohr else: forces = None atoms = Atoms(positions=positions, numbers=numbers, cell=cell, pbc=pbc) if tags.any(): atoms.set_tags(tags) if magmoms.any(): atoms.set_initial_magnetic_moments(magmoms) else: magmoms = None atoms.calc = SinglePointDFTCalculator(atoms, energy=energy, forces=forces, magmoms=magmoms) kpts = [] if r.has_array('IBZKPoints'): for w, kpt, eps_n, f_n in zip(r.get('IBZKPointWeights'), r.get('IBZKPoints'), r.get('Eigenvalues'), r.get('OccupationNumbers')): kpts.append( SinglePointKPoint(w, kpt[0], kpt[1], eps_n[0], f_n[0])) atoms.calc.kpts = kpts return atoms if format in ['json', 'db', 'postgresql']: from ase.db.core import connect, dict2atoms if index == slice(None, None): index = None images = [ dict2atoms(d) for d in connect(filename, format).select(index) ] if len(images) == 1: return images[0] else: return images if index is None: index = -1 if format == 'castep': from ase.io.castep import read_castep return read_castep(filename, index) if format == 'castep_cell': import ase.io.castep return ase.io.castep.read_cell(filename, index) if format == 'castep_geom': import ase.io.castep return ase.io.castep.read_geom(filename, index) if format == 'exi': from ase.io.exciting import read_exciting return read_exciting(filename, index) if format in ['xyz', 'extxyz']: from ase.io.extxyz import read_xyz return read_xyz(filename, index) if format == 'traj': from ase.io.trajectory import read_trajectory return read_trajectory(filename, index) if format == 'bundle': from ase.io.bundletrajectory import read_bundletrajectory return read_bundletrajectory(filename, index) if format == 'cube': from ase.io.cube import read_cube return read_cube(filename, index) if format == 'nc': from ase.io.netcdf import read_netcdf return read_netcdf(filename, index) if format == 'gpaw-text': from ase.io.gpawtext import read_gpaw_text return read_gpaw_text(filename, index) if format == 'dacapo-text': from ase.io.dacapo import read_dacapo_text return read_dacapo_text(filename) if format == 'dacapo': from ase.io.dacapo import read_dacapo return read_dacapo(filename) if format == 'xsf': from ase.io.xsf import read_xsf return read_xsf(filename, index) if format == 'vasp': from ase.io.vasp import read_vasp return read_vasp(filename) if format == 'vasp_out': from ase.io.vasp import read_vasp_out return read_vasp_out(filename, index) if format == 'abinit': from ase.io.abinit import read_abinit return read_abinit(filename) if format == 'v_sim': from ase.io.v_sim import read_v_sim return read_v_sim(filename) if format == 'mol': from ase.io.mol import read_mol return read_mol(filename) if format == 'pdb': from ase.io.pdb import read_pdb return read_pdb(filename, index) if format == 'cif': from ase.io.cif import read_cif return read_cif(filename, index) if format == 'struct': from ase.io.wien2k import read_struct return read_struct(filename) if format == 'struct_out': from ase.io.siesta import read_struct return read_struct(filename) if format == 'vti': from ase.io.vtkxml import read_vti return read_vti(filename) if format == 'vts': from ase.io.vtkxml import read_vts return read_vts(filename) if format == 'vtu': from ase.io.vtkxml import read_vtu return read_vtu(filename) if format == 'aims': from ase.io.aims import read_aims return read_aims(filename) if format == 'aims_out': from ase.io.aims import read_aims_output return read_aims_output(filename, index) if format == 'iwm': from ase.io.iwm import read_iwm return read_iwm(filename) if format == 'Cmdft': from ase.io.cmdft import read_I_info return read_I_info(filename) if format == 'tmol': from ase.io.turbomole import read_turbomole return read_turbomole(filename) if format == 'tmol-gradient': from ase.io.turbomole import read_turbomole_gradient return read_turbomole_gradient(filename) if format == 'cfg': from ase.io.cfg import read_cfg return read_cfg(filename) if format == 'dftb': from ase.io.dftb import read_dftb return read_dftb(filename) if format == 'sdf': from ase.io.sdf import read_sdf return read_sdf(filename) if format == 'etsf': from ase.io.etsf import ETSFReader return ETSFReader(filename).read_atoms() if format == 'gen': from ase.io.gen import read_gen return read_gen(filename) if format == 'cmr': from ase.io.cmr_io import read_db return read_db(filename, index) if format == 'lammps': from ase.io.lammpsrun import read_lammps_dump return read_lammps_dump(filename, index) if format == 'eon': from ase.io.eon import read_reactant_con return read_reactant_con(filename) if format == 'gromacs': from ase.io.gromacs import read_gromacs return read_gromacs(filename) if format == 'gaussian': from ase.io.gaussian import read_gaussian return read_gaussian(filename) if format == 'gaussian_out': from ase.io.gaussian import read_gaussian_out return read_gaussian_out(filename, index) if format == 'esp_in': from ase.io.espresso import read_espresso_in return read_espresso_in(filename) if format == 'esp_out': from ase.io.espresso import read_espresso_out return read_espresso_out(filename, index) if format == 'nw': from ase.io.nwchem import read_nwchem_input return read_nwchem_input(filename) raise RuntimeError('File format descriptor ' + format + ' not recognized!')
def main( numsofar, #use the run number you're seeding for batch, #current batch number velo, # velocity of incident Ar in Å/ps datadir="temp/", #data files, structured as datadir/output$i-$j.gen and datadir/velos$i-$j outputdir="temp.new/", #files for output hbondrange=6, zmincutoff=0.1, #somewhat arbitrary value to get rid of atoms that have gone into bulk numperbatch=17, numbatches=10): numsofar = int(numsofar) batch = int(batch) velo = float(velo) hbondrange = int(hbondrange) zmincutoff = float(zmincutoff) numperbatch = int(numperbatch) numbatches = int(numbatches) ############################## ### Read in geometry files ### ############################## geometries = {} for i in os.listdir(datadir): if "output" in i: key = re.search(r"\d+", i) if key: key = key.group(0) geometries[key] = gen.read_gen(datadir + i) ########################## ### Read in velocities ### ########################## velos = dict() for i in os.listdir(datadir): if "velos" in i: key = re.search(r"\d+", i) if key: key = key.group(0) velos[key] = pd.read_csv(datadir + i, header=None, dtype=float, sep="\s+") # to account for seed behavior from first numssofar sets of runs # numssofar can also be interpreted as = current run seeding for np.random.seed(429) for b in range(batch + numsofar * numbatches): for i in range(numperbatch): x_rand, y_rand, z_rand = np.append(np.random.random(size=2), 0) ################ ### trimming ### ################ trimmedgeoms = dict() trimmedvelos = dict() removedspecies = dict() for key, geom in geometries.items(): removedatoms = {'Si': 0, 'N': 0, 'H': 0, 'Ar': 0, 'F': 0, 'C': 0} # construct graph adjmat = Analysis(geom).adjacency_matrix[0] numnodes = adjmat.shape[0] g = Graph(numnodes) for i in range(numnodes): for j in range(numnodes): if adjmat[i, j]: g.addEdge(i, j) cc = g.connectedComponents() #identify slab, and max height of slab maingraph = np.array([i for i in cc if 0 in i][0]) slab = geom[[atom.index for atom in geom if atom.index in maingraph]] gen.write_gen(outputdir + "slab{}.gen".format(key), slab) zcutoff = np.max([atom.position[2] for atom in slab]) + hbondrange # isolate fragments and identify which to remove fragGraphs = [i for i in cc if 0 not in i] fragZs = [[geom[i].position[2] for i in frag] for frag in fragGraphs] removeFrag = [ np.all(np.array(i) > zcutoff) or np.all(np.array(i) < zmincutoff) for i in fragZs ] atomsToRemove = [ i for g, r in zip(fragGraphs, removeFrag) if r for i in g ] #account for any atoms that have wrapped around through the top of the cell (lookin at you, H) atomsToRemove += [a.index for a in geom if a.z > geom.cell[2, 2]] for idx in atomsToRemove: removedatoms[ geom[idx].symbol] += 1 #tally removed atoms by species geomcopy = geom.copy() del geomcopy[[ atom.index for atom in geomcopy if atom.index in atomsToRemove ]] x_rand, y_rand, z_rand = geomcopy.cell.cartesian_positions( np.append(np.random.random(size=2), 0)) add_adsorbate(geomcopy, adsorbate='Ar', height=7, position=(x_rand, y_rand)) removedspecies[key] = pd.Series(removedatoms) trimmedgeoms[key] = geomcopy trimmedvelos[key] = velos[key][[ i not in atomsToRemove for i in np.arange(len(velos[key])) ]] trimmedvelos[key] = trimmedvelos[key].append(pd.Series([0, 0, -velo]), ignore_index=True) # collect all removed species series into a df and write as csv pd.DataFrame(removedspecies).to_csv("removedspecies.csv") #write for key, geom in trimmedgeoms.items(): gen.write_gen("%sinput%s.gen" % (outputdir, key), geom) for key, v in trimmedvelos.items(): v.to_csv("%svelos%s.in" % (outputdir, key), sep=" ", index=False, header=False)