def _get_trajectories(self): """ Internal method to make a Trajectory object """ atoms = self.get_atoms() self.trajectory = Trajectory(num_steps=self.nsw, timestep=self.potim, atoms=atoms) self.step_no = 0 status_text = "Parsing %s (%.2f MB)... " % ( self.filename, os.path.getsize(self.filename) / 1024.**2) if imported['progressbar']: self.pbar = ProgressBar(widgets=[status_text, Percentage()], maxval=self.nsw + 1).start() parser = etree.XMLParser() context = etree.iterparse(self.filename, tag='calculation') try: self._fast_iter(context, self._calculation_tag_found) except etree.XMLSyntaxError: type, message, traceback = sys.exc_info() print "XML parsing halted:", message for e in parser.error_log: print "Warning: " + e.message if imported['progressbar']: self.pbar.finish() print "Found %d out of %d steps" % (self.step_no, self.nsw) self.trajectory.update_length(self.step_no) print_memory_usage()
def read_trajectory(dir='', unwrap_pbcs=True, xml_file='vasprun.xml', orig_file='traj_orig.h5', unwrapped_file='traj_unwrapped.h5', poscar_file='POSCAR'): """ Convenience function for reading trajectory data from vasprun.xml. - The first time a trajectory is read, the trajectory data is read from vasprun.xml using lxml, and saved in a HDF5 binary file using PyTables. - The next time the trajectory is read, the HDF5 file is read directly. This is much faster. If `unwrap_pbcs' is set to True, periodic boundary conditions (PBCs) will be unwrapped, using initial positions from a specified POSCAR file. Initial positions are taken from a POSCAR file instead of the initial positions in the vasprun.xml file, because the latter are wrapped into the cell, making visual comparison with the original structure difficult if the POSCAR file contained coordinates out of the cell, such as may result from a cell where the atoms were relaxed from initial positions at the edge of the cell. Parameters ---------- dir : str Directory to read files from Default is current directory unwrap_pbcs : bool Set to True to unwrap the periodic boundary conditions (PBCs) or False to leave them. Default is True xml_file : str Name of the vasprun.xml file, default is "vasprun.xml" orig_file : str Name of the HDF5 file unwrapped_file : str Name of the HDF5 file containing unwrapped PBCs. Only used if unwrap_pbcs is set to True """ if not os.path.isfile(dir + orig_file): p = IterativeVasprunParser(dir + xml_file) traj = p.get_trajectory() traj.save(dir + orig_file) if not unwrap_pbcs: return traj elif not unwrap_pbcs: return Trajectory(filename = dir + orig_file) if os.path.isfile(dir + unwrapped_file): return Trajectory(filename = dir + unwrapped_file) else: poscar = PoscarParser(dir + poscar_file) pos = poscar.get_positions(coords = 'direct') try: t = traj except: t = Trajectory(filename = dir + orig_file) print "Unwrapping using initial pos from POSCAR" t.unwrap_pbc(init_pos = pos) t.save(dir + unwrapped_file) return t
def _get_trajectories(self): """ Internal method to make a Trajectory object """ atoms = self.get_atoms() self.trajectory = Trajectory(num_steps = self.nsw, timestep = self.potim, atoms = atoms) self.step_no = 0 status_text = "Parsing %s (%.2f MB)... " % (self.filename, os.path.getsize(self.filename)/1024.**2) if imported['progressbar']: self.pbar = ProgressBar(widgets=[status_text,Percentage()], maxval = self.nsw+1).start() parser = etree.XMLParser() context = etree.iterparse(self.filename, tag='calculation') try: self._fast_iter(context, self._calculation_tag_found) except etree.XMLSyntaxError: type, message, traceback = sys.exc_info() print "XML parsing halted:",message for e in parser.error_log: print "Warning: "+e.message if imported['progressbar']: self.pbar.finish() print "Found %d out of %d steps" % (self.step_no,self.nsw) self.trajectory.update_length(self.step_no) print_memory_usage()
def read_trajectory(dir='', unwrap_pbcs=True, xml_file='vasprun.xml', orig_file='traj_orig.h5', unwrapped_file='traj_unwrapped.h5', poscar_file='POSCAR'): """ Convenience function for reading trajectory data from vasprun.xml. - The first time a trajectory is read, the trajectory data is read from vasprun.xml using lxml, and saved in a HDF5 binary file using PyTables. - The next time the trajectory is read, the HDF5 file is read directly. This is much faster. If `unwrap_pbcs' is set to True, periodic boundary conditions (PBCs) will be unwrapped, using initial positions from a specified POSCAR file. Initial positions are taken from a POSCAR file instead of the initial positions in the vasprun.xml file, because the latter are wrapped into the cell, making visual comparison with the original structure difficult if the POSCAR file contained coordinates out of the cell, such as may result from a cell where the atoms were relaxed from initial positions at the edge of the cell. Parameters ---------- dir : str Directory to read files from Default is current directory unwrap_pbcs : bool Set to True to unwrap the periodic boundary conditions (PBCs) or False to leave them. Default is True xml_file : str Name of the vasprun.xml file, default is "vasprun.xml" orig_file : str Name of the HDF5 file unwrapped_file : str Name of the HDF5 file containing unwrapped PBCs. Only used if unwrap_pbcs is set to True """ if not os.path.isfile(dir + orig_file): p = IterativeVasprunParser(dir + xml_file) traj = p.get_trajectory() traj.save(dir + orig_file) if not unwrap_pbcs: return traj elif not unwrap_pbcs: return Trajectory(filename=dir + orig_file) if os.path.isfile(dir + unwrapped_file): return Trajectory(filename=dir + unwrapped_file) else: poscar = PoscarParser(dir + poscar_file) pos = poscar.get_positions(coords='direct') try: t = traj except: t = Trajectory(filename=dir + orig_file) print "Unwrapping using initial pos from POSCAR" t.unwrap_pbc(init_pos=pos) t.save(dir + unwrapped_file) return t
class IterativeVasprunParser(object): """ Parser for very large vasprun.xml files, based on iterative xml parsing. The functionality of this parser is limited compared to VasprunParser. """ def __str__(self): return "iterative vasprun parser" def __init__(self, filename='vasprun.xml', verbose=False): if not imported['lxml']: print "Error: The module 'lxml' is needed!" sys.exit(1) self.filename = filename self.verbose = verbose if not os.path.isfile(self.filename): print "Fatal error: The file '%s' was not found or is not a file." % ( self.filename) sys.exit(1) #print_memory_usage() # read beginning of file to find number of ionic steps (NSW) and timestep (POTIM) self.params = self._find_first_instance('parameters', self._params_tag_found) self.nsw = int( self.params.xpath("separator[@name='ionic']/i[@name='NSW']") [0].text) if self.nsw == 0: print "Note: This file contains no ionic motion (NSW=0)." self.nsw = 1 # to read the static structure # should make a try clause self.potim = float( self.params.xpath("separator[@name='ionic']/i[@name='POTIM']") [0].text) self.atoms = self._find_first_instance('atominfo', self._get_atoms) self.natoms = len(self.atoms) try: self.nsw #print "Number of ionic steps: %d" % (self.nsw) except AttributeError: print "Could not find incar:NSW in vasprun.xml" sys.exit(1) def _params_tag_found(self, elem): return copy(elem) def _get_atoms(self, elem): atoms = [] for rc in elem.xpath("array[@name='atoms']/set/rc"): atoms.append(get_atomic_number_from_symbol(rc[0].text)) return np.array(atoms, dtype=int) def _fast_iter(self, context, func): for event, elem in context: func(elem) elem.clear() while elem.getprevious() is not None: del elem.getparent()[0] del context def _find_first_instance(self, tag, func): parser = etree.XMLParser() context = etree.iterparse(self.filename, tag=tag) ret = None try: for event, elem in context: ret = func(elem) elem.clear() while elem.getprevious() is not None: del elem.getparent()[0] break except etree.XMLSyntaxError: print "XML parsing failed:" type, message, traceback = sys.exc_info() print message for e in parser.error_log: print "XML Error: " + e.message if str(message).split()[0] == 'Char': ZapControlCharacters(self.filename) print print "You may now try to re-run the script." print sys.exit(1) del context return ret def get_num_ionic_steps(self): """ Returns the number of ionic steps """ return self.nsw def get_num_atoms(self): """ Returns the number of atoms """ return self.natoms def get_atoms(self): """ Returns an array with the types of the atoms """ return self.atoms def _get_initial_structure(self, elem): basis = elem.xpath("crystal/varray[@name='basis']/v") basis = [[float(x) for x in p.text.split()] for p in basis] pos = elem.xpath("varray[@name='positions']/v") pos = [[float(x) for x in p.text.split()] for p in pos] vel = elem.xpath("varray[@name='velocities']/v") vel = [[float(x) for x in p.text.split()] for p in vel] return {'basis': basis, 'positions': pos, 'velocities': vel} def get_initial_structure(self): """ Returns a dictionary containing 'basis', 'positions' and 'velocities' """ return self._find_first_instance('structure', self._get_initial_structure) def _calculation_tag_found(self, elem): bas = elem.xpath("structure/crystal/varray[@name='basis']/v") self.trajectory.set_basis( self.step_no, np.array([[float(x) for x in b.text.split()] for b in bas])) if self.trajectory.num_atoms == 1: pos = elem.xpath("structure/varray[@name='positions']/v[%d]" % (self.atom_no + 1)) forces = elem.xpath("structure/varray[@name='forces']/v[%d]" % (self.atom_no + 1)) else: pos = elem.xpath("structure/varray[@name='positions']/v") forces = elem.xpath("varray[@name='forces']/v") pos = [[float(x) for x in ap.text.split()] for ap in pos] forces = [[float(x) for x in ap.text.split()] for ap in forces] self.trajectory.set_positions(self.step_no, pos) self.trajectory.set_forces(self.step_no, forces) e_kin = elem.xpath("energy/i[@name='kinetic']") if e_kin: self.trajectory.set_e_kinetic(self.step_no, float(e_kin[0].text)) e_pot = elem.xpath("energy/i[@name='e_fr_energy']") self.trajectory.set_e_total(self.step_no, float(e_pot[0].text)) self.step_no += 1 if imported['progressbar']: self.pbar.update(self.step_no) #print pos def _get_trajectories(self): """ Internal method to make a Trajectory object """ atoms = self.get_atoms() self.trajectory = Trajectory(num_steps=self.nsw, timestep=self.potim, atoms=atoms) self.step_no = 0 status_text = "Parsing %s (%.2f MB)... " % ( self.filename, os.path.getsize(self.filename) / 1024.**2) if imported['progressbar']: self.pbar = ProgressBar(widgets=[status_text, Percentage()], maxval=self.nsw + 1).start() parser = etree.XMLParser() context = etree.iterparse(self.filename, tag='calculation') try: self._fast_iter(context, self._calculation_tag_found) except etree.XMLSyntaxError: type, message, traceback = sys.exc_info() print "XML parsing halted:", message for e in parser.error_log: print "Warning: " + e.message if imported['progressbar']: self.pbar.finish() print "Found %d out of %d steps" % (self.step_no, self.nsw) self.trajectory.update_length(self.step_no) print_memory_usage() def get_trajectory(self): """ Returns a oppvasp.trajectory.Trajectory object """ self.atom_no = -1 self._get_trajectories() return self.trajectory def get_all_trajectories(self): print "DEPRECATED: get_all_trajectories is deprectated! Use get_trajectory instead" return self.get_trajectory() def ionic_steps(self, step=0): try: parser except: parser = etree.XMLParser() context = etree.iterparse(self.filename, tag='atominfo') for event, elem in context: atominfo = [] for rc in elem.xpath("array[@name='atoms']/set/rc"): atominfo.append( get_atomic_number_from_symbol(rc[0].text.strip())) break # avoid scanning the whole file! context = etree.iterparse(self.filename, tag='calculation') for event, elem in context: step = IonicStep(elem, atominfo) elem.clear() while elem.getprevious() is not None: del elem.getparent()[0] yield step
class IterativeVasprunParser(object): """ Parser for very large vasprun.xml files, based on iterative xml parsing. The functionality of this parser is limited compared to VasprunParser. """ def __str__(self): return "iterative vasprun parser" def __init__(self, filename = 'vasprun.xml', verbose = False): if not imported['lxml']: print "Error: The module 'lxml' is needed!" sys.exit(1) self.filename = filename self.verbose = verbose if not os.path.isfile(self.filename): print "Fatal error: The file '%s' was not found or is not a file." % (self.filename) sys.exit(1) #print_memory_usage() # read beginning of file to find number of ionic steps (NSW) and timestep (POTIM) self.params = self._find_first_instance('parameters', self._params_tag_found) self.nsw = int(self.params.xpath("separator[@name='ionic']/i[@name='NSW']")[0].text) if self.nsw == 0: print "Note: This file contains no ionic motion (NSW=0)." self.nsw = 1 # to read the static structure # should make a try clause self.potim = float(self.params.xpath("separator[@name='ionic']/i[@name='POTIM']")[0].text) self.atoms = self._find_first_instance('atominfo',self._get_atoms) self.natoms = len(self.atoms) try: self.nsw #print "Number of ionic steps: %d" % (self.nsw) except AttributeError: print "Could not find incar:NSW in vasprun.xml" sys.exit(1) def _params_tag_found(self, elem): return copy(elem) def _get_atoms(self, elem): atoms = [] for rc in elem.xpath("array[@name='atoms']/set/rc"): atoms.append(get_atomic_number_from_symbol(rc[0].text)) return np.array(atoms, dtype=int) def _fast_iter(self, context, func): for event, elem in context: func(elem) elem.clear() while elem.getprevious() is not None: del elem.getparent()[0] del context def _find_first_instance(self, tag, func): parser = etree.XMLParser() context = etree.iterparse(self.filename, tag=tag) ret = None try: for event, elem in context: ret = func(elem) elem.clear() while elem.getprevious() is not None: del elem.getparent()[0] break except etree.XMLSyntaxError: print "XML parsing failed:" type, message, traceback = sys.exc_info() print message for e in parser.error_log: print "XML Error: "+e.message if str(message).split()[0] == 'Char': ZapControlCharacters(self.filename) print print "You may now try to re-run the script." print sys.exit(1) del context return ret def get_num_ionic_steps(self): """ Returns the number of ionic steps """ return self.nsw def get_num_atoms(self): """ Returns the number of atoms """ return self.natoms def get_atoms(self): """ Returns an array with the types of the atoms """ return self.atoms def _get_initial_structure(self,elem): basis= elem.xpath("crystal/varray[@name='basis']/v") basis = [[float(x) for x in p.text.split()] for p in basis] pos = elem.xpath("varray[@name='positions']/v") pos = [[float(x) for x in p.text.split()] for p in pos] vel = elem.xpath("varray[@name='velocities']/v") vel = [[float(x) for x in p.text.split()] for p in vel] return { 'basis': basis, 'positions': pos, 'velocities': vel } def get_initial_structure(self): """ Returns a dictionary containing 'basis', 'positions' and 'velocities' """ return self._find_first_instance('structure',self._get_initial_structure) def _calculation_tag_found(self, elem): bas = elem.xpath("structure/crystal/varray[@name='basis']/v") self.trajectory.set_basis(self.step_no, np.array([[float(x) for x in b.text.split()] for b in bas])) if self.trajectory.num_atoms == 1: pos = elem.xpath("structure/varray[@name='positions']/v[%d]" % (self.atom_no+1)) forces = elem.xpath("structure/varray[@name='forces']/v[%d]" % (self.atom_no+1)) else: pos = elem.xpath("structure/varray[@name='positions']/v") forces = elem.xpath("varray[@name='forces']/v") pos = [[float(x) for x in ap.text.split()] for ap in pos] forces = [[float(x) for x in ap.text.split()] for ap in forces] self.trajectory.set_positions(self.step_no, pos) self.trajectory.set_forces(self.step_no, forces) e_kin = elem.xpath("energy/i[@name='kinetic']") if e_kin: self.trajectory.set_e_kinetic(self.step_no, float(e_kin[0].text)) e_pot = elem.xpath("energy/i[@name='e_fr_energy']") self.trajectory.set_e_total(self.step_no, float(e_pot[0].text)) self.step_no += 1 if imported['progressbar']: self.pbar.update(self.step_no) #print pos def _get_trajectories(self): """ Internal method to make a Trajectory object """ atoms = self.get_atoms() self.trajectory = Trajectory(num_steps = self.nsw, timestep = self.potim, atoms = atoms) self.step_no = 0 status_text = "Parsing %s (%.2f MB)... " % (self.filename, os.path.getsize(self.filename)/1024.**2) if imported['progressbar']: self.pbar = ProgressBar(widgets=[status_text,Percentage()], maxval = self.nsw+1).start() parser = etree.XMLParser() context = etree.iterparse(self.filename, tag='calculation') try: self._fast_iter(context, self._calculation_tag_found) except etree.XMLSyntaxError: type, message, traceback = sys.exc_info() print "XML parsing halted:",message for e in parser.error_log: print "Warning: "+e.message if imported['progressbar']: self.pbar.finish() print "Found %d out of %d steps" % (self.step_no,self.nsw) self.trajectory.update_length(self.step_no) print_memory_usage() def get_trajectory(self): """ Returns a oppvasp.trajectory.Trajectory object """ self.atom_no = -1 self._get_trajectories() return self.trajectory def get_all_trajectories(self): print "DEPRECATED: get_all_trajectories is deprectated! Use get_trajectory instead" return self.get_trajectory() def ionic_steps(self, step = 0): try: parser except: parser = etree.XMLParser() context = etree.iterparse(self.filename, tag='atominfo') for event, elem in context: atominfo = [] for rc in elem.xpath("array[@name='atoms']/set/rc"): atominfo.append(get_atomic_number_from_symbol(rc[0].text.strip())) break # avoid scanning the whole file! context = etree.iterparse(self.filename, tag='calculation') for event, elem in context: step = IonicStep(elem, atominfo) elem.clear() while elem.getprevious() is not None: del elem.getparent()[0] yield step