def calc_DOS(self, recalc=False): """Calculates the *total* density of states. Args: recalc (bool): when True, recalculate the DOS, even if the file already exists. """ dosfile = path.join(self.phonodir, "mesh.yaml") if not recalc and path.isfile(dosfile): return #Make sure we have calculated the force sets already. self.calc_forcesets(recalc) settings = { "ATOM_NAME": ' '.join(self.database.parent.species), "DIM": ' '.join(map(str, self.supercell)), "MP": ' '.join(map(str, self.dosmesh)) } with open(path.join(self.phonodir, "dos.conf"), 'w') as f: for k, v in settings.items(): f.write("{} = {}\n".format(k, v)) sargs = ["phonopy", "-p", "dos.conf", "-s"] xres = execute(sargs, self.phonodir, venv=True) #Make sure that phonopy actually produced files; otherwise show the output #(phonopy doesn't write to stderr, only stdout). if not path.isfile(dosfile): #pragma: no cover msg.std(''.join(xres["error"])) msg.err("could not calculate the DOS; see errors.")
def ready(self): """Returns True if all the calculations have been completed. """ self._expand_sequence() if len(self.sequence) == 0: if len(self.configs) >= 1: result = True for config in self.configs.values(): if not path.isfile(path.join(config, "atoms.h5")): result = False break else: result = False if not result: msg.std("{} is not ready. Exiting.".format(self.root), 2) return result else: ready = False for p in self.sequence.values(): if not p.ready(): msg.std("{} is not ready. Exiting.".format(p.root), 2) break else: ready = True return ready
def extract(self, cleanup="default"): """Parses the XDATCAR files to create a list of configurations that can be run using high-accuracy DFT. Args: cleanup (str): the level of cleanup to perform after extraction. Returns: bool: True if the database is ready; this means that any other databases that rely on its outputs can be run. """ #First, we need to check that the MD is done; then we can subsample it #and run the individual DFT calculations. if not self._xdatcar_ok(): msg.std("XDATCAR incomplete; can't extract the MD.", 2) return False subsamples = [] for i, folder in self.configs.items(): subsamples.extend(self._parse_md(folder)) #Write the list of sub-sample file paths to disk. with open(self.subsamples, 'w') as f: f.writelines(subsamples) return len(subsamples) > 0
def ready(self): """Returns True if all the calculations have been completed. """ self._expand_sequence() if len(self.sequence) == 0: if not self.extractable and self.is_setup(): return True else: # if there is no seeds, pretend it's ready and don't bother to setup if self._seed is None and self.seeded: return True #A zero-length sequence can mean we have a set of seeds that #were specified, *or* that we have a single seed that is itself #an atoms object (instead of a list of atoms objects). if (len(self.fitting_configs) == len(self._seed) or (len(self.fitting_configs) == 1 and isinstance(self._seed, Atoms))): return True else: return False else: ready = True for p in self.sequence.values(): if not p.ready(): msg.std("{} is not ready. Exiting.".format(p.root), 2) ready = False break return ready
def __init__(self, name=None, root=None, controller=None, splits=None, folder=None, pattern=None, config_type=None, energy="dft_energy", force="dft_force", virial="dft_virial", limit=None): self.name = name self.root = path.join(root, self.name) if not path.isdir(self.root): from os import mkdir mkdir(self.root) self.controller = controller self.splits = {} if splits is None else splits self.folder = folder if self.controller is None: self.ran_seed = 0 else: self.ran_seed = self.controller.ran_seed self._dbfile = path.join(self.root, "legacy-{}.h5".format(limit)) """str: path to the combined legacy database, with limits included. """ self._dbfull = path.join(self.root, "legacy.h5") """str: path to the combined legacy database, *without* limits. """ self.dbfiles = [] self.config_type = config_type from matdb.database.utility import dbconfig config = dbconfig(self._dbfull) if path.isfile(self._dbfile) and len(config) > 0: self.dbfiles = [db[0] for db in config["sources"]] self.config_type = config["config_type"] self.folder = folder else: from matdb.utility import dbcat if not path.isfile(self._dbfull): self._create_dbfull(folder, pattern, energy, force, virial, config_type) if limit is not None: msg.std("Slicing limit subset of full {} db.".format(self.name)) full = AtomsList(self._dbfull) N = np.arange(len(full)) np.random.shuffle(N) ids = N[0:limit] part = full[ids] part.write(self._dbfile) dbcat([self._dbfull], self._dbfile, docat=False, limit=limit, ids=ids) else: from matdb.utility import symlink symlink(self._dbfile, self._dbfull) #The rest of matdb expects each database to have an atoms object that is #representative. Just take the first config in the combined database. self.atoms = Atoms(self._dbfile)
def _make_train_cfg(self, iteration): """Creates the 'train.cfg' file needed to train the potential from the databeses used. Args: iteration (int): the number of iterations of MTP has been through. """ from matdb.database.legacy import LegacyDatabase if iteration == 1: for db in self.dbs: if not isinstance(db, LegacyDatabase): for step in db.steps.values(): pbar = tqdm(total=len(step.rset)) for atm in step.rset: self._create_train_cfg( atm, path.join(self.root, "train.cfg")) pbar.update(1) else: # pragma: no cover (Don't use LegacyDatabase for M1) pbar = tqdm(total=len(db.rset)) for atm in db.rset: self._create_train_cfg( atm, path.join(self.root, "train.cfg")) pbar.update(1) else: if self.active.last_iteration is None or len( self.active.last_iteration) < 1: if path.isfile(self.active.iter_file): self.active._load_last_iter() else: raise IOError("File {0} containing most recently added " "structures is missing.".format( self.active.iter_file)) msg.info("Extracting from {0} folders".format( len(self.active.last_iteration))) self.active.extract() pbar = tqdm(total=len(self.active.last_iteration)) ccb = 1 if self.active.last_config_atoms is not None: for atm in self.active.last_config_atoms.values(): if not atm.calc.can_extract(atm.calc.folder): msg.std( "Folder {} can not be extracted.".format( atm.calc.folder), 2) continue self._create_train_cfg(atm, path.join(self.root, "train.cfg")) ccb += 1 pbar.update(1)
def execute(self, dryrun=False): """Submits the job script for the currently configured potential training. Args: dryrun (bool): when True, simulate the submission without actually submitting. Returns: bool: True if the submission generated a job id (considered successful). """ if self.ready(): msg.info( "Trainer {} is already done;".format(self.root) + "skipping execute step.", 2) return if not path.isfile(self._jobfile): return False if not path.isfile(self._trainfile): msg.std("train.h5 missing in {}; can't execute.".format(self.root)) return False # We must have what we need to execute. Compile the command and submit. shell_command = self.controller.db.shell_command # We suport 'bash' and 'sbatch' shell commands, if it's neighter one # of them, default to 'bash' if shell_command not in ['bash', 'sbatch']: shell_command = 'bash' cargs = [shell_command, self._jobfile] if dryrun: msg.okay("Executed {} in {}".format(' '.join(cargs), self.root)) return True else: xres = execute(cargs, self.root) # supercompute will return "Submitted" if len(xres["output"]) > 0 and "Submitted" in xres["output"][0]: msg.okay("{}: {}".format(self.root, xres["output"][0].strip())) return True # local computer elif len(xres["error"]) == 0: return True else: return False
def run(args): """Runs the matdb setup and cleanup to produce database files. """ print("matdb Copyright (C) 2019 HALL LABS") print("This program comes with ABSOLUTELY NO WARRANTY.") print( "This is free software, and you are welcome to redistribute it under " "certain conditions.") if args is None: return targets = {} with chdir("seed"): for pattern in args["seeds"]: #Handle the default file type, which is vasp. if ':' in pattern: fmt, pat = pattern.split(':') else: fmt, pat = "vasp", pattern for filename in glob(pat): targets[filename] = Atoms(filename, format=fmt) result = {} for filename, at in tqdm(list(targets.items())): result[filename] = _get_supers(at, args["sizes"]) items = [("Filename", 20, "cokay"), ("Supercell", 40, "cstds"), ("Req.", 6, "cinfo"), ("Act.", 6, "cgens"), ("rmin", 8, "cerrs"), ("pg", 6, "cwarn")] msg.blank(2) heading = '|'.join([ "{{0: ^{0}}}".format(size).format(name) for name, size, color in items ]) msg.arb(heading, [msg.cenum[i[2]] for i in items], '|') msg.std(''.join('-' for i in range(len(heading) + 1))) for filename, hs in result.items(): for size, hnf in hs.items(): names = (filename, hnf.hnf.flatten().tolist(), size, hnf.size, hnf.rmin, hnf.pg) text = '|'.join([ "{{0: <{0}}}".format(item[1]).format(name) for name, item in zip(names, items) ]) msg.arb(text, [msg.cenum[i[2]] for i in items], '|') msg.blank(2) return result
def calc_fc(self, recalc=False): """Extracts the force constants from a DFPT Hessian matrix. """ fcfile = path.join(self.phonodir, "FORCE_CONSTANTS") if not recalc and path.isfile(fcfile): return from matdb.calculators import get_calculator_module mod = get_calculator_module(self.calcargs) call = getattr(mod, "extract_force_constants") xres = call(self.configs, self.phonodir) #Make sure that phonopy actually produced files; otherwise show the #output (phonopy doesn't write to stderr, only stdout). if not path.isfile(fcfile): #pragma: no cover msg.std(''.join(xres["error"])) msg.err("could not calculate the force constants from DFPT.")
def ready(self): """Returns True if this database has finished its computations and is ready to be used. """ self._expand_sequence() if len(self.sequence) == 0: result = len(self.atoms_paths()) == self.nconfigs if not result: msg.std("{} is not ready. Exiting.".format(self.root), 2) return result else: ready = False for p in self.sequence.values(): if not p.ready(): msg.std("{} is not ready. Exiting.".format(p.root), 2) break else: ready = True return ready
def ready(self): """Returns True if all the phonon calculations have been completed, the force sets have been created, and the DOS has been calculated. """ self._expand_sequence() if len(self.sequence) == 0: #If the DOS has been calculated, then all the other steps must have #completed correctly. result = path.isfile(self.dos_file) if not result: msg.std("{} is not ready. Exiting.".format(self.root), 2) return result else: ready = False for p in self.sequence.values(): if not p.ready(): msg.std("{} is not ready. Exiting.".format(p.root), 2) break else: ready = True return ready
def can_extract(self): """Runs post-execution routines to clean-up the calculations. """ self._expand_sequence() if len(self.sequence) == 0: if (len(self.configs) != self.nconfigs and self.nconfigs is not None): #We need to have at least one folder for each config; #otherwise we aren't ready to go. return False result = False for f, a in zip(self.configs.values(), self.config_atoms.values()): if not a.calc.can_extract(f): msg.std("Config {} not ready for extraction.".format(f), 2) # continue processing the rest. If any folder can be extracted, return True. continue else: result = True return result else: #pragma: no cover, enumerated database shouldn't take seeds return all(group.can_extract() for group in self.sequence.values())
def calc_forcesets(self, recalc=False): """Extracts the force sets from the displacement calculations. Args: recalc (bool): when True, recalculate the force sets, even if the file already exists. """ fsets = path.join(self.phonodir, "FORCE_SETS") if not recalc and path.isfile(fsets): return from matdb.calculators import get_calculator_module mod = get_calculator_module(self.calcargs) call = getattr(mod, "extract_force_sets") xres = call(self.configs, self.phonodir) #Make sure that phonopy actually produced files; otherwise show the output #(phonopy doesn't write to stderr, only stdout). if not path.isfile(fsets): #pragma: no cover msg.std(''.join(xres["output"])) msg.err("Couldn't create the FORCE_SETS in {}.".format( self.phonodir))
def _calc_bands(atoms, hessian, supercell=(1, 1, 1), outfile=None, grid=None): """Calculates the band structure for the given Hessian matrix. Args: atoms (matdb.atoms.Atoms): atoms object corresponding to the *primitive* cell. The specified supercell matrix should result in a number of atoms that matches the dimensionality of the Hessian. supercell (tuple): tuple of `int` supercell matrix components; can have either 3 or 9 components. hessian (numpy.ndarray): with shape `(natoms*3, natoms*3)`. grid (list): list of `int` specifying the number of divisions in k-space along each reciprocal unit vector. outfile (str): path to the output `band.yaml` file that should be created by this function. Returns: If `outfile` is None, then this method returns a dictionary that has the same format as :func:`from_yaml`. """ #Create a temporary directory in which to work. target = mkdtemp() bandfile = path.join(target, "band.yaml") if grid is None: grid = [13, 13, 13] if isinstance(supercell, np.ndarray): supercell = supercell.flatten() #First, roll up the Hessian and write it as a FORCE_CONSTANTS file. with chdir(target): HR = roll(hessian) write_FORCE_CONSTANTS(HR) atoms.write("POSCAR", format="vasp") #We need to create the band.conf file and write the special #paths in k-space at which the phonons should be calculated. atom_types = _ordered_unique(atoms.get_chemical_symbols()) settings = [("FORCE_CONSTANTS", "READ"), ("ATOM_NAME", ' '.join(atom_types)), ("DIM", ' '.join(map(str, supercell))), ("MP", ' '.join(map(str, grid)))] labels, bands = parsed_kpath(atoms) bandfmt = "{0:.3f} {1:.3f} {2:.3f}" sband = [] for Q in bands: sband.append(bandfmt.format(*Q)) settings.append(("BAND", " ".join(sband))) settings.append(("BAND_LABELS", ' '.join(labels))) with open("band.conf", 'w') as f: for k, v in settings: f.write("{} = {}\n".format(k, v)) sargs = ["phonopy", "band.conf"] xres = execute(sargs, target, venv=True) if not path.isfile(bandfile): #pragma: no cover msg.err("could not calculate phonon bands; see errors.") msg.std(''.join(xres["output"])) result = None if outfile is not None: #Move the band.yaml file to the new target location. from shutil import move move(bandfile, outfile) else: result = from_yaml(bandfile) #Remove the temporary directory that we created and return the result. rmtree(target) return result
def execute(args, folder, wait=True, nlines=100, venv=None, printerr=True, env_vars=None, errignore=None, **kwargs): """Executes the specified tuple that should include the command as first item and additional arguments afterward. See the documentation for :class:`subprocess.Popen` for details. Args: args (list): list of `str`; first item should be the command to execute; additional arguments following. folder (str): directory to switch to before executing the command. wait (bool): when True, block the current thread until execution completes; otherwise, returns immediately. nlines (int): by default, `stdout` and `stderr` are redirected to :data:`subprocess.PIPE`. This is the maximum number of lines that will be returned for large outputs (so that memory doesn't get overwhelmed by large outputs). venv (str): when not `None`, the name of a virtualenv to activate before running the command. printerr (bool): when True, if `stderr` is not empty, print the lines automatically. env_vars (dict): dictionary of environment variables to set before calling the execution. The variables will be revert to their original value after execution. errignore (str): if produced errors include this pattern, they will **not** be printed to `stdout`. kwargs (dict): additional arguments that are passed directly to the :class:`subprocess.Popen` constructor. Returns: dict: dictionary with keys ['process', 'stdout', 'stderr'], where 'process' is the instance of the subprocess that was created; 'stdout' and 'stderr' are only included if they were set to :data:`subprocess.PIPE`. .. note:: If the output from 'stdout' and 'stderr' are too large, only the first 100 lines will be returned. Use parameter `nlines` to control output size. """ # from subprocess import Popen, PIPE if "stdout" not in kwargs: kwargs["stdout"] = PIPE if "stderr" not in kwargs: kwargs["stderr"] = PIPE kwargs["cwd"] = folder if venv is not None: # pragma: no cover No guarantee that virtual # envs exist on testing machine. if isinstance(venv, string_types): vargs = ["virtualenvwrapper_derive_workon_home"] vres = execute(vargs, path.abspath(".")) prefix = path.join(vres["output"][0].strip(), venv, "bin") elif venv == True: import sys prefix = path.dirname(sys.executable) args[0] = path.join(prefix, args[0]) from os import environ if env_vars is not None: oldvars = {} for name, val in env_vars.items(): oldvars[name] = environ[name] if name in environ else None environ[name] = val msg.std("Executing `{}` in {}.".format(' '.join(args), folder), 2) pexec = Popen(' '.join(args), shell=True, executable="/bin/bash", **kwargs) if wait: from os import waitpid waitpid(pexec.pid, 0) if env_vars is not None: #Set the environment variables back to what they used to be. for name, val in oldvars.items(): if val is None: del environ[name] else: environ[name] = val #Redirect the output and errors so that we don't pollute stdout. output = None if kwargs["stdout"] is PIPE: output = [] for line in pexec.stdout: l = line.decode('ascii') #Filter non fatal exceptions such as future warnings. A full list can be found here # https://docs.python.org/3/library/exepctions.html#exception-hierarchy #Special case: "lazy-import" has "import" in the name but it's a valid module name #if not ("FutureWarning" in line or ("import" in line and "-import" not in line) or "\x1b[0m" in line): if not ("FutureWarning" in l or "import" in l or "\x1b[0m" in l): output.append(line) if len(output) >= nlines: break pexec.stdout.close() error = None if kwargs["stderr"] is PIPE: error = [] for line in pexec.stderr: if errignore is None or errignore not in str(line): error.append(str(line)) if len(error) >= nlines: break pexec.stderr.close() if printerr and len(error) > 0 and all( [isinstance(i, string_types) for i in error]): msg.err(''.join(error)) return { "process": pexec, "output": [i.decode("ascii") if not isinstance(i, str) else i for i in output], "error": [i.decode("ascii") if not isinstance(i, str) else i for i in error] }
def _create_dbfull(self, folder, pattern, energy, force, virial, config_type): """Creates the full combined database. """ from matdb.utility import chdir, dbcat from glob import glob from tqdm import tqdm from os import path #NB! There is a subtle bug here: if you try and open a matdb.atoms.Atoms #within the context manager of `chdir`, something messes up with the #memory sharing in fortran and it dies. This has to be separate. with chdir(folder): self.dbfiles = glob(pattern) rewrites = [] for dbfile in self.dbfiles: #Look at the first configuration in the atoms list to #determine if it matches the energy, force, virial and #config type parameter names. dbpath = path.join(folder, dbfile) params, doforce = _atoms_conform(dbpath, energy, force, virial) if len(params) > 0 or doforce: msg.std("Conforming database file {}.".format(dbpath)) al = AtomsList(dbpath) outpath = path.join(self.root, dbfile.replace(".xyz",".h5")) for ai in tqdm(al): for target, source in params.items(): if (target == "config_type" and config_type is not None): ai.params[target] = config_type else: ai.add_param(target,ai.params[source]) del ai.params[source] if source in ai.info: #pragma: no cover #(if things were #dane correctly by #the atoms object #this should never #be used. It exists #mainly as a #safegaurd. msg.warn("The atoms object didn't properly " "update the parameters of the legacy " "atoms object.") del ai.info[source] if doforce: ai.add_property("ref_force",ai.properties[force]) del ai.properties[force] al.write(outpath) #Mark this db as non-conforming so that we created a new #version of it. rewrites.append(dbfile) dbcat([dbpath], outpath, docat=False, renames=params, doforce=doforce) # We want a single file to hold all of the data for all the atoms in the database. all_atoms = AtomsList() for dbfile in self.dbfiles: if dbfile in rewrites: infile = dbfile.replace(".xyz",".h5") all_atoms.extend(AtomsList(path.join(self.root, infile))) else: dbpath = path.join(folder, dbfile) all_atoms.extend(AtomsList(dbpath)) all_atoms.write(self._dbfull) #Finally, create the config file. from matdb.utility import dbcat with chdir(folder): dbcat(self.dbfiles, self._dbfull, config_type=self.config_type, docat=False)