示例#1
0
    def calc_DOS(self, recalc=False):
        """Calculates the *total* density of states.

        Args:
            recalc (bool): when True, recalculate the DOS, even if the
              file already exists.
        """
        dosfile = path.join(self.phonodir, "mesh.yaml")
        if not recalc and path.isfile(dosfile):
            return

        #Make sure we have calculated the force sets already.
        self.calc_forcesets(recalc)
        settings = {
            "ATOM_NAME": ' '.join(self.database.parent.species),
            "DIM": ' '.join(map(str, self.supercell)),
            "MP": ' '.join(map(str, self.dosmesh))
        }
        with open(path.join(self.phonodir, "dos.conf"), 'w') as f:
            for k, v in settings.items():
                f.write("{} = {}\n".format(k, v))

        sargs = ["phonopy", "-p", "dos.conf", "-s"]
        xres = execute(sargs, self.phonodir, venv=True)
        #Make sure that phonopy actually produced files; otherwise show the output
        #(phonopy doesn't write to stderr, only stdout).
        if not path.isfile(dosfile):  #pragma: no cover
            msg.std(''.join(xres["error"]))
            msg.err("could not calculate the DOS; see errors.")
示例#2
0
 def ready(self):
     """Returns True if all the calculations have been completed.
     """
     self._expand_sequence()
     if len(self.sequence) == 0:
         if len(self.configs) >= 1:
             result = True
             for config in self.configs.values():
                 if not path.isfile(path.join(config, "atoms.h5")):
                     result = False
                     break
         else:
             result = False
         if not result:
             msg.std("{} is not ready. Exiting.".format(self.root), 2)
         return result
     else:
         ready = False
         for p in self.sequence.values():
             if not p.ready():
                 msg.std("{} is not ready. Exiting.".format(p.root), 2)
                 break
         else:
             ready = True
         return ready
示例#3
0
    def extract(self, cleanup="default"):
        """Parses the XDATCAR files to create a list of configurations
        that can be run using high-accuracy DFT.

        Args:
            cleanup (str): the level of cleanup to perform after extraction.

        Returns:
           bool: True if the database is ready; this means that any other
           databases that rely on its outputs can be run.
        """
        #First, we need to check that the MD is done; then we can subsample it
        #and run the individual DFT calculations.
        if not self._xdatcar_ok():
            msg.std("XDATCAR incomplete; can't extract the MD.", 2)
            return False

        subsamples = []
        for i, folder in self.configs.items():
            subsamples.extend(self._parse_md(folder))

        #Write the list of sub-sample file paths to disk.
        with open(self.subsamples, 'w') as f:
            f.writelines(subsamples)

        return len(subsamples) > 0
示例#4
0
    def ready(self):
        """Returns True if all the calculations have been completed.
        """
        self._expand_sequence()
        if len(self.sequence) == 0:
            if not self.extractable and self.is_setup():
                return True
            else:
                # if there is no seeds, pretend it's ready and don't bother to setup
                if self._seed is None and self.seeded:
                    return True

                #A zero-length sequence can mean we have a set of seeds that
                #were specified, *or* that we have a single seed that is itself
                #an atoms object (instead of a list of atoms objects).
                if (len(self.fitting_configs) == len(self._seed)
                        or (len(self.fitting_configs) == 1
                            and isinstance(self._seed, Atoms))):
                    return True
                else:
                    return False
        else:
            ready = True
            for p in self.sequence.values():
                if not p.ready():
                    msg.std("{} is not ready. Exiting.".format(p.root), 2)
                    ready = False
                    break
            return ready
示例#5
0
    def __init__(self, name=None, root=None, controller=None, splits=None,
                 folder=None, pattern=None, config_type=None, energy="dft_energy",
                 force="dft_force", virial="dft_virial", limit=None):
        self.name = name
        self.root = path.join(root, self.name)
        if not path.isdir(self.root):
            from os import mkdir
            mkdir(self.root)

        self.controller = controller
        self.splits = {} if splits is None else splits
        self.folder = folder

        if self.controller is None:
            self.ran_seed = 0
        else:
            self.ran_seed = self.controller.ran_seed

        self._dbfile = path.join(self.root, "legacy-{}.h5".format(limit))
        """str: path to the combined legacy database, with limits included.
        """
        self._dbfull = path.join(self.root, "legacy.h5")
        """str: path to the combined legacy database, *without* limits.
        """
        self.dbfiles = []
        self.config_type = config_type

        from matdb.database.utility import dbconfig
        config = dbconfig(self._dbfull)
        if path.isfile(self._dbfile) and len(config) > 0:
            self.dbfiles = [db[0] for db in config["sources"]]
            self.config_type = config["config_type"]
            self.folder = folder
        else:
            from matdb.utility import dbcat
            if not path.isfile(self._dbfull):
                self._create_dbfull(folder, pattern, energy, force, virial, config_type)

            if limit is not None:
                msg.std("Slicing limit subset of full {} db.".format(self.name))
                full = AtomsList(self._dbfull)
                N = np.arange(len(full))
                np.random.shuffle(N)
                ids = N[0:limit]
                part = full[ids]
                part.write(self._dbfile)
                dbcat([self._dbfull], self._dbfile, docat=False, limit=limit,
                      ids=ids)
            else:
                from matdb.utility import symlink
                symlink(self._dbfile, self._dbfull)

        #The rest of matdb expects each database to have an atoms object that is
        #representative. Just take the first config in the combined database.
        self.atoms = Atoms(self._dbfile)
示例#6
0
    def _make_train_cfg(self, iteration):
        """Creates the 'train.cfg' file needed to train the potential from the
        databeses used.
        Args:
            iteration (int): the number of iterations of MTP has been 
                through.
        """
        from matdb.database.legacy import LegacyDatabase
        if iteration == 1:
            for db in self.dbs:
                if not isinstance(db, LegacyDatabase):
                    for step in db.steps.values():
                        pbar = tqdm(total=len(step.rset))
                        for atm in step.rset:
                            self._create_train_cfg(
                                atm, path.join(self.root, "train.cfg"))
                            pbar.update(1)
                else:  # pragma: no cover (Don't use LegacyDatabase for M1)
                    pbar = tqdm(total=len(db.rset))
                    for atm in db.rset:
                        self._create_train_cfg(
                            atm, path.join(self.root, "train.cfg"))
                        pbar.update(1)

        else:
            if self.active.last_iteration is None or len(
                    self.active.last_iteration) < 1:
                if path.isfile(self.active.iter_file):
                    self.active._load_last_iter()
                else:
                    raise IOError("File {0} containing most recently added "
                                  "structures is missing.".format(
                                      self.active.iter_file))

            msg.info("Extracting from {0} folders".format(
                len(self.active.last_iteration)))
            self.active.extract()
            pbar = tqdm(total=len(self.active.last_iteration))
            ccb = 1
            if self.active.last_config_atoms is not None:
                for atm in self.active.last_config_atoms.values():
                    if not atm.calc.can_extract(atm.calc.folder):
                        msg.std(
                            "Folder {} can not be extracted.".format(
                                atm.calc.folder), 2)
                        continue
                    self._create_train_cfg(atm,
                                           path.join(self.root, "train.cfg"))
                    ccb += 1
                    pbar.update(1)
示例#7
0
    def execute(self, dryrun=False):
        """Submits the job script for the currently configured potential training.

        Args:
            dryrun (bool): when True, simulate the submission without actually
              submitting.

        Returns:
            bool: True if the submission generated a job id (considered
            successful).
        """
        if self.ready():
            msg.info(
                "Trainer {} is already done;".format(self.root) +
                "skipping execute step.", 2)
            return

        if not path.isfile(self._jobfile):
            return False

        if not path.isfile(self._trainfile):
            msg.std("train.h5 missing in {}; can't execute.".format(self.root))
            return False

        # We must have what we need to execute. Compile the command and submit.

        shell_command = self.controller.db.shell_command
        # We suport 'bash' and 'sbatch' shell commands, if it's neighter one
        # of them, default to 'bash'
        if shell_command not in ['bash', 'sbatch']:
            shell_command = 'bash'
        cargs = [shell_command, self._jobfile]

        if dryrun:
            msg.okay("Executed {} in {}".format(' '.join(cargs), self.root))
            return True
        else:
            xres = execute(cargs, self.root)

        # supercompute will return "Submitted"
        if len(xres["output"]) > 0 and "Submitted" in xres["output"][0]:
            msg.okay("{}: {}".format(self.root, xres["output"][0].strip()))
            return True
        # local computer
        elif len(xres["error"]) == 0:
            return True
        else:
            return False
示例#8
0
def run(args):
    """Runs the matdb setup and cleanup to produce database files.
    """
    print("matdb  Copyright (C) 2019  HALL LABS")
    print("This program comes with ABSOLUTELY NO WARRANTY.")
    print(
        "This is free software, and you are welcome to redistribute it under "
        "certain conditions.")
    if args is None:
        return

    targets = {}
    with chdir("seed"):
        for pattern in args["seeds"]:
            #Handle the default file type, which is vasp.
            if ':' in pattern:
                fmt, pat = pattern.split(':')
            else:
                fmt, pat = "vasp", pattern
            for filename in glob(pat):
                targets[filename] = Atoms(filename, format=fmt)

    result = {}
    for filename, at in tqdm(list(targets.items())):
        result[filename] = _get_supers(at, args["sizes"])

    items = [("Filename", 20, "cokay"), ("Supercell", 40, "cstds"),
             ("Req.", 6, "cinfo"), ("Act.", 6, "cgens"), ("rmin", 8, "cerrs"),
             ("pg", 6, "cwarn")]

    msg.blank(2)
    heading = '|'.join([
        "{{0: ^{0}}}".format(size).format(name) for name, size, color in items
    ])
    msg.arb(heading, [msg.cenum[i[2]] for i in items], '|')
    msg.std(''.join('-' for i in range(len(heading) + 1)))
    for filename, hs in result.items():
        for size, hnf in hs.items():
            names = (filename, hnf.hnf.flatten().tolist(), size, hnf.size,
                     hnf.rmin, hnf.pg)
            text = '|'.join([
                "{{0: <{0}}}".format(item[1]).format(name)
                for name, item in zip(names, items)
            ])
            msg.arb(text, [msg.cenum[i[2]] for i in items], '|')
        msg.blank(2)

    return result
示例#9
0
    def calc_fc(self, recalc=False):
        """Extracts the force constants from a DFPT Hessian matrix.
        """
        fcfile = path.join(self.phonodir, "FORCE_CONSTANTS")
        if not recalc and path.isfile(fcfile):
            return

        from matdb.calculators import get_calculator_module
        mod = get_calculator_module(self.calcargs)
        call = getattr(mod, "extract_force_constants")
        xres = call(self.configs, self.phonodir)

        #Make sure that phonopy actually produced files; otherwise show the
        #output (phonopy doesn't write to stderr, only stdout).
        if not path.isfile(fcfile):  #pragma: no cover
            msg.std(''.join(xres["error"]))
            msg.err("could not calculate the force constants from DFPT.")
示例#10
0
 def ready(self):
     """Returns True if this database has finished its computations
     and is ready to be used.
     """
     self._expand_sequence()
     if len(self.sequence) == 0:
         result = len(self.atoms_paths()) == self.nconfigs
         if not result:
             msg.std("{} is not ready. Exiting.".format(self.root), 2)
         return result
     else:
         ready = False
         for p in self.sequence.values():
             if not p.ready():
                 msg.std("{} is not ready. Exiting.".format(p.root), 2)
                 break
             else:
                 ready = True
         return ready
示例#11
0
 def ready(self):
     """Returns True if all the phonon calculations have been completed, the
     force sets have been created, and the DOS has been calculated.
     """
     self._expand_sequence()
     if len(self.sequence) == 0:
         #If the DOS has been calculated, then all the other steps must have
         #completed correctly.
         result = path.isfile(self.dos_file)
         if not result:
             msg.std("{} is not ready. Exiting.".format(self.root), 2)
         return result
     else:
         ready = False
         for p in self.sequence.values():
             if not p.ready():
                 msg.std("{} is not ready. Exiting.".format(p.root), 2)
                 break
         else:
             ready = True
         return ready
示例#12
0
    def can_extract(self):
        """Runs post-execution routines to clean-up the calculations.
        """
        self._expand_sequence()
        if len(self.sequence) == 0:
            if (len(self.configs) != self.nconfigs
                    and self.nconfigs is not None):
                #We need to have at least one folder for each config;
                #otherwise we aren't ready to go.
                return False

            result = False
            for f, a in zip(self.configs.values(), self.config_atoms.values()):
                if not a.calc.can_extract(f):
                    msg.std("Config {} not ready for extraction.".format(f), 2)
                    # continue processing the rest. If any folder can be extracted, return True.
                    continue
            else:
                result = True
            return result
        else:  #pragma: no cover, enumerated database shouldn't take seeds
            return all(group.can_extract() for group in self.sequence.values())
示例#13
0
    def calc_forcesets(self, recalc=False):
        """Extracts the force sets from the displacement calculations.

        Args:
            recalc (bool): when True, recalculate the force sets, even if the
              file already exists.
        """
        fsets = path.join(self.phonodir, "FORCE_SETS")
        if not recalc and path.isfile(fsets):
            return

        from matdb.calculators import get_calculator_module
        mod = get_calculator_module(self.calcargs)
        call = getattr(mod, "extract_force_sets")
        xres = call(self.configs, self.phonodir)

        #Make sure that phonopy actually produced files; otherwise show the output
        #(phonopy doesn't write to stderr, only stdout).
        if not path.isfile(fsets):  #pragma: no cover
            msg.std(''.join(xres["output"]))
            msg.err("Couldn't create the FORCE_SETS in {}.".format(
                self.phonodir))
示例#14
0
def _calc_bands(atoms, hessian, supercell=(1, 1, 1), outfile=None, grid=None):
    """Calculates the band structure for the given Hessian matrix.

    Args:
        atoms (matdb.atoms.Atoms): atoms object corresponding to the *primitive*
          cell. The specified supercell matrix should  result in a number
          of atoms that matches the dimensionality of the Hessian.
        supercell (tuple): tuple of `int` supercell matrix components; can have either
          3 or 9 components.
        hessian (numpy.ndarray): with shape `(natoms*3, natoms*3)`.
        grid (list): list of `int` specifying the number of divisions in k-space
          along each reciprocal unit vector.
        outfile (str): path to the output `band.yaml` file that should be
          created by this function.

    Returns:
        If `outfile` is None, then this method returns a dictionary that has the
        same format as :func:`from_yaml`.
    """
    #Create a temporary directory in which to work.
    target = mkdtemp()
    bandfile = path.join(target, "band.yaml")

    if grid is None:
        grid = [13, 13, 13]
    if isinstance(supercell, np.ndarray):
        supercell = supercell.flatten()

    #First, roll up the Hessian and write it as a FORCE_CONSTANTS file.
    with chdir(target):
        HR = roll(hessian)
        write_FORCE_CONSTANTS(HR)
        atoms.write("POSCAR", format="vasp")

        #We need to create the band.conf file and write the special
        #paths in k-space at which the phonons should be calculated.
        atom_types = _ordered_unique(atoms.get_chemical_symbols())
        settings = [("FORCE_CONSTANTS", "READ"),
                    ("ATOM_NAME", ' '.join(atom_types)),
                    ("DIM", ' '.join(map(str, supercell))),
                    ("MP", ' '.join(map(str, grid)))]

        labels, bands = parsed_kpath(atoms)
        bandfmt = "{0:.3f} {1:.3f} {2:.3f}"
        sband = []
        for Q in bands:
            sband.append(bandfmt.format(*Q))

        settings.append(("BAND", "  ".join(sband)))
        settings.append(("BAND_LABELS", ' '.join(labels)))

        with open("band.conf", 'w') as f:
            for k, v in settings:
                f.write("{} = {}\n".format(k, v))

    sargs = ["phonopy", "band.conf"]
    xres = execute(sargs, target, venv=True)

    if not path.isfile(bandfile):  #pragma: no cover
        msg.err("could not calculate phonon bands; see errors.")
        msg.std(''.join(xres["output"]))

    result = None
    if outfile is not None:
        #Move the band.yaml file to the new target location.
        from shutil import move
        move(bandfile, outfile)
    else:
        result = from_yaml(bandfile)

    #Remove the temporary directory that we created and return the result.
    rmtree(target)
    return result
示例#15
0
def execute(args,
            folder,
            wait=True,
            nlines=100,
            venv=None,
            printerr=True,
            env_vars=None,
            errignore=None,
            **kwargs):
    """Executes the specified tuple that should include the command as
    first item and additional arguments afterward. See the
    documentation for :class:`subprocess.Popen` for details.

    Args:
        args (list): list of `str`; first item should be the command to
          execute; additional arguments following.
        folder (str): directory to switch to before executing the
          command.
        wait (bool): when True, block the current thread until
          execution completes; otherwise, returns immediately.
        nlines (int): by default, `stdout` and `stderr` are redirected to
          :data:`subprocess.PIPE`. This is the maximum number of lines that will
          be returned for large outputs (so that memory doesn't get overwhelmed
          by large outputs).
        venv (str): when not `None`, the name of a virtualenv to
          activate before running the command.
        printerr (bool): when True, if `stderr` is not empty, print
          the lines automatically.
        env_vars (dict): dictionary of environment variables to set before calling the
          execution. The variables will be revert to their original value after execution.
        errignore (str): if produced errors include this pattern, they will **not** be printed to `stdout`.
        kwargs (dict): additional arguments that are passed directly
          to the :class:`subprocess.Popen` constructor.

    Returns:
        dict: dictionary with keys ['process', 'stdout', 'stderr'], where 'process' is the
        instance of the subprocess that was created; 'stdout' and 'stderr' are
        only included if they were set to :data:`subprocess.PIPE`.

    .. note:: If the output from 'stdout' and 'stderr' are too large, only the
      first 100 lines will be returned. Use parameter `nlines` to control output
      size.
    """
    # from subprocess import Popen, PIPE
    if "stdout" not in kwargs:
        kwargs["stdout"] = PIPE
    if "stderr" not in kwargs:
        kwargs["stderr"] = PIPE
    kwargs["cwd"] = folder

    if venv is not None:  # pragma: no cover No guarantee that virtual
        # envs exist on testing machine.
        if isinstance(venv, string_types):
            vargs = ["virtualenvwrapper_derive_workon_home"]
            vres = execute(vargs, path.abspath("."))
            prefix = path.join(vres["output"][0].strip(), venv, "bin")
        elif venv == True:
            import sys
            prefix = path.dirname(sys.executable)
        args[0] = path.join(prefix, args[0])

    from os import environ
    if env_vars is not None:
        oldvars = {}
        for name, val in env_vars.items():
            oldvars[name] = environ[name] if name in environ else None
            environ[name] = val

    msg.std("Executing `{}` in {}.".format(' '.join(args), folder), 2)
    pexec = Popen(' '.join(args), shell=True, executable="/bin/bash", **kwargs)

    if wait:
        from os import waitpid
        waitpid(pexec.pid, 0)

    if env_vars is not None:
        #Set the environment variables back to what they used to be.
        for name, val in oldvars.items():
            if val is None:
                del environ[name]
            else:
                environ[name] = val

    #Redirect the output and errors so that we don't pollute stdout.
    output = None
    if kwargs["stdout"] is PIPE:
        output = []
        for line in pexec.stdout:
            l = line.decode('ascii')
            #Filter non fatal exceptions such as future warnings. A full list can be found here
            # https://docs.python.org/3/library/exepctions.html#exception-hierarchy

            #Special case: "lazy-import" has "import" in the name but it's a valid module name
            #if not ("FutureWarning" in line or ("import" in line and "-import" not in line) or "\x1b[0m" in line):
            if not ("FutureWarning" in l or "import" in l or "\x1b[0m" in l):
                output.append(line)
                if len(output) >= nlines:
                    break
        pexec.stdout.close()

    error = None
    if kwargs["stderr"] is PIPE:
        error = []
        for line in pexec.stderr:
            if errignore is None or errignore not in str(line):
                error.append(str(line))
            if len(error) >= nlines:
                break
        pexec.stderr.close()
        if printerr and len(error) > 0 and all(
            [isinstance(i, string_types) for i in error]):
            msg.err(''.join(error))

    return {
        "process":
        pexec,
        "output":
        [i.decode("ascii") if not isinstance(i, str) else i for i in output],
        "error":
        [i.decode("ascii") if not isinstance(i, str) else i for i in error]
    }
示例#16
0
    def _create_dbfull(self, folder, pattern, energy, force, virial, config_type):
        """Creates the full combined database.
        """
        from matdb.utility import chdir, dbcat
        from glob import glob
        from tqdm import tqdm
        from os import path

        #NB! There is a subtle bug here: if you try and open a matdb.atoms.Atoms
        #within the context manager of `chdir`, something messes up with the
        #memory sharing in fortran and it dies. This has to be separate.
        with chdir(folder):
            self.dbfiles = glob(pattern)
        rewrites = []

        for dbfile in self.dbfiles:
            #Look at the first configuration in the atoms list to
            #determine if it matches the energy, force, virial and
            #config type parameter names.
            dbpath = path.join(folder, dbfile)
            params, doforce = _atoms_conform(dbpath, energy, force, virial)
            if len(params) > 0 or doforce:
                msg.std("Conforming database file {}.".format(dbpath))
                al = AtomsList(dbpath)
                outpath = path.join(self.root, dbfile.replace(".xyz",".h5"))
                for ai in tqdm(al):
                    for target, source in params.items():
                        if (target == "config_type" and
                            config_type is not None):
                            ai.params[target] = config_type
                        else:
                            ai.add_param(target,ai.params[source])
                            del ai.params[source]
                            if source in ai.info: #pragma: no cover
                                                  #(if things were
                                                  #dane correctly by
                                                  #the atoms object
                                                  #this should never
                                                  #be used. It exists
                                                  #mainly as a
                                                  #safegaurd.
                                msg.warn("The atoms object didn't properly "
                                         "update the parameters of the legacy "
                                         "atoms object.")
                                del ai.info[source]

                    if doforce:
                        ai.add_property("ref_force",ai.properties[force])
                        del ai.properties[force]

                al.write(outpath)

                #Mark this db as non-conforming so that we created a new
                #version of it.
                rewrites.append(dbfile)

                dbcat([dbpath], outpath, docat=False, renames=params,
                      doforce=doforce)

        # We want a single file to hold all of the data for all the atoms in the database.
        all_atoms = AtomsList()
        for dbfile in self.dbfiles:
            if dbfile in rewrites:
                infile = dbfile.replace(".xyz",".h5")
                all_atoms.extend(AtomsList(path.join(self.root, infile)))
            else:
                dbpath = path.join(folder, dbfile)
                all_atoms.extend(AtomsList(dbpath))

        all_atoms.write(self._dbfull)

        #Finally, create the config file.
        from matdb.utility import dbcat
        with chdir(folder):
            dbcat(self.dbfiles, self._dbfull, config_type=self.config_type, docat=False)