示例#1
0
def test_split(Pd_split):
    """ Test the split function in the controller object
    """
    from os import path
    Pd_split.setup()
    Pd_split.execute(env_vars={"SLURM_ARRAY_TASK_ID": "1"})
    folder = path.join(reporoot, "tests", "data", "Pd", "manual_split")
    _mimic_vasp(folder, Pd_split.root, "S1.1")

    Pd_split.extract()
    Pd_split.split()

    for dbname, db in Pd_split.collections.items():
        for s, p in db.splits.items():
            tfile = path.join(db.train_file(s).format(s))
            hfile = path.join(db.holdout_file(s).format(s))
            sfile = path.join(db.super_file(s).format(s))

            tal = AtomsList(tfile)
            hal = AtomsList(hfile)
            sal = AtomsList(sfile)

            assert len(tal) == int(np.ceil(5 * p))
            assert len(hal) == int(np.ceil((5 - len(tal)) * p))
            assert len(sal) == 5 - len(tal) - len(hal)
示例#2
0
def _generate_pkl(pot, dbs=None, **args):
    """Generates a pickle file for a single potential and its default databases.
    """
    from matdb.plotting.potentials import generate
    from matdb.atoms import AtomsList
    from cPickle import dump
    outdir = path.join(args["folder"], pot.fqn)
    if not path.isdir(outdir):
        mkdir(outdir)

    if dbs is not None:
        configs = AtomsList()
        for db in dbs:
            configs.extend(list(db.iconfigs))

        pdis = generate(args["plots"],
                        pot.calculator,
                        configs,
                        outdir,
                        args["base64"],
                        valkey=args["valkey"])
    else:
        pdis = generate(args["plots"], pot.calculator,
                        pot.configs(args["subset"]), outdir, args["base64"])

    pklname = "{}-{}-plotgen.pkl".format(args["subset"], args["plots"])
    target = path.join(outdir, pklname)
    with open(target, 'w') as f:
        dump(pdis, f)
示例#3
0
def run(args):
    """Runs the matdb setup and cleanup to produce database files.
    """
    print("matdb  Copyright (C) 2019  HALL LABS")
    print("This program comes with ABSOLUTELY NO WARRANTY.")
    print(
        "This is free software, and you are welcome to redistribute it under "
        "certain conditions.")
    if args is None:
        return

    #No matter what other options the user has chosen, we will have to create a
    #database controller for the specification they have given us.

    cdb = Controller(args["dbspec"])

    matches = []
    configs = AtomsList()
    for pattern in args["p"]:
        for entry in cdb.find(pattern):
            for iatoms in entry.iconfigs:
                configs.append(iatoms)

    if args["format"] == "xyz":
        from matdb.conversion import to_xyz
        target = path.abspath(path.expanduser(args["o"]))
        to_xyz(configs, target, args["overwrite"])
示例#4
0
 def _get_distortion(self):
     """Perform the duplication of the atom_seed and displacement of atom cells.
     Attributes:
         volume_factor (int): the volume factor of the repeated cells
              (i.e 1==Same Cell Volume as atom_seed)
         cell_choice (ase.Atoms): each repeated atom_seed is rattled and
              saved to to the distortion array.
     Returns:
         distortion (np.n darray): an array of atoms objects of length
              num_cells with distorted atom positions according to the
              normal distribution specified.
     """
     if(self.cov_diag is not None):
         scaling_matrix = self._get_scaling_matrix()
     atom_seed = AtomsList()
     for i in scaling_matrix:
         local_atoms = self.atoms.copy()
         local_atoms.set_cell(np.matmul(local_atoms.get_cell(), i))
         if (self.rattle != 0.0):
             local_atoms.rattle(stdev=self.rattle)
         #Also distort the positions of the atoms just like the lattice
         #vectors.
         local_atoms.positions = np.matmul(local_atoms.get_positions(), i)
         atom_seed.append(local_atoms)
     return atom_seed
示例#5
0
    def quantities(self,
                   params=None,
                   properties=None,
                   aggregators=None,
                   kind="train",
                   **kwargs):
        """Returns datasets derived from the atoms objects that are present in
        this trainers compiled databases.

        .. note:: If a property is missing from a particular atoms object, it is
          just ignored. That means the arrays returned from this method may not
          all have exactly the same length as the number of entries in the
          database.

        Args:
            params (list): list of `str` parameter names to extract from each atoms
              object.
            properties (list): list of `str` property names to extract from each
              atoms object.
            aggregators (dict): keys are `str` property names; values are `str`
              FQN of importable functions that can be applied to a
              :class:`numpy.ndarray` to produce a single scalar value. These are
              used to reduce an array of property values to a single number for a
              particular configuration. If not specified, the raw arrays are
              returned instead.
            kind (str): one of ['train', 'holdout', 'super', '*']. Specifies which of
              the database sets to use. If '*' is specified, then all of them are
              combined.
            kwargs (dict): additional dummy arguments that aren't needed, but allow the `**` syntax to be used.

        Returns:
            dict: keys are either property or parameter names. Values are
            :class:`numpy.ndarray` for parameters; for properties, since the arrays
            may have different sizes, the value will be a list of
            :class:`numpy.ndarray`.
        """
        assert kind in ["train", "holdout", "super", '*']
        if kind == '*':
            db = AtomsList()
            for k in ["train", "holdout", "super"]:
                db.extend(self.configs(k))
        else:
            db = self.configs(kind)

        result = {}
        if params is not None:
            for pname in params:
                result[pname] = np.array(getattr(db, pname))
        if properties is not None:
            for pname in properties:
                value = getattr(db, pname)
                if pname in aggregators:
                    aggmod, aggfun = import_fqdn(aggregators[pname])
                    result[pname] = aggfun(value)
                else:
                    result[pname] = value

        return result
示例#6
0
def cfg_to_atomslist(cfgfile, config_type=None, species=None):
    """Converts the CFG format file to an internal AtomsList object.

    Args:
        cfgfile (str): path to the file to convert.
        config_type (str): name of the config_type to assign to each
          configuration.
        species (list): list of element names corresponding to the integer
          species in the CFG dictionary.

    Returns:
        matdb.atoms.AtomsList : An AtomsList object containing the all the cells in the CFG file.

    """

    from matdb.atoms import AtomsList

    configs = []
    cfgd = None
    with open(cfgfile) as f:
        for line in f:
            if line.strip() == '':
                continue

            if "BEGIN_CFG" in line:
                cfgd = {"features": {}}
            elif isinstance(cfgd, dict) and "END_CFG" not in line:
                if _rxcfg.match(line.strip()):
                    if ':' in line:
                        raw = line.strip().split()
                        label = raw[0].rstrip(':')
                        cols = raw[1:]
                        cfgd[label] = {"cols": cols, "vals": []}
                    else:
                        label = line.strip()
                        cfgd[label] = {"vals": []}

                    if "Feature" in label:
                        fvals = label.split()
                        feature = fvals[1]
                        values = fvals[2:]
                        cfgd["features"][feature] = values
                        del cfgd[label]
                else:
                    parsed = list(map(eval, line.strip().split()))
                    cfgd[label]["vals"].append(parsed)
            elif "END_CFG" in line:
                if cfgd is not None:
                    configs.append(cfgd)
                cfgd = None

    result = AtomsList()
    for cfg in configs:
        atoms = _cfgd_to_atoms(cfg, species)
        result.append(atoms)

    return result
示例#7
0
    def rset(self):
        """Returns a :class:`~matdb.atoms.AtomsList`, one for each config in the
        latest result set.
        """

        from matdb.atoms import Atoms, AtomsList
        result = AtomsList()
        for apath in self.fitting_configs:
            result.append(Atoms(apath))
        return result
示例#8
0
    def rset(self):
        """Returns a :class:`~matdb.atoms.AtomsList`, one for each config in the
        latest result set.
        """

        #Return the configurations from this group; it is at the
        #bottom of the stack
        result = AtomsList()
        for epath in self.fitting_configs:
            result.append(Atoms(epath))
        return result
示例#9
0
    def _get_substitution(self):
        '''
        '''
        np.random.seed(self.ran_seed)  # Set the seed for reproducibility.
        combs = self._set_stoichiometry()
        seed_atoms = AtomsList()

        for i in combs:
            local_atoms = self.atoms.copy()
            local_atoms.set_chemical_symbols(i)
            seed_atoms.append(local_atoms)
        return seed_atoms
示例#10
0
    def _get_vacancies(self):
        '''Vacancies.py: Group to create atomic vacancies from a seed configuration.

        Args:
            atom_seed (list, str, matdb.atoms.Atoms): The location of the
                 files that will be read into to make the atoms object or an
                 atoms object.
            ran_seed (hashable):(=1 default) seed for the random number
                 generator for index of vacancies selection.
            nconfigs (int): number of cells with vacancies to create.
            vac_per_atom (int < 1): The number of vacancies to include per
                 atom in the cell. (i.e. 0.1 would be 1 in every 10 atoms.)
            min_index (int):(default=0) Default choice with the same ran_seed
                 would produce the same vacancies in each cell.

        .. note:: Additional attributes are also exposed by the super class
              :class:`~matdb.database.Group`.

        Attributes:
            name (str): name of this database type relative to the over
                 database collection. This is also the name of the folder
                 in which all of its calculations will be performed.
            num_atom(int): The number of atoms present in each atoms object.
            num_vac(int): The number of vacancies per cell.
            seed_state(tuple, len=4): values 1,3-4 are set by ran_seed after
                 the first call to np.random and do not change, value 2 gives
                 the ith value of a call to random
            select_atoms(list): list of lists with indices of atoms to be
                 removed
            unique_perm(int): number of possible combinations
        Returns:
            vacancies(AtomsList): an list of atoms objects of length nconfigs
                 with unique vacancies for each cell.
        '''
        select_atoms = []  # list of lists with indices of atoms to be removed
        num_atoms = int(len(self.atoms.get_positions()))  # number of atoms
        num_vac = int(num_atoms * self.vac_per_atom)

        np.random.seed(self.ran_seed)  # Set the random seed for reproduction
        if (choose(num_atoms, num_vac) > 1000):
            select_atoms = self._get_random_choice(select_atoms, num_atoms,
                                                   num_vac)
        else:
            select_atoms = self._get_combinations(select_atoms, num_atoms,
                                                  num_vac)
        atom_seed = AtomsList()
        for i in select_atoms:
            local_atoms = self.atoms.copy()
            del local_atoms[i]
            atom_seed.append(local_atoms)
        return atom_seed, select_atoms
示例#11
0
    def rset(self):
        """Constructs the Hessian matrix for the *best* convergence parameters
        in this group and it's possible sub-sequences.

        Returns:
            list: list of :class:`~matdb.atoms.Atoms`; each atoms object will have a `H`
            matrix in its info dictionary.
        """
        if len(self.sequence) == 0:
            #We are at the bottom of the stack; attach the hessian matrix
            #to the atoms object it corresponds to.
            self.atoms.info["H"] = self.H
            result = AtomsList()
            result.append(self.atoms)
            return result
        else:
            #Check where we are in the stack. If we are just below the database,
            #then we want to return a list of hessian matrices and atoms
            #objects. If we are not, then we must a parameter grid of sequences
            #to select from.
            if isinstance(self.parent, Hessian):
                #We have many dynamical matrices to choose from. We need to decide
                #what "best" means and then return that one.
                bestkey = self._best_bands()
                return self.sequence[bestkey].rset
            else:
                result = AtomsList()
                for p in self.sequence.values():
                    result.extend(p.rset)
                return result
示例#12
0
 def fitting_configs(self):
     """Returns a :class:`~matdb.atoms.AtomsList` for all configs in this
     group. This list includes a single *duplicated* configuration for each
     of the eigenvalue/eigenvector combinations of the Hessian matrix.
     """
     if len(self.sequence) == 0:
         if self.ready():
             return self.config_atoms.values()
         else:
             return AtomsList()
     else:
         result = AtomsList()
         for g in self.sequence.values():
             result.extend(g.fitting_configs)
         return result
示例#13
0
    def validate(self,
                 datafile=None,
                 tfilter=None,
                 sfilter=None,
                 energy=True,
                 force=True,
                 virial=True):
        """Validates all potentials/fitters in this controller against the
        specified data file. If not specified, then the built-in hold out sets
        are used.

        Args:
            datafile (str): path to the data file to read the atoms list from.
            tfilter (list): list of `str` patterns to match against *fit* names.
            sfilter (list): list of `str` patterns to match against *step* names.
            energy (bool): when True, validate the energies of each
              configuration.
            forces (bool): when True, validate the force *components* of each
              configuration.
            virial (bool): when True, validate the virial *components* of each
              configuration.
        """
        if datafile is not None:
            configs = AtomsList(datafile)
        else:
            configs = None

        for trainer in self.ifiltered(tfilter, sfilter):
            trainer.validate(configs, energy, force, virial)
示例#14
0
    def configs(self, kind, asatoms=True):
        """Loads a list of configurations of the specified kind.

        Args:
            kind (str): possible values are ['train', 'holdout', 'super'].
            asatoms (bool): when True, return a :class:`~matdb.atoms.AtomsList`
              object; otherwise just compile the file.

        Returns:
            matdb.atoms.AtomsList: Atoms list for the specified configuration class.
        """
        fmap = {
            "train": lambda seq, splt: seq.train_file(splt),
            "holdout": lambda seq, splt: seq.holdout_file(splt),
            "super": lambda seq, splt: seq.super_file(splt)
        }
        smap = {
            t: getattr(self, "_{}file".format(t))
            for t in ["train", "holdout", "super"]
        }
        cfile = smap[kind]

        if not path.isfile(cfile):
            cfiles = []
            for seq in self.dbs:
                #We need to split to get training data. If the split has already
                #been done as part of a different training run, then it won't be
                #done a second time.
                msg.info("Compiling database {} for {}.".format(
                    seq.name, self.fqn))
                seq.split()
                if seq.name in self.cust_splits:
                    splt = self.cust_splits[seq.name]
                else:
                    splt = self.split

                #We grab a list of all the files that match the particular split
                #pattern. Then we apply any filters to individual atoms objects
                #within each of the databases.
                if splt == '*':
                    nfiles = []
                    for dbsplit in seq.splits:
                        nfiles.extend([f(seq, dbsplit) for f in fmap.values()])
                else:
                    nfiles = [fmap[kind](seq, splt)]

                filtered = self._filter_dbs(seq.name, nfiles)
                cfiles.extend(filtered)

            #If this is the training file, we need to append any extras; these
            #are files that have additional trainer-specific configs to include.
            if kind == "train":
                cfiles.extend(self.extras())

            #First, save the configurations to a single file.
            dbcat(cfiles, cfile)

        if asatoms:
            return AtomsList(cfile)
示例#15
0
    def __init__(self, name=None, root=None, controller=None, splits=None,
                 folder=None, pattern=None, config_type=None, energy="dft_energy",
                 force="dft_force", virial="dft_virial", limit=None):
        self.name = name
        self.root = path.join(root, self.name)
        if not path.isdir(self.root):
            from os import mkdir
            mkdir(self.root)

        self.controller = controller
        self.splits = {} if splits is None else splits
        self.folder = folder

        if self.controller is None:
            self.ran_seed = 0
        else:
            self.ran_seed = self.controller.ran_seed

        self._dbfile = path.join(self.root, "legacy-{}.h5".format(limit))
        """str: path to the combined legacy database, with limits included.
        """
        self._dbfull = path.join(self.root, "legacy.h5")
        """str: path to the combined legacy database, *without* limits.
        """
        self.dbfiles = []
        self.config_type = config_type

        from matdb.database.utility import dbconfig
        config = dbconfig(self._dbfull)
        if path.isfile(self._dbfile) and len(config) > 0:
            self.dbfiles = [db[0] for db in config["sources"]]
            self.config_type = config["config_type"]
            self.folder = folder
        else:
            from matdb.utility import dbcat
            if not path.isfile(self._dbfull):
                self._create_dbfull(folder, pattern, energy, force, virial, config_type)

            if limit is not None:
                msg.std("Slicing limit subset of full {} db.".format(self.name))
                full = AtomsList(self._dbfull)
                N = np.arange(len(full))
                np.random.shuffle(N)
                ids = N[0:limit]
                part = full[ids]
                part.write(self._dbfile)
                dbcat([self._dbfull], self._dbfile, docat=False, limit=limit,
                      ids=ids)
            else:
                from matdb.utility import symlink
                symlink(self._dbfile, self._dbfull)

        #The rest of matdb expects each database to have an atoms object that is
        #representative. Just take the first config in the combined database.
        self.atoms = Atoms(self._dbfile)
def test_rename(rendb):
    """Tests renaming of properties to meet `matdb` conventions.
    """
    first = Atoms(rendb._dbfile)
    assert "ref_energy" in first.params
    assert "ref_force" in first.properties
    assert "ref_virial" in first.params
    assert first.params["config_type"] == "re"

    al = AtomsList(rendb._dbfile)
    assert len(al) == 80
def test_merge(phondb):
    """Tests merger of the databases into a single DB.
    """
    combined = AtomsList(phondb._dbfile)
    assert len(combined) == 150
    assert combined[20].params["config_type"] == "ph"

    root = path.dirname(phondb.root)
    newdb = LegacyDatabase("AgPd-50", root, None, phondb.splits, phondb.folder,
                           "p-50-*.xyz", "ph")
    assert newdb.atoms == phondb.atoms
示例#18
0
    def cleanup(self):
        """Extracts the calibration information from the configurations to
        determine the maiximum allowable amplitude to maintain linear force
        regime.
        Returns:
           bool: True if the amplitude calibration is ready.
        """
        if not super(Calibration, self).cleanup():
            msg.warn("cannot cleanup calibration; not all configs ready.")
            return False

        success = self.xyz()
        if not success:
            msg.warn("could not extract the calibration XYZ configurations.")
            return False
        else:
            imsg = "Extracted calibration configs from {0:d} folders."
            msg.okay(imsg.format(len(self.configs)))

        #Read in the XYZ file and extract the forces on each atom in each
        #configuration.
        from matdb.atoms import AtomsList
        forces = {}
        failed = 0
        for cid, folder in self.configs.items():
            #Find the mean, *absolute* force in each of the directions. There
            #will only be one atom in the atoms list. If the calculation didn't
            #finish, then we exclude it. This happens for some of the
            #calibration runs if the atoms are too close together.
            try:
                al = AtomsList(path.join(folder, "output.xyz"))
                forces[cid] = np.mean(np.abs(np.array(al[0].dft_force)),
                                      axis=1)
            except:
                failed += 1
                pass

        if failed > 0:
            msg.warn(
                "couldn't extract forces for {0:d} configs.".format(failed))

        if len(forces) > 0:
            fmt = "{0:.7f}  {1:.7f}  {2:.7f}  {3:.7f}\n"
            with open(self.outfile, 'w') as f:
                for cid in forces:
                    A, F = self.amplitudes[cid], forces[cid]
                    f.write(fmt.format(A, *F))
        else:
            msg.warn("no forces available to write {}.".format(self.outfile))

        return len(forces) > 3
示例#19
0
 def rset(self):
     """Returns a :class:`~matdb.atoms.AtomsList`, one for each config in the
     latest result set.
     """
     if len(self.sequence) == 0:
         # Return the configurations from this group; it is at the
         # bottom of the stack
         result = AtomsList()
         for epath in self.atoms_paths():
             result.append(Atoms(path.join(epath, "atoms.h5")))
         return result
     else:
         result = AtomsList()
         for e in self.sequence.values():
             result.extend(e.rset)
         return result
示例#20
0
    def split(self, recalc=0):
        """Splits the database multiple times, one for each `split` setting in
        the database specification.
        """
        from matdb.database.utility import split

        # Get the AtomsList object
        subconfs = AtomsList(self._dbfile)

        file_targets = {"train": self.train_file, "holdout": self.holdout_file,
                        "super": self.super_file}

        split(subconfs, self.splits, file_targets, self.root, self.ran_seed,
              dbfile=self._dbfile, recalc=recalc)
def test_split(phondb):
    """Tests splitting of available data from a legacy database.
    """
    supers = {}

    phondb.split()
    for s, p in phondb.splits.items():
        tfile = path.join(phondb.train_file(s).format(s))
        hfile = path.join(phondb.holdout_file(s).format(s))
        sfile = path.join(phondb.super_file(s).format(s))

        tal = AtomsList(tfile)
        hal = AtomsList(hfile)
        sal = AtomsList(sfile)
        supers[s] = sal

        assert len(tal) == int(np.ceil(150 * p))
        assert len(hal) == int(np.ceil((150 - len(tal)) * p))
        assert len(sal) == 150 - len(tal) - len(hal)

        assert path.isfile(path.join(phondb.root, "{}-ids.pkl".format(s)))

    #Now, make sure that we return quickly if split is called again.
    phondb.split()

    #Remove one of the files so that we can trigger reading from existing ids.
    from os import remove
    for s, p in phondb.splits.items():
        sfile = path.join(phondb.super_file(s).format(s))
        remove(sfile)

    phondb.split()

    for s, p in phondb.splits.items():
        sfile = path.join(phondb.super_file(s).format(s))
        sal = AtomsList(sfile)
        assert sal == supers[s]
示例#22
0
def test_AtomsList_empty_io(tmpdir):
    from matdb.atoms import Atoms, AtomsList
    from os import path

    target = str(tmpdir.join("empty_AtomsList"))
    globals_setup(target)

    if not path.isdir(target):
        mkdir(target)

    empty_list = AtomsList([])
    empty_list.write(path.join(target,"temp.h5"))
    assert len(empty_list) == 0
    assert path.isfile(path.join(target,"temp.h5"))

    aR = AtomsList()
    aR.read(path.join(target,"temp.h5"))
    assert len(aR) == 0
示例#23
0
def h5cat(files, target):
    """Concatenates a list of h5 AtomsList files into a single AtomsList.

    Args:
        files (list): list of `string` file paths to combine.
        target (str): name/path of the output file that will include all of the
          combined files.
    """
    # Local import to prevent cyclic imports
    from matdb.atoms import AtomsList
    result = AtomsList()
    for fname in files:
        ilist = AtomsList(fname)
        result.extend(ilist)
    result.write(target)
示例#24
0
def test_cfg(tmpdir):
    """Tests conversion of MTP's CFG format to XYZ.
    """
    target = str(tmpdir.join("cfg.xyz"))
    model = AtomsList(relpath("tests/files/io_convert/atoms.xyz"))
    conv = cfg_to_xyz(relpath("tests/files/io_convert/atoms.cfg"),
                      target,
                      species=[46, 47])

    for a, b in zip(model, conv):
        assert np.allclose(a.get_positions(), b.get_positions())
        assert np.allclose(a.get_forces(), b.calc.results["forces"])
        assert np.allclose(a.get_stress(),
                           b.calc.results["stress"],
                           atol=1e-4,
                           rtol=1e-3)
        assert a.get_total_energy() == b.calc.results["energy"]
示例#25
0
def test_AtomsList_sort():
    """Tests the method AtomsList.sort
    """
    from matdb.atoms import Atoms, AtomsList

    at1 = Atoms("Si8",positions=[[0,0,0],[0.25,0.25,0.25],[0.5,0.5,0],[0.75,0.75,0.25],
                                  [0.5,0,0.5],[0.75,0.25,0.75],[0,0.5,0.5],[0.25,0.75,0.75]],
                 cell=[5.43,5.43,5.43],info={"rand":10})
    at2 = Atoms("S6",positions=[[0,0,0],[0.25,0.25,0.25],[0.5,0.5,0],[0.75,0.75,0.25],
                                  [0.5,0,0.5],[0.75,0.25,0.75]],
                 cell=[6.43,5.43,4.43],info={"rand":10})
    at3 = Atoms("CNi",positions=[[0,0,0],[0.5,0.5,0.5]], info={"rand":8})
    at4 = Atoms("CoV",positions=[[0,0,0],[0.25,0.5,0.25]], info={"rand":8})

    at1.add_param("vasp_energy", 25361.504084423999)
    at2.add_param("vasp_energy", 25362.504084423999)
    at3.add_param("vasp_energy", 25363.504084423999)
    at4.add_param("vasp_energy", 25364.504084423999)
    
    al1 = AtomsList([at4,at2,at1,at3])

    #This is to test __getitem__
    al2 = al1[0:2]
    assert len(al2) == 2

    al1.sort(key=len)
    assert al1[0].get_chemical_formula() == "CoV"
    assert al1[1].get_chemical_formula() == "CNi"
    assert al1[2].get_chemical_formula() == "S6"
    assert al1[3].get_chemical_formula() == "Si8"

    al1.sort(attr="vasp_energy")
    assert al1[0].get_chemical_formula() == "Si8"
    assert al1[1].get_chemical_formula() == "S6"
    assert al1[2].get_chemical_formula() == "CNi"
    assert al1[3].get_chemical_formula() == "CoV"

    al1.sort(attr="vasp_energy", reverse=True)
    assert al1[0].get_chemical_formula() == "CoV"
    assert al1[1].get_chemical_formula() == "CNi"
    assert al1[2].get_chemical_formula() == "S6"
    assert al1[3].get_chemical_formula() == "Si8"
示例#26
0
    def fitting_configs(self):
        """Returns a :class:`matdb.atoms.AtomsList` for all configs in this
        group.
        """
        configs = AtomsList()
        if len(self.sequence) == 0:
            for config in self.config_atoms.values():
                configs.append(config)
        else:
            for seq in self.sequence.values():
                configs.extend(seq.fitting_configs)

        return configs
示例#27
0
    def _filter_dbs(self, seqname, dbfiles):
        """Filters each of the database files specified so that they conform to
        any specified filters.

        Args:
            seqname (str): name of the sequence that the database files are
              from.
            dbfiles (list): list of `str` paths to database files to filter.

        Returns:
            list: list of `str` paths to include in the database from this sequence.
        """
        if len(self.dbfilter) > 0 and seqname in self._dbfilters:
            filtered = []
            #The filters values have a function and a list of the actual values
            #used in the formula replacement. Extract the parameters; we can't
            #serialize the actual functions.
            filters = self._dbfilters[seqname].items()
            params = {k: v[1] for k, v in filters}

            for dbfile in dbfiles:
                dbname = path.basename(path.dirname(dbfile))
                filtdb = path.join(self.root, "__{}.h5".format(dbname))
                if path.isfile(filtdb):
                    continue

                al = AtomsList(dbfile)
                nl = AtomsList()
                for a in al:
                    #The 0 index here gets the function out; see comment above
                    #about the filters dictionary.
                    if not any(opf[0](getattr(a, attr))
                               for attr, opf in filters):
                        nl.append(a)

                if len(nl) != len(al):
                    nl.write(filtdb)
                    dN, N = (len(al) - len(nl), len(nl))
                    dbcat([dbfile], filtdb, filters=params, dN=dN, N=N)
                    filtered.append(filtdb)
                else:
                    filtered.append(nfile)
        else:
            filtered = dbfiles

        return filtered
示例#28
0
    def rset(self):
        """Returns the reusable set to the next database group.

        Returns:
            list: list of :class:`~matdb.atoms.Atoms`
        """
        if len(self.sequence) == 0:
            #We are at the bottom of the stack;
            result = AtomsList()
            for config in self.fitting_configs:
                result.append(Atoms(path.join(config, "atoms.h5")))
            return result
        else:
            #Check where we are in the stack. If we are just below the database,
            #then we want to return the atoms objects for all database entries.
            #If we are not, then we must a parameter grid of sequences
            #to select from.
            result = []
            for g in self.sequence.values():
                result.extend(g.rset)
            return AtomsList(result)
示例#29
0
def test_reading_multiple_files(tmpdir):
    """Tests the reading in of multiple atoms objects to an AtomsList.
    """
    
    from matdb.calculators import Vasp
    from matdb.atoms import Atoms as Atoms, AtomsList
    from matdb.io import save_dict_to_h5
    import h5py
    from matdb.utility import _set_config_paths

    _set_config_paths("AgPd_Enumerated", str(tmpdir))
    target = str(tmpdir.join("read_atoms2"))
    globals_setup(target)

    if not path.isdir(target):
        mkdir(target)

    atSi = Atoms("Si8",positions=[[0,0,0],[0.25,0.25,0.25],[0.5,0.5,0],[0.75,0.75,0.25],
                                  [0.5,0,0.5],[0.75,0.25,0.75],[0,0.5,0.5],[0.25,0.75,0.75]],
                 cell=[5.43,5.43,5.43])

    kwargs = {"encut":400, "kpoints": {"rmin": 50},
              "potcars":{"xc": "pbe", "directory": "./tests/vasp", "versions": {"Si": "05Jan2001"}}}
    
    calc = Vasp(atSi, target, '.', 0, **kwargs)
    atSi.set_calculator(calc)

    temp = path.join(target,"temp.h5")
    atSi.write(temp)    

    atSi2 = Atoms("Si8",positions=[[0,0,0],[0.25,0.25,0.25],[0.5,0.5,0],[0.75,0.75,0.25],
                                  [0.5,0,0.5],[0.75,0.25,0.75],[0,0.5,0.5],[0.25,0.75,0.75]],
                 cell=[6.43,6.43,6.43])

    kwargs = {"encut":600, "kpoints": {"rmin": 50},
              "potcars":{"xc": "pbe", "directory": "./tests/vasp", "versions": {"Si": "05Jan2001"}}}
    
    calc = Vasp(atSi2, target, '.', 0, **kwargs)
    atSi2.set_calculator(calc)
    temp2 = path.join(target,"temp2.h5")
    atSi2.write(temp2)

    atRL = AtomsList([temp,temp2])

    assert len(atRL) == 2
    assert atRL[0].calc.kwargs["encut"] != atRL[1].calc.kwargs["encut"]
    assert atRL[1].calc.kwargs["encut"] in [400,600]
    assert atRL[0].calc.kwargs["encut"] in [400,600]

    atom_dict = {"atom_1":temp, "atom_2": temp2}

    temp3 = path.join(target,"temp3.h5")
    with h5py.File(temp3,"w") as hf:
        save_dict_to_h5(hf,atom_dict,'/')

    atRL = AtomsList(temp3)

    assert len(atRL) == 2
    assert atRL[0].calc.kwargs["encut"] != atRL[1].calc.kwargs["encut"]
    assert atRL[1].calc.kwargs["encut"] in [400,600]
    assert atRL[0].calc.kwargs["encut"] in [400,600]
示例#30
0
def test_AtomsList_io(tmpdir):
    """Tests the AtomsList writing and reading from file.
    """
    from matdb.atoms import Atoms, AtomsList

    at1 = Atoms("Si8",positions=[[0,0,0],[0.25,0.25,0.25],[0.5,0.5,0],[0.75,0.75,0.25],
                                  [0.5,0,0.5],[0.75,0.25,0.75],[0,0.5,0.5],[0.25,0.75,0.75]],
                 cell=[5.43,5.43,5.43],info={"rand":10})
    
    at2 = Atoms("S6",positions=[[0,0,0],[0.25,0.25,0.25],[0.5,0.5,0],[0.75,0.75,0.25],
                                  [0.5,0,0.5],[0.75,0.25,0.75]],
                 cell=[6.43,5.43,4.43],info={"rand":10})

    at3 = Atoms("CNi",positions=[[0,0,0],[0.5,0.5,0.5]])
    at4 = Atoms()
    at4.copy_from(at3)
    
    al1 = AtomsList([at1,at2,at3,at4])
    
    target = str(tmpdir.join("atomList_to_hdf5"))
    if not path.isdir(target):
        mkdir(target)

    al1.write(path.join(target,"temp.h5"))

    aR = AtomsList()
    aR.read(path.join(target,"temp.h5"))

    assert len(aR) == len(al1)

    alpos = aR.positions
    assert any([np.allclose(alpos[i],at1.positions) for i in range(4) if
                len(alpos[i])==len(at1.positions)])
    assert any([np.allclose(alpos[i],at2.positions) for i in range(4) if
                len(alpos[i])==len(at2.positions)])
    assert any([np.allclose(alpos[i],at3.positions) for i in range(4) if
                len(alpos[i])==len(at3.positions)])
    assert any([np.allclose(alpos[i],at4.positions) for i in range(4) if
                len(alpos[i])==len(at4.positions)])

    al1.write(path.join(target,"temp.xyz"))

    aR = AtomsList()
    aR.read(path.join(target,"temp.xyz"))

    assert len(aR) == len(al1)

    aR.read(path.join(target,"temp.xyz"))
    assert len(aR) == 2*len(al1)


    # Test reading in of a single atoms object.

    aR1 = Atoms(path.join(target,"temp.h5"))
    assert isinstance(aR1,Atoms)
    assert any([np.allclose(alpos[i],at1.positions) for i in range(4) if
                len(alpos[i])==len(at1.positions)])