示例#1
0
文件: map3b.py 项目: kitpeng11/flare
    def __init__(self, **kwargs):
        """
        Build 3-body MGP

        """

        self.bodies = 3
        self.grid_dim = 3
        self.kernel_name = "threebody"
        self.pred_perm = [[0, 1, 2], [1, 0, 2]]

        super().__init__(**kwargs)

        # initialize bounds
        self.set_bounds(None, None)

        spc = self.species
        self.species_code = (
            Z_to_element(spc[0])
            + "_"
            + Z_to_element(spc[1])
            + "_"
            + Z_to_element(spc[2])
        )
        self.kv3name = f"kv3_{self.species_code}"
示例#2
0
def test_Z_to_element():
    for i in range(1, 118):
        assert isinstance(Z_to_element(i), str)

    for pair in zip([1, 6, "8", "118"], ["H", "C", "O", "Og"]):
        assert Z_to_element(pair[0]) == pair[1]

    with raises(ValueError):
        Z_to_element("a")
示例#3
0
def test_Z_to_element():
    for i in range(1, 118):
        assert isinstance(Z_to_element(i), str)

    for pair in zip([1, 6, '8', '118'], ['H', 'C', 'O', 'Og']):
        assert Z_to_element(pair[0]) == pair[1]

    with raises(ValueError):
        Z_to_element('a')
示例#4
0
    def training_statistics(self) -> dict:
        """
        Return dict with statistics about the current training data by expert.
        Useful for quickly summarizing info about the RBCM.
        :return:
        """

        data = {}

        # Count all of the present species in the atomic env. data
        present_species = []
        data["N"] = 0
        for i in range(self.n_experts):
            data["N"] += self.n_envs_prev[i]
            data[f"N_{i}"] = self.n_envs_prev[i]
            for env, _ in zip(self.training_data[i], self.training_labels[i]):
                present_species.append(
                    Z_to_element(env.structure.coded_species[env.atom])
                )

        # Summarize the relevant information
        data["species"] = list(set(present_species))
        data["envs_by_species"] = dict(Counter(present_species))

        return data
    def update_gp_and_print(
        self,
        frame: Structure,
        train_atoms: List[int],
        uncertainties: List[int] = None,
        train: bool = True,
    ):
        """
        Update the internal GP model training set with a list of training
        atoms indexing atoms within the frame. If train is True, re-train
        the GP by optimizing hyperparameters.
        :param frame: Structure to train on
        :param train_atoms: Index atoms to train on
        :param uncertainties: Uncertainties to print, pass in [] to silence
        :param train: Train or not
        :return: None
        """

        if not train_atoms:
            return

        # Group added atoms by species for easier output
        added_species = [
            Z_to_element(frame.coded_species[at]) for at in train_atoms
        ]
        added_atoms = {spec: [] for spec in set(added_species)}

        for atom, spec in zip(train_atoms, added_species):
            added_atoms[spec].append(atom)

        logger = logging.getLogger(self.logger_name)
        logger.info("Adding atom(s) "
                    f"{json.dumps(added_atoms,cls=NumpyEncoder)}"
                    " to the training set.")

        if uncertainties is None:
            uncertainties = frame.stds[train_atoms]

        if uncertainties is not None and len(uncertainties) != 0:
            logger.info(f"Uncertainties: {uncertainties}.")

        logger.info(
            f"New GP Statistics: {json.dumps(self.gp.training_statistics)}\n")

        # update gp model; handling differently if it's an MGP
        if not self.gp_is_mapped:
            frame_energy = frame.energy if self.include_energies else None
            self.gp.update_db(frame,
                              frame.forces,
                              custom_range=train_atoms,
                              energy=frame_energy)

            if train:
                self.train_gp()

        else:
            logger.warning(
                "Warning: Adding data to an MGP is not yet supported.")
示例#6
0
    def __init__(self, **kwargs):
        """
        Build 2-body MGP

        bond_struc: Mock structure used to sample 2-body forces on 2 atoms
        """

        self.bodies = 2
        self.grid_dim = 1
        self.kernel_name = "twobody"
        self.pred_perm = [[0]]

        super().__init__(**kwargs)

        # initialize bounds
        self.set_bounds(None, None)

        spc = self.species
        self.species_code = Z_to_element(spc[0]) + "_" + Z_to_element(spc[1])
示例#7
0
def test_to_xyz(varied_test_struc):

    simple_str = varied_test_struc.to_xyz(extended_xyz=False,
                                          print_stds=False,
                                          print_forces=False,
                                          print_max_stds=False)

    simple_str_by_line = simple_str.split("\n")

    assert len(simple_str_by_line) - 2 == len(varied_test_struc)

    for i, atom_line in enumerate(simple_str_by_line[2:-1]):
        split_line = atom_line.split()
        assert split_line[0] == Z_to_element(
            int(varied_test_struc.species_labels[i]))
        for j in range(3):
            assert float(split_line[1 +
                                    j]) == varied_test_struc.positions[i][j]

    complex_str = varied_test_struc.to_xyz(True, True, True, True)
    complex_str_by_line = complex_str.split("\n")

    assert len(complex_str_by_line) - 2 == len(varied_test_struc)

    for i, atom_line in enumerate(complex_str_by_line[2:-1]):
        split_line = atom_line.split()
        assert split_line[0] == Z_to_element(
            int(varied_test_struc.species_labels[i]))
        for j in range(1, 4):
            assert float(split_line[j]) == varied_test_struc.positions[i][j -
                                                                          1]
        for j in range(4, 7):
            assert float(split_line[j]) == varied_test_struc.stds[i][j - 4]
        for j in range(7, 10):
            assert float(split_line[j]) == varied_test_struc.forces[i][j - 7]
        assert float(split_line[10]) == np.max(varied_test_struc.stds[i])
示例#8
0
    def update_gp_and_print(self,
                            frame: Structure,
                            train_atoms: List[int],
                            uncertainties: List[int] = None,
                            train: bool = True):
        """
        Update the internal GP model training set with a list of training
        atoms indexing atoms within the frame. If train is True, re-train
        the GP by optimizing hyperparameters.
        :param frame: Structure to train on
        :param train_atoms: Index atoms to train on
        :param uncertainties: Uncertainties to print, pass in [] to silence
        :param train: Train or not
        :return: None
        """

        # Group added atoms by species for easier output
        added_species = [
            Z_to_element(frame.coded_species[at]) for at in train_atoms
        ]
        added_atoms = {spec: [] for spec in set(added_species)}

        for atom, spec in zip(train_atoms, added_species):
            added_atoms[spec].append(atom)

        logger = logging.getLogger(self.logger_name)
        logger.info('Adding atom(s) '
                    f'{json.dumps(added_atoms,cls=NumpyEncoder)}'
                    ' to the training set.')

        if uncertainties is None or len(uncertainties) != 0:
            uncertainties = frame.stds[train_atoms]

        if len(uncertainties) != 0:
            logger.info(f'Uncertainties: ' f'{uncertainties}.')

        # update gp model; handling differently if it's an MGP
        if not self.mgp:
            self.gp.update_db(frame, frame.forces, custom_range=train_atoms)

            if train:
                self.train_gp()

        else:
            logger.warning("Warning: Adding data to an MGP is not yet "
                           "supported.")
示例#9
0
文件: gp.py 项目: owaisahmad18/flare
    def training_statistics(self) -> dict:
        """
        Return a dictionary with statistics about the current training data.
        Useful for quickly summarizing info about the GP.
        :return:
        """

        data = dict()

        data["N"] = len(self.training_data)

        # Count all of the present species in the atomic env. data
        present_species = []
        for env, _ in zip(self.training_data, self.training_labels):
            present_species.append(
                Z_to_element(env.structure.coded_species[env.atom]))

        # Summarize the relevant information
        data["species"] = list(set(present_species))
        data["envs_by_species"] = dict(Counter(present_species))

        return data
示例#10
0
    def write_gp_dft_comparison(
        self,
        curr_step,
        frame,
        start_time,
        dft_forces,
        dft_energy,
        error,
        local_energies=None,
        KE=None,
        mgp=False,
        cell=None,
        stress=None,
    ):
        """Write the comparison to logfile.

        :param curr_step: current timestep
        :param frame: Structure object that contains the current GP calculation
            results.
        :param start_time: start time for time profiling
        :param dft_forces: list of forces computed by DFT
        :param dft_energy: total energy computed by DFT
        :param error: list of force differences between DFT and GP prediction
        :param local_energies: local atomic energy
        :param KE: total kinetic energy
        :param cell: print the unit cell of the structure
        :param stress: print the stress acting on the cell

        :return:
        """

        string = ""

        # Mark if a frame had DFT forces with an asterisk
        string += f"\n*-Frame: {curr_step}"

        # Construct Header line
        string += "\nEl  Position (A) \t\t\t\t "
        if mgp:
            string += "M"
        string += "GP Force (ev/A)  \t\t\t\t"
        string += "Std. Dev (ev/A) \t\t\t\t"
        string += "DFT Force (ev/A)  \t\t\t\t \n"

        # Construct atom-by-atom description
        for i in range(len(frame.positions)):
            string += f"{frame.species_labels[i]} "
            for j in range(3):
                string += f"{frame.positions[i][j]:10.5} "
            string += "\t"
            for j in range(3):
                string += f"{frame.forces[i][j]:10.5} "
            string += "\t"
            for j in range(3):
                string += f"{frame.stds[i][j]:10.5} "
            string += "\t"
            for j in range(3):
                string += f"{dft_forces[i][j]:10.5} "
            string += "\n"

        string += "\n"

        # Print stress & cell related parameters
        if cell is not None:
            rounded_cell = np.round(cell, 4)
            string += f"cell: {[list(vec) for vec in rounded_cell]} \n"
        if stress:
            raise NotImplementedError

        # Compute errors and errors by species
        mae = np.nanmean(error) * 1000
        mac = np.mean(np.abs(dft_forces)) * 1000
        string += f"mean absolute error: {mae:.2f} meV/A\n"
        string += f"mean absolute dft component: {mac:.2f} meV/A\n"
        stat = f"{curr_step} {mae:.2} {mac:.2}"

        mae_per_species = {}
        count_per_species = {}
        species = [Z_to_element(Z) for Z in set(frame.coded_species)]
        for ele in species:
            mae_per_species[ele] = 0
            count_per_species[ele] = 0

        for atom in range(frame.nat):
            Z = frame.coded_species[atom]
            ele = Z_to_element(Z)
            if np.isnan(np.sum(error[atom, :])):
                continue
            mae_per_species[ele] += np.sum(error[atom, :])
            count_per_species[ele] += 1

        string += "mae per species\n"
        for ele in species:
            if count_per_species[ele] > 0:
                mae_per_species[ele] /= count_per_species[ele] * 3
                mae_per_species[ele] *= 1000  # Put in meV/A
                string += f"type {ele} mae: {mae_per_species[ele]:.2f} meV/A\n"
            stat += f" {mae_per_species[ele]:.2f}"

        # calculate potential and total energy
        if local_energies is not None:
            pot_en = 0
            pot_en = np.sum(local_energies)
            tot_en = KE + pot_en
            string += f"potential energy: {pot_en:10.6} eV (DFT: {dft_energy} eV\n"
            string += f"total energy: {tot_en:10.6} eV \n"
            stat += f" {pot_en:10.6} {tot_en:10.6}"
        else:
            pot_en = float("nan")

        if self.print_as_xyz:
            self.write_xyz_config(
                curr_step,
                frame,
                forces=frame.forces,
                stds=frame.stds,
                dft_forces=dft_forces,
                dft_energy=dft_energy,
                predict_energy=pot_en,
            )

        f = logging.getLogger(self.basename + "log")
        f.info(string)
        self.write_wall_time(start_time)

        # stat += f' {dt}\n'
        # logging.getLogger('stat').write(stat)

        if self.always_flush:
            f.handlers[0].flush()
示例#11
0
    def __init__(
        self,
        grid_params: dict,
        unique_species: list = [],
        GP: GaussianProcess = None,
        var_map: str = None,
        container_only: bool = True,
        lmp_file_name: str = "lmp",
        n_cpus: int = None,
        n_sample: int = 10,
    ):

        # load all arguments as attributes
        self.var_map = var_map
        self.lmp_file_name = lmp_file_name
        self.n_cpus = n_cpus
        self.n_sample = n_sample
        self.grid_params = grid_params
        self.species_labels = []
        self.coded_species = []

        self.hyps_mask = None
        self.cutoffs = None
        self.training_statistics = None

        species_labels = []
        coded_species = []
        for i, ele in enumerate(unique_species):
            if isinstance(ele, str):
                species_labels.append(ele)
                coded_species.append(element_to_Z(ele))
            elif isinstance(ele, int):
                coded_species.append(ele)
                species_labels.append(Z_to_element(ele))
            else:
                print("element type not accepted", ele, type(ele))
        sort_id = np.argsort(coded_species)
        for i in sort_id:
            self.coded_species.append(coded_species[i])
            self.species_labels.append(species_labels[i])

        self.load_grid = grid_params.get("load_grid", None)
        self.update = grid_params.get("update", False)
        self.lower_bound_relax = grid_params.get("lower_bound_relax", 0.1)

        self.maps = {}

        optional_xb_params = ["lower_bound", "upper_bound", "svd_rank"]
        for key in grid_params:
            if "body" in key:
                if "twobody" == key:
                    mapxbody = Map2body
                elif "threebody" == key:
                    mapxbody = Map3body
                else:
                    raise KeyError("Only 'twobody' & 'threebody' are allowed")

                xb_dict = grid_params[key]

                # set to 'auto' if the param is not given
                args = {}
                for oxp in optional_xb_params:
                    args[oxp] = xb_dict.get(oxp, "auto")
                args["grid_num"] = xb_dict.get("grid_num", None)

                for k in xb_dict:
                    args[k] = xb_dict[k]

                xb_maps = mapxbody(**args, **self.__dict__)
                self.maps[key] = xb_maps
示例#12
0
    def run_passive_learning(
        self,
        frames: List[Structure] = (),
        environments: List[AtomicEnvironment] = (),
        max_atoms_per_frame: int = np.inf,
        post_training_iterations: int = 0,
        post_build_matrices: bool = False,
        max_elts_per_frame: Dict[str, int] = None,
        max_model_size: int = np.inf,
        max_model_elts: Dict[str, int] = None,
    ):
        """
        Various tasks to set up the AIMD training before commencing
        the run through the AIMD trajectory.

        If you want to skip frames, splice the input as
        frames[::skip_n].

        If you want to randomize the frame order, try the random module's shuffle function.

        Loads the GP with the seed frames and
        environments. ALL environments passed in will be added. Randomly chosen
        atoms from each frame will be added. If no seed frames or environments and
        the GP has no training set, then seed with at least one atom from each
        """

        if self.gp_is_mapped:
            raise NotImplementedError(
                "Passive learning not yet configured for MGP")
        if max_elts_per_frame is None:
            max_elts_per_frame = dict()
        if max_model_elts is None:
            max_model_elts = dict()

        logger = logging.getLogger(self.logger_name)
        logger.debug("Beginning passive learning.")
        # If seed environments were passed in, add them to the GP.

        for env in environments:
            self.gp.add_one_env(env, env.force, train=False)

        # Ensure compatibility with number / symbol elemental notation
        for cur_dict in [max_elts_per_frame, max_model_elts]:
            for key in list(cur_dict.keys()):
                if isinstance(key, int):
                    cur_dict[Z_to_element(key)] = cur_dict[key]
                elif isinstance(key, str):
                    cur_dict[element_to_Z(key)] = cur_dict[key]

        # Main frame loop
        total_added = 0
        for frame in frames:
            current_stats = self.gp.training_statistics
            available_to_add = max_model_size - current_stats["N"]

            train_atoms = []
            for species_i in set(frame.coded_species):
                # Get a randomized set of atoms of species i from the frame
                # So that it is not always the lowest-indexed atoms chosen
                elt = Z_to_element(species_i)
                atoms_of_specie = frame.indices_of_specie(species_i)
                n_at = len(atoms_of_specie)
                # Determine how many to add based on user defined cutoffs
                n_add = min(
                    n_at,
                    max_elts_per_frame.get(species_i, inf),
                    max_atoms_per_frame - len(train_atoms),
                    available_to_add - len(train_atoms),
                    max_model_elts.get(elt, np.inf) -
                    current_stats["envs_by_species"].get(elt, 0),
                )
                n_add = max(0, n_add)

                train_atoms += sample(atoms_of_specie, n_add)
                available_to_add -= n_add
                total_added += n_add

            self.update_gp_and_print(
                frame=frame,
                train_atoms=train_atoms,
                uncertainties=[],
                train=False,
            )

        logger = logging.getLogger(self.logger_name)
        logger.info(f"Added {total_added} atoms to "
                    "GP.\n"
                    "Current GP Statistics: "
                    f"{json.dumps(self.gp.training_statistics)} ")

        if post_training_iterations:
            logger.debug("Now commencing pre-run training of GP (which has "
                         "non-empty training set)")
            time0 = time.time()
            self.train_gp(max_iter=post_training_iterations)
            logger.debug(f"Done train_gp {time.time() - time0}")
        elif post_build_matrices:
            logger.debug(
                "Now commencing pre-run set up of GP (which has non-empty training set)"
            )
            time0 = time.time()
            self.gp.check_L_alpha()
            logger.debug(f"Done check_L_alpha {time.time() - time0}")
示例#13
0
    def from_dict(hyps_mask, verbose=False, init_spec=[]):
        """convert dictionary mask to HM instance
        This function is not tested yet
        """

        Parameters.check_instantiation(
            hyps_mask["hyps"], hyps_mask["cutoffs"], hyps_mask["kernels"], hyps_mask
        )

        pm = ParameterHelper(verbose=verbose)

        nspecie = hyps_mask["nspecie"]
        if nspecie > 1:
            max_species = np.max(hyps_mask["specie_mask"])
            specie_mask = hyps_mask["specie_mask"]
            for i in range(max_species + 1):
                elelist = np.where(specie_mask == i)[0]
                if len(elelist) > 0:
                    for ele in elelist:
                        if ele != 0:
                            elename = Z_to_element(ele)
                            if len(init_spec) > 0:
                                if elename in init_spec:
                                    pm.define_group("specie", i, [elename])
                            else:
                                pm.define_group("specie", i, [elename])
        else:
            pm.define_group("specie", i, ["*"])

        for kernel in hyps_mask["kernels"] + ParameterHelper.cutoff_types_keys:
            n = hyps_mask.get("n" + kernel, 0)
            if n >= 0:
                if kernel not in ParameterHelper.cutoff_types:
                    chyps, copt = Parameters.get_component_hyps(
                        hyps_mask, kernel, constraint=True, noise=False
                    )
                    sig = chyps[0]
                    ls = chyps[1]
                    csig = copt[0]
                    cls = copt[1]
                    cutoff = hyps_mask["cutoffs"][kernel]
                    pm.set_parameters("cutoff_" + kernel, cutoff)
                    cutoff_list = hyps_mask.get(
                        f"{kernel}_cutoff_list", np.ones(len(sig)) * cutoff
                    )
                elif kernel in ParameterHelper.cutoff_types and n > 1:
                    cutoff_list = hyps_mask[
                        ParameterHelper.cutoff_types[kernel] + "_cutoff_list"
                    ]

                if n > 1:
                    all_specie = np.arange(nspecie)
                    all_comb = combinations_with_replacement(
                        all_specie, ParameterHelper.ndim[kernel]
                    )
                    for comb in all_comb:
                        mask_id = 0
                        for ele in comb:
                            mask_id += ele
                            mask_id *= nspecie
                        mask_id = mask_id // nspecie
                        ttype = hyps_mask[f"{kernel}_mask"][mask_id]
                        pm.define_group(f"{kernel}", f"{kernel}{ttype}", comb)

                        if (kernel not in ParameterHelper.cutoff_types) and (
                            kernel not in ParameterHelper.cutoff_types_values
                        ):
                            pm.set_parameters(
                                f"{kernel}{ttype}",
                                [sig[ttype], ls[ttype], cutoff_list[ttype]],
                                opt=[csig[ttype], cls[ttype]],
                            )
                        elif kernel in ParameterHelper.cutoff_types_values:
                            pm.set_parameters(
                                f"{kernel}{ttype}",
                                [sig[ttype], ls[ttype]],
                                opt=[csig[ttype], cls[ttype]],
                            )
                        else:
                            pm.set_parameters(f"{kernel}{ttype}", cutoff_list[ttype])
                else:
                    pm.define_group(
                        kernel, kernel, ["*"] * ParameterHelper.ndim[kernel]
                    )
                    if kernel not in ParameterHelper.cutoff_types_keys:
                        pm.set_parameters(
                            kernel, parameters=np.hstack([sig, ls, cutoff]), opt=copt
                        )
                    else:
                        pm.set_parameters(kernel, parameters=cutoff)

        hyps = Parameters.get_hyps(hyps_mask)
        pm.set_parameters("noise", hyps[-1])

        if "cutoffs" in hyps_mask:
            cutoffs = hyps_mask["cutoffs"]
            for k in cutoffs:
                pm.set_parameters(f"cutoff_{k}", cutoffs[k])

        return pm
示例#14
0
    def to_xyz(
        self,
        extended_xyz: bool = True,
        print_stds: bool = False,
        print_forces: bool = False,
        print_max_stds: bool = False,
        print_energies: bool = False,
        predict_energy=None,
        dft_forces=None,
        dft_energy=None,
        timestep=-1,
        write_file: str = "",
        append: bool = False,
    ) -> str:
        """
        Convenience function which turns a structure into an extended .xyz
        file; useful for further input into visualization programs like VESTA
        or Ovito. Can be saved to an output file via write_file.

        :param print_stds: Print the stds associated with the structure.
        :param print_forces:
        :param extended_xyz:
        :param print_max_stds:
        :param write_file:
        :return:
        """
        species_list = [Z_to_element(x) for x in self.coded_species]
        xyz_str = ""
        xyz_str += f"{len(self.coded_species)} \n"

        # Add header line with info about lattice and properties if extended
        #  xyz option is called.
        if extended_xyz:
            cell = self.cell

            xyz_str += f'Lattice="{cell[0,0]} {cell[0,1]} {cell[0,2]}'
            xyz_str += f" {cell[1,0]} {cell[1,1]} {cell[1,2]}"
            xyz_str += f' {cell[2,0]} {cell[2,1]} {cell[2,2]}"'
            if timestep > 0:
                xyz_str += f" Timestep={timestep}"
            if predict_energy:
                xyz_str += f" PE={predict_energy}"
            if dft_energy is not None:
                xyz_str += f" DFT_PE={dft_energy}"
            xyz_str += f' Proprties="species:S:1:pos:R:3'

            if print_stds:
                xyz_str += ":stds:R:3"
                stds = self.stds
            if print_forces:
                xyz_str += ":forces:R:3"
                forces = self.forces
            if print_max_stds:
                xyz_str += ":max_std:R:1"
                stds = self.stds
            if print_energies:
                if self.local_energies is None:
                    print_energies = False
                else:
                    xyz_str += ":local_energy:R:1"
                    local_energies = self.local_energies
            if dft_forces is not None:
                xyz_str += ":dft_forces:R:3"
            xyz_str += "\n"
        else:
            xyz_str += "\n"

        for i, pos in enumerate(self.positions):
            # Write positions
            xyz_str += f"{species_list[i]} {pos[0]} {pos[1]} {pos[2]}"

            # If extended XYZ: Add in extra information
            if print_stds and extended_xyz:
                xyz_str += f" {stds[i,0]} {stds[i,1]} {stds[i,2]}"
            if print_forces and extended_xyz:
                xyz_str += f" {forces[i,0]} {forces[i,1]} {forces[i,2]}"
            if print_energies and extended_xyz:
                xyz_str += f" {local_energies[i]}"
            if print_max_stds and extended_xyz:
                xyz_str += f" {np.max(stds[i,:])} "
            if dft_forces is not None:
                xyz_str += f" {dft_forces[i, 0]} {dft_forces[i,1]} {dft_forces[i, 2]}"
            if i < (len(self.positions) - 1):
                xyz_str += "\n"

        # Write to file, optionally
        if write_file:
            if append:
                fmt = "a"
            else:
                fmt = "w"
            with open(write_file, fmt) as f:
                f.write(xyz_str)
                f.write("\n")

        return xyz_str
示例#15
0
def test_passive_learning():
    the_gp = GaussianProcess(
        kernel_name="2+3_mc",
        hyps=np.array([
            3.75996759e-06,
            1.53990678e-02,
            2.50624782e-05,
            5.07884426e-01,
            1.70172923e-03,
        ]),
        cutoffs=np.array([5, 3]),
        hyp_labels=["l2", "s2", "l3", "s3", "n0"],
        maxiter=1,
        opt_algorithm="L-BFGS-B",
    )

    frames = Structure.from_file(
        path.join(TEST_FILE_DIR, "methanol_frames.json"))
    envs = AtomicEnvironment.from_file(
        path.join(TEST_FILE_DIR, "methanol_envs.json"))
    cur_gp = deepcopy(the_gp)
    tt = TrajectoryTrainer(frames=None, gp=cur_gp)

    # TEST ENVIRONMENT ADDITION
    envs_species = set(Z_to_element(env.ctype) for env in envs)
    tt.run_passive_learning(environments=envs, post_build_matrices=False)

    assert cur_gp.training_statistics["N"] == len(envs)
    assert set(cur_gp.training_statistics["species"]) == envs_species

    # TEST FRAME ADDITION: ALL ARE ADDED
    cur_gp = deepcopy(the_gp)
    tt.gp = cur_gp
    tt.run_passive_learning(frames=frames, post_build_matrices=False)
    assert len(cur_gp.training_data) == sum([len(fr) for fr in frames])

    # TEST FRAME ADDITION: MAX OUT MODEL SIZE AT 1
    cur_gp = deepcopy(the_gp)
    tt.gp = cur_gp
    tt.run_passive_learning(frames=frames,
                            max_model_size=1,
                            post_training_iterations=1)
    assert len(cur_gp.training_data) == 1

    # TEST FRAME ADDITION: EXCLUDE OXYGEN, LIMIT CARBON TO 1, 1 H PER FRAME
    cur_gp = deepcopy(the_gp)
    tt.gp = cur_gp
    tt.run_passive_learning(
        frames=frames,
        max_model_elts={
            "O": 0,
            "C": 1,
            "H": 5
        },
        max_elts_per_frame={"H": 1},
        post_build_matrices=False,
    )

    assert "O" not in cur_gp.training_statistics["species"]
    assert cur_gp.training_statistics["envs_by_species"]["C"] == 1
    assert cur_gp.training_statistics["envs_by_species"]["H"] == 5