Exemplo n.º 1
0
    def test_load_and_reload(self, all_gps, validation_env, multihyps):

        test_gp = all_gps[multihyps]

        test_gp.write_model('test_gp_write', 'pickle')

        new_gp = GaussianProcess.from_file('test_gp_write.pickle')

        for d in [0, 1, 2]:
            assert np.all(
                test_gp.predict(x_t=validation_env, d=d) == new_gp.predict(
                    x_t=validation_env, d=d))
        os.remove('test_gp_write.pickle')

        test_gp.write_model('test_gp_write', 'json')

        with open('test_gp_write.json', 'r') as f:
            new_gp = GaussianProcess.from_dict(json.loads(f.readline()))
        for d in [0, 1, 2]:
            assert np.all(
                test_gp.predict(x_t=validation_env, d=d) == new_gp.predict(
                    x_t=validation_env, d=d))
        os.remove('test_gp_write.json')

        with raises(ValueError):
            test_gp.write_model('test_gp_write', 'cucumber')
Exemplo n.º 2
0
def test_serialization_method(two_body_gp, test_point):
    """
    Serialize and then un-serialize a GP and ensure that no info was lost.
    Compare one calculation to ensure predictions work correctly.
    :param two_body_gp:
    :return:
    """
    old_gp_dict = two_body_gp.as_dict()
    new_gp = GaussianProcess.from_dict(old_gp_dict)
    new_gp_dict = new_gp.as_dict()

    assert len(new_gp_dict) == len(old_gp_dict)

    for k1, k2 in zip(sorted(new_gp_dict.keys()), sorted(old_gp_dict.keys())):

        x = new_gp_dict[k1]
        y = new_gp_dict[k2]

        if isinstance(x, np.ndarray):
            assert np.equal(x, y).all()
        elif hasattr(x, '__len__'):

            if isinstance(x[0], np.ndarray):
                assert np.equal(x, y).all()

            else:
                for xx, yy in zip(x, y):
                    assert xx == yy
        else:
            assert x == y

    for d in [0, 1, 2]:
        assert np.all(
            two_body_gp.predict(x_t=test_point, d=d) == new_gp.predict(
                x_t=test_point, d=d))
Exemplo n.º 3
0
def test_load_and_reload(two_body_gp, test_point):

    two_body_gp.write_model('two_body', 'pickle')

    with open('two_body.pickle', 'rb') as f:
        new_gp = pickle.load(f)

    for d in [0, 1, 2]:
        assert np.all(
            two_body_gp.predict(x_t=test_point, d=d) == new_gp.predict(
                x_t=test_point, d=d))
    os.remove('two_body.pickle')

    two_body_gp.write_model('two_body', 'json')

    with open('two_body.json', 'r') as f:
        new_gp = GaussianProcess.from_dict(json.loads(f.readline()))
    for d in [0, 1, 2]:
        assert np.all(
            two_body_gp.predict(x_t=test_point, d=d) == new_gp.predict(
                x_t=test_point, d=d))
    os.remove('two_body.json')

    with raises(ValueError):
        two_body_gp.write_model('two_body', 'cucumber')
Exemplo n.º 4
0
    def make_gp(
        self,
        cell=None,
        call_no=None,
        hyps=None,
        init_gp=None,
        hyp_no=None,
        **kwargs,
    ):

        if "restart" in self.header and self.header["restart"] > 0:
            assert (
                init_gp is not None
            ), "Please input the init_gp as the gp model dumppedbefore restarting otf."

        if call_no is None:
            call_no = len(self.gp_position_list)
        if hyp_no is None:
            hyp_no = len(self.gp_hyp_list)  # use the last hyps by default
        if hyps is None:
            # check out the last non-empty element from the list
            hyps = self.gp_hyp_list[hyp_no - 1]
        if cell is None:
            cell = self.header["cell"]

        if init_gp is None:
            # Use run's values as extracted from header
            # TODO Allow for kernel gradient in header

            dictionary = deepcopy(self.header)
            dictionary["hyps"] = hyps
            for k in kwargs:
                if kwargs[k] is not None:
                    dictionary[k] = kwargs[k]

            gp_model = GaussianProcess.from_dict(dictionary)
        else:
            gp_model = init_gp
            gp_model.hyps = hyps

        for (positions, forces, atoms, species) in zip(
                self.gp_position_list[:call_no],
                self.gp_force_list[:call_no],
                self.gp_atom_list[:call_no],
                self.gp_species_list[:call_no],
        ):

            struc_curr = struc.Structure(cell, species, positions)

            gp_model.update_db(struc_curr, forces, custom_range=atoms)

        gp_model.set_L_alpha()

        return gp_model
Exemplo n.º 5
0
    def make_gp(
        self,
        cell=None,
        call_no=None,
        hyps=None,
        init_gp=None,
        hyp_no=None,
        **kwargs,
    ):

        if call_no is None:
            call_no = len(self.gp_position_list)
        if hyp_no is None:
            hyp_no = call_no
        if hyps is None:
            # check out the last non-empty element from the list
            for icall in reversed(range(hyp_no)):
                if len(self.gp_hyp_list[icall]) > 0:
                    hyps = self.gp_hyp_list[icall][-1]
                    break
        if cell is None:
            cell = self.header['cell']

        if init_gp is None:
            # Use run's values as extracted from header
            # TODO Allow for kernel gradient in header

            dictionary = deepcopy(self.header)
            dictionary['hyps'] = hyps
            for k in kwargs:
                if kwargs[k] is not None:
                    dictionary[k] = kwargs[k]

            gp_model = \
                GaussianProcess.from_dict(dictionary)
        else:
            gp_model = init_gp
            gp_model.hyps = hyps

        for (positions, forces, atoms, _, species) in \
            zip(self.gp_position_list[:call_no],
                self.gp_force_list[:call_no],
                self.gp_atom_list[:call_no], self.gp_hyp_list[:call_no],
                self.gp_species_list[:call_no]):

            struc_curr = struc.Structure(cell, species, positions)

            gp_model.update_db(struc_curr, forces, custom_range=atoms)

        gp_model.set_L_alpha()

        return gp_model
Exemplo n.º 6
0
def test_serialization_method(two_body_gp, test_point):
    """
    Serialize and then un-serialize a GP and ensure that no info was lost.
    Compare one calculation to ensure predictions work correctly.
    :param two_body_gp:
    :return:
    """
    old_gp_dict = two_body_gp.as_dict()
    new_gp = GaussianProcess.from_dict(old_gp_dict)
    new_gp_dict = new_gp.as_dict()

    dumpcompare(new_gp_dict, old_gp_dict)

    for d in [0, 1, 2]:
        assert np.all(
            two_body_gp.predict(x_t=test_point, d=d) == new_gp.predict(
                x_t=test_point, d=d))
Exemplo n.º 7
0
    def test_serialization_method(self, all_gps, validation_env, multihyps):
        """
        Serialize and then un-serialize a GP and ensure that no info was lost.
        Compare one calculation to ensure predictions work correctly.
        :param test_gp:
        :return:
        """
        test_gp = all_gps[multihyps]
        old_gp_dict = test_gp.as_dict()
        new_gp = GaussianProcess.from_dict(old_gp_dict)
        new_gp_dict = new_gp.as_dict()

        assert len(new_gp_dict) == len(old_gp_dict)

        dumpcompare(new_gp_dict, old_gp_dict)

        for d in [0, 1, 2]:
            assert np.all(test_gp.predict(x_t=validation_env, d=d) ==
                          new_gp.predict(x_t=validation_env, d=d))
Exemplo n.º 8
0
    def from_dict(dct):
        dct["gp_model"] = GaussianProcess.from_dict(dct["gp_model"])
        if dct["use_mapping"]:
            dct["mgp_model"] = MappedGaussianProcess.from_dict(dct["mgp_model"])

        calc = FLARE_Calculator(**dct)
        res = dct["results"]
        for key in res:
            if isinstance(res[key], float):
                calc.results[key] = res[key]
            if isinstance(res[key], list):
                calc.results[key] = np.array(res[key])

        if dct["use_mapping"]:
            for xb in calc.mgp_model.maps:
                xb_map = calc.mgp_model.maps[xb]
                xb_map.hyps_mask = calc.gp_model.hyps_mask

        return calc
Exemplo n.º 9
0
    def from_dict(dictionary: dict):
        """
        Create MGP object from dictionary representation.
        """
        new_mgp = MappedGaussianProcess(
            grid_params=dictionary['grid_params'],
            struc_params=dictionary['struc_params'],
            GP=None,
            mean_only=dictionary['mean_only'],
            container_only=True,
            lmp_file_name=dictionary['lmp_file_name'],
            n_cpus=dictionary['n_cpus'],
            n_sample=dictionary['n_sample'],
            autorun=False)

        # Restore kernel_info
        for i in dictionary['bodies']:
            kern_info = f'kernel{i}b_info'
            hyps_mask = dictionary[kern_info][-1]
            if (hyps_mask is None):
                multihyps = False
            else:
                multihyps = True

            kernel_info = dictionary[kern_info]
            kernel_name = kernel_info[0]
            kernel, _, _, efk = str_to_kernel_set(kernel_name, multihyps)
            kernel_info[0] = kernel
            kernel_info[1] = efk
            setattr(new_mgp, kern_info, kernel_info)

        # Fill up the model with the saved coeffs
        for m, map_2 in enumerate(new_mgp.maps_2):
            map_2.mean.__coeffs__ = np.array(dictionary['maps_2'][m])
        for m, map_3 in enumerate(new_mgp.maps_3):
            map_3.mean.__coeffs__ = np.array(dictionary['maps_3'][m])

        # Set GP
        if dictionary.get('GP'):
            new_mgp.GP = GaussianProcess.from_dict(dictionary.get("GP"))

        return new_mgp
Exemplo n.º 10
0
    def test_load_and_reload(self, all_gps, validation_env, multihyps):

        test_gp = all_gps[multihyps]

        test_gp.write_model("test_gp_write", "pickle")

        new_gp = GaussianProcess.from_file("test_gp_write.pickle")

        for d in [1, 2, 3]:
            assert np.all(
                test_gp.predict(x_t=validation_env, d=d) == new_gp.predict(
                    x_t=validation_env, d=d))

        try:
            os.remove("test_gp_write.pickle")
        except:
            pass

        test_gp.write_model("test_gp_write", "json")

        with open("test_gp_write.json", "r") as f:
            new_gp = GaussianProcess.from_dict(json.loads(f.readline()))
        for d in [1, 2, 3]:
            assert np.all(
                test_gp.predict(x_t=validation_env, d=d) == new_gp.predict(
                    x_t=validation_env, d=d))
        os.remove("test_gp_write.json")

        with raises(ValueError):
            test_gp.write_model("test_gp_write", "cucumber")

        # Test logic for auto-detecting format in write command
        for format in ["json", "pickle"]:
            write_string = "format_write_test." + format
            if os.path.exists(write_string):
                os.remove(write_string)

            test_gp.write_model(write_string)
            assert os.path.exists(write_string)
            os.remove(write_string)
Exemplo n.º 11
0
    def from_dict(dictionary: dict) -> "MappedGaussianProcess":
        """
        Create MGP object from dictionary representation.
        """

        # Set GP
        if dictionary.get("GP"):
            GP = GaussianProcess.from_dict(dictionary.get("GP"))
        else:
            dictionary["GP"] = None

        dictionary["unique_species"] = list(set(dictionary["species_labels"]))
        if "container_only" not in dictionary:
            dictionary["container_only"] = True

        init_arg_name = [
            "grid_params",
            "unique_species",
            "GP",
            "var_map",
            "container_only",
            "lmp_file_name",
            "n_cpus",
            "n_sample",
        ]
        kwargs = {key: dictionary[key] for key in init_arg_name}
        new_mgp = MappedGaussianProcess(**kwargs)

        # Fill up the model with the saved coeffs
        if "twobody" in new_mgp.maps:
            new_mgp.maps["twobody"] = Map2body.from_dict(
                dictionary["maps"]["twobody"], Map2body
            )
        if "threebody" in new_mgp.maps:
            new_mgp.maps["threebody"] = Map3body.from_dict(
                dictionary["maps"]["threebody"], Map3body
            )

        return new_mgp
Exemplo n.º 12
0
    def make_gp(
        self,
        cell=None,
        call_no=None,
        hyps=None,
        init_gp=None,
        hyp_no=None,
        **kwargs,
    ):
        """
        Build GP model from the training frames parsed from the log file.
        The cell, hyps and gp can be reset with customized values.

        Args:
            cell (np.ndarray): Default None to use the cell from the log file.
                A customized cell can be input as a 3x3 numpy array.
            call_no (int): Default None to use all the DFT frames as training
                data for building GP. If not None, then the frames 0 to `call_no`
                will be added to GP.
            hyps (np.ndarray): Default None to use the hyperparameters from the
                log file. Customized hyps can be input as an array.
            init_gp (GaussianProcess): Default to None to use no initial settings
                or training data. an initial GP can be used, and then the
                frames parsed in the log file will add to the initial GP. Then the
                final GP uses the hyps and kernels of `init_gp`, and consists of
                training data from `init_gp` and the data from the log file.
                **NOTE**: if a log file from restarted OTF is parsed, then an initial
                GP needs to be parsed from the prior log file as the `init_gp` of the
                restarted log file.
            hyp_no (int): Default None to use the final optimized hyperparameters to
                build GP. If not None, then use the hyps from the `hyp_no`th
                optimization step.
            kwargs: if a new GP setting is needed without inputing `init_gp`, the GP
                initial args can be input as kwargs.
        """
        if "restart" in self.header and self.header["restart"] > 0:
            assert (
                init_gp is not None
            ), "Please input the init_gp as the gp model dumppedbefore restarting otf."

        if call_no is None:
            call_no = len(self.gp_position_list)
        if hyp_no is None:
            hyp_no = len(self.gp_hyp_list)  # use the last hyps by default
        if hyps is None:
            # check out the last non-empty element from the list
            hyps = self.gp_hyp_list[hyp_no - 1]
        if cell is None:
            cell = self.header["cell"]

        if init_gp is None:
            # Use run's values as extracted from header
            # TODO Allow for kernel gradient in header

            dictionary = deepcopy(self.header)
            dictionary["hyps"] = hyps
            for k in kwargs:
                if kwargs[k] is not None:
                    dictionary[k] = kwargs[k]

            gp_model = GaussianProcess.from_dict(dictionary)
        else:
            gp_model = init_gp
            gp_model.hyps = hyps

        for (positions, forces, atoms, species) in zip(
            self.gp_position_list[:call_no],
            self.gp_force_list[:call_no],
            self.gp_atom_list[:call_no],
            self.gp_species_list[:call_no],
        ):

            struc_curr = struc.Structure(cell, species, positions)

            gp_model.update_db(struc_curr, forces, custom_range=atoms)

        gp_model.set_L_alpha()

        return gp_model