Пример #1
0
def compute_si_cpp(job):
    # setup input for the script
    data = job.statepoint()
    np.random.seed(data['seed'])

    if data['representation']['coefficient_subselection'] is not None:
        rpr = deepcopy(data['representation'])
        rpr.pop('coefficient_subselection')
        rep = SphericalInvariants(**rpr)
        rep,n_feat = get_randomly_sparsified_soap(data, rep)
    else:
        n_feat = None
        rep = SphericalInvariants(**data['representation'])

    data['calculator'] = rep.hypers
    tojson(job.fn(group['fn_in']), data)
    # look at memory footprint
    p = Popen([group['executable'][data['nl_type']], job.fn(group['fn_in']), job.fn(group['fn_out'])], stdout=PIPE, stderr=PIPE)
    max_mem = memory_usage(p, interval=0.1, max_usage=True)
    # look at timings
    p = Popen([group['executable'][data['nl_type']], job.fn(group['fn_in']), job.fn(group['fn_out'])], stdout=PIPE, stderr=PIPE)
    if p.stderr.read(): print(p.stderr.read())
    data = fromjson(job.fn(group['fn_out']))
    data = data['results']
    data['n_features'] = n_feat
    data['mem_max'] = max_mem
    data['mem_unit'] = 'MiB'
    tojson(job.fn(group['fn_res']), data)
def compute_representation(feature_hypers, frames, center_atom_id_mask):
    if feature_hypers["feature_type"] == "soap":
        representation = SphericalInvariants(**feature_hypers["feature_parameters"])
        return representation.transform(frames).get_features(representation)
    elif feature_hypers["feature_type"] == "wasserstein":
        return compute_radial_spectrum_wasserstein_features(feature_hypers["feature_parameters"], frames)
    elif feature_hypers["feature_type"] == "sorted_distances":
        features = compute_sorted_distances(feature_hypers["feature_parameters"], frames, center_atom_id_mask)
        return features
    elif feature_hypers["feature_type"] == "precomputed":
        print("WARNING we assume for the precomputed features that only one environment in each structure was computed.")
        parameters = feature_hypers['feature_parameters']
        nb_envs = sum([len(structure_mask) for structure_mask in center_atom_id_mask])
        if parameters["filetype"] == "npy":
            pathname = f"{FEATURES_ROOT}/{parameters['feature_name']}/{parameters['filename']}"
            return np.load(pathname)[:nb_envs]
        elif parameters["filetype"] == "txt":
            pathname = f"{FEATURES_ROOT}/{parameters['feature_name']}/{parameters['filename']}"
            return np.loadtxt(pathname)[:nb_envs]
        elif parameters["filetype"] == "frame_info":
            return np.array([frame.info[parameters['feature_name']] for frame in frames])[:,np.newaxis][:nb_envs]

        # hardcoded case
        elif parameters["feature_name"] == "displaced_hydrogen_distance": 
            return load_hydrogen_distance_dataset(frames)[:nb_envs]
Пример #3
0
def benchmark_spherical_representations(frames, optimization_args,
                                        radial_basis):
    hypers = {
        'interaction_cutoff': INTERACTION_CUTOFF,
        'max_radial': 8,
        'max_angular': 6,
        'gaussian_sigma_constant': 0.5,
        'gaussian_sigma_type': "Constant",
        'cutoff_smooth_width': 0.5,
        'radial_basis': radial_basis,
        'optimization_args': optimization_args
    }

    print("Timing SphericalExpansion")
    transform_representation(SphericalExpansion(**hypers),
                             frames,
                             nb_iterations=NB_ITERATIONS_PER_REPRESENTATION)

    hypers = {
        'soap_type': "PowerSpectrum",
        'interaction_cutoff': INTERACTION_CUTOFF,
        'max_radial': 8,
        'max_angular': 6,
        'gaussian_sigma_constant': 0.5,
        'gaussian_sigma_type': "Constant",
        'cutoff_smooth_width': 0.5,
        'normalize': False,
        'radial_basis': radial_basis,
        'optimization_args': optimization_args
    }
    print("Timing SphericalInvariants")
    transform_representation(SphericalInvariants(**hypers),
                             frames,
                             nb_iterations=NB_ITERATIONS_PER_REPRESENTATION)
    def test_representation_transform(self):

        rep = SphericalInvariants(**self.hypers)

        features = rep.transform([self.frame])

        test = features.get_dense_feature_matrix(rep)
    def test_representation_transform(self):

        rep = SphericalInvariants(**self.hypers)

        features = rep.transform(self.frames)

        test = features.get_features(rep)
        kk_ref = np.dot(test, test.T)

        # test that the feature matrix exported to python in various ways
        # are equivalent
        X_t = features.get_features(rep, self.global_species)
        kk = np.dot(X_t, X_t.T)
        self.assertTrue(np.allclose(kk, kk_ref))

        X_t = features.get_features(rep, self.global_species + [70])
        kk = np.dot(X_t, X_t.T)
        self.assertTrue(np.allclose(kk, kk_ref))

        species = copy(self.global_species)
        species.pop()
        X_t = features.get_features(rep, species)
        kk = np.dot(X_t, X_t.T)
        self.assertFalse(np.allclose(kk, kk_ref))

        X_t = features.get_features_by_species(rep)
        kk = dot(X_t, X_t)
        self.assertTrue(np.allclose(kk, kk_ref))
Пример #6
0
    def test_model_call(self):

        rep = SphericalInvariants(**self.hypers)

        features = rep.transform([self.frame])

        for target_type in ["Atom", "Structure"]:
            cosine_kernel = Kernel(rep,
                                   name="Cosine",
                                   target_type=target_type,
                                   zeta=2)
            cosine_kernel(features)

        # wrong name
        with self.assertRaises(RuntimeError):
            Kernel(rep, name="WrongName", target_type="Structure", zeta=2)
        with self.assertRaises(RuntimeError):
            Kernel(rep, name="cosine", target_type="Structure", zeta=2)
        # wrong target_type
        with self.assertRaises(RuntimeError):
            Kernel(rep, name="Cosine", target_type="WrongType", zeta=2)
        with self.assertRaises(RuntimeError):
            Kernel(rep, name="Cosine", target_type="structure", zeta=2)
        with self.assertRaises(RuntimeError):
            Kernel(rep, name="Cosine", target_type="atom", zeta=2)
        # wrong zeta
        with self.assertRaises(ValueError):
            Kernel(rep, name="Cosine", target_type="Structure", zeta=2.5)
        with self.assertRaises(ValueError):
            Kernel(rep, name="Cosine", target_type="Structure", zeta=-2)
Пример #7
0
def compute_sparse_point(job):
    sp = _decode(job.statepoint())
    st, lg = job.sp.start_structure, job.sp.n_structures
    frames = fromfile(job.sp.filename)[st:st + lg]
    soap = SphericalInvariants(**sp['representation'])
    managers = soap.transform(frames)
    compressor = RandomFilter(soap, **sp['sparse_point_subselection'])
    compressor.select(managers)

    feature_subselection = fromjson(job.fn(group['feature_fn']))
    sp['representation']['coefficient_subselection'] = feature_subselection[
        'coefficient_subselection']
    soap = SphericalInvariants(**sp['representation'])
    managers = soap.transform(frames)
    compressor._representation = soap
    X_pseudo = compressor.filter(managers)
    dump_obj(job.fn(group['sparse_point_fn']), X_pseudo)
def get_feature_vector(hypers, frames):
    with ostream_redirect():
        soap = SphericalInvariants(**hypers)
        soap_vectors = soap.transform(frames)
        print('Feature vector size: %.3fMB' %
              (soap.get_num_coefficients() * 8.0 / 1.0e6))
        feature_vector = soap_vectors.get_feature_matrix()
    return feature_vector
Пример #9
0
 def test_pickle(self):
     rep = SphericalInvariants(**self.hypers)
     cosine_kernel = Kernel(rep,
                            name="Cosine",
                            target_type="Structure",
                            zeta=2)
     serialized = pickle.dumps(cosine_kernel)
     cosine_kernel_ = pickle.loads(serialized)
     self.assertTrue(to_dict(cosine_kernel) == to_dict(cosine_kernel_))
Пример #10
0
    def test_serialization(self):
        rep = SphericalInvariants(**self.hypers)

        rep_dict = to_dict(rep)

        rep_copy = from_dict(rep_dict)

        rep_copy_dict = to_dict(rep_copy)

        self.assertTrue(rep_dict == rep_copy_dict)
Пример #11
0
    def test_hypers_construction(self):
        """Checks that manually-constructed and automatic
        framework are consistent."""

        hypers = deepcopy(self.hypers)

        hypers["max_radial"] = self.expanded_max_radial
        spex = SphericalExpansion(**hypers)
        feats = spex.transform(self.frames).get_features_by_species(spex)

        cov = get_radial_basis_covariance(spex, feats)

        p_val, p_vec = get_radial_basis_pca(cov)

        p_mat = get_radial_basis_projections(p_vec, self.hypers["max_radial"])

        # now makes this SOAP
        hypers["max_radial"] = self.hypers["max_radial"]
        hypers["soap_type"] = "PowerSpectrum"
        hypers["optimization"] = {
            "RadialDimReduction": {
                "projection_matrices": p_mat
            },
            "Spline": {
                "accuracy": 1e-8
            },
        }

        # compute SOAP
        soap_opt = SphericalInvariants(**hypers)
        soap_feats = soap_opt.transform(self.frames).get_features(soap_opt)

        # now we do the same with the compact utils
        hypers = deepcopy(self.hypers)
        hypers["soap_type"] = "PowerSpectrum"
        hypers = get_optimal_radial_basis_hypers(
            hypers, self.frames, expanded_max_radial=self.expanded_max_radial)
        soap_opt_2 = SphericalInvariants(**hypers)
        soap_feats_2 = soap_opt_2.transform(
            self.frames).get_features(soap_opt_2)

        self.assertTrue(np.allclose(soap_feats, soap_feats_2))
Пример #12
0
def compute_feature_selection(job):
    sp = _decode(job.statepoint())
    st, lg = job.sp.start_structure, job.sp.n_structures
    frames = fromfile(job.sp.filename)[st:st + lg]
    soap = SphericalInvariants(**sp['representation'])
    managers = soap.transform(frames)
    compressor = RandomFilter(soap, **sp['feature_subselection'])
    feature_subselection = compressor.select_and_filter(managers)
    # if sp['feature_subselection']['Nselect'] is None:
    #     feature_subselection['coefficient_subselection'] = None
    tojson(job.fn(group['feature_fn']), feature_subselection)
Пример #13
0
    def test_serialization(self):
        rep = SphericalInvariants(**self.hypers)

        for target_type in ["Atom", "Structure"]:
            cosine_kernel = Kernel(rep, name="Cosine", target_type=target_type, zeta=2)

            cosine_kernel_dict = to_dict(cosine_kernel)
            cosine_kernel_copy = from_dict(cosine_kernel_dict)
            cosine_kernel_copy_dict = to_dict(cosine_kernel_copy)

            self.assertTrue(cosine_kernel_dict == cosine_kernel_copy_dict)
Пример #14
0
def get_randomly_sparsified_soap(data, rep):
    rep_hypers = deepcopy(data['representation'])
    feat_frac = rep_hypers['coefficient_subselection']
    global_species = rep_hypers['global_species']
    feature_mapping = get_feature_index_mapping(rep, global_species)
    ids = np.arange(len(feature_mapping))
    np.random.seed(data['seed'])
    np.random.shuffle(ids)
    sel_feat = {'a': [], 'b': [], 'n1': [], 'n2': [], 'l': []}
    n_feat = int(feat_frac * len(feature_mapping))
    for idx in ids[:n_feat]:
        feat = feature_mapping[idx]
        for k, v in feat.items():
            sel_feat[k].append(int(v))
    rep_hypers['coefficient_subselection'] = sel_feat
    soap = SphericalInvariants(**rep_hypers)
    return soap, n_feat
Пример #15
0
def compute_knm(job):
    sp = _decode(job.statepoint())
    st, lg = job.sp.start_structure, job.sp.n_structures
    frames = fromfile(job.sp.filename)[st:st + lg]

    X_pseudo = load_obj(job.fn(group['sparse_point_fn']))

    hypers = X_pseudo.representation._get_init_params()
    hypers['compute_gradients'] = job.sp.train_with_grad

    soap = SphericalInvariants(**hypers)
    kernel = Kernel(soap, **sp['kernel'])

    Nstructures = len(frames)
    Ngrad_stride = [0]
    Ngrads = 0
    for frame in frames:
        n_at = len(frame)
        Ngrad_stride.append(n_at * 3)
        Ngrads += n_at * 3
    Ngrad_stride = np.cumsum(Ngrad_stride) + Nstructures
    dump_obj(job.fn(group['kernel_fn']), kernel)

    if job.sp.train_with_grad:
        KNM = np.zeros((Nstructures + Ngrads, X_pseudo.size()))
    else:
        KNM = np.zeros((Nstructures, X_pseudo.size()))

    for i_frame, frame in enumerate(frames):
        en_row, grad_rows = compute(i_frame,
                                    frame,
                                    soap,
                                    kernel,
                                    X_pseudo,
                                    grad=job.sp.train_with_grad)
        KNM[i_frame] = en_row
        if job.sp.train_with_grad:
            KNM[Ngrad_stride[i_frame]:Ngrad_stride[i_frame + 1]] = grad_rows

    np.save(job.fn(group['knm_fn']), KNM)
def benchmark_spherical_representations(frames, optimization_args,
                                        radial_basis):
    hypers = {
        "interaction_cutoff": INTERACTION_CUTOFF,
        "max_radial": 8,
        "max_angular": 6,
        "gaussian_sigma_constant": 0.5,
        "gaussian_sigma_type": "Constant",
        "cutoff_smooth_width": 0.5,
        "radial_basis": radial_basis,
        "optimization": optimization_args,
    }

    print("Timing SphericalExpansion")
    transform_representation(
        SphericalExpansion(**hypers),
        frames,
        nb_iterations=NB_ITERATIONS_PER_REPRESENTATION,
    )

    hypers = {
        "soap_type": "PowerSpectrum",
        "interaction_cutoff": INTERACTION_CUTOFF,
        "max_radial": 8,
        "max_angular": 6,
        "gaussian_sigma_constant": 0.5,
        "gaussian_sigma_type": "Constant",
        "cutoff_smooth_width": 0.5,
        "normalize": False,
        "radial_basis": radial_basis,
        "optimization": optimization_args,
    }
    print("Timing SphericalInvariants")
    transform_representation(
        SphericalInvariants(**hypers),
        frames,
        nb_iterations=NB_ITERATIONS_PER_REPRESENTATION,
    )
Пример #17
0
def compute_benchmark(job):
    from rascal.models.krr import compute_sparse_kernel_gradients
    sp = _decode(job.statepoint())
    st, lg = job.sp.start_structure, job.sp.n_structures
    frames = fromfile(job.sp.filename)[st:st + lg]

    model = load_obj(job.fn(group['model_fn']))
    soap = model.get_representation_calculator()
    grads_timing = job.sp.grads_timing

    hypers = soap._get_init_params()
    hypers['compute_gradients'] = grads_timing
    soap = SphericalInvariants(**hypers)

    rc = sp['representation']['interaction_cutoff']
    nl_options = [
        dict(name='centers', args=[]),
        dict(name='neighbourlist', args=dict(cutoff=rc)),
        # dict(name='halflist', args=dict()),
        dict(name="centercontribution", args=dict()),
        dict(name='strict', args=dict(cutoff=rc))
    ]

    kernel = Kernel(soap, **sp['kernel'])

    N_ITERATIONS = sp['N_ITERATIONS']
    if grads_timing:
        tags = ['NL', 'rep with grad', 'pred energy', 'pred forces']
    else:
        tags = ['NL', 'rep', 'pred energy']

    timers = {k: Timer(tag=k, logger=None) for k in tags}
    if job.sp.name != 'qm9':
        frames = [
            make_supercell(frames[0],
                           job.sp.n_replication * np.eye(3),
                           wrap=True,
                           tol=1e-11)
        ]
    else:
        frames = frames[:100]

    if grads_timing:
        for ii in range(N_ITERATIONS):
            with timers['NL']:
                managers = AtomsList(frames, nl_options)
            sleep(0.1)
            with timers['rep with grad']:
                managers = soap.transform(managers)
            sleep(0.1)
            Y0 = model._get_property_baseline(managers)
            with timers['pred energy']:
                KNM = kernel(managers, model.X_train, (False, False))
                Y0 + np.dot(KNM, model.weights).reshape((-1))
            sleep(0.1)
            with timers['pred forces']:
                rep = soap._representation
                forces = -compute_sparse_kernel_gradients(
                    rep, model.kernel._kernel, managers.managers,
                    model.X_train._sparse_points, model.weights.reshape(
                        (1, -1)))
            sleep(0.1)
            managers, KNM = [], []
            del managers, KNM
            sleep(0.3)
    else:
        for ii in range(N_ITERATIONS):
            with timers['NL']:
                managers = AtomsList(frames, nl_options)
            sleep(0.1)
            with timers['rep']:
                managers = soap.transform(managers)
            sleep(0.1)
            Y0 = model._get_property_baseline(managers)
            with timers['pred energy']:
                KNM = kernel(managers, model.X_train, (False, False))
                Y0 + np.dot(KNM, model.weights).reshape((-1))
            sleep(0.1)

            managers, KNM = [], []
            del managers, KNM
            sleep(0.3)

    n_atoms = 0
    for frame in frames:
        n_atoms += len(frame)

    timings = []
    for tag in tags:
        data = timers[tag].dumps()
        data.update({'name': job.sp.name, 'n_atoms': n_atoms})
        timings.append(data)

    tojson(job.fn(group['benchmark_fn']), timings)
def dump_reference_json():
    import ubjson
    import os
    from copy import copy
    path = '../'
    sys.path.insert(0, os.path.join(path, 'build/'))
    sys.path.insert(0, os.path.join(path, 'tests/'))

    cutoffs = [2, 3]
    gaussian_sigmas = [0.2, 0.5]
    max_radials = [4, 10]
    max_angulars = [3, 6]
    soap_types = ["RadialSpectrum", "PowerSpectrum", "BiSpectrum"]
    inversion_symmetry = False

    fns = [
        os.path.join(
            path, "tests/reference_data/CaCrP2O7_mvc-11955_symmetrized.json"),
        os.path.join(path, "tests/reference_data/small_molecule.json")
    ]
    fns_to_write = [
        "reference_data/CaCrP2O7_mvc-11955_symmetrized.json",
        "reference_data/small_molecule.json",
    ]

    data = dict(filenames=fns_to_write,
                cutoffs=cutoffs,
                gaussian_sigmas=gaussian_sigmas,
                max_radials=max_radials,
                soap_types=soap_types,
                rep_info=[])

    for fn in fns:
        frames = [json2ase(load_json(fn))]
        for cutoff in cutoffs:
            print(fn, cutoff)
            data['rep_info'].append([])
            for soap_type in soap_types:
                for gaussian_sigma in gaussian_sigmas:
                    for max_radial in max_radials:
                        for max_angular in max_angulars:
                            if 'RadialSpectrum' == soap_type:
                                max_angular = 0
                            if "BiSpectrum" == soap_type:
                                max_radial = 2
                                max_angular = 1
                                inversion_symmetry = True

                            hypers = {
                                "interaction_cutoff": cutoff,
                                "cutoff_smooth_width": 0.5,
                                "max_radial": max_radial,
                                "max_angular": max_angular,
                                "gaussian_sigma_type": "Constant",
                                "normalize": True,
                                "cutoff_function_type": "Cosine",
                                "radial_basis": "GTO",
                                "gaussian_sigma_constant": gaussian_sigma,
                                "soap_type": soap_type,
                                "inversion_symmetry": inversion_symmetry,
                            }

                            soap = SphericalInvariants(**hypers)
                            soap_vectors = soap.transform(frames)
                            x = soap_vectors.get_feature_matrix()
                            # x = get_feature_vector(hypers, frames)
                            data['rep_info'][-1].append(
                                dict(feature_matrix=x.tolist(),
                                     hypers=copy(soap.hypers)))

    with open(
            path +
            "tests/reference_data/spherical_invariants_reference.ubjson",
            'wb') as f:
        ubjson.dump(data, f)
def compute_soap(hypers):
    representation = SphericalInvariants(**hypers)
    representation.transform(frames).get_features(representation)
    return
Пример #20
0
def dump_reference_json():
    import ubjson
    from copy import copy
    from itertools import product

    sys.path.insert(0, os.path.join(root, 'build/'))
    sys.path.insert(0, os.path.join(root, 'tests/'))

    cutoffs = [2, 3]
    gaussian_sigmas = [0.2, 0.5]
    max_radials = [4, 10]
    max_angulars = [3, 6]
    soap_types = ["RadialSpectrum", "PowerSpectrum", "BiSpectrum"]
    inversion_symmetry = False
    radial_basis = ["GTO"]

    fns = [
        os.path.join(inputs_path, "CaCrP2O7_mvc-11955_symmetrized.json"),
        os.path.join(inputs_path, "small_molecule.json")
    ]
    fns_to_write = [
        os.path.join(dump_path, "CaCrP2O7_mvc-11955_symmetrized.json"),
        os.path.join(dump_path, "small_molecule.json"),
    ]

    data = dict(filenames=fns_to_write,
                cutoffs=cutoffs,
                gaussian_sigmas=gaussian_sigmas,
                max_radials=max_radials,
                soap_types=soap_types,
                rep_info=[])

    for fn in fns:
        frames = read(fn)
        for cutoff in cutoffs:
            print(fn, cutoff)
            data['rep_info'].append([])
            for (soap_type, gaussian_sigma, max_radial, max_angular,
                 rad_basis) in product(soap_types, gaussian_sigmas,
                                       max_radials, max_angulars,
                                       radial_basis):
                if 'RadialSpectrum' == soap_type:
                    max_angular = 0
                if "BiSpectrum" == soap_type:
                    max_radial = 2
                    max_angular = 1
                    inversion_symmetry = True

                hypers = {
                    "interaction_cutoff": cutoff,
                    "cutoff_smooth_width": 0.5,
                    "max_radial": max_radial,
                    "max_angular": max_angular,
                    "gaussian_sigma_type": "Constant",
                    "normalize": True,
                    "cutoff_function_type": "ShiftedCosine",
                    "radial_basis": rad_basis,
                    "gaussian_sigma_constant": gaussian_sigma,
                    "soap_type": soap_type,
                    "inversion_symmetry": inversion_symmetry,
                }

                soap = SphericalInvariants(**hypers)
                soap_vectors = soap.transform(frames)
                x = soap_vectors.get_features(soap)
                x[np.abs(x) < 1e-300] = 0.
                data['rep_info'][-1].append(
                    dict(feature_matrix=x.tolist(), hypers=copy(soap.hypers)))

    with open(
            os.path.join(root, dump_path,
                         "spherical_invariants_reference.ubjson"), 'wb') as f:
        ubjson.dump(data, f)
Пример #21
0
    def test_numerical_kernel_stress(self):
        """Tests if the numerical kernel stress on the python site matches the one
        on the cpp site."""

        with open(self.kernel_input_filename, "r") as f:
            kernel_inputs = json.load(f)

        kernel_inputs = [kernel_inputs[i] for i in self.selected_test_indices]
        for kernel_input in kernel_inputs:
            structures_filename = kernel_input["filename"]
            frames = ase.io.read(structures_filename,
                                 ":" + str(kernel_input["n_structures"]))
            h_disp = kernel_input["h"]

            selected_ids = kernel_input["selected_ids"]
            hypers = kernel_input["calculator"]
            # TODO(alex) the cutoff function is kind of hard coded
            #            a general function transformation c++ parameters to
            #            python would be more suitable here
            #            future work
            calculator = SphericalInvariants(
                soap_type=hypers["soap_type"],
                radial_basis=hypers["radial_contribution"]["type"],
                max_radial=hypers["max_radial"],
                max_angular=hypers["max_angular"],
                cutoff_function_type=hypers["cutoff_function"]["type"],
                interaction_cutoff=hypers["cutoff_function"]["cutoff"]
                ["value"],
                cutoff_smooth_width=hypers["cutoff_function"]["smooth_width"]
                ["value"],
                gaussian_sigma_type=hypers["gaussian_density"]["type"],
                gaussian_sigma_constant=hypers["gaussian_density"]
                ["gaussian_sigma"]["value"],
                compute_gradients=hypers["compute_gradients"],
                normalize=hypers["normalize"],
            )
            kernel = Kernel(calculator,
                            kernel_type="Sparse",
                            **kernel_input["kernel"])
            for j in range(len(frames)):
                # we do this frame by frame to be able to use the function
                # `displace_strain_tensor` as in the
                # `test_displace_strain_tensor` test
                frame = frames[j]
                selected_id = selected_ids[j]
                managers = calculator.transform([frame])
                sparse_points = SparsePoints(calculator)
                sparse_points.extend(managers, [selected_id])

                # the binded cpp function; the minus is because the function
                # returns the negative stress
                cpp_site_stress = -compute_numerical_kernel_gradients(
                    kernel, calculator, managers, sparse_points, h_disp,
                    True)[-6:]

                def compute_numerical_kernel_gradient_on_python_site():
                    python_site_stress = np.zeros((6, len(selected_id)))
                    for i in range(6):
                        frame_displaced_plus = displace_strain_tensor(
                            copy.deepcopy(frame),
                            self.matrix_indices_in_voigt_notation[i][0],
                            self.matrix_indices_in_voigt_notation[i][1],
                            h_disp,
                        )
                        managers = calculator.transform([frame_displaced_plus])
                        kernel_plus = kernel(managers, sparse_points)

                        frame_displaced_minus = displace_strain_tensor(
                            copy.deepcopy(frame),
                            self.matrix_indices_in_voigt_notation[i][0],
                            self.matrix_indices_in_voigt_notation[i][1],
                            -h_disp,
                        )
                        managers = calculator.transform(
                            [frame_displaced_minus])
                        kernel_minus = kernel(managers, sparse_points)

                        python_site_stress[i] = np.sum(
                            (kernel_plus - kernel_minus) / (2 * h_disp),
                            axis=0)
                    return python_site_stress / frame.get_volume()

                python_site_stress = compute_numerical_kernel_gradient_on_python_site(
                )

                relative_error = compute_relative_error(
                    python_site_stress, cpp_site_stress)
                absolute_error = np.abs(python_site_stress - cpp_site_stress)
                passes_test = np.all(
                    np.logical_or(
                        relative_error < self.error_threshold,
                        absolute_error < self.error_threshold,
                    ))
                if not (passes_test):
                    np.set_printoptions(suppress=True)
                    print("structures_filename:", structures_filename)
                    print("structure index:", j)
                    print()
                    print("relative_error:\n", relative_error)
                    print()
                    print("python_site_stress:\n", python_site_stress)
                    print("cpp_site_stress:\n", cpp_site_stress)

                self.assertTrue(passes_test)
Пример #22
0
 def test_pickle(self):
     rep = SphericalInvariants(**self.hypers)
     serialized = pickle.dumps(rep)
     rep_ = pickle.loads(serialized)
     self.assertTrue(to_dict(rep) == to_dict(rep_))
Пример #23
0
    def test_representation_gradient(self):
        """
        Test the get_features and get_features_gradient functions by computing
        the linear sparse kernel matrix and check that the exported features
        lead to the same kernel matrix as the reference method.
        """
        hypers = deepcopy(self.hypers)
        hypers["compute_gradients"] = True
        rep = SphericalInvariants(**hypers)

        features = rep.transform(self.frames)

        n_sparses = {1: 1, 6: 1, 8: 1, 14: 1, 15: 1, 20: 1, 24: 1}

        compressor = FPSFilter(rep, n_sparses, act_on="sample per species")
        X_pseudo = compressor.select_and_filter(features)

        xs = X_pseudo.get_features()
        n_sparse, n_feat = xs.shape
        masks = {sp: np.zeros(n_sparse, dtype=bool) for sp in n_sparses}
        ii = 0
        for sp, mask in masks.items():
            mask[ii:ii + n_sparses[sp]] = 1
            ii = ii + n_sparses[sp]

        zeta = 1
        kernel = Kernel(rep,
                        name="GAP",
                        zeta=zeta,
                        target_type="Structure",
                        kernel_type="Sparse")

        ij = features.get_gradients_info()
        n_atoms = len(np.unique(ij[:, 1]))
        n_neigh = ij.shape[0]

        KNM_ref = kernel(features, X_pseudo, (False, False))
        X = features.get_features(rep).reshape((n_atoms, n_feat))
        KNM = np.zeros((len(self.frames), n_sparse))
        ii = 0
        for iff, frame in enumerate(features):
            for at in frame:
                sp = at.atom_type
                KNM[iff, masks[sp]] += np.dot(X[ii], xs[masks[sp]].T)
                ii += 1
        self.assertTrue(np.allclose(KNM_ref, KNM))

        KNM_ref = kernel(features, X_pseudo, (True, False))

        X_der = features.get_features_gradient(rep).reshape(
            (n_neigh, 3, n_feat))

        KNM = np.zeros((n_atoms, 3, n_sparse))
        for ii, (i_frame, i, j, i_sp, j_sp) in enumerate(ij):
            sp = i_sp
            KNM[j, 0, masks[sp]] += np.dot(X_der[ii, 0], xs[masks[sp]].T)
            KNM[j, 1, masks[sp]] += np.dot(X_der[ii, 1], xs[masks[sp]].T)
            KNM[j, 2, masks[sp]] += np.dot(X_der[ii, 2], xs[masks[sp]].T)

        KNM = KNM.reshape((-1, n_sparse))

        self.assertTrue(np.allclose(KNM_ref, KNM))
Пример #24
0
def dump_reference_json():
    import ubjson
    import os
    from copy import copy
    sys.path.insert(0, os.path.join(root, 'build/'))
    sys.path.insert(0, os.path.join(root, 'tests/'))

    cutoffs = [3.5]
    gaussian_sigmas = [0.5]
    max_radials = [6]
    max_angulars = [6]
    soap_types = ["RadialSpectrum", "PowerSpectrum"]

    fn = os.path.join(inputs_path, "small_molecules-20.json")
    fn_to_write = os.path.join(
        'reference_data', "inputs", "small_molecules-20.json")
    start = 0
    length = 5
    representations = ['spherical_invariants']
    kernel_names = ['Cosine']
    target_types = ['Structure', 'Atom']
    dependant_args = dict(Cosine=[dict(zeta=1), dict(zeta=2), dict(zeta=4)])

    data = dict(filename=fn_to_write,
                start=start,
                length=length,
                cutoffs=cutoffs,
                gaussian_sigmas=gaussian_sigmas,
                max_radials=max_radials,
                soap_types=soap_types,
                kernel_names=kernel_names,
                target_types=target_types,
                dependant_args=dependant_args,
                rep_info=dict(spherical_invariants=[]))

    frames = read(fn, '{}:{}'.format(start, start + length))
    for representation_name in representations:
        for cutoff in cutoffs:
            print(fn, cutoff)
            data['rep_info'][representation_name].append([])
            for kernel_name in kernel_names:
                for target_type in target_types:
                    for kwargs in dependant_args[kernel_name]:
                        for soap_type in soap_types:
                            for gaussian_sigma in gaussian_sigmas:
                                for max_radial in max_radials:
                                    for max_angular in max_angulars:
                                        if 'RadialSpectrum' == soap_type:
                                            max_angular = 0

                                        hypers = {"interaction_cutoff": cutoff,
                                                  "cutoff_smooth_width": 0.5,
                                                  "max_radial": max_radial,
                                                  "max_angular": max_angular,
                                                  "gaussian_sigma_type": "Constant",
                                                  "gaussian_sigma_constant": gaussian_sigma,
                                                  "soap_type": soap_type,
                                                  "cutoff_function_type": "ShiftedCosine",
                                                  "normalize": True,
                                                  "radial_basis": "GTO"}
                                        soap = SphericalInvariants(**hypers)
                                        soap_vectors = soap.transform(frames)
                                        hypers_kernel = dict(name=kernel_name,
                                                             target_type=target_type)
                                        hypers_kernel.update(**kwargs)
                                        kernel = Kernel(soap, **hypers_kernel)
                                        kk = kernel(soap_vectors)
                                        # x = get_spectrum(hypers, frames)
                                        for aa in soap.nl_options:
                                            aa['initialization_arguments'] = aa['args']

                                        data['rep_info'][representation_name][-1].append(dict(kernel_matrix=kk.tolist(),
                                                                                              hypers_rep=copy(
                                                                                                  soap.hypers),
                                                                                              hypers_manager=copy(
                                                                                                  soap.nl_options),
                                                                                              hypers_kernel=copy(hypers_kernel)))

    with open(os.path.join(root, dump_path,
                           "kernel_reference.ubjson"), 'wb') as f:
        ubjson.dump(data, f)
Пример #25
0
def compute_squared_radial_spectrum_wasserstein_distance(
        feature_paramaters, frames, nb_grid_points=200):
    if feature_paramaters["feature_parameters"][
            "soap_type"] != "RadialSpectrum":
        raise ValueError(
            'Wasserstein features can be only computed for soap_type="RadialSpectrum".'
        )
    if feature_paramaters["feature_parameters"]["radial_basis"] != "DVR":
        raise ValueError(
            'Wasserstein features can be only computed for radial_basis="DVR".'
        )

    nb_basis_functions = feature_paramaters["feature_parameters"]["max_radial"]
    feature_paramaters["feature_parameters"]["max_radial"] = nb_grid_points
    normalize_wasserstein_features = feature_paramaters["feature_parameters"][
        "normalize"]
    feature_paramaters["feature_parameters"]["normalize"] = False
    cutoff = feature_paramaters["feature_parameters"]["interaction_cutoff"]

    # compute soap representation for interpolation
    representation = SphericalInvariants(
        **feature_paramaters["feature_parameters"])
    densities = representation.transform(frames).get_features(representation)
    nb_envs = densities.shape[0]
    nb_species = densities.shape[1] // nb_grid_points
    densities = densities.reshape(nb_envs * nb_species, nb_grid_points)

    # DVR uses gaussian quadrature points as basis function, we reproduce the original grid points for the interpolation
    density_grid, density_weights = np.polynomial.legendre.leggauss(
        nb_grid_points)
    density_grid = density_grid * cutoff / 2 + cutoff / 2
    density_grid = np.hstack((0, density_grid))
    densities /= np.sqrt(density_weights)
    cdf = np.cumsum(densities, axis=1)

    # gaussian quadrature points as grid
    if feature_paramaters["hilbert_space_parameters"]["distance_parameters"][
            "grid_type"] == "gaussian_quadrature":
        interp_grid, interp_weights = np.polynomial.legendre.leggauss(
            nb_basis_functions)
        interp_grid = interp_grid / 2 + 0.5
    elif feature_paramaters["hilbert_space_parameters"]["distance_parameters"][
            "grid_type"] == "equispaced":
        interp_grid = np.linspace(0, 1, nb_basis_functions)
    else:
        raise ValueError(
            "The wasserstein grid_type=" +
            feature_parameters["distance_parameters"]["grid_type"] +
            " is not known.")

    # normalize nonzero environments
    nonzero_mask = cdf[:, -1] != 0
    # insert the zero probabilty point at the beginning to help interpolating at the beginning
    cdf = np.concatenate((np.zeros((cdf.shape[0], 1)), cdf), axis=1)

    dist = np.zeros((nb_envs, nb_envs))
    if feature_paramaters["hilbert_space_parameters"]["distance_parameters"][
            "delta_normalization"]:
        cdf = cdf.reshape(nb_envs, nb_species, nb_grid_points + 1)
        # potential bug when species are present
        for i in range(nb_envs):  # subset of nb_envs*nb_species
            for j in range(nb_envs):  # subset of nb_envs*nb_species
                for sp in range(nb_species):
                    max_norm = max(cdf[i, sp, -1], cdf[j, sp, -1])
                    cdf_i = np.copy(cdf[i, sp, :])
                    cdf_j = np.copy(cdf[j, sp, :])
                    cdf_i[-1] = max_norm
                    cdf_j[-1] = max_norm
                    cdf_i /= max_norm
                    cdf_j /= max_norm
                    interpolator_i = interp1d(cdf_i,
                                              density_grid,
                                              assume_sorted=True)
                    interpolator_j = interp1d(cdf_j,
                                              density_grid,
                                              assume_sorted=True)
                    wasserstein_features_i = interpolator_i(interp_grid)
                    wasserstein_features_j = interpolator_j(interp_grid)
                    dist[i, j] += np.sum(
                        (wasserstein_features_i - wasserstein_features_j)**2)
        return dist
    else:
        cdf[nonzero_mask] /= cdf[:, -1][nonzero_mask][:, np.newaxis]
        wasserstein_features = np.zeros(
            (nb_envs * nb_species, nb_basis_functions))
        for i in np.where(nonzero_mask)[0]:  # subset of nb_envs*nb_species
            interpolator = interp1d(cdf[i, :],
                                    density_grid,
                                    assume_sorted=True)
            wasserstein_features[i, :] = interpolator(interp_grid)
        if feature_paramaters["hilbert_space_parameters"][
                "distance_parameters"]["grid_type"] == "gaussian_quadrature":
            wasserstein_features *= np.sqrt(interp_weights)

        wasserstein_features = wasserstein_features.reshape(
            nb_envs, nb_species * nb_basis_functions)

        if normalize_wasserstein_features:
            wasserstein_features /= np.linalg.norm(wasserstein_features,
                                                   axis=1)[:, np.newaxis]
        return squareform(pdist(wasserstein_features))
Пример #26
0
def compute_radial_spectrum_wasserstein_features(feature_paramaters, frames):
    """Compute"""
    if feature_paramaters["soap_parameters"]["soap_type"] != "RadialSpectrum":
        raise ValueError(
            'Wasserstein features can be only computed for soap_type="RadialSpectrum".'
        )
    if feature_paramaters["soap_parameters"]["radial_basis"] != "DVR":
        raise ValueError(
            'Wasserstein features can be only computed for radial_basis="DVR".'
        )

    nb_basis_functions = feature_paramaters["nb_basis_functions"]
    nb_grid_points = feature_paramaters["soap_parameters"]["max_radial"]
    normalize_wasserstein_features = feature_paramaters["soap_parameters"][
        "normalize"]
    feature_paramaters["soap_parameters"]["normalize"] = False
    cutoff = feature_paramaters["soap_parameters"]["interaction_cutoff"]

    # compute soap representation for interpolation
    representation = SphericalInvariants(
        **feature_paramaters["soap_parameters"])
    densities = representation.transform(frames).get_features(representation)
    nb_envs = densities.shape[0]
    nb_species = densities.shape[1] // nb_grid_points
    densities = densities.reshape(nb_envs * nb_species, nb_grid_points)

    # DVR uses gaussian quadrature points as basis function, we reproduce the original grid points for the interpolation
    density_grid, density_weights = np.polynomial.legendre.leggauss(
        nb_grid_points)
    density_grid = density_grid * cutoff / 2 + cutoff / 2
    densities /= np.sqrt(density_weights)

    cdf = scipy.integrate.cumtrapz(densities, density_grid)
    # insert the zero probabilty point at the beginning to help interpolating at the beginning
    cdf = np.hstack((np.zeros((cdf.shape[0], 1)), cdf))

    if feature_paramaters["delta_normalization"]:
        cdf = cdf.reshape(nb_envs, nb_species, nb_grid_points)
        delta_sigma = feature_paramaters["delta_sigma"]
        delta_offset_percentage = feature_paramaters["delta_offset_percentage"]
        if delta_sigma is None:
            for i in range(nb_species):
                max_norm = np.max(cdf[:, i, -1])
                max_norm += delta_offset_percentage * max_norm
                cdf[:, i, -1] += max_norm - cdf[:, i, -1]
        else:
            for i in range(nb_species):
                cdf[:, i, :] = bump_function(density_grid, cdf[:, i, :],
                                             cutoff, delta_sigma)
        cdf = cdf.reshape(nb_envs * nb_species, nb_grid_points)

    # normalize nonzero environments
    nonzero_mask = cdf[:, -1] != 0
    cdf[nonzero_mask] /= cdf[:, -1][nonzero_mask][:, np.newaxis]

    # gaussian quadrature points as grid
    if feature_paramaters["grid_type"] == "gaussian_quadrature":
        interp_grid, interp_weights = np.polynomial.legendre.leggauss(
            nb_basis_functions)
        interp_grid = interp_grid / 2 + 0.5
    elif feature_paramaters["grid_type"] == "equispaced":
        interp_grid = np.linspace(0, 1, nb_basis_functions)
    else:
        raise ValueError("The wasserstein grid_type=" +
                         feature_paramaters["grid_type"] + " is not known.")

    wasserstein_features = np.zeros((nb_envs * nb_species, nb_basis_functions))
    # add jitter for uniqueness
    jitter = np.finfo(0.1).tiny * np.arange(cdf.shape[1])
    cdf += jitter[np.newaxis, :]
    for i in np.where(nonzero_mask)[0]:  # subset of nb_envs*nb_species
        interpolator = interp1d(cdf[i],
                                density_grid,
                                assume_sorted=True,
                                kind='linear')
        wasserstein_features[i, :] = interpolator(interp_grid)

    # delta normalization 2 sets delta areas to 0 so they cannot be used as features
    if feature_paramaters["delta_normalization"] == 2:
        wasserstein_features[cutoff - 1e-3 <= wasserstein_features] = 0

    if feature_paramaters["grid_type"] == "gaussian_quadrature":
        wasserstein_features *= np.sqrt(interp_weights)

    wasserstein_features = wasserstein_features.reshape(
        nb_envs, nb_species * nb_basis_functions)

    if normalize_wasserstein_features:
        wasserstein_features /= np.linalg.norm(wasserstein_features,
                                               axis=1)[:, np.newaxis]

    return wasserstein_features
Пример #27
0
 def __init__(self, spherical_hypers, target):
     self.representation = SphericalInvariants(**spherical_hypers)
     assert target in ['Atom', 'Structure']
     self.target = target
def dump_reference_json():
    sys.path.insert(0, join(root, "build/"))
    sys.path.insert(0, join(root, "tests/"))
    from rascal.representations import SphericalInvariants
    from ase.io import read

    np.random.seed(10)
    fns = [
        "diamond_2atom_distorted.json",
        "CaCrP2O7_mvc-11955_symmetrized.json",
        "methane.json",
    ]
    soap_types = ["PowerSpectrum"]
    Nselects = ["all", "all_random", "8_random"]

    sparsification_inputs = []
    for fn, soap_type, Nselect in product(fns, soap_types, Nselects):
        frames = read(join(inputs_path, fn), ":")

        hypers = dict(
            soap_type=soap_type,
            interaction_cutoff=3.5,
            max_radial=2,
            max_angular=2,
            gaussian_sigma_constant=0.4,
            gaussian_sigma_type="Constant",
            cutoff_smooth_width=0.5,
            normalize=False,
            compute_gradients=True,
            expansion_by_species_method="structure wise",
        )

        soap = SphericalInvariants(**hypers)
        managers = soap.transform(frames)
        hyp = deepcopy(hypers)

        # select some features from the possible set
        mapping = soap.get_feature_index_mapping(managers)
        selected_features = {key: [] for key in mapping[0].keys()}
        ids = np.array([key for key in mapping.keys()])
        if Nselect == "all":
            pass
        elif Nselect == "all_random":
            np.random.shuffle(ids)
        elif Nselect == "8_random":
            np.random.shuffle(ids)
            ids = ids[:8]
        else:
            raise NotImplementedError()
        for idx in ids:
            coef_idx = mapping[idx]
            for key in selected_features.keys():
                selected_features[key].append(int(coef_idx[key]))
        # selected_features_global_ids is important for the tests
        selected_features["selected_features_global_ids"] = ids.tolist()
        mapp = dict(coefficient_subselection=selected_features)

        hyp.update(mapp)

        soap_s = SphericalInvariants(**hyp)
        managers_s = soap_s.transform(frames)

        sparsification_inputs.append(
            dict(
                hypers=dict(
                    rep=soap.hypers,
                    rep_sparse=soap_s.hypers,
                    adaptors=json.loads(managers_s.managers.get_parameters()),
                ),
                filename=join(read_inputs_path, fn),
                Nselect=Nselect,
            ))

    fn_out = join(root, dump_path, "sparsification_inputs.json")
    print(fn_out)
    with open(fn_out, "w") as f:
        sparsification_inputs_pretty = prettyjson(sparsification_inputs,
                                                  indent=2,
                                                  maxlinelength=80)
        f.write(sparsification_inputs_pretty)
Пример #29
0
    mask_center_atoms_by_species(molecule, species_select=[
        'C',
    ])
    # Also works by atomic number
    #mask_center_atoms_by_species(molecule, species_select=[6,])

hypers = {
    'interaction_cutoff': 5.0,
    'cutoff_smooth_width': 0.5,
    'max_radial': 8,
    'max_angular': 6,
    'gaussian_sigma_type': "Constant",
    'gaussian_sigma_constant': 0.3
}

representation = SphericalInvariants(**hypers)
atoms_transformed = representation.transform(molecules)
print("Number of feature vectors computed: {:d}".format(
    atoms_transformed.get_features(representation).shape[0]))

print("Now masking out the first 5 atoms of each molecule.")
n_remaining_centers = sum(
    np.sum((mol.get_atomic_numbers()[5:] == 6)) for mol in molecules)
print("Number of centres remaining: {:d}".format(n_remaining_centers))

for molecule in molecules:
    mask_center_atoms_by_id(molecule, id_blacklist=np.arange(5))
atoms_transformed = representation.transform(molecules)
print("Number of feature vectors computed: {:d}".format(
    atoms_transformed.get_features(representation).shape[0]))
Пример #30
0
def get_rascal_ps_kernel(structures, hypers):
    structures = process_structures(structures)
    soap = SphericalInvariants(**hypers)
    librascal_ps = soap.transform(structures).get_features(soap)
    return librascal_ps.dot(librascal_ps.T)