Exemplo n.º 1
0
    def test_xy(self):
        """Tests that the kernel can be also calculated between two different
        sets, which is necessary for making predictions with kernel-based
        methods.
        """
        # Create SOAP features for a system
        desc = SOAP(
            species=[1, 8],
            rcut=5.0,
            nmax=2,
            lmax=2,
            sigma=0.2,
            periodic=False,
            crossover=True,
            sparse=False,
        )
        a = molecule("H2O")
        b = molecule("O2")
        c = molecule("H2O2")

        a_feat = desc.create(a)
        b_feat = desc.create(b)
        c_feat = desc.create(c)

        # Linear dot-product kernel
        kernel = REMatchKernel(metric="linear", alpha=0.1, threshold=1e-6)
        K = kernel.create([a_feat, b_feat], [c_feat])

        self.assertEqual(K.shape, (2, 1))
Exemplo n.º 2
0
    def test_convergence_infinity(self):
        """Tests that the REMatch kernel correctly converges to the average
        kernel at the the limit of infinite alpha.
        """
        # Create SOAP features for a system
        desc = SOAP(
            species=[1, 8],
            rcut=5.0,
            nmax=2,
            lmax=2,
            sigma=0.2,
            periodic=False,
            crossover=True,
            sparse=False,
        )
        a = molecule("H2O")
        b = molecule("H2O2")
        a_features = desc.create(a)
        b_features = desc.create(b)

        # REMatch kernel with very high alpha
        kernel_re = REMatchKernel(metric="linear", alpha=1e20, threshold=1e-6)
        K_re = kernel_re.create([a_features, b_features])

        # Average kernel
        kernel_ave = AverageKernel(metric="linear")
        K_ave = kernel_ave.create([a_features, b_features])

        # Test approximate equality
        self.assertTrue(np.allclose(K_re, K_ave))
Exemplo n.º 3
0
    def test_difference(self):
        """Tests that the similarity is correct.
        """
        # Create SOAP features for a system
        desc = SOAP(species=[1, 6, 7, 8],
                    rcut=5.0,
                    nmax=2,
                    lmax=2,
                    sigma=0.2,
                    periodic=False,
                    crossover=True,
                    sparse=False)

        # Calculate that identical molecules are identical.
        a = molecule("H2O")
        a_features = desc.create(a)
        kernel = REMatchKernel(metric="linear", alpha=1, threshold=1e-6)
        K = kernel.create([a_features, a_features])
        self.assertTrue(np.all(np.abs(K - 1) < 1e-3))

        # Check that completely different molecules are completely different
        a = molecule("N2")
        b = molecule("H2O")
        a_features = desc.create(a)
        b_features = desc.create(b)
        K = kernel.create([a_features, b_features])
        self.assertTrue(np.all(np.abs(K - np.eye(2)) < 1e-3))

        # Check that somewhat similar molecules are somewhat similar
        a = molecule("H2O")
        b = molecule("H2O2")
        a_features = desc.create(a)
        b_features = desc.create(b)
        K = kernel.create([a_features, b_features])
        self.assertTrue(K[0, 1] > 0.9)
Exemplo n.º 4
0
    def test_metrics(self):
        """Tests that different metrics as defined by scikit-learn can be used."""
        # Create SOAP features for a system
        desc = SOAP(
            species=[1, 8],
            rcut=5.0,
            nmax=2,
            lmax=2,
            sigma=0.2,
            periodic=False,
            crossover=True,
            sparse=False,
        )
        a = molecule("H2O")
        a_features = desc.create(a)

        # Linear dot-product kernel
        kernel = REMatchKernel(metric="linear", alpha=0.1, threshold=1e-6)
        K = kernel.create([a_features, a_features])

        # Gaussian kernel
        kernel = REMatchKernel(metric="rbf",
                               gamma=1,
                               alpha=0.1,
                               threshold=1e-6)
        K = kernel.create([a_features, a_features])

        # Laplacian kernel
        kernel = REMatchKernel(metric="laplacian",
                               gamma=1,
                               alpha=0.1,
                               threshold=1e-6)
        K = kernel.create([a_features, a_features])
Exemplo n.º 5
0
def remax_listcomp(desc_list):
    re = REMatchKernel(metric='rbf', gamma=1, alpha=1, threshold=1e-6)
    re_comp_list = []

    for i in range(0, len(desc_list) - 1):
        comp_pair = [desc_list[i], desc_list[i + 1][:, 0:len(desc_list[i][0])]]
        norm_pair = [normalize(j) for j in comp_pair]
        kern = re.create(norm_pair)
        re_comp_list.append(kern[0][1])

    return re_comp_list
Exemplo n.º 6
0
 def __init__(self, args):
     super().__init__(active_dims=[0])
     self.var = gpflow.Parameter(10.0, transform=positive())
     self.mag = gpflow.Parameter(1.0, transform=positive())
     self.args = args
     self.re = REMatchKernel(metric="polynomial",
                             degree=3,
                             gamma=1,
                             coef0=0,
                             alpha=0.5,
                             threshold=1e-6,
                             normalize_kernel=True)
Exemplo n.º 7
0
 def test_sparse(self):
     """Tests that sparse features may also be used to construct the kernels.
     """
     # Create SOAP features for a system
     desc = SOAP(species=[1, 8],
                 rcut=5.0,
                 nmax=2,
                 lmax=2,
                 sigma=0.2,
                 periodic=False,
                 crossover=True,
                 sparse=True)
     a = molecule('H2O')
     a_feat = desc.create(a)
     kernel = REMatchKernel(metric="linear", alpha=0.1, threshold=1e-6)
     K = kernel.create([a_feat])
Exemplo n.º 8
0
        def update_soap_similarities(struct, all_kwargs):

            if not struct:
                raise PreventUpdate

            structs = {"input": self.from_data(struct)}
            kwargs = self.reconstruct_kwargs_from_state(
                callback_context.inputs)

            elements = [
                str(el) for el in structs["input"].composition.elements
            ]
            structs.update(_get_all_structs_from_elements(elements))

            if not structs:
                raise PreventUpdate

            elements = {
                elem
                for s in structs.values() for elem in s.composition.elements
            }
            # TODO: make sure is_int kwarg information is enforced so that int() conversion is unnecessary
            desc = SOAP(
                species=[e.number for e in elements],
                sigma=kwargs["sigma"],
                rcut=kwargs["rcut"],
                nmax=int(kwargs["nmax"]),
                lmax=int(kwargs["lmax"]),
                periodic=True,
                crossover=kwargs["crossover"],
                sparse=False,
                average=kwargs["average"],
            )

            adaptor = AseAtomsAdaptor()
            atomss = {
                mpid: adaptor.get_atoms(struct)
                for mpid, struct in structs.items()
            }

            print(f"Calculating {len(atomss)} SOAP vectors")
            features = {
                mpid: normalize(desc.create(atoms, n_jobs=cpu_count()))
                for mpid, atoms in atomss.items()
            }

            re = REMatchKernel(
                metric=kwargs["metric"],
                alpha=kwargs["alpha"],
                threshold=kwargs["threshold"],
                # normalize_kernel=kwargs["normalize_kernel"],
            )

            print("Calculating similarity kernel")
            similarities = {
                mpid: re.get_global_similarity(
                    re.get_pairwise_matrix(features["input"], feature))
                for mpid, feature in features.items() if mpid != "input"
            }

            sorted_mpids = sorted(similarities.keys(),
                                  key=lambda x: -similarities[x])

            print("Generating similarity graphs")
            # TODO: was much slower using px.imshow (see prev commit)
            all_graphs = [
                _get_soap_graph(
                    features[mpid],
                    [
                        html.Span(
                            f"{unicodeify(structs[mpid].composition.reduced_formula)}"
                        ),
                        dcc.Markdown(
                            f"[{mpid}](https://materialsproject.org/{mpid})"),
                        html.Span(f"{similarities[mpid]:.5f}"),
                    ],
                ) for mpid in sorted_mpids
            ]

            print("Returning similarity graphs")
            return html.Div(all_graphs)
Exemplo n.º 9
0
        def update_soap_similarities(struct, all_kwargs):

            if not struct:
                raise PreventUpdate

            structs = {"input": self.from_data(struct)}
            kwargs = self.reconstruct_kwargs_from_state(
                callback_context.inputs)

            elements = [
                str(el) for el in structs["input"].composition.elements
            ]
            all_chemsyses = []
            for i in range(len(elements)):
                for els in itertools.combinations(elements, i + 1):
                    all_chemsyses.append("-".join(sorted(els)))

            with MPRester() as mpr:
                docs = mpr.query(
                    {"chemsys": {
                        "$in": all_chemsyses
                    }},
                    ["task_id", "structure"],
                )
            structs.update({d["task_id"]: d["structure"] for d in docs})

            if not structs:
                raise PreventUpdate

            elements = {
                elem
                for s in structs.values() for elem in s.composition.elements
            }
            # TODO: make sure is_int kwarg information is enforced so that int() conversion is unnecessary
            desc = SOAP(
                species=[e.number for e in elements],
                sigma=kwargs["sigma"],
                rcut=kwargs["rcut"],
                nmax=int(kwargs["nmax"]),
                lmax=int(kwargs["lmax"]),
                periodic=True,
                crossover=kwargs["crossover"],
                sparse=False,
                average=kwargs["average"],
            )

            adaptor = AseAtomsAdaptor()
            atomss = {
                mpid: adaptor.get_atoms(struct)
                for mpid, struct in structs.items()
            }

            print(f"Calculating {len(atomss)} SOAP vectors")
            features = {
                mpid: normalize(desc.create(atoms, n_jobs=cpu_count()))
                for mpid, atoms in atomss.items()
            }

            re = REMatchKernel(
                metric=kwargs["metric"],
                alpha=kwargs["alpha"],
                threshold=kwargs["threshold"],
                normalize_kernel=kwargs["normalize_kernel"],
            )

            print(f"Calculating similarity kernel")
            re_kernel = re.create(list(features.values()))

            similarities = {
                mpid: score
                for mpid, score in zip(features.keys(), re_kernel[0])
                if mpid != "input"
            }

            sorted_mpids = sorted(similarities.keys(),
                                  key=lambda x: -similarities[x])

            all_graphs = [
                _get_soap_graph(
                    features[mpid],
                    [
                        html.Span(
                            f"{unicodeify(structs[mpid].composition.reduced_formula)}"
                        ),
                        dcc.Markdown(
                            f"[{mpid}](https://materialsproject.org/{mpid})"),
                        html.Span(f"{similarities[mpid]:.5f}"),
                    ],
                ) for mpid in sorted_mpids
            ]

            return html.Div(all_graphs)
Exemplo n.º 10
0
def main(args):
    if args.task != 'IC50':
        mols, num_list, atom_list, species = read_xyz('data/' + args.task +
                                                      '.xyz')
    else:
        mols, num_list, atom_list, species = read_xyz('data/' + args.task +
                                                      '/' + args.subtask +
                                                      '.xyz')

    dat_size = len(mols)

    mpi_comm = MPI.COMM_WORLD
    mpi_rank = mpi_comm.Get_rank()
    mpi_size = mpi_comm.Get_size()

    if mpi_rank == 0:
        print("\nEvaluating " + data_name + " rematch on " + str(mpi_size) +
              " MPI processes.\n")
        print('No. of molecules = {}\n'.format(dat_size))
        print('Elements present = {}\n'.format(species))

    # Setting up the SOAP descriptor
    rcut_small = 3.0
    sigma_small = 0.2
    rcut_large = 6.0
    sigma_large = 0.4

    small_soap = SOAP(species=species,
                      periodic=False,
                      rcut=rcut_small,
                      nmax=12,
                      lmax=8,
                      sigma=sigma_small,
                      sparse=True)

    large_soap = SOAP(species=species,
                      periodic=False,
                      rcut=rcut_large,
                      nmax=12,
                      lmax=8,
                      sigma=sigma_large,
                      sparse=True)

    t0 = time.time()
    my_border_low, my_border_high = return_borders(
        mpi_rank, dat_size, mpi_size)  # split indices between MPI processes

    my_mols = mols[my_border_low:my_border_high]
    soap = scipy.sparse.hstack(
        [small_soap.create(my_mols),
         large_soap.create(my_mols)])  # generate atomic descriptors

    t1 = time.time()
    if mpi_rank == 0:
        print("SOAP: {:.2f}s\n".format(t1 - t0))
        print(
            "rcut_small = {:.1f}, sigma_small = {:.1f}, rcut_large = {:.1f}, sigma_large = {:.1f}"
            .format(rcut_small, sigma_small, rcut_large, sigma_large))

    soap = normalize(soap, copy=False)
    my_soap = split_by_lengths(soap, num_list[my_border_low:my_border_high]
                               )  # group atomic descriptors by molecule
    my_len = len(my_soap)

    t2 = time.time()
    if mpi_rank == 0:
        print("Normalise & Split Descriptors: {:.2f}s\n".format(t2 - t1))

    if args.save_soap:  # save to args.soap_path for use with gpr_onthefly.py
        for i, mat in enumerate(my_soap):
            if args.task != 'IC50':
                scipy.sparse.save_npz(
                    args.soap_path + args.task + '_soap_' +
                    str(i + my_border_low), mat)
            else:
                scipy.sparse.save_npz(
                    args.soap_path + args.subtask + '_soap_' +
                    str(i + my_border_low), mat)

    if args.save_kernel:  # save to args.kernel_path for use with gpr_soap.py
        re = REMatchKernel(metric="polynomial",
                           degree=3,
                           gamma=1,
                           coef0=0,
                           alpha=0.5,
                           threshold=1e-6,
                           normalize_kernel=True)

        K = np.zeros((my_len, dat_size), dtype=np.float32)
        sendcounts = np.array(mpi_comm.gather(my_len * dat_size, root=0))

        if mpi_rank == 0:
            K_full = np.empty((dat_size, dat_size), dtype=np.float32)
            print("K memory usage(bytes): {}".format(K.nbytes + K_full.nbytes))
        else:
            K_full = None

        #row-parallelised kernel computation
        for index in range(0, mpi_size):
            if index == mpi_rank:
                K[:,
                  my_border_low:my_border_high] += re.create(my_soap).astype(
                      np.float32)
                continue  #skip useless calculation

            start, end = return_borders(index, dat_size, mpi_size)
            ref_mols = mols[start:end]
            ref_soap = scipy.sparse.hstack(
                [small_soap.create(ref_mols),
                 large_soap.create(ref_mols)])
            ref_soap = normalize(ref_soap, copy=False)
            ref_soap = split_by_lengths(ref_soap, num_list[start:end])
            K[:, start:end] += re.create(my_soap, ref_soap).astype(np.float32)

        #Gather kernel rows
        mpi_comm.Gatherv(sendbuf=K, recvbuf=(K_full, sendcounts), root=0)

        K = K_full

        if mpi_rank == 0:
            t3 = time.time()
            print("Normalised Kernel: {:.2f}s\n".format(t3 - t2))

            np.save(args.kernel_path + data_name + '_soap', K)
            print(K)

    mpi_comm.Barrier()
    MPI.Finalize()
Exemplo n.º 11
0
from dscribe.descriptors import SOAP
from dscribe.kernels import REMatchKernel

from ase.build import molecule

# We will compare two similar molecules
a = molecule("H2O")
b = molecule("H2O2")

# First we will have to create the features for atomic environments. Lets
# use SOAP.
desc = SOAP(species=[1, 6, 7, 8],
            rcut=5.0,
            nmax=2,
            lmax=2,
            sigma=0.2,
            periodic=False,
            crossover=True,
            sparse=False)
a_features = desc.create(a)
b_features = desc.create(b)

# Calculates the similarity with an average kernel, and a linear metric. The
# result will be a full similarity matrix.
re = REMatchKernel(metric="linear", alpha=1, threshold=1e-6)
re_kernel = re.create([a_features, b_features])

# Any metric supported by scikit-learn will work: e.g. a Gaussian:
re = REMatchKernel(metric="rbf", gamma=1, alpha=1, threshold=1e-6)
re_kernel = re.create([a_features, b_features])
Exemplo n.º 12
0
def pop_fitness(population,
                rcut,
                sigma,
                kernel,
                tgt_atoms,
                tgt_species,
                tgt_atoms2=None,
                max_score=[-9999, '']):
    """
    Calculates the fitness (ie SOAP similarity score) of the population by generating conformers for each of the
    population molecules, then evaluating their SOAP descriptors and calculating its similarity score with the SOAP
    descriptor of the binding ligand 'field'

    Conformer generation and similarity calculation are the computational bottlenecks  - might be worth splitting the
    task up with MPI. see return_borders.py in helper.py if you want to do that - make sure you only run the
    reproduction on the master node (since there is randomness), then broadcast to the other nodes

    :param population: list of RDKit molecule objects
    :param tgt_atoms: list of ASE atom objects of the target ligand field - from read_xyz, second is optional if separate sites
    :param tgt_species: list of the atomic species present in the target ligand field - from read_xyz
    :param rcut, sigma: SOAP parameters
    :param max_score: Maximum SOAP similarity found so far

    :return: fitness, max_score, fit_mean, fit_std
    """
    t0 = time.time()

    # loop over RDKit mols and turn them into lists of ASE atom objects for dscribe SOAP atomic feature generation
    population_ase = []
    num_list = []
    species = ['C']
    bad_mols = []
    for m in population:
        m = Chem.AddHs(m)
        conf_result = AllChem.EmbedMolecule(m, maxAttempts=1000)
        if conf_result != 0:
            bad_mols.append(m)
            continue
        m = Chem.RemoveHs(m)
        num_list.append(len(m.GetAtoms()))
        for i, atom in enumerate(m.GetAtoms()):
            symbol = atom.GetSymbol()
            conf = m.GetConformer()
            population_ase.append(Atoms(symbol, [conf.GetPositions()[i]]))
            if symbol not in species:  # find unique atomic species for SOAP generation
                species.append(symbol)
    if bad_mols != []:
        for bm in bad_mols:
            bm = Chem.RemoveHs(bm)
            try:
                population.remove(
                    bm)  # filter out molecules which have no conformers
            except:
                continue
    # Check that we also include the atom types present in the ligand targets
    for atom in tgt_species:
        if atom not in species:
            species.append(atom)
    t1 = time.time()
    print('Time taken to generate conformers: {}'.format(t1 - t0))

    # Generate SOAP descriptors using dscribe
    soap_generator = SOAP(species=species,
                          periodic=False,
                          rcut=rcut,
                          nmax=8,
                          lmax=6,
                          sigma=sigma,
                          sparse=True)
    soap = soap_generator.create(population_ase)
    tgt_soap = soap_generator.create(tgt_atoms)
    if tgt_atoms2 is not None:
        tgt_soap2 = [normalize(soap_generator.create(tgt_atoms2), copy=False)]

    # normalize SOAP atom descriptors and group by molecule
    soap = normalize(soap, copy=False)
    tgt_soap = [normalize(tgt_soap, copy=False)]
    soap = split_by_lengths(soap, num_list)

    t2 = time.time()
    print('Time taken to generate SOAP descriptors: {}'.format(t2 - t1))

    # TODO make REMatch kernel args as input args
    if kernel == 'rematch':
        soap_similarity = REMatchKernel(metric="polynomial",
                                        degree=3,
                                        gamma=1,
                                        coef0=0,
                                        alpha=0.1,
                                        threshold=1e-3,
                                        normalize_kernel=True)
    elif kernel == 'average':
        soap_similarity = AverageKernel(metric="polynomial",
                                        degree=3,
                                        gamma=1,
                                        coef0=0,
                                        normalize_kernel=True)
    if tgt_atoms2 is not None:
        fitness1 = soap_similarity.create(soap, tgt_soap)
        fitness1.flatten()
        fitness2 = soap_similarity.create(soap, tgt_soap2)
        fitness2.flatten()
        # calculate fitness score as product of the two fitnesses
        fitness = np.multiply(fitness1, fitness2)
        fitness = np.array([f[0] for f in fitness])
    else:
        fitness = soap_similarity.create(soap, tgt_soap)
        fitness = fitness.flatten()

    t3 = time.time()
    print('Time taken to calculate fitness: {}'.format(t3 - t2))
    # update max_score, include new champion
    if np.amax(fitness) > max_score[0]:
        max_score = [
            np.amax(fitness),
            Chem.MolToSmiles(population[np.argmax(fitness)])
        ]

    #Print the top 5 scores and corresponding molecules for a particular generation
    top_scores = np.flip(fitness[np.argsort(fitness)[-5:]])
    # print(top_scores)
    for i in range(5):
        print("Mol {}: {} (fitness = {:.3f})".format(
            i, Chem.MolToSmiles(population[np.argsort(fitness)[-i - 1]]),
            top_scores[i]))

    fitness = fitness / np.sum(fitness)

    return fitness, max_score
Exemplo n.º 13
0
def pop_fitness(mpi_comm,
                mpi_rank,
                mpi_size,
                population,
                rcut,
                sigma,
                kernel,
                tgt_atoms,
                tgt_species,
                tgt_atoms2=None,
                max_score=[-9999, '']):
    """
    Calculates the fitness (ie SOAP similarity score) of the population by generating conformers for each of the
    population molecules, then evaluating their SOAP descriptors and calculating its similarity score with the SOAP
    descriptor of the binding ligand 'field'

    :param population: list of RDKit molecule objects
    :param tgt_atoms: list of ASE atom objects of the target ligand field - from read_xyz, second is optional if separate sites
    :param tgt_species: list of the atomic species present in the target ligand field - from read_xyz
    :param rcut, sigma: SOAP parameters
    :param max_score: Maximum SOAP similarity found so far

    :return: fitness, max_score, fit_mean, fit_std
    """
    t0 = time.time()

    # partition the population between the MPI cpus
    my_border_low, my_border_high = return_borders(mpi_rank, len(population),
                                                   mpi_size)
    my_pop = population[my_border_low:my_border_high]

    # loop over RDKit mols and turn them into lists of ASE atom objects for dscribe SOAP atomic feature generation
    population_ase = []
    num_list = []
    species = ['C']
    bad_mols = []
    for ind, m in enumerate(my_pop):
        m = Chem.AddHs(m)
        conf_result = AllChem.EmbedMolecule(m, maxAttempts=1000)
        m = Chem.RemoveHs(m)
        num_list.append(len(m.GetAtoms()))
        for i, atom in enumerate(
                m.GetAtoms()
        ):  # this is actually wrong, should have an Atoms object for each mol...
            symbol = atom.GetSymbol()
            if conf_result != 0:
                bad_mols.append(ind)
                population_ase.append(Atoms(symbol, [[0, 0, 0]]))
            else:
                conf = m.GetConformer()
                population_ase.append(Atoms(symbol, [conf.GetPositions()[i]]))
            if symbol not in species:  # find unique atomic species for SOAP generation
                species.append(symbol)

    # Check that we also include the atom types present in the ligand targets
    for atom in tgt_species:
        if atom not in species:
            species.append(atom)

    t1 = time.time()
    if mpi_rank == 0:
        print('Time taken to generate conformers: {}'.format(t1 - t0))

    # Generate SOAP descriptors using dscribe
    soap_generator = SOAP(species=species,
                          periodic=False,
                          rcut=rcut,
                          nmax=8,
                          lmax=6,
                          sigma=sigma,
                          sparse=True)
    soap = soap_generator.create(population_ase)
    tgt_soap = soap_generator.create(tgt_atoms)
    if tgt_atoms2 is not None:
        tgt_soap2 = [normalize(soap_generator.create(tgt_atoms2), copy=False)]

    # normalize SOAP atom descriptors and group by molecule
    soap = normalize(soap, copy=False)
    tgt_soap = [normalize(tgt_soap, copy=False)]
    soap = split_by_lengths(soap, num_list)

    t2 = time.time()
    if mpi_rank == 0:
        print('Time taken to generate SOAP descriptors: {}'.format(t2 - t1))

    # TODO make REMatch kernel args as input args
    if kernel == 'rematch':
        soap_similarity = REMatchKernel(metric="polynomial",
                                        degree=3,
                                        gamma=1,
                                        coef0=0,
                                        alpha=0.1,
                                        threshold=1e-3,
                                        normalize_kernel=True)
    elif kernel == 'average':
        soap_similarity = AverageKernel(metric="polynomial",
                                        degree=3,
                                        gamma=1,
                                        coef0=0,
                                        normalize_kernel=True)
    if tgt_atoms2 is not None:
        fitness1 = soap_similarity.create(soap, tgt_soap)
        fitness1.flatten()
        fitness2 = soap_similarity.create(soap, tgt_soap2)
        fitness2.flatten()
        # calculate fitness score as product of the two fitnesses
        fitness = np.multiply(fitness1, fitness2)
        fitness = np.array([f[0] for f in fitness])
    else:
        fitness = soap_similarity.create(soap, tgt_soap)
        fitness = fitness.flatten()

    fitness[bad_mols] = 0  # set fitness of bad conformers to 0

    sendcounts = np.array(mpi_comm.gather(len(fitness), root=0))

    if mpi_rank == 0:
        fitness_full = np.empty(len(population))
    else:
        fitness_full = None

    # Gather fitness arrays from MPI cpus into the root cpu, then broadcast the gathered array to all cpus
    mpi_comm.Gatherv(sendbuf=fitness,
                     recvbuf=(fitness_full, sendcounts),
                     root=0)
    fitness = mpi_comm.bcast(fitness_full, root=0)

    t3 = time.time()
    if mpi_rank == 0:
        print('Time taken to calculate fitness: {}'.format(t3 - t2))

    # update max_score, include new champion
    if np.amax(fitness) > max_score[0]:
        max_score = [
            np.amax(fitness),
            Chem.MolToSmiles(population[np.argmax(fitness)])
        ]

    #Print the top 5 scores and corresponding molecules for a particular generation
    top_scores = np.flip(fitness[np.argsort(fitness)[-5:]])
    # print(top_scores)
    for i in range(5):
        if mpi_rank == 0:
            print("Mol {}: {} (fitness = {:.3f})".format(
                i, Chem.MolToSmiles(population[np.argsort(fitness)[-i - 1]]),
                top_scores[i]))

    fitness = fitness / np.sum(fitness)

    return fitness, max_score
Exemplo n.º 14
0
                        periodic=True,
                        sparse=False,
                        rbf='gto')
    descriptors = [soapgen_rcut.create(i) for i in structures]
    descdiffs.append(descriptors[1][0][0] - descriptors[0][0][0])

    tic_1 = time.perf_counter()
    re = AverageKernel(metric='linear')
    kern = re.create(descriptors)
    toc_1 = time.perf_counter()
    ctime.append(toc_1 - tic_1)
    kerndiffs.append(kern[0][1])

    tic_2 = time.perf_counter()
    normed = [normalize(i) for i in descriptors]
    rem = REMatchKernel(metric='rbf', gamma=1, alpha=1, threshold=1e-6)
    remkern = rem.create(descriptors)
    toc_2 = time.perf_counter()
    rectime.append(toc_2 - tic_2)
    remkerndiffs.append(remkern[0][1])

    allkerndiffs.append(abs(remkern[0][1] - kern[0][1]))

plt.plot(xax, ctime, label='Average Kernel')
plt.plot(xax, rectime, label='REMatch Kernel')
plt.xlabel('Number of radial basis functions')
plt.ylabel('Kernel comparison time')
plt.title(f'lmax = {lmax}, rcut = {rcut}')
plt.legend()

plt.savefig(outputdir +
Exemplo n.º 15
0
print("total number of frames: " + str(len(images)))
os.remove('./XDATCAR_temp')

reference_frame = read_vasp('./SPOSCAR')
desc = SOAP(species=[55, 51, 53],
            rcut=6.0,
            nmax=9,
            lmax=9,
            sigma=0.3,
            periodic=True,
            crossover=True,
            sparse=False)
ref_features = desc.create(reference_frame)
ref_features = normalize(ref_features)

re = REMatchKernel(metric="linear", alpha=1, threshold=1e-6, gamma=1)

similarities = []
for i, image in enumerate(images):
    image_features = desc.create(image)
    image_features = normalize(image_features)
    re_kernel = re.create([image_features, ref_features])
    print(i, re_kernel[0][1])
    similarities.append(re_kernel[0][1])

scorer = AnharmonicScore(md_frames='./vasprun_md.xml',
                         ref_frame='./SPOSCAR',
                         unit_cell_frame='./SPOSCAR')
sigmas, _ = scorer.structural_sigma(return_trajectory=True)

fig, ax1 = plt.subplots(figsize=(7, 5))
Exemplo n.º 16
0
class SOAP_onthefly(gpflow.kernels.Kernel):
    """
    A kernel class that dynamically calculates the SOAP kernel on-the-fly by loading SOAP descriptors from
    args.soap_path by index and calculating the SOAP kernel.
    """
    def __init__(self, args):
        super().__init__(active_dims=[0])
        self.var = gpflow.Parameter(10.0, transform=positive())
        self.mag = gpflow.Parameter(1.0, transform=positive())
        self.args = args
        self.re = REMatchKernel(metric="polynomial",
                                degree=3,
                                gamma=1,
                                coef0=0,
                                alpha=0.5,
                                threshold=1e-6,
                                normalize_kernel=True)

    def K(self, X, X2=None, presliced=None):
        A = X.numpy().flatten().astype(int)
        X_soap = scipy.sparse.load_npz(args.soap_path + args.task + '_soap_' +
                                       str(A[0]) + '.npz')
        X_list = [X_soap.get_shape()[0]]
        for i in A[1:]:
            X_next = scipy.sparse.load_npz(args.soap_path + args.task +
                                           '_soap_' + str(i) + '.npz')
            X_list.append(X_next.get_shape()[0])
            X_soap = scipy.sparse.vstack([X_soap, X_next])
        X_soap = normalize(X_soap, copy=False)
        X_soap = split_by_lengths(X_soap, X_list)

        if X2 is None:
            # t1 = time.time()
            K_mat = self.re.create(X_soap)
            # t2 = time.time()
            # print('Time taken to calculate kernel = {:.1f}s'.format(t2-t1))

            max_rem = K_mat.max()
            z = tf.math.sqrt(
                6 *
                (max_rem - tf.constant(K_mat, dtype=tf.float64))) * self.var
            K_final = self.mag * (1 + z) * tf.math.exp(-z)
            return K_final
        else:
            A2 = X2.numpy().flatten().astype(int)
            X2_soap = scipy.sparse.load_npz(args.soap_path + args.task +
                                            '_soap_' + str(A2[0]) + '.npz')
            X2_list = [X2_soap.get_shape()[0]]
            for i in A2[1:]:
                X_next = scipy.sparse.load_npz(args.soap_path + args.task +
                                               '_soap_' + str(i) + '.npz')
                X2_list.append(X_next.get_shape()[0])
                X2_soap = scipy.sparse.vstack([X2_soap, X_next])
            X2_soap = normalize(X2_soap, copy=False)
            X2_soap = split_by_lengths(X2_soap, X2_list)

            # t3 = time.time()
            K_mat = self.re.create(X_soap, X2_soap)
            # t4 = time.time()
            # print('Time taken to calculate kernel = {:.1f}s'.format(t4-t3))

            max_rem = K_mat.max()
            z = tf.math.sqrt(6 *
                             (max_rem - tf.constant(K_mat, dtype=tf.float64))
                             ) * self.var  # Matern v=3/2 kernel
            K_final = self.mag * (1 + z) * tf.math.exp(-z)
            return K_final

    def K_diag(self, X, presliced=None):
        return self.mag * tf.reshape(tf.ones_like(X),
                                     -1)  # diagonal of ones * self.mag