示例#1
0
def structk(strucA, strucB, alchem=alchemy(), periodic=False, mode="match", fout=None, peps=0.0, gamma=1.0, zeta=1.0, xspecies=False):
   # computes the SOAP similarity KERNEL between two structures by combining atom-centered kernels
   # possible kernel modes include:
   #   average :  scalar product between averaged kernels
   #   match:     best-match hungarian kernel
   #   permanent: average over all permutations
      
   # average kernel. quick & easy!   
   if mode=="fastavg":
       genvA=strucA.globenv
       genvB=strucB.globenv        
       return envk(genvA, genvB, alchem)**zeta, 0
   elif mode=="fastspecies": 
       # for now, only implement standard Kronecker alchemy
       senvB = environ(strucB.nmax, strucB.lmax, strucB.alchem)
       kk = 0
       for za in strucA.zspecies:    
         if not za in strucB.zspecies: continue         
         senvA = environ(strucA.nmax, strucA.lmax, strucA.alchem)
         for ia in xrange(strucA.getnz(za)):
            senvA.add(strucA.getenv(za, ia))
         senvB = environ(strucB.nmax, strucB.lmax, strucB.alchem)   
         for ib in xrange(strucB.getnz(za)):
            senvB.add(strucB.getenv(za, ib))
         kk += envk(senvA, senvB, alchem)**zeta
       
       kk/=strucA.nenv*strucB.nenv
       return kk,0
         
       #  for zb, nzb in nspeciesB:
       #         for ib in xrange(nzb):
       #            return envk(genvA, genvB, alchem), 0

   nenv = 0
   
   if periodic: # replicate structures to match structures of different periodicity
      # we do not check for compatibility at this stage, just assume that the 
      # matching will be done somehow (otherwise it would be exceedingly hard to manage in case of non-standard alchemy)
      nspeciesA = []
      nspeciesB = []
      for z in strucA.zspecies:
         nspeciesA.append( (z, strucA.getnz(z)) )
      for z in strucB.zspecies:
         nspeciesB.append( (z, strucB.getnz(z)) )
      nenv=nenvA = strucA.nenv
      nenvB = strucB.nenv            
   else:   
      # top up missing atoms with isolated environments
      # first checks which atoms are present
      zspecies = sorted(list(set(strucB.zspecies+strucA.zspecies)))
      nspecies = []
      for z in zspecies:
         nz = max(strucA.getnz(z),strucB.getnz(z))
         nspecies.append((z,nz)) 
         nenv += nz
      nenvA = nenvB = nenv
      nspeciesA = nspeciesB = nspecies   
         
   np.set_printoptions(linewidth=500,precision=4)

   kk = np.zeros((nenvA,nenvB),float)
   ika = 0
   ikb = 0  
   for za, nza in nspeciesA:      
      for ia in xrange(nza):
         envA = strucA.getenv(za, ia)         
         ikb = 0
         for zb, nzb in nspeciesB:
            for ib in xrange(nzb):
               envB = strucB.getenv(zb, ib)
               if alchem.mu > 0 and (strucA.ismissing(za, ia) ^ strucB.ismissing(zb, ib)):
                   # includes a penalty dependent on "mu", in a way that is consistent with the definition of kernel distance
                   kk[ika,ikb] = exp(-alchem.mu)
               else:
                  if za == zb or not xspecies:  #uncomment to zero out kernels between different species
                    kk[ika,ikb] = envk(envA, envB, alchem)**zeta              
                  else: kk[ika,ikb] = 0
               ikb+=1
         ika+=1
   aidx = {}
   ika=0
   for za, nza in nspeciesA: 
      aidx[za] = range(ika,ika+nza)
      ika+=nza
   ikb=0
   bidx = {}
   for zb, nzb in nspeciesB: 
      bidx[zb] = range(ikb,ikb+nzb)
      ikb+=nzb

   if fout != None:
      # prints out similarity information for the environment pairs
      fout.write("# atomic species in the molecules (possibly topped up with dummy isolated atoms): \n")      
      for za, nza in nspeciesA:
         for ia in xrange(nza): fout.write(" %d " % (za) )
      fout.write("\n");
      for zb, nzb in nspeciesB:
         for ib in xrange(nzb): fout.write(" %d " % (zb) )
      fout.write("\n");
      
      fout.write("# environment kernel matrix: \n")      
      for r in kk:
         for e in r:
            fout.write("%20.14e " % (e) )
         fout.write("\n")
      #fout.write("# environment kernel eigenvalues: \n")      
      #ev = np.linalg.eigvals(kk)
      #for e in ev:
      #    fout.write("(%8.4e,%8.4e) " % (e.real,e.imag) )
      #fout.write("\n");
         
       

      
   # Now we have the matrix of scalar products. 
   # We can first find the optimal scalar product kernel
   # we must find the maximum "cost"
   if mode == "match":
        if periodic and nenvA != nenvB:
            nenv = lcm(nenvA, nenvB)
            hun = lcm_best_cost(1-kk)
        else:
            hun=best_cost(1.0-kk)        
        cost = 1-hun/nenv
   elif mode == "permanent":
        # there is no place to hide: cross-species environments are not necessarily zero 
        if peps>0: cost = mcperm(kk, peps)
        else: cost = xperm(kk)
            
        cost = cost/np.math.factorial(nenv)/nenv        
   elif mode == "rematch":
       cost=rematch(kk, gamma, 1e-6)  # hard-coded residual error for regularized gamma
       # print cost, kk.sum()/(nenv*nenv), envk(strucA.globenv, strucB.globenv, alchem)
   elif mode == "average":
       cost = kk.sum()/(nenvA*nenvB)
       # print 'elem: {}'.format(kk.sum()) 
       # print 'elem norm: {}'.format(cost) 
       # print 'avg norm: {}'.format((nenvA*nenvB)) 
       
   else: raise ValueError("Unknown global fingerprint mode ", mode)
   
         
   return cost,kk
示例#2
0
def structk(strucA,
            strucB,
            alchem=alchemy(),
            peratom=False,
            mode="match",
            fout=None,
            peps=0.0,
            gamma=1.0,
            zeta=1.0,
            xspecies=False):
    # computes the SOAP similarity KERNEL between two structures by combining atom-centered kernels
    # possible kernel modes include:
    #   average :  scalar product between averaged kernels
    #   match:     best-match hungarian kernel
    #   permanent: average over all permutations

    # average kernel. quick & easy!
    if mode == "fastavg":
        genvA = strucA.globenv
        genvB = strucB.globenv
        return envk(genvA, genvB, alchem)**zeta, 0
    elif mode == "fastspecies":
        # for now, only implement standard Kronecker alchemy
        senvB = environ(strucB.nmax, strucB.lmax, strucB.alchem)
        kk = 0
        for za in strucA.zspecies:
            if not za in strucB.zspecies: continue
            senvA = environ(strucA.nmax, strucA.lmax, strucA.alchem)
            for ia in xrange(strucA.getnz(za)):
                senvA.add(strucA.getenv(za, ia))
            senvB = environ(strucB.nmax, strucB.lmax, strucB.alchem)
            for ib in xrange(strucB.getnz(za)):
                senvB.add(strucB.getenv(za, ib))
            kk += envk(senvA, senvB, alchem)**zeta

        kk /= strucA.nenv * strucB.nenv
        return kk, 0

        #  for zb, nzb in nspeciesB:
        #         for ib in xrange(nzb):
        #            return envk(genvA, genvB, alchem), 0

    nenv = 0

    if peratom:  # replicate structures to match structures of different peratomity
        # we do not check for compatibility at this stage, just assume that the
        # matching will be done somehow (otherwise it would be exceedingly hard to manage in case of non-standard alchemy)
        nspeciesA = []
        nspeciesB = []
        for z in strucA.zspecies:
            nspeciesA.append((z, strucA.getnz(z)))
        for z in strucB.zspecies:
            nspeciesB.append((z, strucB.getnz(z)))
        nenv = nenvA = strucA.nenv
        nenvB = strucB.nenv
    else:
        # top up missing atoms with isolated environments
        # first checks which atoms are present
        zspecies = sorted(list(set(strucB.zspecies + strucA.zspecies)))
        nspecies = []
        for z in zspecies:
            nz = max(strucA.getnz(z), strucB.getnz(z))
            nspecies.append((z, nz))
            nenv += nz
        nenvA = nenvB = nenv
        nspeciesA = nspeciesB = nspecies

    np.set_printoptions(linewidth=500, precision=4)

    kk = np.zeros((nenvA, nenvB), float)
    ika = 0
    ikb = 0
    for za, nza in nspeciesA:
        for ia in xrange(nza):
            envA = strucA.getenv(za, ia)
            ikb = 0
            for zb, nzb in nspeciesB:
                for ib in xrange(nzb):
                    envB = strucB.getenv(zb, ib)
                    if alchem.mu > 0 and (strucA.ismissing(za, ia)
                                          ^ strucB.ismissing(zb, ib)):
                        # includes a penalty dependent on "mu", in a way that is consistent with the definition of kernel distance
                        kk[ika, ikb] = exp(-alchem.mu)
                    else:
                        if za == zb or not xspecies:  #uncomment to zero out kernels between different species
                            kk[ika, ikb] = envk(envA, envB, alchem)**zeta
                        else:
                            kk[ika, ikb] = 0
                    ikb += 1
            ika += 1
    aidx = {}
    ika = 0
    for za, nza in nspeciesA:
        aidx[za] = range(ika, ika + nza)
        ika += nza
    ikb = 0
    bidx = {}
    for zb, nzb in nspeciesB:
        bidx[zb] = range(ikb, ikb + nzb)
        ikb += nzb

    if fout != None:
        # prints out similarity information for the environment pairs
        fout.write(
            "# atomic species in the molecules (possibly topped up with dummy isolated atoms): \n"
        )
        for za, nza in nspeciesA:
            for ia in xrange(nza):
                fout.write(" %d " % (za))
        fout.write("\n")
        for zb, nzb in nspeciesB:
            for ib in xrange(nzb):
                fout.write(" %d " % (zb))
        fout.write("\n")

        fout.write("# environment kernel matrix: \n")
        for r in kk:
            for e in r:
                fout.write("%20.14e " % (e))
            fout.write("\n")
        #fout.write("# environment kernel eigenvalues: \n")
        #ev = np.linalg.eigvals(kk)
        #for e in ev:
        #    fout.write("(%8.4e,%8.4e) " % (e.real,e.imag) )
        #fout.write("\n");

    # Now we have the matrix of scalar products.
    # We can first find the optimal scalar product kernel
    # we must find the maximum "cost"
    if mode == "match":
        if peratom and nenvA != nenvB:
            nenv = lcm(nenvA, nenvB)
            hun = lcm_best_cost(1 - kk)
        else:
            hun = best_cost(1.0 - kk)
        cost = 1 - hun / nenv
    elif mode == "permanent":
        # there is no place to hide: cross-species environments are not necessarily zero
        if peps > 0: cost = mcperm(kk, peps)
        else: cost = xperm(kk)

        cost = cost / np.math.factorial(nenv) / nenv
    elif mode == "rematch":
        cost = rematch(kk, gamma,
                       1e-6)  # hard-coded residual error for regularized gamma
        # print cost, kk.sum()/(nenv*nenv), envk(strucA.globenv, strucB.globenv, alchem)
    elif mode == "average":
        cost = kk.sum() / (nenvA * nenvB)
        # print 'elem: {}'.format(kk.sum())
        # print 'elem norm: {}'.format(cost)
        # print 'avg norm: {}'.format((nenvA*nenvB))

    else:
        raise ValueError("Unknown global fingerprint mode ", mode)

    return cost, kk
示例#3
0
def structk(strucA,
            strucB,
            alchem=alchemy(),
            periodic=False,
            mode="match",
            fout=None,
            peps=0.0,
            gamma=1.0,
            zeta=1.0,
            xspecies=False):
    # computes the SOAP similarity KERNEL between two structures by combining atom-centered kernels
    # possible kernel modes include:
    #   average :  scalar product between averaged kernels
    #   match:     best-match hungarian kernel
    #   permanent: average over all permutations

    # top up missing atoms with isolated environments
    # first checks which atoms are present
    zspecies = sorted(list(set(strucB.zspecies + strucA.zspecies)))
    nspecies = []
    nenv = 0
    for z in zspecies:
        nz = max(strucA.getnz(z), strucB.getnz(z))
        nspecies.append((z, nz))
        nenv += nz

    # Make a mapping from atom idx to (species, species_idx)
    idx_to_spec = [zip([z] * nz, range(nz)) for z, nz in nspecies]
    idx_to_spec = [itm for sublist in idx_to_spec for itm in sublist]

    nsp = len(zspecies)
    # alchemical matrix for species
    alchemAB = np.empty((nsp, nsp), dtype=float)
    for sA in xrange(nsp):
        for sB in xrange(sA + 1):
            alchemAB[sA, sB] = alchem.getpair(zspecies[sB], zspecies[sA])
            alchemAB[sB, sA] = alchemAB[sA, sB]

    #prepares the lists of pairs to avoid calling many times getpair further down the line
    #for i1 in xrange(nsp):
    #    s1 = zspecies[i1]
    #    for i2 in xrange(nsp):
    #        s2 = zspecies[i2]
    #        a=envA.getpair(s1,s2)
    #        b=envB.getpair(s1,s2)
    #        if i1==0 and i2==0:
    #            arrA = np.empty((nenv, nsp, nsp, a.shape[0]), float)
    #            arrB = np.empty((nenv, nsp, nsp, a.shape[0]), float)
    #        arrA[i1,i2,:] = a
    #        arrB[i1,i2,:] = b

    #first = True
    #for za, nza in nspecies:
    #    for ia in xrange(nza):
    #        envA = strucA.getenv(za, ia)
    #        ikb = 0
    #        for zb, nzb in nspecies:
    #            a = envA.getpair(za, zb)
    #            for ib in xrange(nzb):
    #                envB = strucB.getenv(zb, ib)
    #                b = envB.getpair(za, zb)
    #                if first:
    #                    arrA = np.empty((nenv, nsp, nsp, a.shape[0]), float)
    #                    arrB = np.empty((nenv, nsp, nsp, a.shape[0]), float)
    #                    first = False
    #                arrB[ikb,i1,i2,:] = b

    #                ikb+=1
    #        arrA[ika, i1,i2,:] = a
    #        ika+=1

    for ik in range(nenv):
        i = idx_to_spec[ik]
        envA = strucA.getenv(i[0], i[1])
        envB = strucB.getenv(i[0], i[1])
        for i1 in range(nsp):
            s1 = zspecies[i1]
            for i2 in range(nsp):
                s2 = zspecies[i2]
                a = envA.getpair(s1, s2)
                b = envB.getpair(s1, s2)
                if i1 == 0 and i2 == 0 and ik == 0:
                    arrA = np.empty((nenv, nsp, nsp, a.shape[0]), dtype=float)
                    arrB = np.empty((nenv, nsp, nsp, a.shape[0]), dtype=float)
                arrA[ik, i1, i2, :] = a
                arrB[ik, i1, i2, :] = b

    kk = tensordot(arrA, arrB, alchemAB)

    #dotprod = np.tensordot(arrA, arrB, axes=([3],[3]))
    #dotprod = np.tensordot(dotprod, alchemAB, axes=([1,4],[0,1]))
    #kk = np.tensordot(dotprod, alchemAB, axes=([1,3],[0,1]))

    #kk = np.zeros((nenv,nenv),float)
    #for ika in range(nenv):
    #    ai = idx_to_spec[ika]
    #    envA = strucA.getenv(ai[0], ai[1])
    #    for ikb in range(nenv):
    #        bi = idx_to_spec[ikb]
    #        envB = strucB.getenv(bi[0], bi[1])
    #        res = envk(envA, envB, alchem)**zeta
    #        kk[ika,ikb] = res

    #ika = 0
    #ikb = 0
    #for za, nza in nspecies:
    #for ia in xrange(nza):
    #envA = strucA.getenv(za, ia)
    #ikb = 0
    #for zb, nzb in nspecies:
    #for ib in xrange(nzb):
    #envB = strucB.getenv(zb, ib)
    #res = envk(envA, envB, alchem)**zeta
    #kk[ika,ikb] = res
    #ikb+=1
    #ika+=1

    # Now we have the matrix of scalar products.
    # We can first find the optimal scalar product kernel
    # we must find the maximum "cost"
    if mode == "rematch":
        cost = rematch(kk, gamma,
                       1e-6)  # hard-coded residual error for regularized gamma
        # print cost, kk.sum()/(nenv*nenv), envk(strucA.globenv, strucB.globenv, alchem)
    else:
        raise ValueError("Unknown global fingerprint mode ", mode)

    return cost, kk