示例#1
0
def _check_and_straighten_at_triple_bond(at, bond, conf):
    if at.GetDegree() != 2:
        raise ValueError("only works with degree 2")
    nbrs = [x.GetIdx() for x in at.GetNeighbors()]
    angle = rdMolTransforms.GetAngleRad(conf, nbrs[0], at.GetIdx(), nbrs[1])
    # are we off by more than a degree?
    if (abs(abs(angle) - math.pi) > 0.017):
        rdMolTransforms.SetAngleRad(conf, nbrs[0], at.GetIdx(), nbrs[1],
                                    math.pi)
示例#2
0
def _cleanup_allenes(m):
    conf = m.GetConformer()
    if conf.Is3D():
        raise ValueError("can only operate on 2D conformers")
    p = Chem.MolFromSmarts('*=[C;R0]=*')
    for match in m.GetSubstructMatches(p):
        angle = rdMolTransforms.GetAngleRad(conf, match[0], match[1], match[2])
        # are we off by more than a degree?
        if (abs(abs(angle) - math.pi) > 0.017):
            rdMolTransforms.SetAngleRad(conf, match[0], match[1], match[2],
                                        math.pi)
示例#3
0
    def testGetSetAngle(self):
        file = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol',
                            'MolTransforms', 'test_data',
                            '3-cyclohexylpyridine.mol')

        m = Chem.MolFromMolFile(file, True, False)
        conf = m.GetConformer()
        angle = rdmt.GetAngleDeg(conf, 0, 19, 21)
        self.failUnlessAlmostEqual(angle, 109.7, 1)
        rdmt.SetAngleDeg(conf, 0, 19, 21, 125.0)
        angle = rdmt.GetAngleDeg(conf, 0, 19, 21)
        self.failUnlessAlmostEqual(angle, 125.0, 1)
        rdmt.SetAngleRad(conf, 21, 19, 0, math.pi / 2.)
        angle = rdmt.GetAngleRad(conf, 0, 19, 21)
        self.failUnlessAlmostEqual(angle, math.pi / 2., 1)
        angle = rdmt.GetAngleDeg(conf, 0, 19, 21)
        self.failUnlessAlmostEqual(angle, 90.0, 1)
示例#4
0
def GetRingBondAng(mol, ringpath):
    """
    Get bond angles of the ring

    Input:

    mol: rdmol 

    ringpath: list 

    Return:

    bondang: list (output in radian)
 
    """
    N = len(ringpath)
    atoms = [[ringpath[i], ringpath[(i + 1) % N], ringpath[(i + 2) % N]]
             for i in range(N)]
    molconf = mol.GetConformer()
    bondang = [
        rdMolTransforms.GetAngleRad(molconf, x[0], x[1], x[2]) for x in atoms
    ]
    return bondang
示例#5
0
    def get_3body(self):
        """
        3-body parts: angles spanned by 3 adjacent atoms,
                      must be a valid angle in forcefield
        """
        for aj in self.m.GetAtoms():
            j = aj.GetIdx()
            zj = self.zs[j]
            neibs = aj.GetNeighbors()
            nneib = len(neibs)
            if zj > 1 and nneib > 1:
                for i0 in range(nneib):
                    for k0 in range(i0 + 1, nneib):
                        i, k = neibs[i0].GetIdx(), neibs[k0].GetIdx()
                        ias = [i, j, k]
                        if self.zs[i] > self.zs[k]: ias = [k, j, i]
                        zsi = [self.zs[ia] for ia in ias]
                        type3 = '-'.join(['%d' % zi for zi in zsi])
                        if self.imbt:
                            if type3 not in self.mbs3: self.mbs3.append(type3)
                            continue
                        #assert type3 in self.dic3.keys()
                        _theta = rdMolTransforms.GetAngleRad(
                            self.m.GetConformer(), ias[0], ias[1], ias[2])
                        theta = _theta * (-1.) if _theta < 0. else _theta
                        #assert theta <= np.pi
                        if theta > np.pi:
                            raise '#ERROR: `thea > np.pi?'
                        self.dic3[type3] += [theta]

        if not self.imbt:
            distr3 = []
            for mb3 in self.mbs3:
                self.gaussian(self.xs3, self.dic3[mb3], self.sigmas[1])
                distr3.append(self.ys)
            self.distr3 = distr3
示例#6
0
def make_graph(molecule_name, gb_structure, gb_scalar_coupling, mc, ob_mc,
               scc_mean_and_std):
    #https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key
    #----
    df = gb_scalar_coupling.get_group(molecule_name)
    # ['id', 'molecule_name', 'atom_index_0', 'atom_index_1', 'type',
    #        'scalar_coupling_constant', 'fc', 'sd', 'pso', 'dso'],
    # nomalize
    scc_arr = df[['fc', 'sd', 'pso', 'dso']].values
    norm_scc = np.zeros((len(df), 4))
    for i, (mean_v, std_v) in enumerate(scc_mean_and_std):
        norm_scc[:, i] = (scc_arr[:, i] - mean_v) / std_v
    # make coupling
    coupling = (
        df.id.values,
        scc_arr,
        df[['atom_index_0', 'atom_index_1']].values,
        #type = np.array([ one_hot_encoding(t,COUPLING_TYPE) for t in df.type.values ], np.uint8)
        np.array([COUPLING_TYPE.index(t) for t in df.type.values], np.int32),
        df.scalar_coupling_constant.values,
        norm_scc)
    #----
    df = gb_structure.get_group(molecule_name)
    df = df.sort_values(['atom_index'], ascending=True)
    # ['molecule_name', 'atom_index', 'atom', 'x', 'y', 'z']
    a = df.atom.values.tolist()
    xyz = df[['x', 'y', 'z']].values
    mol = mol_from_axyz(a, xyz)
    #---
    assert (  #check
        a == [
            mol.GetAtomWithIdx(i).GetSymbol() for i in range(mol.GetNumAtoms())
        ])
    #---
    factory = ChemicalFeatures.BuildFeatureFactory(
        os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
    feature = factory.GetFeaturesForMol(mol)
    ## ** node **
    #[ a.GetSymbol() for a in mol.GetAtoms() ]
    num_atom = mol.GetNumAtoms()
    symbol = np.zeros((num_atom, len(SYMBOL)), np.uint8)  #category
    acceptor = np.zeros((num_atom, 1), np.uint8)
    donor = np.zeros((num_atom, 1), np.uint8)
    aromatic = np.zeros((num_atom, 1), np.uint8)
    hybridization = np.zeros((num_atom, len(HYBRIDIZATION)), np.uint8)
    num_h = np.zeros((num_atom, 1), np.float32)  #real
    atomic = np.zeros((num_atom, 1), np.float32)
    # ring check
    isotope = np.zeros((num_atom, 1), np.uint8)
    isin_ring = np.zeros((num_atom, 1), np.uint8)
    ring_types = [3, 4, 5, 6, 7, 8]
    n_ring = np.zeros((num_atom, len(ring_types)), np.uint8)
    for i in range(num_atom):
        atom = mol.GetAtomWithIdx(i)
        symbol[i] = one_hot_encoding(atom.GetSymbol(), SYMBOL)
        aromatic[i] = atom.GetIsAromatic()
        hybridization[i] = one_hot_encoding(atom.GetHybridization(),
                                            HYBRIDIZATION)
        num_h[i] = atom.GetTotalNumHs(includeNeighbors=True)
        atomic[i] = atom.GetAtomicNum()
        #new
        isotope[i] = atom.GetIsotope()
        isin_ring[i] = atom.IsInRing()
        for j, ring_i in enumerate(ring_types):
            n_ring[i, j] = atom.IsInRingSize(ring_i)
    #[f.GetFamily() for f in feature]
    for t in range(0, len(feature)):
        if feature[t].GetFamily() == 'Donor':
            for i in feature[t].GetAtomIds():
                donor[i] = 1
        elif feature[t].GetFamily() == 'Acceptor':
            for i in feature[t].GetAtomIds():
                acceptor[i] = 1
    #edge_angleを出すためのindex_listの作成
    n_bonds = len(mol.GetBonds())
    bond_arr_0 = [(mol.GetBonds()[i].GetBeginAtomIdx(),
                   mol.GetBonds()[i].GetEndAtomIdx()) for i in range(n_bonds)]
    bond_arr_1 = [(mol.GetBonds()[i].GetEndAtomIdx(),
                   mol.GetBonds()[i].GetBeginAtomIdx())
                  for i in range(n_bonds)]
    bond_df = pd.DataFrame(bond_arr_0 + bond_arr_1, columns=["a0", "a1"])
    bond_df3 = bond_df.merge(bond_df.rename(columns={
        "a1": "a2",
        "a0": "a1"
    }),
                             on="a1")
    bond_df3 = bond_df3[bond_df3.a0 != bond_df3.a2]
    bond_df4 = bond_df3.merge(bond_df.rename(columns={
        "a0": "a2",
        "a1": "a3"
    }),
                              on="a2")
    bond_df4 = bond_df4[(bond_df4.a0 != bond_df4.a3)
                        & (bond_df4.a1 != bond_df4.a3)]
    bond_idx3 = bond_df3.values
    bond_idx4 = bond_df4.values
    #all_bond_df = all_bond_df.sort_values(["a0","a1","a2"]).reset_index(drop=True)
    ## ** edge **
    num_edge = num_atom * num_atom - num_atom
    edge_index = np.zeros((num_edge, 2), np.uint8)
    bond_type = np.zeros((num_edge, len(BOND_TYPE)), np.uint8)  #category
    distance = np.zeros((num_edge, 1), np.float32)  #real
    angle = np.zeros((num_edge, 1), np.float32)  #real
    dihedrals_min = np.zeros((num_edge, 1), np.float32)
    dihedrals_max = np.zeros((num_edge, 1), np.float32)
    dihedrals_diff = np.zeros((num_edge, 1), np.float32)
    conjugate = np.zeros((num_edge, 1), np.uint8)
    is_ring_edge = np.zeros((num_edge, 1), np.uint8)
    bond_type_detail = np.zeros((num_edge, 1),
                                np.uint16)  #np.zeros((num_edge,1), str)

    #norm_xyz = preprocessing.normalize(xyz, norm='l2')
    ij = 0
    ij_dict = {}
    for i in range(num_atom):
        for j in range(num_atom):
            if i == j: continue
            edge_index[ij] = [i, j]
            bond = mol.GetBondBetweenAtoms(i, j)
            if bond is not None:
                bond_type[ij] = one_hot_encoding(bond.GetBondType(), BOND_TYPE)
                conjugate[ij] = bond.GetIsConjugated()
                # add ring info
                is_ring_edge[ij] = isin_ring[i, 0] and isin_ring[j, 0]
                # bond type detail
                #print(a[i] +"_"+ str(bond.GetBondType()) +"_" + a[j])
                bond_type_detail[ij] = bond_detail_dict[a[i] + "_" + str(
                    bond.GetBondType()) + "_" + a[j]]
                #print(a[i] + str(bond.GetBondType()) + a[j])
            distance[ij] = np.linalg.norm(xyz[i] - xyz[j])
            #angle[ij] = (norm_xyz[i]*norm_xyz[j]).sum()
            ij_dict[(i, j)] = ij
            ij += 1
    conf = mol.GetConformer(0)
    for i, j, k in bond_idx3:
        #角Ni-Nj-Nkはi-kエッジとして格納
        ## なにもない0とcos = 0を区別できるように-1する
        angle[ij_dict[(i, k)]] = np.cos(
            rdMolTransforms.GetAngleRad(conf, int(i), int(j), int(k))) + 2
    for i, j, k, l in bond_idx4:
        #角Ni-Nj-Nk-Nlはi-lエッジとして格納
        ## なにもない0とcos = 0を区別できるように-1する
        dihedral = np.cos(
            rdMolTransforms.GetDihedralRad(conf, int(i), int(j), int(k),
                                           int(l))) + 2
        if angle[ij_dict[(i, k)]] < 1.1 or angle[ij_dict[(i, k)]] < 1.1:
            dihedral = -1
        dihedrals_min[ij_dict[(i, l)]] = min(angle[ij_dict[(i, l)]], dihedral)
        dihedrals_max[ij_dict[(i,l)]]= max(angle[ij_dict[(i,l)]], dihedral)\
                            if angle[ij_dict[(i,l)]] != 0 else dihedral
    dihedrals_diff = dihedrals_max - dihedrals_min
    # add mulliken charge
    mc_df = mc.get_group(molecule_name)
    mc_df = mc_df.sort_values(['atom_index'], ascending=True)
    mc_values = mc_df["mulliken_charge"].values.reshape([-1, 1])
    ob_mc_df = ob_mc.get_group(molecule_name)
    ob_mc_df = ob_mc_df.sort_values(['atom_index'], ascending=True)
    ob_mc_values = ob_mc_df[[
        c for c in ob_mc_df.columns
        if c not in ["molecule_name", "atom_index"]
    ]].values

    ##-------------------
    graph = (
        molecule_name,
        Chem.MolToSmiles(mol),
        [a, xyz],
        # node feature
        [
            symbol, acceptor, donor, aromatic, hybridization, num_h, isotope,
            isin_ring, n_ring, atomic, mc_values, ob_mc_values
        ],
        # edge feature
        [
            bond_type, distance, angle, dihedrals_min, dihedrals_max,
            dihedrals_diff, conjugate, is_ring_edge
        ],
        edge_index,
        coupling,
        # edge bond type detail info (string)
        bond_type_detail,
    )
    return graph
示例#7
0
def make_graph(molecule_name, gb_structure, gb_scalar_coupling, ):
    #https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key
    #----
    df = gb_scalar_coupling.get_group(molecule_name)
    #----
    df = gb_structure.get_group(molecule_name)
    df = df.sort_values(['atom_index'], ascending=True)
    a   = df.atom.values.tolist()
    xyz = df[['x','y','z']].values
    mol = mol_from_axyz(a, xyz)
    #---
    assert( #check
       a == [ mol.GetAtomWithIdx(i).GetSymbol() for i in range(mol.GetNumAtoms())]
    )
    #---
    factory = ChemicalFeatures.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
    feature = factory.GetFeaturesForMol(mol)
    ## ** node **
    num_atom = mol.GetNumAtoms()
    symbol   = np.zeros((num_atom,len(SYMBOL)),np.uint8) #category
    acceptor = np.zeros((num_atom,1),np.uint8)
    donor    = np.zeros((num_atom,1),np.uint8)
    aromatic = np.zeros((num_atom,1),np.uint8)
    hybridization = np.zeros((num_atom,len(HYBRIDIZATION)),np.uint8)
    num_h  = np.zeros((num_atom,1),np.float32)#real
    atomic = np.zeros((num_atom,1),np.float32)
    # new
    isotope = np.zeros((num_atom,1),np.uint8)
    isin_ring = np.zeros((num_atom,1),np.uint8) 
    ring_types = [3,4,5,6]
    n_ring = np.zeros((num_atom,len(ring_types)),np.uint8) 
    for i in range(num_atom):
        atom = mol.GetAtomWithIdx(i)
        symbol[i]        = one_hot_encoding(atom.GetSymbol(),SYMBOL)
        aromatic[i]      = atom.GetIsAromatic()
        hybridization[i] = one_hot_encoding(atom.GetHybridization(),HYBRIDIZATION)
        num_h[i]  = atom.GetTotalNumHs(includeNeighbors=True)
        atomic[i] = atom.GetAtomicNum()
        #new
        isotope[i] = atom.GetIsotope()
        isin_ring[i] = atom.IsInRing()
        for j,ring_i in enumerate(ring_types):
            n_ring[i,j] = atom.IsInRingSize(ring_i)
    #[f.GetFamily() for f in feature]
    for t in range(0, len(feature)):
        if feature[t].GetFamily() == 'Donor':
            for i in feature[t].GetAtomIds():
                donor[i] = 1
        elif feature[t].GetFamily() == 'Acceptor':
            for i in feature[t].GetAtomIds():
                acceptor[i] = 1
    #edge_angleを出すためのindex_listの作成
    n_bonds = len(mol.GetBonds())
    bond_arr_0 = [(mol.GetBonds()[i].GetBeginAtomIdx(), 
                    mol.GetBonds()[i].GetEndAtomIdx()) for i in range(n_bonds)]
    bond_arr_1 = [(mol.GetBonds()[i].GetEndAtomIdx(), 
                    mol.GetBonds()[i].GetBeginAtomIdx()) for i in range(n_bonds)]
    bond_df = pd.DataFrame(bond_arr_0+bond_arr_1,columns=["a0","a1"])
    bond_df3 = bond_df.merge(bond_df.rename(columns={"a1":"a2","a0":"a1"}), on="a1")
    bond_df3 = bond_df3[bond_df3.a0 != bond_df3.a2]
    bond_df4 = bond_df3.merge(bond_df.rename(columns={"a0":"a2","a1":"a3"}), on="a2")
    bond_df4 = bond_df4[(bond_df4.a0 != bond_df4.a3)&(bond_df4.a1 != bond_df4.a3)]
    bond_idx3 = bond_df3.values
    bond_idx4 = bond_df4.values
    #all_bond_df = all_bond_df.sort_values(["a0","a1","a2"]).reset_index(drop=True)
    ## ** edge **
    num_edge = num_atom*num_atom - num_atom
    edge_index = np.zeros((num_edge,2), np.uint8)
    bond_type  = np.zeros((num_edge,len(BOND_TYPE)), np.uint8)#category
    distance   = np.zeros((num_edge,1),np.float32) #real
    angle      = np.zeros((num_edge,1),np.float32) #real
    dihedrals_min = np.zeros((num_edge,1),np.float32)
    dihedrals_max = np.zeros((num_edge,1),np.float32)
    dihedrals_diff = np.zeros((num_edge,1),np.float32)
    conjugate = np.zeros((num_edge,1),np.uint8)
    #GetIsConjugated
    norm_xyz = preprocessing.normalize(xyz, norm='l2')

    ij=0
    ij_dict = {}
    for i in range(num_atom):
        for j in range(num_atom):
            if i==j: continue
            edge_index[ij] = [i,j]
            bond = mol.GetBondBetweenAtoms(i, j)
            if bond is not None:
                bond_type[ij] = one_hot_encoding(bond.GetBondType(),BOND_TYPE)
                conjugate[ij] = bond.GetIsConjugated()
            distance[ij] = np.linalg.norm(xyz[i] - xyz[j])
            #angle[ij] = (norm_xyz[i]*norm_xyz[j]).sum()
            ij_dict[(i,j)] = ij
            ij+=1
    conf = mol.GetConformer(0)
    for i,j,k in bond_idx3:
        #角Ni-Nj-Nkはi-kエッジとして格納
        ## なにもない0とcos = 0を区別できるように-1する
        angle[ij_dict[(i,k)]] = np.cos(rdMolTransforms.GetAngleRad(conf,int(i),int(j),int(k)))-1
    for i,j,k,l in bond_idx4:
        #角Ni-Nj-Nk-Nlはi-lエッジとして格納
        ## なにもない0とcos = 0を区別できるように-1する
        dihedral = np.cos(rdMolTransforms.GetDihedralRad(conf,int(i),int(j),int(k),int(l)))-1
        dihedrals_min[ij_dict[(i,l)]] = min(angle[ij_dict[(i,l)]], dihedral)
        dihedrals_max[ij_dict[(i,l)]]= max(angle[ij_dict[(i,l)]], dihedral)\
                            if angle[ij_dict[(i,l)]] != 0 else dihedral
    dihedrals_diff = dihedrals_max - dihedrals_min
    ##-------------------
    atom_ret = pd.DataFrame({
            "molecule_name":molecule_name,
            "atom_index": df["atom_index"].values,
            "is_accepter": acceptor,
            "is_donor": donor,
            "is_aromatic":aromatic,
            "is_sp1": hybridization[:,0],
            "is_sp2": hybridization[:,1],
            "is_sp3": hybridization[:,2],
            "is_isotope": isotope,
            "isin_ring": isin_ring,
            "is_ring3": n_ring[:,0],
            "is_ring4": n_ring[:,1],
            "is_ring5": n_ring[:,2],
            "is_ring6": n_ring[:,3],
        })
        [bond_type, distance, angle, dihedrals_min, dihedrals_max, 
            dihedrals_diff, conjugate,],
        edge_index,