Пример #1
0
def getChannels(mol, aromaticNitrogen=False, version=2, validitychecks=True):
    from moleculekit.smallmol.smallmol import SmallMol
    from moleculekit.molecule import Molecule

    mol = mol.copy()

    if isinstance(mol, SmallMol):
        channels = _getPropertiesRDkit(mol)
    elif isinstance(mol, Molecule):
        if version == 1:
            channels = _getAtomtypePropertiesPDBQT(mol)
        elif version == 2:
            from moleculekit.tools.atomtyper import (
                getFeatures,
                getPDBQTAtomTypesAndCharges,
            )

            mol.atomtype, mol.charge = getPDBQTAtomTypesAndCharges(
                mol,
                aromaticNitrogen=aromaticNitrogen,
                validitychecks=validitychecks)
            channels = getFeatures(mol)

    if channels.dtype == bool:
        # Calculate for each channel the atom sigmas
        sigmas = _getChannelRadii(mol.get("element"))
        channels = sigmas[:, np.newaxis] * channels.astype(float)
    return channels, mol
Пример #2
0
def getChannels(mol,
                aromaticNitrogen=False,
                version=2,
                validitychecks=True,
                protein=True):
    #from moleculekit.smallmol.smallmol import SmallMol

    mol = mol.copy()

    if isinstance(mol, SmallMol):
        channels = _getPropertiesRDkit(mol)
    elif isinstance(mol, Molecule):
        if version == 1:
            channels = _getAtomtypePropertiesPDBQT(mol)
        elif version == 2:
            mol.atomtype, mol.charge = getPDBQTAtomTypesAndCharges(
                mol,
                aromaticNitrogen=aromaticNitrogen,
                validitychecks=validitychecks)

            residues = mol.resname  # residue name for each atom
            amacid_features = np.stack(
                [amino_acid_feature(r) for r in residues], axis=0)

            elements = mol.element

            els = np.stack([get_atomic_feature(e) for e in elements], axis=0)

            from tempfile import NamedTemporaryFile
            tmpmol2 = NamedTemporaryFile(suffix='.pdb').name
            mol.write(tmpmol2)
            s_atoms = process_arpeggio(tmpmol2)
            os.remove(tmpmol2)

            channels = getFeatures(mol)
            # we take only
            channels = channels[:, features_to_keep]

            #assert len(channels) == len(s_atoms)

            # the order of s_atoms matches the order of channels
            arpeggio_features = []

            for i, atom in enumerate(s_atoms):
                raw_features = sorted(tuple(s_atoms[i].atom_types))
                feature_vector = np.zeros(len(arpeggio_atomtypes_unique),
                                          dtype=bool)
                for rf in raw_features:
                    #print(rf)
                    if rf in arpeggio_atomtypes_unique:
                        feature_vector[arpeggio_atomtypes_unique[rf]] = 1

                arpeggio_features.append(feature_vector)

                if feature_vector[-1] != 0:
                    print(raw_features, feature_vector)

            arpeggio_features = np.stack(arpeggio_features, axis=0)

            #print('aaa', channels.shape, arpeggio_features.shape,amacid_features.shape,els.shape)

            protein_feature = np.ones(
                shape=(len(channels), 1),
                dtype=bool)  # 1 for protein, 0 for ligand

            channels = np.concatenate([
                channels, amacid_features, arpeggio_features, els,
                protein_feature
            ],
                                      axis=-1)
            #channels = np.concatenate([arpeggio_features,amacid_features,els], axis=-1  )
            '''
            # check that the order of atoms is the same
            coords = mol.get('coords')
            serials = mol.get('serial')
            for i, elem in enumerate(mol.get('element')):
                #print(elem, s_atoms[i].get_name())
                print(sorted(tuple(s_atoms[i].atom_types)))
                try:
                    assert np.allclose(coords[i],s_atoms[i].get_coord(), atol=1e-3)
                except:
                    print(coords[i],s_atoms[i].get_coord())
                assert serials[i] == s_atoms[i].get_serial_number()

            print('11111111111', len(channels), len(s_atoms))
            '''

    if channels.dtype == bool:
        # Calculate for each channel the atom sigmas
        sigmas = _getChannelRadii(mol.get('element'))
        channels = sigmas[:, np.newaxis] * channels.astype(float)

    #print(channels.shape,arpeggio_features.shape)
    #assert channels.shape[0] == arpeggio_features.shape[0]
    #channels = np.concatenate([channels, arpeggio_features], axis=-1 )

    #print('bbb', channels.shape,  (1000*np.mean(channels, axis=0)).astype(int))
    #print('ccc', (1000*np.mean(channels[:,:12], axis=0)).astype(int))

    return channels, mol