Exemplo n.º 1
0
 def __getitem__(self, index: int):
     if not 0 <= index < self._num_examples:
         raise IndexError(index)
     # Read biopython structure
     file_path = self._file_list[index]
     structure = fo.read_sdf(str(file_path),
                             sanitize=False,
                             add_hs=False,
                             remove_hs=False)
     # assemble the item (no bonds)
     item = {
         'atoms': fo.bp_to_df(structure),
         'id': structure.id,
         'file_path': str(file_path),
     }
     # Add bonds if included
     if self._read_bonds:
         mol = fo.read_sdf_to_mol(str(file_path),
                                  sanitize=False,
                                  add_hs=False,
                                  remove_hs=False)
         bonds_df = fo.get_bonds_list_from_mol(mol[0])
         item['bonds'] = bonds_df
     if self._transform:
         item = self._transform(item)
     return item
Exemplo n.º 2
0
def _get_rdkit_data(smiles):
    mol = Chem.MolFromSmiles(smiles)
    mol = Chem.AddHs(mol)
    bonds_df = fo.get_bonds_list_from_mol(mol)
    type_mapping = {'H': 0, 'C': 1, 'N': 2, 'O': 3, 'F': 4}

    type_idx = []
    atomic_number = []
    acceptor = []
    donor = []
    aromatic = []
    sp = []
    sp2 = []
    sp3 = []
    num_hs = []
    for atom in mol.GetAtoms():
        type_idx.append(type_mapping[atom.GetSymbol()])
        atomic_number.append(atom.GetAtomicNum())
        donor.append(0)
        acceptor.append(0)
        aromatic.append(1 if atom.GetIsAromatic() else 0)
        hybridization = atom.GetHybridization()
        sp.append(1 if hybridization == HybridizationType.SP else 0)
        sp2.append(1 if hybridization == HybridizationType.SP2 else 0)
        sp3.append(1 if hybridization == HybridizationType.SP3 else 0)
        num_hs.append(atom.GetTotalNumHs(includeNeighbors=True))

    feats = factory.GetFeaturesForMol(mol)
    for j in range(0, len(feats)):
        if feats[j].GetFamily() == 'Donor':
            node_list = feats[j].GetAtomIds()
            for k in node_list:
                donor[k] = 1
        elif feats[j].GetFamily() == 'Acceptor':
            node_list = feats[j].GetAtomIds()
            for k in node_list:
                acceptor[k] = 1
    atom_feats = [
        atomic_number, acceptor, donor, aromatic, sp, sp2, sp3, num_hs
    ]

    return bonds_df, atom_feats