示例#1
0
    def preprocess(self, mol):
        if self.set_hydrogen_explicit:
            mol = Chem.AddHs(mol)

        if self.set_morgan_identifier:
            # calculate morgan substrcutre hasing value
            morgan_info = dict()
            atomidx_hash_dict = dict()
            radius = self.morgan_radius
            Chem.GetMorganFingerprint(mol,
                                      radius,
                                      bitInfo=morgan_info,
                                      useChirality=False)
            while len(atomidx_hash_dict) != mol.GetNumAtoms():
                for key in morgan_info.keys():
                    if morgan_info[key][0][1] != radius:
                        continue
                    for a in morgan_info[key]:
                        if a[0] not in atomidx_hash_dict:
                            atomidx_hash_dict[a[0]] = key
                radius -= 1
            self.atomidx_hash_dict = atomidx_hash_dict
        if self.set_ring_membership:
            self.ringlist_atom = self.get_ringlist(mol)
            self.ringlist_bond = self.get_ringlist(mol, type='bond')
        if self.set_TPSA:
            self.TPSA = rdMolDescriptors._CalcTPSAContribs(mol)
        if self.set_partial_charge:
            Chem.ComputeGasteigerCharges(mol)
def atom_level_descriptors(mol,
                           include=['functional'],
                           asOneHot=False,
                           ORIGINAL_VERSION=False):
    '''
	Given an RDKit mol, returns an N_atom-long list of lists,
	each of which contains atom-level descriptors and their names

	returns: (label, attributes)
	'''

    attributes = [[] for i in mol.GetAtoms()]
    labels = []
    if 'functional' in include:

        [attributes[i].append(x[0]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
        labels.append('Crippen contribution to logp')

        [attributes[i].append(x[1]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
        labels.append('Crippen contribution to mr')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol))]
        labels.append('TPSA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0])]
        labels.append('Labute ASA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(EState.EStateIndices(mol))]
        labels.append('EState Index')

        rdPartialCharges.ComputeGasteigerCharges(mol)
        [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
         for (i, a) in enumerate(mol.GetAtoms())]
        labels.append('Gasteiger partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]):
                attributes[i][-1] = 0.0

        [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
         for (i, a) in enumerate(mol.GetAtoms())]
        labels.append('Gasteiger hydrogen partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]):
                attributes[i][-1] = 0.0

    if 'structural' in include:
        [attributes[i].extend(atom_structural(mol.GetAtomWithIdx(i), asOneHot = asOneHot, ORIGINAL_VERSION = ORIGINAL_VERSION)) \
         for i in range(len(attributes))]
        labels.append('--many structural--')

    return (labels, attributes)
def mol_to_nx(mol) -> nx.Graph:
    G = nx.Graph()
    conf = mol.GetConformer()

    SanitizeMol(mol,
                SanitizeFlags.SANITIZE_ALL ^ SanitizeFlags.SANITIZE_PROPERTIES)

    ComputeGasteigerCharges(mol)
    ring_info = mol.GetRingInfo()
    crippen_contribs = rdMolDescriptors._CalcCrippenContribs(mol)
    tpsa_contribs = rdMolDescriptors._CalcTPSAContribs(mol)

    for atom in mol.GetAtoms():
        idx = atom.GetIdx()

        # if atom.GetSymbol() == 'N' and atom.GetTotalValence() == 2:
        #     formal_charge = -1
        # elif atom.GetSymbol() == 'N' and atom.GetTotalValence() == 4:
        #     formal_charge = 1
        # elif atom.GetSymbol() == 'O' and atom.GetTotalValence() == 1:
        #     formal_charge = -1
        # else:
        #     formal_charge = atom.GetFormalCharge()
        formal_charge = atom.GetFormalCharge()

        G.add_node(
            idx,
            pos=conf.GetAtomPosition(idx),
            formal_charge=formal_charge,
            chiral_tag=atom.GetChiralTag(),
            hybridization=atom.GetHybridization(),
            # num_explicit_hs=atom.GetNumExplicitHs(),  # All same
            is_aromatic=atom.GetIsAromatic(),
            num_atom_rings=ring_info.NumAtomRings(idx),
            is_in_ring_size3=atom.IsInRingSize(3),
            is_in_ring_size4=atom.IsInRingSize(4),
            is_in_ring_size5=atom.IsInRingSize(5),
            is_in_ring_size6=atom.IsInRingSize(6),
            symbol=atom.GetSymbol(),
            total_valence=atom.GetTotalValence(),
            gasteiger_charge=atom.GetProp('_GasteigerCharge'),
            num_implicit_hs=atom.GetNumImplicitHs(),
            total_degree=atom.GetTotalDegree(),
            crippen_logp=crippen_contribs[idx][0],
            crippen_mr=crippen_contribs[idx][1],
            tpsa=tpsa_contribs[idx],
        )

    for bond in mol.GetBonds():
        G.add_edge(
            bond.GetBeginAtomIdx(),
            bond.GetEndAtomIdx(),
            bond_type=bond.GetBondType(),
            is_conjugated=bond.GetIsConjugated(),
        )

    return G
示例#4
0
def assignProperties(mol):
    '''
    Calculate atom-level descriptors that can be used in featurization
    '''
    for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol)):
        mol.GetAtomWithIdx(i).SetDoubleProp('crippen_logp',x[0])
        mol.GetAtomWithIdx(i).SetDoubleProp('crippen_mr', x[1])
    for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol)):
        mol.GetAtomWithIdx(i).SetDoubleProp('tpsa', x)
    for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0]):
        mol.GetAtomWithIdx(i).SetDoubleProp('asa', x)
    for (i, x) in enumerate(EState.EStateIndices(mol)):
        mol.GetAtomWithIdx(i).SetDoubleProp('estate', x)
    rdPartialCharges.ComputeGasteigerCharges(mol) # '_GasteigerCharge', '_GasteigerHCharge'
示例#5
0
def get_molecular_attributes(rdmol):
    """
  Molecular attributes calculated as:
    [Crippen contribution to logp,
     Crippen contribution to mr,
     TPSA contribution,
     Labute ASA contribution,
     EState Index,
     Gasteiger partial charge,
     Gasteiger hydrogen partial charge]

  Parameters
  ----------
  rdmol : rdkit.Chem.rdchem.Mol
    rdkit molecule class

  Returns
  -------
  attributes : list
    feature vector

  """
    attributes = [[] for _ in rdmol.GetAtoms()]

    for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol)):
        attributes[i].append(x[0])
        attributes[i].append(x[1])
    for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol)):
        attributes[i].append(x)
    for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0]):
        attributes[i].append(x)
    for (i, x) in enumerate(EState.EStateIndices(rdmol)):
        attributes[i].append(x)

    rdPartialCharges.ComputeGasteigerCharges(rdmol)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        val = float(a.GetProp('_GasteigerCharge'))
        if val == val and val < np.inf:
            attributes[i].append(val)
        else:
            attributes[i].append(0.0)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        val = float(a.GetProp('_GasteigerHCharge'))
        if val == val and val < np.inf:
            attributes[i].append(val)
        else:
            attributes[i].append(0.0)

    return attributes
示例#6
0
文件: atom.py 项目: yccai/scikit-chem
def tpsa_contrib(a):
    """ Hacky way of getting total polar surface area contribution. """

    idx = a.GetIdx()
    m = a.GetOwningMol()
    return rdMolDescriptors._CalcTPSAContribs(m)[idx]
示例#7
0
def molToGraph(rdmol, bondtype_list_order, atomtype_list_order, molecular_attributes = False):
    '''Converts an RDKit molecule to an attributed undirected graph'''
    # Initialize
    graph = Graph()
    graph.molecular_attributes = molecular_attributes
    graph.bondtype_list_order = bondtype_list_order
    bond_list = bondtype_list_order
    graph.atomtype_list_order = atomtype_list_order

    # Calculate atom-level molecule descriptors
    attributes = [[] for i in rdmol.GetAtoms()]
    if molecular_attributes:
        labels = []
        [attributes[i].append(x[0]) \
            for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to logp')

        [attributes[i].append(x[1]) \
            for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to mr')

        [attributes[i].append(x) \
            for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))]
        labels.append('TPSA contribution')

        [attributes[i].append(x) \
            for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])]
        labels.append('Labute ASA contribution')

        [attributes[i].append(x) \
            for (i, x) in enumerate(EState.EStateIndices(rdmol))]
        labels.append('EState Index')

        rdPartialCharges.ComputeGasteigerCharges(rdmol)
        [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
            for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

        [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
            for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger hydrogen partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

    # Add bonds
    for bond in rdmol.GetBonds():
        edge = Edge()
        edge.i = bond.GetIdx()
        edge.attributes = bondAttributes(bond)
        edge.orderAtt = list(oneHotVector(bond.GetBondTypeAsDouble(), [1.0, 1.5, 2.0, 3.0]))
        edge.aromAtt = list(oneHotVector(bond.GetIsAromatic(), [1.0, 0.0]))
        edge.conjAtt = list(oneHotVector(bond.GetIsConjugated(), [1.0, 0.0]))
        edge.ringAtt = list(oneHotVector(bond.IsInRing(), [1.0, 0.0]))

        BeginAtom, EndAtom = bond.GetBeginAtom(), bond.GetEndAtom()
        begin_idx, end_idx = BeginAtom.GetAtomicNum(), EndAtom.GetAtomicNum()
        if begin_idx < end_idx:
            bond_type = str(begin_idx) + '_' + str(end_idx)
        else:
            bond_type= str(end_idx) + '_' + str(begin_idx)

        bond_attributes = []
        bond_attributes = bond_attributes + list(oneHotVector(bond_type, bondtype_list_order))
        edge.attributesAtt = np.array(bond_attributes, dtype=att_dtype)

        edge.connects = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
        graph.edges.append(edge)
    # Add atoms
    for k, atom in enumerate(rdmol.GetAtoms()):
        node = Node()
        node.i = atom.GetIdx()
        node.attributes = atomAttributes(atom, extra_attributes = attributes[k])
        node_type = atom.GetAtomicNum()
        node_attributesAtt = []
        node_attributesAtt = node_attributesAtt + list(oneHotVector(node_type, atomtype_list_order))
        node.attributesAtt = np.array(node_attributesAtt, dtype=att_dtype)
        for neighbor in atom.GetNeighbors():
            node.neighbors.append((
                neighbor.GetIdx(),
                rdmol.GetBondBetweenAtoms(
                    atom.GetIdx(),
                    neighbor.GetIdx()
                ).GetIdx()
            ))
        graph.nodes.append(node)
    # Add counts, for convenience
    graph.num_edges = len(graph.edges)
    graph.num_nodes = len(graph.nodes)
    return graph
示例#8
0
def atom_level_descriptors(mol, include = ['functional'], asOneHot = False, ORIGINAL_VERSION = False):
	"""
	Given an RDKit mol, returns an N_atom-long list of lists,
	each of which contains atom-level descriptors and their names

	Returns:
		(labels, attributes)
	"""

	attributes = [[] for i in mol.GetAtoms()]
	labels = []
	if 'functional' in include:

		[attributes[i].append(x[0]) \
			for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
		labels.append('Crippen contribution to logp')

		[attributes[i].append(x[1]) \
			for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(mol))]
		labels.append('Crippen contribution to mr')

		[attributes[i].append(x) \
			for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(mol))]
		labels.append('TPSA contribution')

		[attributes[i].append(x) \
			for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(mol)[0])]
		labels.append('Labute ASA contribution')

		[attributes[i].append(x) \
			for (i, x) in enumerate(EState.EStateIndices(mol))]
		labels.append('EState Index')

		rdPartialCharges.ComputeGasteigerCharges(mol)
		[attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
			for (i, a) in enumerate(mol.GetAtoms())]
		labels.append('Gasteiger partial charge')

		# Gasteiger partial charges sometimes gives NaN
		for i in range(len(attributes)):
			if np.isnan(attributes[i][-1]):
				attributes[i][-1] = 0.0

		[attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
			for (i, a) in enumerate(mol.GetAtoms())]
		labels.append('Gasteiger hydrogen partial charge')

		# Gasteiger partial charges sometimes gives NaN
		for i in range(len(attributes)):
			if np.isnan(attributes[i][-1]):
				attributes[i][-1] = 0.0

	if 'structural' in include:
		[attributes[i].extend(atom_structural(mol.GetAtomWithIdx(i), asOneHot = asOneHot, ORIGINAL_VERSION = ORIGINAL_VERSION)) \
			for i in range(len(attributes))]
		labels.append('--many structural--')

	if 'dftb' in include:
		try:
			dftb_atom_atts = atom_dftb(mol)
		except ValueError as e:# often, an invalid element
			print(e)
			dftb_atom_atts = [[0 for i in range(18)] for j in range(mol.GetNumAtoms())]
		except KeyError as e:
			print(e)
			dftb_atom_atts = [[0 for i in range(18)] for j in range(mol.GetNumAtoms())]
		[attributes[i].extend(dftb_atom_atts[i]) for i in range(mol.GetNumAtoms())]
		labels.append('--many DFTB--')

	return (labels, attributes)
示例#9
0
def molToGraph(rdmol):
    '''
    Converts an RDKit molecule to an attributed undirected graph
    @param rdmol: RDKit molecule
    @return: Graph
    '''
    graph = Graph()

    # Calculate atom-level molecule descriptors
    nodesFeatures = [[] for i in rdmol.GetAtoms()]

    #6 (25) Crippen contribution to logp
    [nodesFeatures[i].append(x[0]) \
     for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]

    #7 (26) Crippen contribution to mr
    [nodesFeatures[i].append(x[1]) \
     for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]

    #8 (27) TPSA contribution
    [nodesFeatures[i].append(x) \
     for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))]

    #9 (28) Labute ASA contribution
    [nodesFeatures[i].append(x) \
     for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])]

    #10 (29) EState Index
    [nodesFeatures[i].append(x) \
     for (i, x) in enumerate(EState.EStateIndices(rdmol))]

    # Calculate Gasteiger charges for features 30 and 31
    rdPartialCharges.ComputeGasteigerCharges(rdmol)
    # The computed charges are stored on each atom with computed property
    # under the name _GasteigerCharge and _GasteigerHCharge.
    # Values could be NaN.

    #11 (30)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        if np.isnan(float(a.GetProp('_GasteigerCharge'))) or np.isinf(
                float(a.GetProp('_GasteigerCharge'))):
            nodesFeatures[i].append(0.0)
        else:
            nodesFeatures[i].append(float(a.GetProp('_GasteigerCharge')))

    #12 (31)
    for (i, a) in enumerate(rdmol.GetAtoms()):
        if np.isnan(float(a.GetProp('_GasteigerHCharge'))) or np.isinf(
                float(a.GetProp('_GasteigerHCharge'))):
            nodesFeatures[i].append(0.0)
        else:
            nodesFeatures[i].append(float(a.GetProp('_GasteigerHCharge')))

    # Add edges to graph
    for bond in rdmol.GetBonds():
        edge = Edge()
        edge.id = bond.GetIdx()
        edge.features = getBondFeatures(bond).astype('float32')
        edge.ends = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
        graph.edges.append(edge)

    # Add nodes to graph
    for i, atom in enumerate(rdmol.GetAtoms()):
        node = Node()
        node.id = atom.GetIdx()
        node.features = getAtomFeatures(atom, nodesFeatures[i])

        for neighbor in atom.GetNeighbors():
            node.neighbors.append(
                (neighbor.GetIdx(),
                 rdmol.GetBondBetweenAtoms(atom.GetIdx(),
                                           neighbor.GetIdx()).GetIdx()))

        graph.nodes.append(node)

    graph.nodeNum = len(graph.nodes)
    graph.nodeFeatureDim = len(graph.nodes[0].features)
    if (len(graph.edges) > 0):
        graph.edgeFeatureDim = len(graph.edges[0].features)

    return graph
示例#10
0
文件: preparing.py 项目: HadXu/PMP
def make_graph(name, gb_structure, gb_scalar_coupling):
    # ['id', 'molecule_name', 'atom_index_0', 'atom_index_1', 'type','scalar_coupling_constant']
    coupling_df = gb_scalar_coupling.get_group(name)

    # [molecule_name,atom_index,atom,x,y,z]
    df = gb_structure.get_group(name)
    df = df.sort_values(['atom_index'], ascending=True)
    a = df.atom.values.tolist()
    xyz = df[['x', 'y', 'z']].values

    mol = mol_from_axyz(a, xyz)
    mol_op = openbabel.OBMol()
    obConversion.ReadFile(mol_op, f'../input/champs-scalar-coupling/structures/{name}.xyz')

    factory = ChemicalFeatures.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
    feature = factory.GetFeaturesForMol(mol)

    num_atom = mol.GetNumAtoms()
    symbol = np.zeros((num_atom, len(SYMBOL)), np.uint8)  # category
    acceptor = np.zeros((num_atom, 1), np.uint8)
    donor = np.zeros((num_atom, 1), np.uint8)
    aromatic = np.zeros((num_atom, 1), np.uint8)
    hybridization = np.zeros((num_atom, len(HYBRIDIZATION)), np.uint8)
    num_h = np.zeros((num_atom, 1), np.float32)  # real
    atomic = np.zeros((num_atom, 1), np.float32)

    # new features
    degree = np.zeros((num_atom, 1), np.uint8)
    formalCharge = np.zeros((num_atom, 1), np.float32)
    chiral_tag = np.zeros((num_atom, 1), np.uint8)
    crippen_contribs = np.zeros((num_atom, 2), np.float32)
    tpsa = np.zeros((num_atom, 1), np.float32)
    labute_asac = np.zeros((num_atom, 1), np.float32)
    gasteiger_charges = np.zeros((num_atom, 1), np.float32)
    esataindices = np.zeros((num_atom, 1), np.float32)
    atomic_radiuss = np.zeros((num_atom, 1), np.float32)
    electronegate = np.zeros((num_atom, 1), np.float32)
    electronegate_sqre = np.zeros((num_atom, 1), np.float32)
    mass = np.zeros((num_atom, 1), np.float32)
    van = np.zeros((num_atom, 1), np.float32)
    cov = np.zeros((num_atom, 1), np.float32)
    ion = np.zeros((num_atom, 1), np.float32)

    for i in range(num_atom):
        atom = mol.GetAtomWithIdx(i)
        atom_op = mol_op.GetAtomById(i)
        symbol[i] = one_hot_encoding(atom.GetSymbol(), SYMBOL)
        aromatic[i] = atom.GetIsAromatic()
        hybridization[i] = one_hot_encoding(atom.GetHybridization(), HYBRIDIZATION)
        num_h[i] = atom.GetTotalNumHs(includeNeighbors=True)
        atomic[i] = atom.GetAtomicNum()

        degree[i] = atom.GetTotalDegree()
        formalCharge[i] = atom.GetFormalCharge()
        chiral_tag[i] = int(atom.GetChiralTag())

        crippen_contribs[i] = rdMolDescriptors._CalcCrippenContribs(mol)[i]
        tpsa[i] = rdMolDescriptors._CalcTPSAContribs(mol)[i]
        labute_asac[i] = rdMolDescriptors._CalcLabuteASAContribs(mol)[0][i]
        gasteiger_charges[i] = atom_op.GetPartialCharge()
        esataindices[i] = EState.EStateIndices(mol)[i]
        atomic_radiuss[i] = atomic_radius[atom.GetSymbol()]
        electronegate[i] = electronegativity[atom.GetSymbol()]
        electronegate_sqre[i] = electronegativity_square[atom.GetSymbol()]
        mass[i] = atomic_mass[atom.GetSymbol()]
        van[i] = vanderwaalsradius[atom.GetSymbol()]
        cov[i] = covalenzradius[atom.GetSymbol()]
        ion[i] = ionization_energy[atom.GetSymbol()]

    for t in range(0, len(feature)):
        if feature[t].GetFamily() == 'Donor':
            for i in feature[t].GetAtomIds():
                donor[i] = 1
        elif feature[t].GetFamily() == 'Acceptor':
            for i in feature[t].GetAtomIds():
                acceptor[i] = 1

    num_edge = num_atom * num_atom - num_atom
    edge_index = np.zeros((num_edge, 2), np.uint32)
    bond_type = np.zeros((num_edge, len(BOND_TYPE)), np.uint32)
    distance = np.zeros((num_edge, 1), np.float32)
    angle = np.zeros((num_edge, 1), np.float32)

    norm_xyz = preprocessing.normalize(xyz, norm='l2')

    ij = 0
    for i in range(num_atom):
        for j in range(num_atom):
            if i == j: continue
            edge_index[ij] = [i, j]

            bond = mol.GetBondBetweenAtoms(i, j)
            if bond is not None:
                bond_type[ij] = one_hot_encoding(bond.GetBondType(), BOND_TYPE)

            distance[ij] = np.linalg.norm(xyz[i] - xyz[j])
            angle[ij] = (norm_xyz[i] * norm_xyz[j]).sum()

            ij += 1

    xyz = xyz * 1.889726133921252

    atom = System(symbols=a, positions=xyz)
    acsf = ACSF_GENERATOR.create(atom)

    l = []
    for item in coupling_df[['atom_index_0', 'atom_index_1']].values.tolist():
        i = edge_index.tolist().index(item)
        l.append(i)

    l = np.array(l)

    coupling_edge_index = np.concatenate([coupling_df[['atom_index_0', 'atom_index_1']].values, l.reshape(len(l), 1)],
                                         axis=1)

    coupling = Coupling(coupling_df['id'].values,
                        coupling_df[['fc', 'sd', 'pso', 'dso']].values,
                        coupling_edge_index,
                        np.array([COUPLING_TYPE.index(t) for t in coupling_df.type.values], np.int32),
                        coupling_df['scalar_coupling_constant'].values,
                        )

    graph = Graph(
        name,
        Chem.MolToSmiles(mol),
        [a, xyz],
        [acsf, symbol, acceptor, donor, aromatic, hybridization, num_h, atomic, degree, formalCharge, chiral_tag,
         crippen_contribs, tpsa, labute_asac, gasteiger_charges, esataindices, atomic_radiuss, electronegate,
         electronegate_sqre, mass, van, cov, ion],
        [bond_type, distance, angle, ],
        edge_index,
        coupling,
    )

    return graph
示例#11
0
def molToGraph(rdmol, molecular_attributes=False):
    '''Converts an RDKit molecule to an attributed undirected graph'''
    # Initialize
    graph = Graph()
    graph.molecular_attributes = molecular_attributes

    # Calculate atom-level molecule descriptors
    attributes = [[] for i in rdmol.GetAtoms()]
    if molecular_attributes:
        labels = []
        [attributes[i].append(x[0]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to logp')

        [attributes[i].append(x[1]) \
         for (i, x) in enumerate(rdMolDescriptors._CalcCrippenContribs(rdmol))]
        labels.append('Crippen contribution to mr')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcTPSAContribs(rdmol))]
        labels.append('TPSA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(rdMolDescriptors._CalcLabuteASAContribs(rdmol)[0])]
        labels.append('Labute ASA contribution')

        [attributes[i].append(x) \
         for (i, x) in enumerate(EState.EStateIndices(rdmol))]
        labels.append('EState Index')

        rdPartialCharges.ComputeGasteigerCharges(rdmol)
        [attributes[i].append(float(a.GetProp('_GasteigerCharge'))) \
         for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

        [attributes[i].append(float(a.GetProp('_GasteigerHCharge'))) \
         for (i, a) in enumerate(rdmol.GetAtoms())]
        labels.append('Gasteiger hydrogen partial charge')

        # Gasteiger partial charges sometimes gives NaN
        for i in range(len(attributes)):
            if np.isnan(attributes[i][-1]) or np.isinf(attributes[i][-1]):
                attributes[i][-1] = 0.0

    # Add bonds
    for bond in rdmol.GetBonds():
        edge = Edge()
        edge.i = bond.GetIdx()
        edge.attributes = bondAttributes(bond)
        edge.connects = (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())
        graph.edges.append(edge)
    # Add atoms
    for k, atom in enumerate(rdmol.GetAtoms()):
        node = Node()
        node.i = atom.GetIdx()
        node.attributes = atomAttributes(atom, extra_attributes=attributes[k])
        for neighbor in atom.GetNeighbors():
            node.neighbors.append(
                (neighbor.GetIdx(),
                 rdmol.GetBondBetweenAtoms(atom.GetIdx(),
                                           neighbor.GetIdx()).GetIdx()))
        graph.nodes.append(node)
    # Add counts, for convenience
    graph.num_edges = len(graph.edges)
    graph.num_nodes = len(graph.nodes)
    return graph