def can_assemble(self, node_x, node_y): neighbors = node_x.neighbors + [node_y] for idx, neighbor_node in enumerate(neighbors): neighbor_node.nid = idx # exclude nodes corresponding to "singleton-clusters" neighbors = [ neighbor_node for neighbor_node in neighbors if neighbor_node.mol.GetNumAtoms() > 1 ] # sort neighbor nodes in descending order of number of atoms neighbors = sorted(neighbors, key=lambda x: x.mol.GetNumAtoms(), reverse=True) # obtain neighbor nodes corresponding to "singleton-clusters" singletons = [ neighbor_node for neighbor_node in neighbors if neighbor_node.mol.GetNumAtoms() == 1 ] neighbors = singletons + neighbors # retrieve all possible candidates molecular attachment configurations of node_x with its neighbor nodes candidates = enum_assemble(node_x, neighbors) return len(candidates) > 0
def dfs_assemble_graph_conv(self, x_mol_vecs, all_nodes, cur_mol, global_amap, fa_amap, cur_node, fa_node): fa_nid = fa_node.nid if fa_node is not None else -1 prev_nodes = [fa_node] if fa_node is not None else [] children = [nei for nei in cur_node.neighbors if nei.nid != fa_nid] neighbors = [nei for nei in children if nei.mol.GetNumAtoms() > 1] neighbors = sorted(neighbors, key=lambda x: x.mol.GetNumAtoms(), reverse=True) singletons = [nei for nei in children if nei.mol.GetNumAtoms() == 1] neighbors = singletons + neighbors cur_amap = [(fa_nid, a2, a1) for nid, a1, a2 in fa_amap if nid == cur_node.nid] cands = enum_assemble(cur_node, neighbors, prev_nodes, cur_amap) if len(cands) == 0: return None cand_smiles, cand_mols, cand_amap = zip(*cands) cands = [(smiles, all_nodes, cur_node) for smiles in cand_smiles] # jtmpn_holder = JTMessPassNet.tensorize(cands, y_tree_mess[1]) # fatoms, fbonds, agraph, bgraph, scope = jtmpn_holder # cand_vecs = self.jtmpn(fatoms, fbonds, agraph, bgraph, scope, y_tree_mess[0]) jt_graph_enc_holder = MolGraphEncoder.tensorize(cand_smiles) cand_vecs = self.graph_enc(*jt_graph_enc_holder) scores = torch.mv(cand_vecs, x_mol_vecs) _, cand_idx = torch.sort(scores, descending=True) backup_mol = Chem.RWMol(cur_mol) for i in range(cand_idx.numel()): cur_mol = Chem.RWMol(backup_mol) pred_amap = cand_amap[cand_idx[i].item()] new_global_amap = copy.deepcopy(global_amap) for nei_id, ctr_atom, nei_atom in pred_amap: if nei_id == fa_nid: continue new_global_amap[nei_id][nei_atom] = new_global_amap[cur_node.nid][ctr_atom] cur_mol = attach_mols(cur_mol, children, [], new_global_amap) # father is already attached new_mol = cur_mol.GetMol() new_mol = Chem.MolFromSmiles(Chem.MolToSmiles(new_mol)) if new_mol is None: continue result = True for nei_node in children: if nei_node.is_leaf: continue cur_mol = self.dfs_assemble_graph_conv(x_mol_vecs, all_nodes, cur_mol, new_global_amap, pred_amap, nei_node, cur_node) if cur_mol is None: result = False break if result: return cur_mol
def dfs_assemble(self, tree_mess, mol_vec, all_nodes, cur_mol, global_amap, fa_amap, cur_node, fa_node, prob_decode): fa_nid = fa_node.nid if fa_node is not None else -1 prev_nodes = [fa_node] if fa_node is not None else [] children = [nei for nei in cur_node.neighbors if nei.nid != fa_nid] neighbors = [nei for nei in children if nei.mol.GetNumAtoms() > 1] neighbors = sorted(neighbors, key=lambda x:x.mol.GetNumAtoms(), reverse=True) singletons = [nei for nei in children if nei.mol.GetNumAtoms() == 1] neighbors = singletons + neighbors cur_amap = [(fa_nid,a2,a1) for nid,a1,a2 in fa_amap if nid == cur_node.nid] cands = enum_assemble(cur_node, neighbors, prev_nodes, cur_amap) if len(cands) == 0: return None cand_smiles,cand_mols,cand_amap = zip(*cands) cands = [(candmol, all_nodes, cur_node) for candmol in cand_mols] cand_vecs = self.jtmpn(cands, tree_mess) cand_vecs = self.G_mean(cand_vecs) mol_vec = mol_vec.squeeze() scores = torch.mv(cand_vecs, mol_vec) * 20 if prob_decode: probs = nn.Softmax()(scores.view(1,-1)).squeeze() + 1e-5 #prevent prob = 0 cand_idx = torch.multinomial(probs, probs.numel()) else: _,cand_idx = torch.sort(scores, descending=True) backup_mol = Chem.RWMol(cur_mol) for i in xrange(cand_idx.numel()): cur_mol = Chem.RWMol(backup_mol) pred_amap = cand_amap[cand_idx[i].data[0]] new_global_amap = copy.deepcopy(global_amap) for nei_id,ctr_atom,nei_atom in pred_amap: if nei_id == fa_nid: continue new_global_amap[nei_id][nei_atom] = new_global_amap[cur_node.nid][ctr_atom] cur_mol = attach_mols(cur_mol, children, [], new_global_amap) #father is already attached new_mol = cur_mol.GetMol() new_mol = Chem.MolFromSmiles(Chem.MolToSmiles(new_mol)) if new_mol is None: continue result = True for nei_node in children: if nei_node.is_leaf: continue cur_mol = self.dfs_assemble(tree_mess, mol_vec, all_nodes, cur_mol, new_global_amap, pred_amap, nei_node, cur_node, prob_decode) if cur_mol is None: result = False break if result: return cur_mol return None
def can_assemble(node_x, node_y): neis = node_x.neighbors + [node_y] for i,nei in enumerate(neis): nei.nid = i neighbors = [nei for nei in neis if nei.mol.GetNumAtoms() > 1] neighbors = sorted(neighbors, key=lambda x:x.mol.GetNumAtoms(), reverse=True) singletons = [nei for nei in neis if nei.mol.GetNumAtoms() == 1] neighbors = singletons + neighbors cands = enum_assemble(node_x, neighbors) return len(cands) > 0
def assemble(self): neighbors = [nei for nei in self.neighbors if nei.mol.GetNumAtoms() > 1] neighbors = sorted(neighbors, key=lambda x:x.mol.GetNumAtoms(), reverse=True) singletons = [nei for nei in self.neighbors if nei.mol.GetNumAtoms() == 1] neighbors = singletons + neighbors cands = enum_assemble(self, neighbors) if len(cands) > 0: self.cands, self.cand_mols, _ = zip(*cands) self.cands = list(self.cands) self.cand_mols = list(self.cand_mols) else: self.cands = [] self.cand_mols = []
def assemble(self): neighbors = [nei for nei in self.neighbors if nei.mol.GetNumAtoms() > 1] neighbors = sorted(neighbors, key=lambda x:x.mol.GetNumAtoms(), reverse=True) singletons = [nei for nei in self.neighbors if nei.mol.GetNumAtoms() == 1] neighbors = singletons + neighbors cands,aroma = enum_assemble(self, neighbors) new_cands = [cand for i,cand in enumerate(cands) if aroma[i] >= 0] if len(new_cands) > 0: cands = new_cands if len(cands) > 0: self.cands, _ = zip(*cands) self.cands = list(self.cands) else: self.cands = []
def can_assemble(node_x, node_y): node_x.nid = 1 node_x.is_leaf = False set_atommap(node_x.mol, node_x.nid) neis = node_x.neighbors + [node_y] for i,nei in enumerate(neis): nei.nid = i + 2 nei.is_leaf = (len(nei.neighbors) <= 1) if nei.is_leaf: set_atommap(nei.mol, 0) else: set_atommap(nei.mol, nei.nid) neighbors = [nei for nei in neis if nei.mol.GetNumAtoms() > 1] neighbors = sorted(neighbors, key=lambda x:x.mol.GetNumAtoms(), reverse=True) singletons = [nei for nei in neis if nei.mol.GetNumAtoms() == 1] neighbors = singletons + neighbors cands,aroma_scores = enum_assemble(node_x, neighbors) return len(cands) > 0# and sum(aroma_scores) >= 0
def assemble(self): """ This function, given the current "cluster-node" in the "cluster-graph" and its "neighbor cluster-nodes", returns all the possible molecular attachment configurations of this node's cluster with its neighbor nodes' clusters. """ # get the neighbors for this "cluster-node" which are not singleton clusters i.e. contain only one atom neighbors = [ neighbor for neighbor in self.neighbors if neighbor.mol.GetNumAtoms() > 1 ] # sort the neighbor nodes of the "cluster-graph" in descending order of number of atoms neighbors = sorted(neighbors, key=lambda x: x.mol.GetNumAtoms(), reverse=True) # obtain all the singleton neighbor "cluster-nodes" of this "cluster-node", in the "cluster-graph" singletons = [ neighbor for neighbor in self.neighbors if neighbor.mol.GetNumAtoms() == 1 ] neighbors = singletons + neighbors # obtain all possible candidate molecular attachment configurations, # corresponding to all possible valid combination of this cluster # and its neighbors candidates = enum_assemble(self, neighbors) if len(candidates) > 0: # SMILES, molecules self.candidates, self.candidate_mols, _ = zip(*candidates) self.candidates = list(self.candidates) self.candidate_mols = list(self.candidate_mols) else: self.candidates = [] self.candidate_mols = []
def dfs_assemble(self, y_tree_mess, x_mol_vecs, all_nodes, cur_mol, global_amap, fa_amap, cur_node, fa_node, prob_decode, check_aroma): fa_nid = fa_node.nid if fa_node is not None else -1 prev_nodes = [fa_node] if fa_node is not None else [] children = [nei for nei in cur_node.neighbors if nei.nid != fa_nid] neighbors = [nei for nei in children if nei.mol.GetNumAtoms() > 1] neighbors = sorted(neighbors, key=lambda x:x.mol.GetNumAtoms(), reverse=True) singletons = [nei for nei in children if nei.mol.GetNumAtoms() == 1] neighbors = singletons + neighbors cur_amap = [(fa_nid,a2,a1) for nid,a1,a2 in fa_amap if nid == cur_node.nid] cands,aroma_score = enum_assemble(cur_node, neighbors, prev_nodes, cur_amap) if len(cands) == 0 or (sum(aroma_score) < 0 and check_aroma): return None, cur_mol cand_smiles,cand_amap = zip(*cands) aroma_score = torch.Tensor(aroma_score).cuda() cands = [(smiles, all_nodes, cur_node) for smiles in cand_smiles] if len(cands) > 1: jtmpn_holder = JTMPN.tensorize(cands, y_tree_mess[1]) fatoms,fbonds,agraph,bgraph,scope = jtmpn_holder cand_vecs = self.jtmpn(fatoms, fbonds, agraph, bgraph, scope, y_tree_mess[0]) scores = torch.mv(cand_vecs, x_mol_vecs) + aroma_score else: scores = torch.Tensor([1.0]) if prob_decode: probs = F.softmax(scores.view(1,-1), dim=1).squeeze() + 1e-7 #prevent prob = 0 cand_idx = torch.multinomial(probs, probs.numel()) else: _,cand_idx = torch.sort(scores, descending=True) backup_mol = Chem.RWMol(cur_mol) pre_mol = cur_mol for i in range(cand_idx.numel()): cur_mol = Chem.RWMol(backup_mol) pred_amap = cand_amap[cand_idx[i].item()] new_global_amap = copy.deepcopy(global_amap) for nei_id,ctr_atom,nei_atom in pred_amap: if nei_id == fa_nid: continue new_global_amap[nei_id][nei_atom] = new_global_amap[cur_node.nid][ctr_atom] cur_mol = attach_mols(cur_mol, children, [], new_global_amap) #father is already attached new_mol = cur_mol.GetMol() new_mol = Chem.MolFromSmiles(Chem.MolToSmiles(new_mol)) if new_mol is None: continue has_error = False for nei_node in children: if nei_node.is_leaf: continue tmp_mol, tmp_mol2 = self.dfs_assemble(y_tree_mess, x_mol_vecs, all_nodes, cur_mol, new_global_amap, pred_amap, nei_node, cur_node, prob_decode, check_aroma) if tmp_mol is None: has_error = True if i == 0: pre_mol = tmp_mol2 break cur_mol = tmp_mol if not has_error: return cur_mol, cur_mol return None, pre_mol