def do_one_smiles2graph(datum): '''This function takes the reactants and candidate bond changes and generates all of the candidate products in a format the model expects''' try: reaction, cand_bonds, gbonds = datum r, _, p = reaction.split('>') n = count(r) if n <= 2 or n > 100: # for the sake of training, ignore huge examples return (None, None) return smiles2graph(r, p, cand_bonds, gbonds, cutoff=cutoff, core_size=core_size) except Exception as e: print(e) sys.stdout.flush() return None, None
def read_data(coord): try: data = [] data_f = open(opts.test_path, 'r') cand_f = open(opts.cand_path, 'r') for line in data_f: r, e = line.strip("\r\n ").split() # reactant smiles, true edits cand = cand_f.readline() # candidate bond changes from CoreFinder cand_split = cand.strip("\r\n ").split() cbonds = [] # list of (x, y, t, v) for i in range(1, len(cand_split), 2): x, y, t = cand_split[i].split('-') x, y = tuple(sorted([int(x) - 1, int(y) - 1])) # record candidate bond as (atom num, atom num, bond order, likelihood score) cbonds.append((x, y, float(t), float(cand_split[i + 1]))) data.append((r, cbonds)) data_len = len(data) for it in range(data_len): reaction, cand_bonds = data[it] r = reaction.split('>')[0] ncore = core_size while True: src_tuple, conf = smiles2graph(r, None, cand_bonds, None, core_size=ncore, cutoff=MAX_NCAND, testing=True) if len(conf) <= MAX_NCAND: break ncore -= 1 queue.put((r, conf)) feed_map = {x: y for x, y in zip(_src_holder, src_tuple)} session.run(enqueue, feed_dict=feed_map) queue.put((None, None)) except Exception as e: sys.stderr.write(e) sys.stderr.flush() finally: coord.request_stop()
def do_one_smiles2graph(datum): try: reaction, cand_bonds, gbonds = datum r, _, p = reaction.split('>') n = count(r) if n <= 2 or n > 100: # for the sake of training, ignore huge examples return (None, None) return smiles2graph(r, p, cand_bonds, gbonds, cutoff=cutoff, core_size=core_size) except Exception as e: print(e) sys.stdout.flush() return None, None
def predict(self, react, top_cand_bonds, top_cand_scores=[]): '''react: atom mapped reactant smiles top_cand_bonds: list of strings "ai-aj-bo"''' cand_bonds = [] if not top_cand_scores: top_cand_scores = [0.0 for b in top_cand_bonds] for i, b in enumerate(top_cand_bonds): x, y, t = b.split('-') x, y, t = int(float(x)) - 1, int(float(y)) - 1, float(t) cand_bonds.append((x, y, t, float(top_cand_scores[i]))) while True: src_tuple, conf = smiles2graph(react, None, cand_bonds, None, core_size=core_size, cutoff=MAX_NCAND, testing=True) if len(conf) <= MAX_NCAND: break ncore -= 1 feed_map = {x: y for x, y in zip(self._src_holder, src_tuple)} self.session.run(self.enqueue, feed_dict=feed_map) cur_pred_scores, cur_pred = self.session.run(self.predict_vars) idxfunc = lambda a: a.GetAtomMapNum() bond_types = [ Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC ] bond_types_as_double = {0.0: 0, 1.0: 1, 2.0: 2, 3.0: 3, 1.5: 4} # Don't waste predictions on bond changes that aren't actually changes rmol = Chem.MolFromSmiles(react) rbonds = {} for bond in rmol.GetBonds(): a1 = idxfunc(bond.GetBeginAtom()) a2 = idxfunc(bond.GetEndAtom()) t = bond_types.index(bond.GetBondType()) + 1 a1, a2 = min(a1, a2), max(a1, a2) rbonds[(a1, a2)] = t outcomes = [] for i in range(len(cur_pred)): idx = cur_pred[i] cbonds = [] # Define edits from prediction for x, y, t, v in conf[idx]: x, y = x + 1, y + 1 if ((x, y) not in rbonds and t > 0) or ( (x, y) in rbonds and rbonds[(x, y)] != t): cbonds.append((x, y, bond_types_as_double[t])) pred_smiles = edit_mol(rmol, cbonds) outcomes.append((cur_pred_scores[i], '.'.join(set(pred_smiles)))) all_scores = softmax(np.array([x[0] for x in outcomes])) for i in range(len(outcomes)): outcomes[i] = (all_scores[i], outcomes[i][1]) return outcomes