Exemplo n.º 1
0
def do_one_smiles2graph(datum):
    '''This function takes the reactants and candidate bond changes and generates
    all of the candidate products in a format the model expects'''
    try:
        reaction, cand_bonds, gbonds = datum
        r, _, p = reaction.split('>')
        n = count(r)
        if n <= 2 or n > 100: # for the sake of training, ignore huge examples
            return (None, None)
        return smiles2graph(r, p, cand_bonds, gbonds, cutoff=cutoff, core_size=core_size)
    except Exception as e:
        print(e)
        sys.stdout.flush()
        return None, None
def read_data(coord):
    try:
        data = []
        data_f = open(opts.test_path, 'r')
        cand_f = open(opts.cand_path, 'r')

        for line in data_f:
            r, e = line.strip("\r\n ").split()  # reactant smiles, true edits
            cand = cand_f.readline()  # candidate bond changes from CoreFinder

            cand_split = cand.strip("\r\n ").split()
            cbonds = []  # list of (x, y, t, v)
            for i in range(1, len(cand_split), 2):
                x, y, t = cand_split[i].split('-')
                x, y = tuple(sorted([int(x) - 1, int(y) - 1]))

                # record candidate bond as (atom num, atom num, bond order, likelihood score)
                cbonds.append((x, y, float(t), float(cand_split[i + 1])))

            data.append((r, cbonds))
        data_len = len(data)

        for it in range(data_len):
            reaction, cand_bonds = data[it]
            r = reaction.split('>')[0]
            ncore = core_size
            while True:
                src_tuple, conf = smiles2graph(r,
                                               None,
                                               cand_bonds,
                                               None,
                                               core_size=ncore,
                                               cutoff=MAX_NCAND,
                                               testing=True)
                if len(conf) <= MAX_NCAND:
                    break
                ncore -= 1
            queue.put((r, conf))
            feed_map = {x: y for x, y in zip(_src_holder, src_tuple)}
            session.run(enqueue, feed_dict=feed_map)

        queue.put((None, None))

    except Exception as e:
        sys.stderr.write(e)
        sys.stderr.flush()

    finally:
        coord.request_stop()
def do_one_smiles2graph(datum):
    try:
        reaction, cand_bonds, gbonds = datum
        r, _, p = reaction.split('>')
        n = count(r)
        if n <= 2 or n > 100:  # for the sake of training, ignore huge examples
            return (None, None)
        return smiles2graph(r,
                            p,
                            cand_bonds,
                            gbonds,
                            cutoff=cutoff,
                            core_size=core_size)
    except Exception as e:
        print(e)
        sys.stdout.flush()
        return None, None
    def predict(self, react, top_cand_bonds, top_cand_scores=[]):
        '''react: atom mapped reactant smiles
        top_cand_bonds: list of strings "ai-aj-bo"'''

        cand_bonds = []
        if not top_cand_scores:
            top_cand_scores = [0.0 for b in top_cand_bonds]
        for i, b in enumerate(top_cand_bonds):
            x, y, t = b.split('-')
            x, y, t = int(float(x)) - 1, int(float(y)) - 1, float(t)

            cand_bonds.append((x, y, t, float(top_cand_scores[i])))

        while True:
            src_tuple, conf = smiles2graph(react,
                                           None,
                                           cand_bonds,
                                           None,
                                           core_size=core_size,
                                           cutoff=MAX_NCAND,
                                           testing=True)
            if len(conf) <= MAX_NCAND:
                break
            ncore -= 1

        feed_map = {x: y for x, y in zip(self._src_holder, src_tuple)}
        self.session.run(self.enqueue, feed_dict=feed_map)

        cur_pred_scores, cur_pred = self.session.run(self.predict_vars)

        idxfunc = lambda a: a.GetAtomMapNum()
        bond_types = [
            Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE,
            Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC
        ]
        bond_types_as_double = {0.0: 0, 1.0: 1, 2.0: 2, 3.0: 3, 1.5: 4}

        # Don't waste predictions on bond changes that aren't actually changes
        rmol = Chem.MolFromSmiles(react)
        rbonds = {}
        for bond in rmol.GetBonds():
            a1 = idxfunc(bond.GetBeginAtom())
            a2 = idxfunc(bond.GetEndAtom())
            t = bond_types.index(bond.GetBondType()) + 1
            a1, a2 = min(a1, a2), max(a1, a2)
            rbonds[(a1, a2)] = t

        outcomes = []
        for i in range(len(cur_pred)):
            idx = cur_pred[i]
            cbonds = []
            # Define edits from prediction
            for x, y, t, v in conf[idx]:
                x, y = x + 1, y + 1
                if ((x, y) not in rbonds and t > 0) or (
                    (x, y) in rbonds and rbonds[(x, y)] != t):
                    cbonds.append((x, y, bond_types_as_double[t]))
            pred_smiles = edit_mol(rmol, cbonds)
            outcomes.append((cur_pred_scores[i], '.'.join(set(pred_smiles))))

        all_scores = softmax(np.array([x[0] for x in outcomes]))

        for i in range(len(outcomes)):
            outcomes[i] = (all_scores[i], outcomes[i][1])

        return outcomes