예제 #1
0
def generate(agent_path,
             out,
             num=10000,
             environ_path='output/RF_cls_ecfp6.pkg'):
    """ Generating novel molecules with SMILES representation and
    storing them into hard drive as a data frame.

    Arguments:
        agent_path (str): the neural states file paths for the RNN agent (generator).
        out (str): file path for the generated molecules (and scores given by environment).
        num (int, optional): the total No. of SMILES that need to be generated. (Default: 10000)
        environ_path (str): the file path of the predictor for environment construction.
    """
    batch_size = 500
    df = pd.DataFrame()
    voc = util.Voc("data/voc.txt")
    agent = model.Generator(voc)
    agent.load_state_dict(torch.load(agent_path))
    for i in range(num // batch_size + 1):
        if i == 0 and num % batch_size == 0: continue
        batch = pd.DataFrame()
        samples = agent.sample(batch_size if i != 0 else num % batch_size)
        smiles, valids = util.check_smiles(samples, agent.voc)
        if environ_path is not None:
            # calculating the reward of each SMILES based on the environment (predictor).
            environ = util.Environment(environ_path)
            scores = environ(smiles)
            scores[valids == 0] = 0
            valids = scores
            batch['SCORE'] = valids
        batch['CANONICAL_SMILES'] = smiles
        df = df.append(batch)
    df.to_csv(out, sep='\t', index=None)
예제 #2
0
    def fit(self, loader_train, out, loader_valid=None, epochs=100, lr=1e-3):
        """Training the RNN generative model, similar to the scikit-learn or Keras style.
        In the end, the optimal value of parameters will also be persisted on the hard drive.

        Arguments:
            loader_train (DataLoader): Data loader for training set, it contains
            Dataset with util.MolData; for each iteration, the output batch is
            m X n LongTensor, m is the No. of samples, n is the maximum length
            of sequences.
        out (str): the file path for the model file (suffix with '.pkg')
        valid_loader (DataLoader, optional): Data loader for validation set.
            The data structure is as same as loader_train.
            and log file (suffix with '.log').
        epochs(int, optional): The maximum of training epochs (default: 100)
        lr (float, optional): learning rate (default: 1e-4)
        """
        optimizer = optim.Adam(self.parameters(), lr=lr)
        log = open(out + '.log', 'w')
        best_error = np.inf
        for epoch in tqdm(range(epochs)):
            for i, batch in tqdm(enumerate(loader_train), total=len(loader_train), desc='Epoch %d' % epoch):
                optimizer.zero_grad()
                loss_train = self.likelihood(batch.to(util.dev))
                loss_train = -loss_train.mean()
                loss_train.backward()
                optimizer.step()
                # Performance Evaluation
                if i % 10 == 0 or loader_valid is not None:
                    # 1000 SMILES is sampled
                    seqs = self.sample(1000)
                    # ix = util.unique(seqs)
                    # seqs = seqs[ix]
                    # Checking the validation of each SMILES
                    smiles, valids = util.check_smiles(seqs, self.voc)
                    error = 1 - sum(valids) / len(seqs)
                    info = "Epoch: %d step: %d error_rate: %.3f loss_train: %.3f" % (epoch, i, error, loss_train.item())
                    # Saving the optimal parameter of the model with minimum loss value.
                    if loader_valid is not None:
                        # If the validation set is given, the loss function will be
                        # calculated on the validation set.
                        loss_valid, size = 0, 0
                        for j, batch in enumerate(loader_valid):
                            size += batch.size(0)
                            loss_valid += -self.likelihood(batch.to(util.dev)).sum()
                        print(size)
                        loss_valid = loss_valid / size / self.voc.max_len
                        if loss_valid.item() < best_error:
                            torch.save(self.state_dict(), out + '.pkg')
                            best_error = loss_valid.item()
                        info += ' loss_valid: %.3f' % loss_valid.item()
                    elif error < best_error:
                        # If the validation is not given, the loss function will be
                        # just based on the training set.
                        torch.save(self.state_dict(), out + '.pkg')
                        best_error = error
                    print(info, file=log)
                    for i, smile in enumerate(smiles):
                        print('%d\t%s' % (valids[i], smile), file=log)
        log.close()
        self.load_state_dict(torch.load(out + '.pkg'))
예제 #3
0
def main():
    voc = util.Voc(init_from_file="data/voc_b.txt")
    netR_path = 'output/rf_dis.pkg'
    netG_path = 'output/net_p'
    netD_path = 'output/net_d'
    agent_path = 'output/net_gan_%d_%d_%dx%d' % (SIGMA * 10, BL * 10,
                                                 BATCH_SIZE, MC)

    netR = util.Environment(netR_path)

    agent = model.Generator(voc)
    agent.load_state_dict(T.load(netG_path + '.pkg'))

    df = pd.read_table('data/CHEMBL251.txt')
    df = df[df['PCHEMBL_VALUE'] >= 6.5]
    data = util.MolData(df, voc)
    loader = DataLoader(data,
                        batch_size=BATCH_SIZE,
                        shuffle=True,
                        drop_last=True,
                        collate_fn=data.collate_fn)

    netD = model.Discriminator(VOCAB_SIZE, EMBED_DIM, FILTER_SIZE, NUM_FILTER)
    if not os.path.exists(netD_path + '.pkg'):
        Train_dis_BCE(netD, agent, loader, epochs=100, out=netD_path)
    netD.load_state_dict(T.load(netD_path + '.pkg'))

    best_score = 0
    log = open(agent_path + '.log', 'w')
    for epoch in range(1000):
        print('\n--------\nEPOCH %d\n--------' % (epoch + 1))
        print('\nPolicy Gradient Training Generator : ')
        Train_GAN(agent, netD, netR)

        print('\nAdversarial Training Discriminator : ')
        Train_dis_BCE(netD, agent, loader, epochs=1)

        seqs = agent.sample(1000)
        ix = util.unique(seqs)
        smiles, valids = util.check_smiles(seqs[ix], agent.voc)
        scores = netR(smiles)
        scores[valids == False] = 0
        unique = (scores >= 0.5).sum() / 1000
        if best_score < unique:
            T.save(agent.state_dict(), agent_path + '.pkg')
            best_score = unique
        print("Epoch+: %d average: %.4f valid: %.4f unique: %.4f" %
              (epoch, scores.mean(), valids.mean(), unique),
              file=log)
        for i, smile in enumerate(smiles):
            print('%f\t%s' % (scores[i], smile), file=log)

        for param_group in agent.optim.param_groups:
            param_group['lr'] *= (1 - 0.01)

    log.close()
예제 #4
0
def Rollout_PG(agent, environ, explore=None):
    """Training generator under reinforcement learning framework,
    The rewoard is given for each token in the SMILES, which is generated by
    Monte Carlo Tree Search based on final reward given by the environment.

    agent (model.Generator): the exploitation network for SMILES string generation
    environ (util.Activity): the environment provide the final reward for each SMILES
    explore (model.Generator): the exploration network for SMILES string generation,
        it has the same architecture with the agent.
    """

    agent.optim.zero_grad()
    seqs = agent.sample(BATCH_SIZE, explore=explore, epsilon=Epsilon)
    batch_size = seqs.size(0)
    seq_len = seqs.size(1)
    rewards = np.zeros((batch_size, seq_len))
    smiles, valids = util.check_smiles(seqs, agent.voc)
    preds = environ(smiles) - Baseline
    preds[valids == False] = -Baseline
    scores, hiddens = agent.likelihood(seqs)

    # Monte Carlo Tree Search for step rewards generation
    for _ in tqdm(range(MC)):
        for i in range(0, seq_len):
            if (seqs[:, i] != 0).any():
                h = hiddens[:, :, i, :]
                subseqs = agent.sample(batch_size,
                                       inits=(seqs[:, i], h, i + 1, None))
                subseqs = torch.cat([seqs[:, :i + 1], subseqs], dim=1)
                subsmile, subvalid = util.check_smiles(subseqs, voc=agent.voc)
                subpred = environ(subsmile) - Baseline
                subpred[1 - subvalid] = -Baseline
            else:
                subpred = preds
            rewards[:, i] += subpred
    loss = agent.PGLoss(scores, seqs, torch.FloatTensor(rewards / MC))
    loss.backward()
    agent.optim.step()
    return 0, valids.mean(), smiles, preds
예제 #5
0
def main():
    global Epsilon
    # Vocabulary containing all of the tokens for SMILES construction
    voc = util.Voc("data/voc.txt")
    # File path of predictor in the environment
    environ_path = 'output/RF_cls_ecfp6.pkg'
    # file path of hidden states in RNN for initialization
    initial_path = 'output/net_p'
    # file path of hidden states of optimal exploitation network
    agent_path = 'output/net_e_%.2f_%.1f_%dx%d' % (Epsilon, Baseline,
                                                   BATCH_SIZE, MC)
    # file path of hidden states of exploration network
    explore_path = 'output/net_p'

    # Environment (predictor)
    environ = util.Environment(environ_path)
    # Agent (generator, exploitation network)
    agent = model.Generator(voc)
    agent.load_state_dict(torch.load(initial_path + '.pkg'))

    # exploration network
    explore = model.Generator(voc)
    explore.load_state_dict(torch.load(explore_path + '.pkg'))

    best_score = 0
    log = open(agent_path + '.log', 'w')

    for epoch in range(1000):
        print('\n--------\nEPOCH %d\n--------' % (epoch + 1))
        print('\nForward Policy Gradient Training Generator : ')
        Policy_gradient(agent, environ, explore=explore)
        seqs = agent.sample(1000)
        ix = util.unique(seqs)
        smiles, valids = util.check_smiles(seqs[ix], agent.voc)
        scores = environ(smiles)
        scores[valids == False] = 0
        unique = (scores >= 0.5).sum() / 1000
        # The model with best percentage of unique desired SMILES will be persisted on the hard drive.
        if best_score < unique:
            torch.save(agent.state_dict(), agent_path + '.pkg')
            best_score = unique
        print("Epoch+: %d average: %.4f valid: %.4f unique: %.4f" %
              (epoch, scores.mean(), valids.mean(), unique),
              file=log)
        for i, smile in enumerate(smiles):
            print('%f\t%s' % (scores[i], smile), file=log)

        # Learing rate exponential decay
        for param_group in agent.optim.param_groups:
            param_group['lr'] *= (1 - 0.01)
    log.close()
예제 #6
0
def Train_GAN(netG, netD, netR, sigma=SIGMA):
    seqs = []
    for _ in range(MC):
        seq = netG.sample(BATCH_SIZE)
        seqs.append(seq)
    seqs = T.cat(seqs, dim=0)
    ix = util.unique(seqs)
    seqs = seqs[ix]
    smiles, valids = util.check_smiles(seqs, netG.voc)
    preds = sigma * netR(smiles) + (1 - sigma) * netD(
        util.Variable(seqs)).data.cpu().numpy()[:, 0]
    preds[valids == False] = 0
    preds -= BL
    ds = TensorDataset(seqs, T.Tensor(preds.reshape(-1, 1)))
    loader = DataLoader(ds, batch_size=BATCH_SIZE)
    for seq, pred in loader:
        score, _ = netG.likelihood(seq)
        netG.optim.zero_grad()
        loss = netG.PGLoss(score, seq, pred)
        loss.backward()
        netG.optim.step()
예제 #7
0
def Policy_gradient(agent, environ, explore=None):
    """Training generator under reinforcement learning framework,
    The rewoard is only the final reward given by environment (predictor).

    agent (model.Generator): the exploitation network for SMILES string generation
    environ (util.Activity): the environment provide the final reward for each SMILES
    explore (model.Generator): the exploration network for SMILES string generation,
        it has the same architecture with the agent.
    """
    seqs = []

    # repeated sampling with MC times
    for _ in range(MC):
        seq = agent.sample(BATCH_SIZE, explore=explore, epsilon=Epsilon)
        seqs.append(seq)
    seqs = torch.cat(seqs, dim=0)
    ix = util.unique(seqs)
    seqs = seqs[ix]
    smiles, valids = util.check_smiles(seqs, agent.voc)

    # obtaining the reward
    preds = environ(smiles)
    preds[valids == False] = 0
    preds -= Baseline
    preds = torch.Tensor(preds.reshape(-1, 1)).to(util.dev)

    ds = TensorDataset(seqs, preds)
    loader = DataLoader(ds, batch_size=BATCH_SIZE)

    # Training Loop
    for seq, pred in loader:
        score = agent.likelihood(seq)
        agent.optim.zero_grad()
        loss = agent.PGLoss(score, pred)
        loss.backward()
        agent.optim.step()
예제 #8
0
    def fit(self, pair_loader, tgt_loader, epochs=100, out=None):
        log = open(out + '.log', 'w')
        best_valid = 0.
        net = nn.DataParallel(self, device_ids=util.devices)
        optimizer = torch.optim.Adam(self.parameters())
        for epoch in range(epochs):
            for i, (tgts, cmps) in enumerate(pair_loader):
                tgts, cmps = tgts.to(util.dev), cmps.to(util.dev)
                optimizer.zero_grad()
                output = net(tgts, cmps)
                loss = F.nll_loss(output.view(-1, self.voc_cmp.size),
                                  cmps.view(-1))
                loss.backward()
                optimizer.step()

                if i % 10 != 0 or i == 0: continue
                ids, smiles, valids = [], [], []
                for _ in range(4):
                    for ix, tgt in tgt_loader:
                        seqs = net(tgt.to(util.dev))
                        # ix = util.unique(seqs)
                        # seqs = seqs[ix]
                        smile, valid = util.check_smiles(seqs, self.voc_cmp)
                        smiles += smile
                        valids += valid
                        ids += ix.tolist()
                valid = sum(valids) / len(valids)
                print("Epoch: %d step: %d loss: %.3f valid: %.3f" %
                      (epoch, i, loss.item(), valid),
                      file=log)
                for i, smile in enumerate(smiles):
                    print('%d\t%s' % (valids[i], smile), file=log)
                if best_valid < valid:
                    torch.save(self.state_dict(), out + '.pkg')
                    best_valid = valid
        log.close()