def generate(agent_path, out, num=10000, environ_path='output/RF_cls_ecfp6.pkg'): """ Generating novel molecules with SMILES representation and storing them into hard drive as a data frame. Arguments: agent_path (str): the neural states file paths for the RNN agent (generator). out (str): file path for the generated molecules (and scores given by environment). num (int, optional): the total No. of SMILES that need to be generated. (Default: 10000) environ_path (str): the file path of the predictor for environment construction. """ batch_size = 500 df = pd.DataFrame() voc = util.Voc("data/voc.txt") agent = model.Generator(voc) agent.load_state_dict(torch.load(agent_path)) for i in range(num // batch_size + 1): if i == 0 and num % batch_size == 0: continue batch = pd.DataFrame() samples = agent.sample(batch_size if i != 0 else num % batch_size) smiles, valids = util.check_smiles(samples, agent.voc) if environ_path is not None: # calculating the reward of each SMILES based on the environment (predictor). environ = util.Environment(environ_path) scores = environ(smiles) scores[valids == 0] = 0 valids = scores batch['SCORE'] = valids batch['CANONICAL_SMILES'] = smiles df = df.append(batch) df.to_csv(out, sep='\t', index=None)
def fit(self, loader_train, out, loader_valid=None, epochs=100, lr=1e-3): """Training the RNN generative model, similar to the scikit-learn or Keras style. In the end, the optimal value of parameters will also be persisted on the hard drive. Arguments: loader_train (DataLoader): Data loader for training set, it contains Dataset with util.MolData; for each iteration, the output batch is m X n LongTensor, m is the No. of samples, n is the maximum length of sequences. out (str): the file path for the model file (suffix with '.pkg') valid_loader (DataLoader, optional): Data loader for validation set. The data structure is as same as loader_train. and log file (suffix with '.log'). epochs(int, optional): The maximum of training epochs (default: 100) lr (float, optional): learning rate (default: 1e-4) """ optimizer = optim.Adam(self.parameters(), lr=lr) log = open(out + '.log', 'w') best_error = np.inf for epoch in tqdm(range(epochs)): for i, batch in tqdm(enumerate(loader_train), total=len(loader_train), desc='Epoch %d' % epoch): optimizer.zero_grad() loss_train = self.likelihood(batch.to(util.dev)) loss_train = -loss_train.mean() loss_train.backward() optimizer.step() # Performance Evaluation if i % 10 == 0 or loader_valid is not None: # 1000 SMILES is sampled seqs = self.sample(1000) # ix = util.unique(seqs) # seqs = seqs[ix] # Checking the validation of each SMILES smiles, valids = util.check_smiles(seqs, self.voc) error = 1 - sum(valids) / len(seqs) info = "Epoch: %d step: %d error_rate: %.3f loss_train: %.3f" % (epoch, i, error, loss_train.item()) # Saving the optimal parameter of the model with minimum loss value. if loader_valid is not None: # If the validation set is given, the loss function will be # calculated on the validation set. loss_valid, size = 0, 0 for j, batch in enumerate(loader_valid): size += batch.size(0) loss_valid += -self.likelihood(batch.to(util.dev)).sum() print(size) loss_valid = loss_valid / size / self.voc.max_len if loss_valid.item() < best_error: torch.save(self.state_dict(), out + '.pkg') best_error = loss_valid.item() info += ' loss_valid: %.3f' % loss_valid.item() elif error < best_error: # If the validation is not given, the loss function will be # just based on the training set. torch.save(self.state_dict(), out + '.pkg') best_error = error print(info, file=log) for i, smile in enumerate(smiles): print('%d\t%s' % (valids[i], smile), file=log) log.close() self.load_state_dict(torch.load(out + '.pkg'))
def main(): voc = util.Voc(init_from_file="data/voc_b.txt") netR_path = 'output/rf_dis.pkg' netG_path = 'output/net_p' netD_path = 'output/net_d' agent_path = 'output/net_gan_%d_%d_%dx%d' % (SIGMA * 10, BL * 10, BATCH_SIZE, MC) netR = util.Environment(netR_path) agent = model.Generator(voc) agent.load_state_dict(T.load(netG_path + '.pkg')) df = pd.read_table('data/CHEMBL251.txt') df = df[df['PCHEMBL_VALUE'] >= 6.5] data = util.MolData(df, voc) loader = DataLoader(data, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, collate_fn=data.collate_fn) netD = model.Discriminator(VOCAB_SIZE, EMBED_DIM, FILTER_SIZE, NUM_FILTER) if not os.path.exists(netD_path + '.pkg'): Train_dis_BCE(netD, agent, loader, epochs=100, out=netD_path) netD.load_state_dict(T.load(netD_path + '.pkg')) best_score = 0 log = open(agent_path + '.log', 'w') for epoch in range(1000): print('\n--------\nEPOCH %d\n--------' % (epoch + 1)) print('\nPolicy Gradient Training Generator : ') Train_GAN(agent, netD, netR) print('\nAdversarial Training Discriminator : ') Train_dis_BCE(netD, agent, loader, epochs=1) seqs = agent.sample(1000) ix = util.unique(seqs) smiles, valids = util.check_smiles(seqs[ix], agent.voc) scores = netR(smiles) scores[valids == False] = 0 unique = (scores >= 0.5).sum() / 1000 if best_score < unique: T.save(agent.state_dict(), agent_path + '.pkg') best_score = unique print("Epoch+: %d average: %.4f valid: %.4f unique: %.4f" % (epoch, scores.mean(), valids.mean(), unique), file=log) for i, smile in enumerate(smiles): print('%f\t%s' % (scores[i], smile), file=log) for param_group in agent.optim.param_groups: param_group['lr'] *= (1 - 0.01) log.close()
def Rollout_PG(agent, environ, explore=None): """Training generator under reinforcement learning framework, The rewoard is given for each token in the SMILES, which is generated by Monte Carlo Tree Search based on final reward given by the environment. agent (model.Generator): the exploitation network for SMILES string generation environ (util.Activity): the environment provide the final reward for each SMILES explore (model.Generator): the exploration network for SMILES string generation, it has the same architecture with the agent. """ agent.optim.zero_grad() seqs = agent.sample(BATCH_SIZE, explore=explore, epsilon=Epsilon) batch_size = seqs.size(0) seq_len = seqs.size(1) rewards = np.zeros((batch_size, seq_len)) smiles, valids = util.check_smiles(seqs, agent.voc) preds = environ(smiles) - Baseline preds[valids == False] = -Baseline scores, hiddens = agent.likelihood(seqs) # Monte Carlo Tree Search for step rewards generation for _ in tqdm(range(MC)): for i in range(0, seq_len): if (seqs[:, i] != 0).any(): h = hiddens[:, :, i, :] subseqs = agent.sample(batch_size, inits=(seqs[:, i], h, i + 1, None)) subseqs = torch.cat([seqs[:, :i + 1], subseqs], dim=1) subsmile, subvalid = util.check_smiles(subseqs, voc=agent.voc) subpred = environ(subsmile) - Baseline subpred[1 - subvalid] = -Baseline else: subpred = preds rewards[:, i] += subpred loss = agent.PGLoss(scores, seqs, torch.FloatTensor(rewards / MC)) loss.backward() agent.optim.step() return 0, valids.mean(), smiles, preds
def main(): global Epsilon # Vocabulary containing all of the tokens for SMILES construction voc = util.Voc("data/voc.txt") # File path of predictor in the environment environ_path = 'output/RF_cls_ecfp6.pkg' # file path of hidden states in RNN for initialization initial_path = 'output/net_p' # file path of hidden states of optimal exploitation network agent_path = 'output/net_e_%.2f_%.1f_%dx%d' % (Epsilon, Baseline, BATCH_SIZE, MC) # file path of hidden states of exploration network explore_path = 'output/net_p' # Environment (predictor) environ = util.Environment(environ_path) # Agent (generator, exploitation network) agent = model.Generator(voc) agent.load_state_dict(torch.load(initial_path + '.pkg')) # exploration network explore = model.Generator(voc) explore.load_state_dict(torch.load(explore_path + '.pkg')) best_score = 0 log = open(agent_path + '.log', 'w') for epoch in range(1000): print('\n--------\nEPOCH %d\n--------' % (epoch + 1)) print('\nForward Policy Gradient Training Generator : ') Policy_gradient(agent, environ, explore=explore) seqs = agent.sample(1000) ix = util.unique(seqs) smiles, valids = util.check_smiles(seqs[ix], agent.voc) scores = environ(smiles) scores[valids == False] = 0 unique = (scores >= 0.5).sum() / 1000 # The model with best percentage of unique desired SMILES will be persisted on the hard drive. if best_score < unique: torch.save(agent.state_dict(), agent_path + '.pkg') best_score = unique print("Epoch+: %d average: %.4f valid: %.4f unique: %.4f" % (epoch, scores.mean(), valids.mean(), unique), file=log) for i, smile in enumerate(smiles): print('%f\t%s' % (scores[i], smile), file=log) # Learing rate exponential decay for param_group in agent.optim.param_groups: param_group['lr'] *= (1 - 0.01) log.close()
def Train_GAN(netG, netD, netR, sigma=SIGMA): seqs = [] for _ in range(MC): seq = netG.sample(BATCH_SIZE) seqs.append(seq) seqs = T.cat(seqs, dim=0) ix = util.unique(seqs) seqs = seqs[ix] smiles, valids = util.check_smiles(seqs, netG.voc) preds = sigma * netR(smiles) + (1 - sigma) * netD( util.Variable(seqs)).data.cpu().numpy()[:, 0] preds[valids == False] = 0 preds -= BL ds = TensorDataset(seqs, T.Tensor(preds.reshape(-1, 1))) loader = DataLoader(ds, batch_size=BATCH_SIZE) for seq, pred in loader: score, _ = netG.likelihood(seq) netG.optim.zero_grad() loss = netG.PGLoss(score, seq, pred) loss.backward() netG.optim.step()
def Policy_gradient(agent, environ, explore=None): """Training generator under reinforcement learning framework, The rewoard is only the final reward given by environment (predictor). agent (model.Generator): the exploitation network for SMILES string generation environ (util.Activity): the environment provide the final reward for each SMILES explore (model.Generator): the exploration network for SMILES string generation, it has the same architecture with the agent. """ seqs = [] # repeated sampling with MC times for _ in range(MC): seq = agent.sample(BATCH_SIZE, explore=explore, epsilon=Epsilon) seqs.append(seq) seqs = torch.cat(seqs, dim=0) ix = util.unique(seqs) seqs = seqs[ix] smiles, valids = util.check_smiles(seqs, agent.voc) # obtaining the reward preds = environ(smiles) preds[valids == False] = 0 preds -= Baseline preds = torch.Tensor(preds.reshape(-1, 1)).to(util.dev) ds = TensorDataset(seqs, preds) loader = DataLoader(ds, batch_size=BATCH_SIZE) # Training Loop for seq, pred in loader: score = agent.likelihood(seq) agent.optim.zero_grad() loss = agent.PGLoss(score, pred) loss.backward() agent.optim.step()
def fit(self, pair_loader, tgt_loader, epochs=100, out=None): log = open(out + '.log', 'w') best_valid = 0. net = nn.DataParallel(self, device_ids=util.devices) optimizer = torch.optim.Adam(self.parameters()) for epoch in range(epochs): for i, (tgts, cmps) in enumerate(pair_loader): tgts, cmps = tgts.to(util.dev), cmps.to(util.dev) optimizer.zero_grad() output = net(tgts, cmps) loss = F.nll_loss(output.view(-1, self.voc_cmp.size), cmps.view(-1)) loss.backward() optimizer.step() if i % 10 != 0 or i == 0: continue ids, smiles, valids = [], [], [] for _ in range(4): for ix, tgt in tgt_loader: seqs = net(tgt.to(util.dev)) # ix = util.unique(seqs) # seqs = seqs[ix] smile, valid = util.check_smiles(seqs, self.voc_cmp) smiles += smile valids += valid ids += ix.tolist() valid = sum(valids) / len(valids) print("Epoch: %d step: %d loss: %.3f valid: %.3f" % (epoch, i, loss.item(), valid), file=log) for i, smile in enumerate(smiles): print('%d\t%s' % (valids[i], smile), file=log) if best_valid < valid: torch.save(self.state_dict(), out + '.pkg') best_valid = valid log.close()