def train_agent(restore_agent_from='data/Prior.ckpt', scoring_function='activity_model', save_dir=None, learning_rate=0.0005, batch_size=64, n_steps=1000, sigma=100): voc = Vocabulary(init_from_file="data/voc") start_time = time.time() Prior = RNN(voc) Agent = RNN(voc) if torch.cuda.is_available(): Prior.rnn.load_state_dict(torch.load('data/Prior.ckpt')) Agent.rnn.load_state_dict(torch.load(restore_agent_from)) else: Prior.rnn.load_state_dict( torch.load('data/Prior.ckpt', map_location=lambda storage, loc: storage)) Agent.rnn.load_state_dict( torch.load(restore_agent_from, map_location=lambda storage, loc: storage)) for param in Prior.rnn.parameters(): param.requires_grad = False optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate) scoring_function = get_scoring_function(scoring_function=scoring_function) step_score = [[], []] print("Model initialized, starting training...") if not save_dir: save_dir = 'experiments/manuscript/1000steps_probtest_rewardonlynosmaller40_' + time.strftime( "%Y-%m-%d-%H_%M_%S", time.localtime()) os.makedirs(save_dir) ## calcualte the probability of psmiles with predicted TC >= 0.4 prob = [] mean_ = [] std_ = [] for step in range(n_steps): seqs, agent_likelihood, entropy = Agent.sample(batch_size) unique_idxs = unique(seqs) seqs = seqs[unique_idxs] agent_likelihood = agent_likelihood[unique_idxs] entropy = entropy[unique_idxs] prior_likelihood, _ = Prior.likelihood(Variable(seqs)) smiles = [] for seq in seqs.cpu().numpy(): smiles.append(voc.decode(seq)) score = scoring_function(smiles) #### count = 0 score_filter = [] for s in score: if s >= 0.4: score_filter.append(s) count += 1 else: pass prob.append(count / 64) mean_.append(np.mean(score_filter)) std_.append(np.std(score_filter)) #### augmented_likelihood = prior_likelihood + sigma * Variable(score) loss = torch.pow((augmented_likelihood - agent_likelihood), 2) loss = loss.mean() regularization = -(1 / agent_likelihood).mean() loss += 5 * 1e3 * regularization optimizer.zero_grad() loss.backward() optimizer.step() # print out information during the training print("Agent Prior Target Score SMILES") for i in range(10): print("{:6.3f} {:6.3f} {:6.3f} {:6.3f} {}".format( agent_likelihood[i], prior_likelihood[i], augmented_likelihood[i], score[i], smiles[i])) step_score[0].append(step + 1) step_score[1].append(np.mean(score)) # if step > 98 and (step+1) % 100 == 0: # # if step == 0: # torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'agent_baseline_{}.ckpt'.format(step+1))) # seqs, agent_likelihood, entropy = Agent.sample(1000) # prior_likelihood, _ = Prior.likelihood(Variable(seqs)) # prior_likelihood = prior_likelihood.data.cpu().numpy() # smiles = [] # for seq in seqs.cpu().numpy(): # smiles.append(voc.decode(seq)) # score = scoring_function(smiles) # with open(os.path.join(save_dir, "sampled_{}".format(step+1)), 'w') as f: # f.write("SMILES Score PriorLogP\n") # for s, sc, pri in zip(smiles, score, prior_likelihood): # f.write("{} {:5.3f} {:6.3f}\n".format(s, sc, pri)) step_score_data = pd.DataFrame({ 'Step': step_score[0], 'Score': step_score[1], 'Prob': prob, 'MEAN': mean_, 'STD': std_ }) step_score_data.to_csv(os.path.join(save_dir, "step_score_1000step.csv"), index=None)
def main(restore_from=None, visualize=False): # read vocbulary from a file voc = Vocabulary(init_from_file="data/voc") # create a dataset from a smiles file moldata = MolData("data/mols_filtered.smi", voc) data = DataLoader(moldata, batch_size=10, shuffle=True, drop_last=True, collate_fn=MolData.collate_fn) agent = RNN(voc) # can restore from a saved RNN if restore_from: agent.rnn.load_state_dict( torch.load(restore_from, map_location=torch.device('cpu'))) optimizer = torch.optim.Adam(agent.rnn.parameters(), lr=0.001) torch.autograd.set_detect_anomaly(True) valid_ratios = list() for epoch in range(1, 2): for step, batch in tqdm(enumerate(data), total=len(data)): # sample from DataLoader seqs = batch.long() # calculate loss log_p, _ = agent.likelihood(seqs) loss = -log_p.mean() # print(loss) # calculate gradients and take a step optimizer.zero_grad() loss.backward() optimizer.step() # every n steps we decrease learning rate and print out some information, n can be customized if step % 5 == 0 and step != 0: decrease_learning_rate(optimizer, decrease_by=0.03) tqdm.write("#" * 50) tqdm.write("Epoch {:3d} step {:3d} loss: {:5.2f}\n".format( epoch, step, loss.data)) seqs, likelihood, _ = agent.sample(128) valid = 0 for i, seq in enumerate(seqs.cpu().numpy()): smile = voc.decode(seq) if Chem.MolFromSmiles(smile): valid += 1 if i < 5: tqdm.write(smile) valid_ratio = 100 * valid / len(seqs) valid_ratios.append(valid_ratio) tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid / len(seqs))) tqdm.write("#" * 50 + "\n") torch.save(agent.rnn.state_dict(), "data/Prior.ckpt") torch.save(agent.rnn.state_dict(), "data/Prior.ckpt") if visualize: plt.plot(range(len(valid_ratios)), valid_ratios, color='red', linewidth=5) plt.savefig('/Users/ruiminma/Desktop/validratio.png', bbox_inches='tight', dpi=400)
def train_rnn(num_batches_per_bunch = 512, batch_size = 1, num_bunches_queue = 5, offset = 0, path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/'): voc_list = Vocabulary(path_name + 'train') voc_list.vocab_create() vocab = voc_list.vocab vocab_size = voc_list.vocab_size dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size) dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size ) dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size ) print '..building the model' #symbolic variables for input, target vector and batch index index = T.lscalar('index') x = T.fvector('x') h0 = T.fvector('h0') y = T.ivector('y') learning_rate = T.fscalar('learning_rate') #theano shared variables for train, valid and test train_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True) train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) valid_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True) valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) test_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True) test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) rng = numpy.random.RandomState() classifier = RNN(rng = rng, input = x, intial_hidden = h0, n_in = vocab_size, n_hidden = int(sys.argv[1]), n_out = vocab_size) cost = classifier.negative_log_likelihood(y) ht1_values = numpy.ones((int(sys.argv[1]), ), dtype = 'float32') ht1 = theano.shared(value = ht1_values, name = 'hidden_state') #constructor for learning rate class learnrate_schedular = LearningRateNewBob(start_rate = float(sys.argv[2]), scale_by=.5, max_epochs=9999,\ min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.) log_likelihood = classifier.sum(y) likelihood = classifier.likelihood(y) #test_model test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood], \ givens = {x: test_set_x1, y: test_set_y, h0: ht1}) #validation_model validate_model = theano.function(inputs = [], outputs = [log_likelihood], \ givens = {x: valid_set_x1, y: valid_set_y, h0: ht1}) gradient_param = [] #calculates the gradient of cost with respect to parameters for param in classifier.params: gradient_param.append(T.cast(T.grad(cost, param), 'float32')) updates = [] #updates the parameters for param, gradient in zip(classifier.params, gradient_param): updates.append((param, T.cast(param - learning_rate * gradient - 0.000001 * param, dtype = 'float32'))) #hidden_output = classifier.inputlayer.output #training_model train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.inputlayer.output], updates = updates, \ givens = {x: train_set_x1, y: train_set_y, h0:ht1}) print '.....training' best_valid_loss = numpy.inf start_time = time.time() while(learnrate_schedular.get_rate() != 0): print 'learning_rate:', learnrate_schedular.get_rate() print 'epoch_number:', learnrate_schedular.epoch frames_showed, progress = 0, 0 start_epoch_time = time.time() dataprovider_train.reset() for feats_lab_tuple in dataprovider_train: features, labels = feats_lab_tuple if labels is None or features is None: continue frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) train_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = train_model(numpy.asarray(learnrate_schedular.get_rate(), dtype = 'float32')) ht1.set_value(numpy.asarray(out[1], dtype = 'float32'), borrow = True) progress += 1 if progress%10000==0: end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) train_set_x1.set_value(numpy.empty((1, ), dtype = 'float32')) train_set_y.set_value(numpy.empty((1), dtype = 'int32')) end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) #classifier_name = 'MLP' + str(learnrate_schedular.epoch) #save_mlp(classifier, path+exp_name1 , classifier_name) print 'Validating...' valid_losses = [] log_likelihood = [] valid_frames_showed, progress = 0, 0 start_valid_time = time.time() # it is also stop of training time dataprovider_valid.reset() for feats_lab_tuple in dataprovider_valid: features, labels = feats_lab_tuple if labels is None or features is None: continue valid_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) valid_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) log_likelihood.append(validate_model()) valid_set_x1.set_value(numpy.empty((1), 'float32')) valid_set_y.set_value(numpy.empty((1), 'int32')) progress += 1 if progress%1000==0: end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) entropy = (-numpy.sum(log_likelihood)/valid_frames_showed) print entropy, numpy.sum(log_likelihood) if entropy < best_valid_loss: learning_rate = learnrate_schedular.get_next_rate(entropy) best_valid_loss = entropy else: learnrate_schedular.rate = 0.0 end_time = time.time() print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.) print 'Testing...' log_likelihood = [] likelihoods = [] test_frames_showed, progress = 0, 0 start_test_time = time.time() # it is also stop of training time dataprovider_test.reset() for feats_lab_tuple in dataprovider_test: features, labels = feats_lab_tuple if labels is None or features is None: continue test_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) test_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = test_model() log_likelihood.append(out[0]) likelihoods.append(out[1]) progress += 1 if progress%1000==0: end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) #save_posteriors(log_likelihood, likelihoods, weight_path+file_name2) print numpy.sum(log_likelihood)