示例#1
0
    def sample(self, num=1, minlen=1, maxlen=100, start='G'):
        """
        Function that generates the SMILES string, token by token, depending on 
        the previous computed sequence 
        """
        sampled = []
        token_table = SmilesToTokens()

        for i in tqdm(range(num)):
            start_a = start
            sequence = start_a
            contador = 0
            while sequence[-1] != 'E' and len(sequence) <= maxlen:
                x, _ = token_table.one_hot_encode(
                    token_table.tokenize(sequence))
                if self.training == True:

                    e = round(random.uniform(0.0, 1.0), 5)

                    if e < self.threshold:  # exploring rate
                        preds = self.model_unbiased.model.predict(x)[0][-1]
                    else:
                        preds = self.model_biased.model.predict(x)[0][-1]
                else:
                    preds = self.model_unbiased.model.predict(x)[0][-1]

                next_a = self.sample_with_temp(preds)
                sequence += self.table[next_a]
                contador = contador + 1
            sequence = sequence[1:].rstrip('E')
            if len(sequence) < minlen:
                continue
            else:
                sampled.append(sequence)
        return sampled
示例#2
0
 def __init__(self, model_unbiased, model_biased, training, threshold,
              config):
     super(PredictSMILES, self).__init__(model_unbiased, model_biased,
                                         training, threshold, config)
     self.model_unbiased = model_unbiased
     self.model_biased = model_biased
     token_table = SmilesToTokens()
     self.table = token_table.table
     self.training = training
     self.threshold = threshold
示例#3
0
    def __init__(self, generator, predictor, configReinforce,
                 property_identifier):
        """
        Constructor for the Reinforcement object.
        Parameters
        ----------
        generator: generative model object that produces string of characters 
            (trajectories)
        predictor: object of any predictive model type
            predictor accepts a trajectory and returns a numerical
            prediction of desired property for the given trajectory
        configReinforce: bunch
            Parameters to use in the predictive model and get_reward function 
        property_identifier: string
            It indicates what property we want to optimize
        Returns
        -------
        object Reinforcement used for implementation of Reinforcement Learning 
        model to bias the Generator
        """

        super(Reinforcement, self).__init__()
        self.generator_unbiased = generator
        self.generator_biased = generator
        self.generator = generator
        self.configReinforce = configReinforce
        self.generator_unbiased.model.load_weights(
            self.configReinforce.model_name_unbiased)
        self.generator_biased.model.load_weights(
            self.configReinforce.model_name_unbiased)
        self.token_table = SmilesToTokens()
        self.table = self.token_table.table
        self.predictor = predictor
        self.get_reward = get_reward
        self.property_identifier = property_identifier
        self.all_rewards = []
        self.all_losses = []
        self.threshold_greedy = 0.1
        self.n_table = len(self.token_table.table)
        #        self.sgd = optimizers.SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
        #        self.adadelta = optimizers.Adadelta(learning_rate=0.0001, rho=0.95, epsilon=1e-07)
        self.adam = optimizers.Adam(learning_rate=0.001,
                                    beta_1=0.9,
                                    beta_2=0.999,
                                    amsgrad=False,
                                    clipvalue=3)
示例#4
0
    def __init__(self, generator, predictor_a2d, predictor_bbb,
                 configReinforce):
        """
        Constructor for the Reinforcement object.
        Parameters
        ----------
        generator: generative model object that produces string of characters 
            (trajectories)
        predictor: object of any predictive model type
            predictor accepts a trajectory and returns a numerical
            prediction of desired property for the given trajectory
        configReinforce: bunch
            Configuration file containing all the necessary specification and
            parameters. 
        Returns
        -------
        object Reinforcement used for implementation of Reinforcement Learning 
        model to bias the Generator
        """

        super(Reinforcement, self).__init__()
        self.generator_unbiased = generator
        self.generator_biased = generator
        self.generator = generator
        self.configReinforce = configReinforce
        self.generator_unbiased.model.load_weights(
            self.configReinforce.model_name_unbiased)
        self.generator_biased.model.load_weights(
            self.configReinforce.model_name_unbiased)
        self.token_table = SmilesToTokens()
        self.table = self.token_table.table
        self.predictor_a2d = predictor_a2d
        self.predictor_bbb = predictor_bbb
        self.get_reward_MO = get_reward_MO
        self.threshold_greedy = 0.1
        self.n_table = len(self.token_table.table)
        self.preds_range = [3., 1.28, 1.284,
                            1.015]  #[2.,1.4,1.284,1.015] #3.2,1.29
        self.best_model = '0.5'
        #        self.adam = optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False)
        self.adam = optimizers.Adam(clipvalue=4)
        self.scalarization_mode = 'chebyshev'  # it can be 'linear' or 'chebyshev'
示例#5
0
    def __init__(self, generator, predictor, configReinforce,
                 property_identifier):
        """
        Constructor for the Reinforcement object.
        Parameters
        ----------
        generator: generative model that produces string of characters 
            (trajectories)
        predictor: object of any predictive model type
            predictor accepts a trajectory and returns a numerical
            prediction of desired property for the given trajectory
        configReinforce: bunch
            Parameters to use in the predictive model and get_reward function 
        property_identifier: string
            It indicates what property we want to optimize
        Returns
        -------
        object Reinforcement used for implementation of Reinforcement Learning 
        model to bias the Generator
        """

        super(Reinforcement, self).__init__()
        self.generator_unbiased = generator
        self.generator_biased = generator
        self.generator = generator
        self.configReinforce = configReinforce
        self.generator_unbiased.model.load_weights(
            self.configReinforce.model_name_unbiased)
        self.generator_biased.model.load_weights(
            self.configReinforce.model_name_unbiased)
        token_table = SmilesToTokens()
        self.table = token_table.table
        self.predictor = predictor
        self.get_reward = get_reward
        self.property_identifier = property_identifier
        self.all_rewards = []
        self.all_losses = []
        self.threshold_greedy = 0.1
示例#6
0
    def policy_gradient(self, gamma=1):
        """
        Implementation of the policy gradient algorithm.

        Parameters:
        -----------
        self.n_batch: int
            number of trajectories to sample per batch.    
        gamma: float (default 0.97)
            factor by which rewards will be discounted within one trajectory.
            Usually this number will be somewhat close to 1.0.

        Returns
        -------
        This function returns, at each iteration, the graphs with average 
        reward and averaged loss from the batch of generated trajectories 
        (SMILES). Moreover it returns the average reward for QED and KOR properties.
        Also, it returns the used weights and the averaged scaled reward for
         """

        pol = 0.5
        cumulative_rewards = []
        cumulative_rewards_a2d = []
        cumulative_rewards_bbb = []
        previous_weights = []

        w_a2d = 0.5

        weights = [w_a2d, 1 - w_a2d]

        # Initialize the variable that will contain the output of each prediction
        dimen = len(self.table)
        states = np.empty(0).reshape(0, dimen)
        pol_rewards_a2d = []
        pol_rewards_bbb = []

        all_rewards = []
        all_losses = []
        # Re-compile the model to adapt the loss function and optimizer to the RL problem
        self.generator_biased.model = self.get_policy_model(np.arange(43))
        self.generator_biased.model.load_weights(
            self.configReinforce.model_name_unbiased)
        memory_smiles = []
        for i in range(self.configReinforce.n_iterations):

            for j in trange(self.configReinforce.n_policy,
                            desc='Policy gradient progress'):

                cur_reward = 0
                cur_reward_a2d = 0
                cur_reward_bbb = 0

                # Necessary object to transform new generated smiles to one-hot encoding
                token_table = SmilesToTokens()
                aux_matrix = np.zeros((65, 1))

                ii = 0

                for m in range(self.configReinforce.batch_size):
                    # Sampling new trajectory
                    reward = 0
                    uniq = True

                    while reward == 0:
                        predictSMILES = PredictSMILES(
                            self.generator_unbiased, self.generator_biased,
                            True, self.threshold_greedy, self.configReinforce,
                            False)  # generate new trajectory
                        trajectory = predictSMILES.sample()

                        try:
                            s = trajectory[
                                0]  # because predictSMILES returns a list of smiles strings
                            if 'A' in s:  # A is the padding character
                                s = remove_padding(trajectory[0])

                            print("Validation of: ", s)

                            mol = Chem.MolFromSmiles(s)

                            trajectory = 'G' + Chem.MolToSmiles(mol) + 'E'
                            #                                trajectory = 'GCCE'

                            if len(memory_smiles) > 30:
                                memory_smiles.remove(memory_smiles[0])
                            memory_smiles.append(s)

                            if len(trajectory) > 65:
                                reward = 0
                            else:
                                rewards = self.get_reward_MO(
                                    self.predictor_a2d, self.predictor_bbb,
                                    trajectory[1:-1], memory_smiles)
                                print(rewards)
                                reward = scalarization(rewards,
                                                       self.scalarization_mode,
                                                       weights,
                                                       self.preds_range, m)

                            print(reward)

                        except:
                            reward = 0
                            print("\nInvalid SMILES!")

                    # Converting string of characters to one-hot enconding
                    trajectory_input, _ = token_table.one_hot_encode(
                        token_table.tokenize(trajectory))
                    ti, _ = token_table.one_hot_encode(
                        token_table.tokenize(trajectory))
                    discounted_reward = reward
                    cur_reward += reward
                    cur_reward_a2d += rewards[0]
                    cur_reward_bbb += rewards[1]

                    # "Following" the trajectory and accumulating the loss
                    idxs = 0
                    for p in range(1, len(trajectory_input[0, :, ])):

                        state = []
                        state = np.reshape(trajectory_input[0, p, :],
                                           [1, dimen])
                        idx = np.nonzero(state)
                        state[idx] = state[:, idx[1]] * discounted_reward
                        #                            output = self.generator_biased.model.predict(trajectory_input[:,0:p,:])
                        #
                        inp = ti[:, 0:p, :]

                        inp_p = padding_one_hot(inp,
                                                self.table)  # input to padd
                        mat = np.zeros((1, 65))
                        mat[:, idxs] = 1

                        if ii == 0:
                            inputs = inp_p
                            aux_matrix = mat
                        else:
                            inputs = np.dstack([inputs, inp_p])
                            aux_matrix = np.dstack([aux_matrix, mat])

                        discounted_reward = discounted_reward * gamma

                        states = np.vstack([states, state])
                        ii += 1
                        idxs += 1

                # Doing backward pass and parameters update

                states = states[:, np.newaxis, :]
                inputs = np.moveaxis(inputs, -1, 0)

                aux_matrix = np.squeeze(aux_matrix)
                aux_matrix = np.moveaxis(aux_matrix, -1, 0)

                self.generator_biased.model.compile(
                    optimizer=self.adam, loss=self.custom_loss(aux_matrix))
                #update weights based on the provided collection of samples, without regard to any fixed batch size.
                loss = self.generator_biased.model.train_on_batch(
                    inputs, states)  # update the weights with a batch

                # Clear out variables
                states = np.empty(0).reshape(0, dimen)
                inputs = np.empty(0).reshape(0, 0, dimen)

                cur_reward = cur_reward / self.configReinforce.batch_size
                cur_reward_a2d = cur_reward_a2d / self.configReinforce.batch_size
                cur_reward_bbb = cur_reward_bbb / self.configReinforce.batch_size

                # serialize model to JSON
                model_json = self.generator_biased.model.to_json()
                with open(
                        self.configReinforce.model_name_biased + "_" +
                        self.scalarization_mode + '_' + str(pol) + ".json",
                        "w") as json_file:
                    json_file.write(model_json)
                # serialize weights to HDF5
                self.generator_biased.model.save_weights(
                    self.configReinforce.model_name_biased + "_" +
                    self.scalarization_mode + '_' + str(pol) + ".h5")
                print("Updated model saved to disk")

                if len(
                        all_rewards
                ) > 2:  # decide the threshold of the next generated batch
                    self.threshold_greedy = compute_thresh(
                        all_rewards[-3:], self.configReinforce.threshold_set)

                all_rewards.append(moving_average(all_rewards, cur_reward))
                pol_rewards_a2d.append(
                    moving_average(pol_rewards_a2d, cur_reward_a2d))
                pol_rewards_bbb.append(
                    moving_average(pol_rewards_bbb, cur_reward_bbb))

                all_losses.append(moving_average(all_losses, loss))

            plot_training_progress(all_rewards, all_losses)
            plot_individual_rewds(pol_rewards_a2d, pol_rewards_bbb)
        cumulative_rewards.append(np.mean(all_rewards[-15:]))
        cumulative_rewards_a2d.append(np.mean(pol_rewards_a2d[-15:]))
        cumulative_rewards_bbb.append(np.mean(pol_rewards_bbb[-15:]))
        pol += 1

        plot_MO(cumulative_rewards_a2d, cumulative_rewards_bbb,
                cumulative_rewards, previous_weights)
        return cumulative_rewards_a2d, cumulative_rewards_bbb, cumulative_rewards, previous_weights
示例#7
0
 def __init__(self, config):
     super(Model, self).__init__(config)
     self.weight_init = RandomNormal(mean=0.0, stddev=0.05, seed=config.seed)
     token_table=SmilesToTokens()
     self.build_model(len(token_table.table))
示例#8
0
    def policy_gradient(self, n_batch=7, gamma=0.98):
        """
            Implementation of the policy gradient algorithm.
    
            Parameters:
            -----------
    
            i,j: int
                indexes of the number of iterations and number of policies, 
                respectively, to load the models properly, i.e, it's necessary 
                to load the original model just when i=0 and j=0, after that 
                it is loaded the updated one 
            n_batch: int (default 2)
                number of trajectories to sample per batch.    
            gamma: float (default 0.97)
                factor by which rewards will be discounted within one trajectory.
                Usually this number will be somewhat close to 1.0.

            Returns
            -------
            total_reward: float
                value of the reward averaged through n_batch sampled trajectories
    
            rl_loss: float
                value for the policy_gradient loss averaged through n_batch sampled
                trajectories
             """
        #            opt = tf.train.AdamOptimizer(learning_rate=0.0001)
        #            sess.run(tf.initialize_all_variables())
        training_rewards = []
        training_losses = []
        for i in range(self.configReinforce.n_iterations):
            for j in trange(self.configReinforce.n_policy,
                            desc='Policy gradient progress'):

                self.opt = tf.train.GradientDescentOptimizer(
                    learning_rate=0.001)
                #            opt = tf.compat.v1.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False,name='Adam')
                # loss scalar in tensor format
                self.loss = tf.zeros(dtype=tf.float32, shape=1)

                cur_reward = 0

                # Necessary object to transform new generated smiles
                token_table = SmilesToTokens()

                for _ in range(n_batch):

                    # Sampling new trajectory
                    reward = 0

                    while reward == 0:
                        predictSMILES = PredictSMILES(
                            self.generator_unbiased, self.generator_biased,
                            True, self.threshold_greedy,
                            self.configReinforce)  # generate new trajectory
                        trajectory = predictSMILES.sample()

                        try:
                            s = trajectory[
                                0]  # because predictSMILES returns a list of smiles strings
                            if 'A' in s:  # A is the padding character
                                s = remove_padding(trajectory[0])

                            print("Validation of: ", s)

                            mol = Chem.MolFromSmiles(s)

                            trajectory = 'G' + Chem.MolToSmiles(mol) + 'E'
                            reward = self.get_reward(self.predictor,
                                                     trajectory[1:-1],
                                                     self.property_identifier)

                            print(reward)

                        except:
                            reward = 0
                            print("\nInvalid SMILES!")

                    # Converting string of characters to one-hot enconding
                    trajectory_input, _ = token_table.one_hot_encode(
                        token_table.tokenize(trajectory))
                    discounted_reward = reward
                    cur_reward += reward

                    # "Following" the trajectory and accumulating the loss
                    for p in range(1, len(trajectory_input[0, :, ])):

                        output = self.generator_biased.model.predict(
                            trajectory_input[:, 0:p, :])[0][-1]
                        c = tf.compat.v1.math.log_softmax(
                            self.generator_biased.model.output[0, 0, :])
                        idx = np.nonzero(trajectory_input[0, p, :])
                        l = c[np.asscalar(idx[0])]
                        #                    l = losses.categorical_crossentropy(-trajectory_input[0,p,:],self.generator.model.output[0,0,:])
                        self.loss = tf.math.subtract(
                            self.loss,
                            tf.math.multiply(
                                l,
                                tf.constant(discounted_reward,
                                            dtype="float32")))
                        discounted_reward = discounted_reward * gamma

                # Doing backward pass and parameters update
                self.loss = tf.math.divide(
                    self.loss, tf.constant(n_batch, dtype="float32"))

                cur_loss = sess.run(self.loss,
                                    feed_dict={
                                        self.generator_biased.model.input:
                                        trajectory_input
                                    })

                # Compute the gradients for a list of variables.
                #            grads_and_vars = opt.compute_gradients(self.loss, self.generator_biased.model.trainable_weights[0:-2])
                self.grads_and_vars = self.opt.compute_gradients(
                    self.loss, self.generator_biased.model.trainable_weights)
                # Ask the optimizer to apply the calculated gradients.
                sess.run(self.opt.apply_gradients(self.grads_and_vars),
                         feed_dict={
                             self.generator_biased.model.input:
                             trajectory_input
                         })

                cur_reward = cur_reward / n_batch

                # serialize model to JSON
                model_json = self.generator_biased.model.to_json()
                with open(self.configReinforce.model_name_biased + ".json",
                          "w") as json_file:
                    json_file.write(model_json)
                # serialize weights to HDF5
                self.generator_biased.model.save_weights(
                    self.configReinforce.model_name_biased + ".h5")
                print("Updated model saved to disk")

                self.all_rewards.append(cur_reward)

                if len(self.all_rewards) > 2:
                    self.threshold_greedy = compute_thresh(
                        self.all_rewards[-3:])

                self.all_rewards.append(
                    moving_average(self.all_rewards, cur_reward))
                self.all_losses.append(
                    moving_average(self.all_losses, cur_loss))

            plot_training_progress(self.all_rewards, self.all_losses)