def __init__(self, env, policy, max_search_depth=100, n_search_episodes=1000, discount_factor=0.99, value_weight=1.0, optimizer=Adam(), model_dir=None): """Create an object for optimizing a policy. Parameters ---------- env: Environment the Environment to interact with policy: Policy the Policy to optimize. Its create_layers() method must return a dict containing the keys 'action_prob' and 'value', corresponding to the action probabilities and value estimate max_search_depth: int the maximum depth of the tree search, measured in steps n_search_episodes: int the number of episodes to simulate (up to max_search_depth, if they do not terminate first) for each tree search discount_factor: float the discount factor to use when computing rewards value_weight: float a scale factor for the value loss term in the loss function optimizer: Optimizer the optimizer to use model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. """ self._env = copy.deepcopy(env) self._policy = policy self.max_search_depth = max_search_depth self.n_search_episodes = n_search_episodes self.discount_factor = discount_factor self.value_weight = value_weight self._state_is_list = isinstance(env.state_shape[0], SequenceCollection) if optimizer is None: self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999) else: self._optimizer = optimizer (self._graph, self._features, self._pred_prob, self._pred_value, self._search_prob, self._search_value) = self._build_graph(None, 'global', model_dir) with self._graph._get_tf("Graph").as_default(): with tf.variable_scope('global'): self._checkpoint = tf.train.Checkpoint() self._checkpoint.save_counter # Ensure the variable has been created self._checkpoint.listed = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='global') self._graph.session.run(self._checkpoint.save_counter.initializer)
def test_multitask_regression_overfit(self): """Test TensorGraph multitask overfits tiny data.""" n_tasks = 10 n_samples = 10 n_features = 3 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = dc.data.NumpyDataset(X, y, w, ids) regression_metric = dc.metrics.Metric( dc.metrics.mean_squared_error, task_averager=np.mean, mode="regression") model = dc.models.MultitaskRegressor( n_tasks, n_features, dropouts=[0.], weight_init_stddevs=[.1], batch_size=n_samples, optimizer=Adam(learning_rate=0.0003, beta1=0.9, beta2=0.999)) # Fit trained model model.fit(dataset, nb_epoch=50) # Eval model on train scores = model.evaluate(dataset, [regression_metric]) assert scores[regression_metric.name] < .1
def test_save_load(self): n_data_points = 20 n_features = 2 X = np.random.rand(n_data_points, n_features) y = [[0, 1] for x in range(n_data_points)] dataset = NumpyDataset(X, y) features = Feature(shape=(None, n_features)) dense = Dense(out_channels=2, in_layers=[features]) output = SoftMax(in_layers=[dense]) label = Label(shape=(None, 2)) smce = SoftMaxCrossEntropy(in_layers=[label, dense]) loss = ReduceMean(in_layers=[smce]) tg = dc.models.TensorGraph(learning_rate=0.01) tg.add_output(output) tg.set_loss(loss) submodel_loss = ReduceSum(in_layers=smce) submodel_opt = Adam(learning_rate=0.002) submodel = tg.create_submodel(layers=[dense], loss=submodel_loss, optimizer=submodel_opt) tg.fit(dataset, nb_epoch=1) prediction = np.squeeze(tg.predict_on_batch(X)) tg.save() dirpath = tempfile.mkdtemp() shutil.rmtree(dirpath) shutil.move(tg.model_dir, dirpath) tg1 = TensorGraph.load_from_dir(dirpath) prediction2 = np.squeeze(tg1.predict_on_batch(X)) assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))
def test_fittransform_regression_overfit(self): """Test that MultitaskFitTransformRegressor can overfit simple regression datasets.""" n_samples = 10 n_features = 3 n_tasks = 1 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = dc.data.NumpyDataset(X, y, w, ids) fit_transformers = [dc.trans.CoulombFitTransformer(dataset)] regression_metric = dc.metrics.Metric(dc.metrics.mean_squared_error) model = dc.models.MultitaskFitTransformRegressor( n_tasks, [n_features, n_features], dropouts=[0.01], weight_init_stddevs=[np.sqrt(6) / np.sqrt(1000)], batch_size=n_samples, fit_transformers=fit_transformers, n_evals=1, optimizer=Adam(learning_rate=0.003, beta1=0.9, beta2=0.999)) # Fit trained model model.fit(dataset, nb_epoch=100) # Eval model on train scores = model.evaluate(dataset, [regression_metric]) assert scores[regression_metric.name] < .1
def test_multitask_classification_overfit(self): """Test MultitaskClassifier overfits tiny data.""" n_tasks = 10 n_samples = 10 n_features = 3 n_classes = 2 # Generate dummy dataset np.random.seed(123) ids = np.arange(n_samples) X = np.random.rand(n_samples, n_features) y = np.zeros((n_samples, n_tasks)) w = np.ones((n_samples, n_tasks)) dataset = dc.data.NumpyDataset(X, y, w, ids) classification_metric = dc.metrics.Metric(dc.metrics.accuracy_score, task_averager=np.mean) model = dc.models.MultitaskClassifier(n_tasks, n_features, dropouts=[0.], weight_init_stddevs=[.1], batch_size=n_samples, optimizer=Adam( learning_rate=0.0003, beta1=0.9, beta2=0.999)) # Fit trained model model.fit(dataset) # Eval model on train scores = model.evaluate(dataset, [classification_metric]) assert scores[classification_metric.name] > .9
def __init__(self, learner, learning_rate=0.001, optimization_steps=1, meta_batch_size=10, optimizer=Adam(), model_dir=None): """Create an object for performing meta-optimization. Parameters ---------- learner: MetaLearner defines the meta-learning problem learning_rate: float or Tensor the learning rate to use for optimizing each task (not to be confused with the one used for meta-learning). This can optionally be made a variable (represented as a Tensor), in which case the learning rate will itself be learnable. optimization_steps: int the number of steps of gradient descent to perform for each task meta_batch_size: int the number of tasks to use for each step of meta-learning optimizer: Optimizer the optimizer to use for meta-learning (not to be confused with the gradient descent optimization performed for each task) model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. """ # Record inputs. self.learner = learner self.learning_rate = learning_rate self.optimization_steps = optimization_steps self.meta_batch_size = meta_batch_size self.optimizer = optimizer # Create the output directory if necessary. self._model_dir_is_temp = False if model_dir is not None: if not os.path.exists(model_dir): os.makedirs(model_dir) else: model_dir = tempfile.mkdtemp() self._model_dir_is_temp = True self.model_dir = model_dir self.save_file = "%s/%s" % (self.model_dir, "model") # Create the optimizers for meta-optimization and task optimization. self._global_step = tf.Variable(0, trainable=False) self._tf_optimizer = optimizer._create_tf_optimizer(self._global_step) task_optimizer = GradientDescent(learning_rate=self.learning_rate) self._tf_task_optimizer = task_optimizer._create_tf_optimizer( self._global_step) # Create a Checkpoint for saving. self._checkpoint = tf.train.Checkpoint() self._checkpoint.listed = learner.variables
def test_ppo_reload(): env = RouletteEnvironment() policy = TestPolicy(env) ppo = dc.rl.PPO(env, policy, max_rollout_length=20, optimization_epochs=8, optimizer=Adam(learning_rate=0.003)) ppo.fit(1000) action_prob, value = ppo.predict([[0]]) new_ppo = dc.rl.PPO(env, policy, model_dir=ppo._model.model_dir) new_ppo.restore() action_prob2, value2 = new_ppo.predict([[0]]) assert np.all(action_prob == action_prob2) assert value == value2
def test_a2c_reload(): env = RouletteEnvironment() policy = TestPolicy(env) a2c = dc.rl.A2C(env, policy, max_rollout_length=20, optimizer=Adam(learning_rate=0.001)) a2c.fit(1000) action_prob, value = a2c.predict([[0]]) new_a2c = dc.rl.A2C(env, policy, model_dir=a2c._model.model_dir) new_a2c.restore() action_prob2, value2 = new_a2c.predict([[0]]) assert np.all(action_prob == action_prob2) assert value == value2
def eval_tic_tac_toe(value_weight, num_epoch_rounds=1, games=10**4, rollouts=10**5): """ Returns the average reward over 1k games after 100k rollouts :param value_weight: :return: """ env = deepchem.rl.envs.tictactoe.TicTacToeEnvironment() policy = TicTacToePolicy() model_dir = "/tmp/tictactoe" try: shutil.rmtree(model_dir) except: pass avg_rewards = [] for j in range(num_epoch_rounds): a3c = dc.rl.A3C(env, policy, entropy_weight=0.01, value_weight=value_weight, model_dir=model_dir, optimizer=Adam(learning_rate=0.001)) try: a3c.restore() except: print("unable to restore") pass a3c.fit(rollouts) rewards = [] for i in range(games): env.reset() reward = -float('inf') while not env._terminated: action = a3c.select_action(env._state) reward = env.step(action) rewards.append(reward) avg_rewards.append({(j + 1) * rollouts: np.mean(rewards)}) return avg_rewards
class KerasModel(Model): """This is a DeepChem model implemented by a Keras model. This class provides several advantages over using the Keras model's fitting and prediction methods directly. 1. It provides better integration with the rest of DeepChem, such as direct support for Datasets and Transformers. 2. It defines the loss in a more flexible way. In particular, Keras does not support multidimensional weight matrices, which makes it impossible to implement most multitask models with Keras. 3. It provides various additional features not found in the Keras Model class, such as uncertainty prediction and saliency mapping. The loss function for a model can be defined in two different ways. For models that have only a single output and use a standard loss function, you can simply provide a dc.models.losses.Loss object. This defines the loss for each sample or sample/task pair. The result is automatically multiplied by the weights and averaged over the batch. Any additional losses computed by model layers, such as weight decay penalties, are also added. For more complicated cases, you can instead provide a function that directly computes the total loss. It must be of the form f(outputs, labels, weights), taking the list of outputs from the model, the expected values, and any weight matrices. It should return a scalar equal to the value of the loss function for the batch. No additional processing is done to the result; it is up to you to do any weighting, averaging, adding of penalty terms, etc. You can optionally provide an output_types argument, which describes how to interpret the model's outputs. This should be a list of strings, one for each output. You can use an arbitrary output_type for a output, but some output_types are special and will undergo extra processing: - 'prediction': This is a normal output, and will be returned by predict(). If output types are not specified, all outputs are assumed to be of this type. - 'loss': This output will be used in place of the normal outputs for computing the loss function. For example, models that output probability distributions usually do it by computing unbounded numbers (the logits), then passing them through a softmax function to turn them into probabilities. When computing the cross entropy, it is more numerically stable to use the logits directly rather than the probabilities. You can do this by having the model produce both probabilities and logits as outputs, then specifying output_types=['prediction', 'loss']. When predict() is called, only the first output (the probabilities) will be returned. But during training, it is the second output (the logits) that will be passed to the loss function. - 'variance': This output is used for estimating the uncertainty in another output. To create a model that can estimate uncertainty, there must be the same number of 'prediction' and 'variance' outputs. Each variance output must have the same shape as the corresponding prediction output, and each element is an estimate of the variance in the corresponding prediction. Also be aware that if a model supports uncertainty, it MUST use dropout on every layer, and dropout most be enabled during uncertainty prediction. Otherwise, the uncertainties it computes will be inaccurate. - other: Arbitrary output_types can be used to extract outputs produced by the model, but will have no additional processing performed. """ def __init__(self, model, loss, output_types=None, batch_size=100, model_dir=None, learning_rate=0.001, optimizer=None, tensorboard=False, tensorboard_log_frequency=100, **kwargs): """Create a new KerasModel. Parameters ---------- model: tf.keras.Model the Keras model implementing the calculation loss: dc.models.losses.Loss or function a Loss or function defining how to compute the training loss for each batch, as described above output_types: list of strings the type of each output from the model, as described above batch_size: int default batch size for training and evaluating model_dir: str the directory on disk where the model will be stored. If this is None, a temporary directory is created. learning_rate: float or LearningRateSchedule the learning rate to use for fitting. If optimizer is specified, this is ignored. optimizer: Optimizer the optimizer to use for fitting. If this is specified, learning_rate is ignored. tensorboard: bool whether to log progress to TensorBoard during training tensorboard_log_frequency: int the frequency at which to log data to TensorBoard, measured in batches """ super(KerasModel, self).__init__(model_instance=model, model_dir=model_dir, **kwargs) self.model = model if isinstance(loss, Loss): self._loss_fn = _StandardLoss(model, loss) else: self._loss_fn = loss self.batch_size = batch_size if optimizer is None: self.optimizer = Adam(learning_rate=learning_rate) else: self.optimizer = optimizer self.tensorboard = tensorboard self.tensorboard_log_frequency = tensorboard_log_frequency if self.tensorboard: self._summary_writer = tf.summary.create_file_writer( self.model_dir) if output_types is None: self._prediction_outputs = None self._loss_outputs = None self._variance_outputs = None self._other_outputs = None else: self._prediction_outputs = [] self._loss_outputs = [] self._variance_outputs = [] self._other_outputs = [] for i, type in enumerate(output_types): if type == 'prediction': self._prediction_outputs.append(i) elif type == 'loss': self._loss_outputs.append(i) elif type == 'variance': self._variance_outputs.append(i) else: self._other_outputs.append(i) if len(self._loss_outputs) == 0: self._loss_outputs = self._prediction_outputs self._built = False self._inputs_built = False self._training_ops_built = False self._output_functions = {} self._gradient_fn_for_vars = {} def _ensure_built(self): """The first time this is called, create internal data structures.""" if self._built: return self._built = True self._global_step = tf.Variable(0, trainable=False) self._tf_optimizer = self.optimizer._create_optimizer( self._global_step) self._checkpoint = tf.train.Checkpoint(optimizer=self._tf_optimizer, model=self.model) def _create_inputs(self, example_inputs): """The first time this is called, create tensors representing the inputs and outputs.""" if self._inputs_built: return self._ensure_built() self._inputs_built = True if (self.model.inputs is not None) and len(self.model.inputs) > 0: self._input_shapes = [t.shape for t in self.model.inputs] self._input_dtypes = [ t.dtype.as_numpy_dtype for t in self.model.inputs ] else: self._input_shapes = [(None, ) + i.shape[1:] for i in example_inputs] self._input_dtypes = [ np.float32 if x.dtype == np.float64 else x.dtype for x in example_inputs ] def _create_training_ops(self, example_batch): """The first time this is called, create tensors used in optimization.""" if self._training_ops_built: return self._create_inputs(example_batch[0]) self._training_ops_built = True self._label_dtypes = [ np.float32 if x.dtype == np.float64 else x.dtype for x in example_batch[1] ] self._weights_dtypes = [ np.float32 if x.dtype == np.float64 else x.dtype for x in example_batch[2] ] def fit(self, dataset, nb_epoch=10, max_checkpoints_to_keep=5, checkpoint_interval=1000, deterministic=False, restore=False, variables=None, loss=None, callbacks=[]): """Train this model on a dataset. Parameters ---------- dataset: Dataset the Dataset to train on nb_epoch: int the number of epochs to train for max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. deterministic: bool if True, the samples are processed in order. If False, a different random order is used for each epoch. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. variables: list of tf.Variable the variables to train. If None (the default), all trainable variables in the model are used. loss: function a function of the form f(outputs, labels, weights) that computes the loss for each batch. If None (the default), the model's standard loss function is used. callbacks: function or list of functions one or more functions of the form f(model, step) that will be invoked after every step. This can be used to perform validation, logging, etc. """ return self.fit_generator( self.default_generator(dataset, epochs=nb_epoch, deterministic=deterministic), max_checkpoints_to_keep, checkpoint_interval, restore, variables, loss, callbacks) def fit_generator(self, generator, max_checkpoints_to_keep=5, checkpoint_interval=1000, restore=False, variables=None, loss=None, callbacks=[]): """Train this model on data from a generator. Parameters ---------- generator: generator this should generate batches, each represented as a tuple of the form (inputs, labels, weights). max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. checkpoint_interval: int the frequency at which to write checkpoints, measured in training steps. Set this to 0 to disable automatic checkpointing. restore: bool if True, restore the model from the most recent checkpoint and continue training from there. If False, retrain the model from scratch. variables: list of tf.Variable the variables to train. If None (the default), all trainable variables in the model are used. loss: function a function of the form f(outputs, labels, weights) that computes the loss for each batch. If None (the default), the model's standard loss function is used. callbacks: function or list of functions one or more functions of the form f(model, step) that will be invoked after every step. This can be used to perform validation, logging, etc. Returns ------- the average loss over the most recent checkpoint interval """ if not isinstance(callbacks, Sequence): callbacks = [callbacks] self._ensure_built() if checkpoint_interval > 0: manager = tf.train.CheckpointManager(self._checkpoint, self.model_dir, max_checkpoints_to_keep) avg_loss = 0.0 averaged_batches = 0 train_op = None if loss is None: loss = self._loss_fn var_key = None if variables is not None: var_key = tuple(v.ref() for v in variables) # The optimizer creates internal variables the first time apply_gradients() # is called for a new set of variables. If that happens inside a function # annotated with tf.function it throws an exception, so call it once here. zero_grads = [tf.zeros(v.shape) for v in variables] self._tf_optimizer.apply_gradients(zip(zero_grads, variables)) if var_key not in self._gradient_fn_for_vars: self._gradient_fn_for_vars[var_key] = self._create_gradient_fn( variables) apply_gradient_for_batch = self._gradient_fn_for_vars[var_key] time1 = time.time() # Main training loop. for batch in generator: self._create_training_ops(batch) if restore: self.restore() restore = False inputs, labels, weights = self._prepare_batch(batch) # Execute the loss function, accumulating the gradients. if len(inputs) == 1: inputs = inputs[0] batch_loss = apply_gradient_for_batch(inputs, labels, weights, loss) current_step = self._global_step.numpy() avg_loss += batch_loss # Report progress and write checkpoints. averaged_batches += 1 should_log = (current_step % self.tensorboard_log_frequency == 0) if should_log: avg_loss = float(avg_loss) / averaged_batches logger.info('Ending global_step %d: Average loss %g' % (current_step, avg_loss)) avg_loss = 0.0 averaged_batches = 0 if checkpoint_interval > 0 and current_step % checkpoint_interval == checkpoint_interval - 1: manager.save() for c in callbacks: c(self, current_step) if self.tensorboard and should_log: with self._summary_writer.as_default(): tf.summary.scalar('loss', batch_loss, current_step) # Report final results. if averaged_batches > 0: avg_loss = float(avg_loss) / averaged_batches logger.info('Ending global_step %d: Average loss %g' % (current_step, avg_loss)) if checkpoint_interval > 0: manager.save() time2 = time.time() logger.info("TIMING: model fitting took %0.3f s" % (time2 - time1)) return avg_loss def _create_gradient_fn(self, variables): """Create a function that computes gradients and applies them to the model. Because of the way TensorFlow function tracing works, we need to create a separate function for each new set of variables. """ @tf.function(experimental_relax_shapes=True) def apply_gradient_for_batch(inputs, labels, weights, loss): with tf.GradientTape() as tape: outputs = self.model(inputs, training=True) if isinstance(outputs, tf.Tensor): outputs = [outputs] if self._loss_outputs is not None: outputs = [outputs[i] for i in self._loss_outputs] batch_loss = loss(outputs, labels, weights) if variables is None: vars = self.model.trainable_variables else: vars = variables grads = tape.gradient(batch_loss, vars) self._tf_optimizer.apply_gradients(zip(grads, vars)) self._global_step.assign_add(1) return batch_loss return apply_gradient_for_batch def fit_on_batch(self, X, y, w, variables=None, loss=None, callbacks=[]): """Perform a single step of training. Parameters ---------- X: ndarray the inputs for the batch y: ndarray the labels for the batch w: ndarray the weights for the batch variables: list of tf.Variable the variables to train. If None (the default), all trainable variables in the model are used. loss: function a function of the form f(outputs, labels, weights) that computes the loss for each batch. If None (the default), the model's standard loss function is used. callbacks: function or list of functions one or more functions of the form f(model, step) that will be invoked after every step. This can be used to perform validation, logging, etc. """ if not self.built: self.build() dataset = NumpyDataset(X, y, w) return self.fit(dataset, nb_epoch=1, variables=variables, loss=loss, callbacks=callbacks) def _predict(self, generator, transformers, outputs, uncertainty, other_output_types): """ Predict outputs for data provided by a generator. This is the private implementation of prediction. Do not call it directly. Instead call one of the public prediction methods. Parameters ---------- generator: generator this should generate batches, each represented as a tuple of the form (inputs, labels, weights). transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. outputs: Tensor or list of Tensors The outputs to return. If this is None, the model's standard prediction outputs will be returned. Alternatively one or more Tensors within the model may be specified, in which case the output of those Tensors will be returned. uncertainty: bool specifies whether this is being called as part of estimating uncertainty. If True, it sets the training flag so that dropout will be enabled, and returns the values of the uncertainty outputs. other_output_types: list, optional Provides a list of other output_types (strings) to predict from model. Returns: a NumPy array of the model produces a single output, or a list of arrays if it produces multiple outputs """ results = None variances = None if (outputs is not None) and (other_output_types is not None): raise ValueError( 'This model cannot compute outputs and other output_types simultaneously. Please invoke one at a time.' ) if uncertainty and (other_output_types is not None): raise ValueError( 'This model cannot compute uncertainties and other output types simultaneously. Please invoke one at a time.' ) if uncertainty: assert outputs is None if self._variance_outputs is None or len( self._variance_outputs) == 0: raise ValueError('This model cannot compute uncertainties') if len(self._variance_outputs) != len(self._prediction_outputs): raise ValueError( 'The number of variances must exactly match the number of outputs' ) if other_output_types: assert outputs is None if self._other_outputs is None or len(self._other_outputs) == 0: raise ValueError( 'This model cannot compute other outputs since no other output_types were specified.' ) if (outputs is not None and self.model.inputs is not None and len(self.model.inputs) == 0): raise ValueError( "Cannot use 'outputs' argument with a model that does not specify its inputs. Note models defined in imperative subclassing style cannot specify outputs" ) if isinstance(outputs, tf.Tensor): outputs = [outputs] for batch in generator: inputs, labels, weights = batch self._create_inputs(inputs) inputs, _, _ = self._prepare_batch((inputs, None, None)) # Invoke the model. if len(inputs) == 1: inputs = inputs[0] if outputs is not None: outputs = tuple(outputs) key = tuple(t.ref() for t in outputs) if key not in self._output_functions: self._output_functions[key] = tf.keras.backend.function( self.model.inputs, outputs) output_values = self._output_functions[key](inputs) else: output_values = self._compute_model(inputs) if isinstance(output_values, tf.Tensor): output_values = [output_values] output_values = [t.numpy() for t in output_values] # Apply tranformers and record results. if uncertainty: var = [output_values[i] for i in self._variance_outputs] if variances is None: variances = [var] else: for i, t in enumerate(var): variances[i].append(t) access_values = [] if other_output_types: access_values += self._other_outputs elif self._prediction_outputs is not None: access_values += self._prediction_outputs if len(access_values) > 0: output_values = [output_values[i] for i in access_values] if len(transformers) > 0: if len(output_values) > 1: raise ValueError( "predict() does not support Transformers for models with multiple outputs." ) elif len(output_values) == 1: output_values = [ undo_transforms(output_values[0], transformers) ] if results is None: results = [[] for i in range(len(output_values))] for i, t in enumerate(output_values): results[i].append(t) # Concatenate arrays to create the final results. final_results = [] final_variances = [] for r in results: final_results.append(np.concatenate(r, axis=0)) if uncertainty: for v in variances: final_variances.append(np.concatenate(v, axis=0)) return zip(final_results, final_variances) if len(final_results) == 1: return final_results[0] else: return final_results @tf.function(experimental_relax_shapes=True) def _compute_model(self, inputs): """Evaluate the model for a set of inputs.""" return self.model(inputs, training=False) def predict_on_generator(self, generator, transformers=[], outputs=None, output_types=None): """ Parameters ---------- generator: generator this should generate batches, each represented as a tuple of the form (inputs, labels, weights). transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. outputs: Tensor or list of Tensors The outputs to return. If this is None, the model's standard prediction outputs will be returned. Alternatively one or more Tensors within the model may be specified, in which case the output of those Tensors will be returned. If outputs is specified, output_types must be None. output_types: String or list of Strings If specified, all outputs of this type will be retrieved from the model. If output_types is specified, outputs must be None. Returns: a NumPy array of the model produces a single output, or a list of arrays if it produces multiple outputs """ return self._predict(generator, transformers, outputs, False, output_types) def predict_on_batch(self, X, transformers=[], outputs=None): """Generates predictions for input samples, processing samples in a batch. Parameters ---------- X: ndarray the input data, as a Numpy array. transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. outputs: Tensor or list of Tensors The outputs to return. If this is None, the model's standard prediction outputs will be returned. Alternatively one or more Tensors within the model may be specified, in which case the output of those Tensors will be returned. Returns ------- a NumPy array of the model produces a single output, or a list of arrays if it produces multiple outputs """ dataset = NumpyDataset(X=X, y=None) return self.predict(dataset, transformers, outputs) def predict_uncertainty_on_batch(self, X, masks=50): """ Predict the model's outputs, along with the uncertainty in each one. The uncertainty is computed as described in https://arxiv.org/abs/1703.04977. It involves repeating the prediction many times with different dropout masks. The prediction is computed as the average over all the predictions. The uncertainty includes both the variation among the predicted values (epistemic uncertainty) and the model's own estimates for how well it fits the data (aleatoric uncertainty). Not all models support uncertainty prediction. Parameters ---------- X: ndarray the input data, as a Numpy array. masks: int the number of dropout masks to average over Returns ------- for each output, a tuple (y_pred, y_std) where y_pred is the predicted value of the output, and each element of y_std estimates the standard deviation of the corresponding element of y_pred """ dataset = NumpyDataset(X=X, y=None) return self.predict_uncertainty(dataset, masks) def predict(self, dataset, transformers=[], outputs=None, output_types=None): """ Uses self to make predictions on provided Dataset object. Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. outputs: Tensor or list of Tensors The outputs to return. If this is None, the model's standard prediction outputs will be returned. Alternatively one or more Tensors within the model may be specified, in which case the output of those Tensors will be returned. output_types: list of Strings The output types to return. Will retrieve all outputs of these types from the model. Returns ------- a NumPy array of the model produces a single output, or a list of arrays if it produces multiple outputs """ generator = self.default_generator(dataset, mode='predict', pad_batches=False) return self.predict_on_generator(generator, transformers=transformers, outputs=outputs, output_types=output_types) def predict_embedding(self, dataset): """ Predicts embeddings created by underlying model if any exist. An embedding must be specified to have `output_type` of `'embedding'` in the model definition. Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on Returns ------- a NumPy array of the embeddings model produces, or a list of arrays if it produces multiple embeddings """ generator = self.default_generator(dataset, mode='predict', pad_batches=False) return self._predict(generator, [], None, False, ['embedding']) def predict_uncertainty(self, dataset, masks=50): """ Predict the model's outputs, along with the uncertainty in each one. The uncertainty is computed as described in https://arxiv.org/abs/1703.04977. It involves repeating the prediction many times with different dropout masks. The prediction is computed as the average over all the predictions. The uncertainty includes both the variation among the predicted values (epistemic uncertainty) and the model's own estimates for how well it fits the data (aleatoric uncertainty). Not all models support uncertainty prediction. Parameters ---------- dataset: dc.data.Dataset Dataset to make prediction on masks: int the number of dropout masks to average over Returns ------- for each output, a tuple (y_pred, y_std) where y_pred is the predicted value of the output, and each element of y_std estimates the standard deviation of the corresponding element of y_pred """ sum_pred = [] sum_sq_pred = [] sum_var = [] for i in range(masks): generator = self.default_generator(dataset, mode='uncertainty', pad_batches=False) results = self._predict(generator, [], None, True, None) if len(sum_pred) == 0: for p, v in results: sum_pred.append(p) sum_sq_pred.append(p * p) sum_var.append(v) else: for j, (p, v) in enumerate(results): sum_pred[j] += p sum_sq_pred[j] += p * p sum_var[j] += v output = [] std = [] for i in range(len(sum_pred)): p = sum_pred[i] / masks output.append(p) std.append( np.sqrt(sum_sq_pred[i] / masks - p * p + sum_var[i] / masks)) if len(output) == 1: return (output[0], std[0]) else: return zip(output, std) def evaluate_generator(self, generator, metrics, transformers=[], per_task_metrics=False): """Evaluate the performance of this model on the data produced by a generator. Parameters ---------- generator: generator this should generate batches, each represented as a tuple of the form (inputs, labels, weights). metric: deepchem.metrics.Metric Evaluation metric transformers: list of dc.trans.Transformers Transformers that the input data has been transformed by. The output is passed through these transformers to undo the transformations. per_task_metrics: bool If True, return per-task scores. Returns ------- dict Maps tasks to scores under metric. """ evaluator = GeneratorEvaluator(self, generator, transformers) return evaluator.compute_model_performance(metrics, per_task_metrics) def compute_saliency(self, X): """Compute the saliency map for an input sample. This computes the Jacobian matrix with the derivative of each output element with respect to each input element. More precisely, - If this model has a single output, it returns a matrix of shape (output_shape, input_shape) with the derivatives. - If this model has multiple outputs, it returns a list of matrices, one for each output. This method cannot be used on models that take multiple inputs. Parameters ---------- X: ndarray the input data for a single sample Returns ------- the Jacobian matrix, or a list of matrices """ input_shape = X.shape X = np.reshape(X, [1] + list(X.shape)) self._create_inputs([X]) X, _, _ = self._prepare_batch(([X], None, None)) # Use a GradientTape to compute gradients. X = tf.constant(X[0]) with tf.GradientTape(persistent=True, watch_accessed_variables=False) as tape: tape.watch(X) outputs = self._compute_model(X) if isinstance(outputs, tf.Tensor): outputs = [outputs] final_result = [] for output in outputs: output_shape = tuple(output.shape.as_list()[1:]) output = tf.reshape(output, [-1]) result = [] for i in range(output.shape[0]): result.append(tape.gradient(output[i], X)) final_result.append( tf.reshape(tf.stack(result), output_shape + input_shape).numpy()) if len(final_result) == 1: return final_result[0] return final_result def _prepare_batch(self, batch): inputs, labels, weights = batch inputs = [ x if x.dtype == t else x.astype(t) for x, t in zip(inputs, self._input_dtypes) ] if labels is not None: labels = [ x if x.dtype == t else x.astype(t) for x, t in zip(labels, self._label_dtypes) ] if weights is not None: weights = [ x if x.dtype == t else x.astype(t) for x, t in zip(weights, self._weights_dtypes) ] for i in range(len(inputs)): shape = inputs[i].shape dims = len(shape) expected_dims = len(self._input_shapes[i]) if dims < expected_dims: inputs[i] = inputs[i].reshape(shape + (1, ) * (expected_dims - dims)) elif dims > expected_dims and all(d == 1 for d in shape[expected_dims:]): inputs[i] = inputs[i].reshape(shape[:expected_dims]) return (inputs, labels, weights) def default_generator(self, dataset, epochs=1, mode='fit', deterministic=True, pad_batches=True): """Create a generator that iterates batches for a dataset. Subclasses may override this method to customize how model inputs are generated from the data. Parameters ---------- dataset: Dataset the data to iterate epochs: int the number of times to iterate over the full dataset mode: str allowed values are 'fit' (called during training), 'predict' (called during prediction), and 'uncertainty' (called during uncertainty prediction) deterministic: bool whether to iterate over the dataset in order, or randomly shuffle the data for each epoch pad_batches: bool whether to pad each batch up to this model's preferred batch size Returns ------- a generator that iterates batches, each represented as a tuple of lists: ([inputs], [outputs], [weights]) """ for epoch in range(epochs): for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(batch_size=self.batch_size, deterministic=deterministic, pad_batches=pad_batches): yield ([X_b], [y_b], [w_b]) def save_checkpoint(self, max_checkpoints_to_keep=5, model_dir=None): """Save a checkpoint to disk. Usually you do not need to call this method, since fit() saves checkpoints automatically. If you have disabled automatic checkpointing during fitting, this can be called to manually write checkpoints. Parameters ---------- max_checkpoints_to_keep: int the maximum number of checkpoints to keep. Older checkpoints are discarded. model_dir: str, default None Model directory to save checkpoint to. If None, revert to self.model_dir """ self._ensure_built() if model_dir is None: model_dir = self.model_dir if not os.path.exists(model_dir): os.makedirs(model_dir) manager = tf.train.CheckpointManager(self._checkpoint, model_dir, max_checkpoints_to_keep) manager.save() def get_checkpoints(self, model_dir=None): """Get a list of all available checkpoint files. Parameters ---------- model_dir: str, default None Directory to get list of checkpoints from. Reverts to self.model_dir if None """ if model_dir is None: model_dir = self.model_dir return tf.train.get_checkpoint_state( model_dir).all_model_checkpoint_paths def restore(self, checkpoint=None, model_dir=None, session=None): """Reload the values of all variables from a checkpoint file. Parameters ---------- checkpoint: str the path to the checkpoint file to load. If this is None, the most recent checkpoint will be chosen automatically. Call get_checkpoints() to get a list of all available checkpoints. model_dir: str, default None Directory to restore checkpoint from. If None, use self.model_dir. session: tf.Session(), default None Session to run restore ops under. If None, self.session is used. """ self._ensure_built() if model_dir is None: model_dir = self.model_dir if checkpoint is None: checkpoint = tf.train.latest_checkpoint(model_dir) if checkpoint is None: raise ValueError('No checkpoint found') self._checkpoint.restore(checkpoint) def get_global_step(self): """Get the number of steps of fitting that have been performed.""" return int(self._global_step) def _create_assignment_map(self, source_model, include_top=True, **kwargs): """ Creates a default assignment map between variables of source and current model. This is used only when a custom assignment map is missing. This assumes the model is made of different layers followed by a dense layer for mapping to output tasks. include_top is used to control whether or not the final dense layer is used. The default assignment map is useful in cases where the type of task is different (classification vs regression) and/or number of tasks. Parameters ---------- source_model: dc.models.KerasModel Source model to copy variable values from. include_top: bool, default True if true, copies the last dense layer """ assignment_map = {} source_vars = source_model.model.trainable_variables dest_vars = self.model.trainable_variables if not include_top: source_vars = source_vars[:-2] dest_vars = dest_vars[:-2] for source_var, dest_var in zip(source_vars, dest_vars): assignment_map[source_var.ref()] = dest_var return assignment_map def _create_value_map(self, source_model, **kwargs): """ Creates a value map between variables in the source model and their current values. This is used only when a custom value map is missing, and assumes the restore method has been called under self.session. Parameters ---------- source_model: dc.models.KerasModel Source model to create value map from """ value_map = {} source_vars = source_model.model.trainable_variables for source_var in source_vars: value_map[source_var.ref()] = source_var.numpy() return value_map def load_from_pretrained(self, source_model, assignment_map=None, value_map=None, checkpoint=None, model_dir=None, include_top=True, inputs=None, **kwargs): """Copies variable values from a pretrained model. `source_model` can either be a pretrained model or a model with the same architecture. `value_map` is a variable-value dictionary. If no `value_map` is provided, the variable values are restored to the `source_model` from a checkpoint and a default `value_map` is created. `assignment_map` is a dictionary mapping variables from the `source_model` to the current model. If no `assignment_map` is provided, one is made from scratch and assumes the model is composed of several different layers, with the final one being a dense layer. include_top is used to control whether or not the final dense layer is used. The default assignment map is useful in cases where the type of task is different (classification vs regression) and/or number of tasks in the setting. Parameters ---------- source_model: dc.KerasModel, required source_model can either be the pretrained model or a dc.KerasModel with the same architecture as the pretrained model. It is used to restore from a checkpoint, if value_map is None and to create a default assignment map if assignment_map is None assignment_map: Dict, default None Dictionary mapping the source_model variables and current model variables value_map: Dict, default None Dictionary containing source_model trainable variables mapped to numpy arrays. If value_map is None, the values are restored and a default variable map is created using the restored values checkpoint: str, default None the path to the checkpoint file to load. If this is None, the most recent checkpoint will be chosen automatically. Call get_checkpoints() to get a list of all available checkpoints model_dir: str, default None Restore model from custom model directory if needed include_top: bool, default True if True, copies the weights and bias associated with the final dense layer. Used only when assignment map is None inputs: List, input tensors for model if not None, then the weights are built for both the source and self. This option is useful only for models that are built by subclassing tf.keras.Model, and not using the functional API by tf.keras """ if inputs is not None: # Ensure weights for both models are built. source_model.model(inputs) self.model(inputs) self._ensure_built() if value_map is None: logger.info( "No value map provided. Creating default value map from restored model." ) source_model.restore(model_dir=model_dir, checkpoint=checkpoint) value_map = self._create_value_map(source_model=source_model) if assignment_map is None: logger.info( "No assignment map provided. Creating custom assignment map.") assignment_map = self._create_assignment_map( source_model=source_model, include_top=include_top) for source_var, dest_var in assignment_map.items(): assert source_var.deref().shape == dest_var.shape dest_var.assign(value_map[source_var])
def __init__(self, learner, learning_rate=0.001, optimization_steps=1, meta_batch_size=10, optimizer=Adam(), model_dir=None): """Create an object for performing meta-optimization. Parameters ---------- learner: MetaLearner defines the meta-learning problem learning_rate: float or Tensor the learning rate to use for optimizing each task (not to be confused with the one used for meta-learning). This can optionally be made a variable (represented as a Tensor), in which case the learning rate will itself be learnable. optimization_steps: int the number of steps of gradient descent to perform for each task meta_batch_size: int the number of tasks to use for each step of meta-learning optimizer: Optimizer the optimizer to use for meta-learning (not to be confused with the gradient descent optimization performed for each task) model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. """ # Record inputs. self.learner = learner self._learning_rate = learning_rate self.meta_batch_size = meta_batch_size self.optimizer = optimizer # Create the output directory if necessary. self._model_dir_is_temp = False if model_dir is not None: if not os.path.exists(model_dir): os.makedirs(model_dir) else: model_dir = tempfile.mkdtemp() self._model_dir_is_temp = True self.model_dir = model_dir self.save_file = "%s/%s" % (self.model_dir, "model") learner.select_task() example_inputs = learner.get_batch() self._input_shapes = [(None, ) + i.shape[1:] for i in example_inputs] self._input_dtypes = [x.dtype for x in example_inputs] self._input_placeholders = [ tf.placeholder(dtype=tf.as_dtype(t), shape=s) for s, t in zip(self._input_shapes, self._input_dtypes) ] self._meta_placeholders = [ tf.placeholder(dtype=tf.as_dtype(t), shape=s) for s, t in zip(self._input_shapes, self._input_dtypes) ] variables = learner.variables self._loss, self._outputs = learner.compute_model( self._input_placeholders, variables, False) loss, _ = learner.compute_model(self._input_placeholders, variables, True) # Build the meta-learning model. updated_variables = variables for i in range(optimization_steps): gradients = tf.gradients(loss, updated_variables) updated_variables = [ v if g is None else v - self._learning_rate * g for v, g in zip(updated_variables, gradients) ] if i == optimization_steps - 1: # In the final loss, use different placeholders for all inputs so the loss will be # computed from a different batch. inputs = self._meta_placeholders else: inputs = self._input_placeholders loss, outputs = learner.compute_model(inputs, updated_variables, True) self._meta_loss = loss # Create variables for accumulating the gradients. variables = list(learner.variables) gradients = tf.gradients(self._meta_loss, variables) for i in reversed(range(len(variables))): if gradients[i] is None: del variables[i] del gradients[i] zero_gradients = [tf.zeros(g.shape, g.dtype) for g in gradients] summed_gradients = [ tf.Variable(z, trainable=False) for z in zero_gradients ] self._clear_gradients = tf.group( *[s.assign(z) for s, z in zip(summed_gradients, zero_gradients)]) self._add_gradients = tf.group( *[s.assign_add(g) for s, g in zip(summed_gradients, gradients)]) # Create the optimizers for meta-optimization and task optimization. self._global_step = tf.placeholder(tf.int32, []) grads_and_vars = list(zip(summed_gradients, variables)) self._meta_train_op = optimizer._create_optimizer( self._global_step).apply_gradients(grads_and_vars) task_optimizer = GradientDescent(learning_rate=self._learning_rate) self._task_train_op = task_optimizer._create_optimizer( self._global_step).minimize(self._loss) self._session = tf.Session() self._session.run(tf.global_variables_initializer()) # Create a Checkpoint for saving. self._checkpoint = tf.train.Checkpoint() self._checkpoint.listed = learner.variables
def __init__(self, model: tf.keras.Model, loss: Union[Loss, LossFn], output_types: Optional[List[str]] = None, batch_size: int = 100, model_dir: Optional[str] = None, learning_rate: Union[float, LearningRateSchedule] = 0.001, optimizer: Optional[Optimizer] = None, tensorboard: bool = False, wandb: bool = False, log_frequency: int = 100, wandb_logger: Optional[WandbLogger] = None, **kwargs) -> None: """Create a new KerasModel. Parameters ---------- model: tf.keras.Model the Keras model implementing the calculation loss: dc.models.losses.Loss or function a Loss or function defining how to compute the training loss for each batch, as described above output_types: list of strings the type of each output from the model, as described above batch_size: int default batch size for training and evaluating model_dir: str the directory on disk where the model will be stored. If this is None, a temporary directory is created. learning_rate: float or LearningRateSchedule the learning rate to use for fitting. If optimizer is specified, this is ignored. optimizer: Optimizer the optimizer to use for fitting. If this is specified, learning_rate is ignored. tensorboard: bool whether to log progress to TensorBoard during training wandb: bool whether to log progress to Weights & Biases during training (deprecated) log_frequency: int The frequency at which to log data. Data is logged using `logging` by default. If `tensorboard` is set, data is also logged to TensorBoard. If `wandb` is set, data is also logged to Weights & Biases. Logging happens at global steps. Roughly, a global step corresponds to one batch of training. If you'd like a printout every 10 batch steps, you'd set `log_frequency=10` for example. wandb_logger: WandbLogger the Weights & Biases logger object used to log data and metrics """ super(KerasModel, self).__init__(model=model, model_dir=model_dir, **kwargs) self.loss = loss # not used self.learning_rate = learning_rate # not used self.output_types = output_types # not used if isinstance(loss, Loss): self._loss_fn: LossFn = _StandardLoss(model, loss) else: self._loss_fn = loss self.batch_size = batch_size if optimizer is None: self.optimizer: Optimizer = Adam(learning_rate=learning_rate) else: self.optimizer = optimizer self.tensorboard = tensorboard # W&B flag support (DEPRECATED) if wandb: logger.warning( "`wandb` argument is deprecated. Please use `wandb_logger` instead. " "This argument will be removed in a future release of DeepChem.") if wandb and not _has_wandb: logger.warning( "You set wandb to True but W&B is not installed. To use wandb logging, " "run `pip install wandb; wandb login`") self.wandb = wandb and _has_wandb self.wandb_logger = wandb_logger # If `wandb=True` and no logger is provided, initialize default logger if self.wandb and (self.wandb_logger is None): self.wandb_logger = WandbLogger() # Setup and initialize W&B logging if (self.wandb_logger is not None) and (not self.wandb_logger.initialized): self.wandb_logger.setup() # Update config with KerasModel params wandb_logger_config = dict( loss=loss, output_types=output_types, batch_size=batch_size, model_dir=model_dir, learning_rate=learning_rate, optimizer=optimizer, tensorboard=tensorboard, log_frequency=log_frequency) wandb_logger_config.update(**kwargs) if self.wandb_logger is not None: self.wandb_logger.update_config(wandb_logger_config) # Backwards compatibility if "tensorboard_log_frequency" in kwargs: logger.warning( "tensorboard_log_frequency is deprecated. Please use log_frequency instead. This argument will be removed in a future release of DeepChem." ) self.log_frequency = kwargs["tensorboard_log_frequency"] else: self.log_frequency = log_frequency if self.tensorboard: self._summary_writer = tf.summary.create_file_writer(self.model_dir) if output_types is None: self._prediction_outputs = None self._loss_outputs = None self._variance_outputs = None self._other_outputs = None else: self._prediction_outputs = [] self._loss_outputs = [] self._variance_outputs = [] self._other_outputs = [] for i, type in enumerate(output_types): if type == 'prediction': self._prediction_outputs.append(i) elif type == 'loss': self._loss_outputs.append(i) elif type == 'variance': self._variance_outputs.append(i) else: self._other_outputs.append(i) if len(self._loss_outputs) == 0: self._loss_outputs = self._prediction_outputs self._built = False self._inputs_built = False self._training_ops_built = False self._output_functions: Dict[Any, Any] = {} self._gradient_fn_for_vars: Dict[Any, Any] = {}
def __init__(self, model: torch.nn.Module, loss: Union[Loss, LossFn], output_types: Optional[List[str]] = None, batch_size: int = 100, model_dir: Optional[str] = None, learning_rate: Union[float, LearningRateSchedule] = 0.001, optimizer: Optional[Optimizer] = None, tensorboard: bool = False, wandb: bool = False, log_frequency: int = 100, device: Optional[torch.device] = None, **kwargs) -> None: """Create a new TorchModel. Parameters ---------- model: torch.nn.Module the PyTorch model implementing the calculation loss: dc.models.losses.Loss or function a Loss or function defining how to compute the training loss for each batch, as described above output_types: list of strings the type of each output from the model, as described above batch_size: int default batch size for training and evaluating model_dir: str the directory on disk where the model will be stored. If this is None, a temporary directory is created. learning_rate: float or LearningRateSchedule the learning rate to use for fitting. If optimizer is specified, this is ignored. optimizer: Optimizer the optimizer to use for fitting. If this is specified, learning_rate is ignored. tensorboard: bool whether to log progress to TensorBoard during training wandb: bool whether to log progress to Weights & Biases during training log_frequency: int The frequency at which to log data. Data is logged using `logging` by default. If `tensorboard` is set, data is also logged to TensorBoard. If `wandb` is set, data is also logged to Weights & Biases. Logging happens at global steps. Roughly, a global step corresponds to one batch of training. If you'd like a printout every 10 batch steps, you'd set `log_frequency=10` for example. device: torch.device the device on which to run computations. If None, a device is chosen automatically. """ super(TorchModel, self).__init__(model_instance=model, model_dir=model_dir, **kwargs) self.model = model if isinstance(loss, Loss): self._loss_fn: LossFn = _StandardLoss(model, loss) else: self._loss_fn = loss self.batch_size = batch_size if optimizer is None: self.optimizer: Optimizer = Adam(learning_rate=learning_rate) else: self.optimizer = optimizer self.tensorboard = tensorboard # Select a device. if device is None: if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') self.device = device model.to(device) # W&B logging if wandb and not is_wandb_available(): logger.warning( "You set wandb to True but W&B is not installed. To use wandb logging, " "run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface." ) self.wandb = wandb and is_wandb_available() self.log_frequency = log_frequency if self.tensorboard: self._summary_writer = torch.utils.tensorboard.SummaryWriter( self.model_dir) if output_types is None: self._prediction_outputs = None self._loss_outputs = None self._variance_outputs = None self._other_outputs = None else: self._prediction_outputs = [] self._loss_outputs = [] self._variance_outputs = [] self._other_outputs = [] for i, type in enumerate(output_types): if type == 'prediction': self._prediction_outputs.append(i) elif type == 'loss': self._loss_outputs.append(i) elif type == 'variance': self._variance_outputs.append(i) else: self._other_outputs.append(i) if len(self._loss_outputs) == 0: self._loss_outputs = self._prediction_outputs self._built = False self._output_functions: Dict[Any, Any] = {} self._optimizer_for_vars: Dict[Any, Any] = {}
def __init__(self, model, loss, output_types=None, batch_size=100, model_dir=None, learning_rate=0.001, optimizer=None, tensorboard=False, tensorboard_log_frequency=100, **kwargs): """Create a new KerasModel. Parameters ---------- model: tf.keras.Model the Keras model implementing the calculation loss: dc.models.losses.Loss or function a Loss or function defining how to compute the training loss for each batch, as described above output_types: list of strings the type of each output from the model, as described above batch_size: int default batch size for training and evaluating model_dir: str the directory on disk where the model will be stored. If this is None, a temporary directory is created. learning_rate: float or LearningRateSchedule the learning rate to use for fitting. If optimizer is specified, this is ignored. optimizer: Optimizer the optimizer to use for fitting. If this is specified, learning_rate is ignored. tensorboard: bool whether to log progress to TensorBoard during training tensorboard_log_frequency: int the frequency at which to log data to TensorBoard, measured in batches """ super(KerasModel, self).__init__(model_instance=model, model_dir=model_dir, **kwargs) self.model = model if isinstance(loss, Loss): self._loss_fn = _StandardLoss(model, loss) else: self._loss_fn = loss self.batch_size = batch_size if optimizer is None: self.optimizer = Adam(learning_rate=learning_rate) else: self.optimizer = optimizer self.tensorboard = tensorboard self.tensorboard_log_frequency = tensorboard_log_frequency if self.tensorboard: self._summary_writer = tf.summary.create_file_writer( self.model_dir) if output_types is None: self._prediction_outputs = None self._loss_outputs = None self._variance_outputs = None self._other_outputs = None else: self._prediction_outputs = [] self._loss_outputs = [] self._variance_outputs = [] self._other_outputs = [] for i, type in enumerate(output_types): if type == 'prediction': self._prediction_outputs.append(i) elif type == 'loss': self._loss_outputs.append(i) elif type == 'variance': self._variance_outputs.append(i) else: self._other_outputs.append(i) if len(self._loss_outputs) == 0: self._loss_outputs = self._prediction_outputs self._built = False self._inputs_built = False self._training_ops_built = False self._output_functions = {} self._gradient_fn_for_vars = {}
def __init__(self, env, policy, max_rollout_length=20, optimization_rollouts=8, optimization_epochs=4, batch_size=64, clipping_width=0.2, discount_factor=0.99, advantage_lambda=0.98, value_weight=1.0, entropy_weight=0.01, optimizer=None, model_dir=None, use_hindsight=False): """Create an object for optimizing a policy. Parameters ---------- env: Environment the Environment to interact with policy: Policy the Policy to optimize. It must have outputs with the names 'action_prob' and 'value', corresponding to the action probabilities and value estimate max_rollout_length: int the maximum length of rollouts to generate optimization_rollouts: int the number of rollouts to generate for each iteration of optimization optimization_epochs: int the number of epochs of optimization to perform within each iteration batch_size: int the batch size to use during optimization. If this is 0, each rollout will be used as a separate batch. clipping_width: float in computing the PPO loss function, the probability ratio is clipped to the range (1-clipping_width, 1+clipping_width) discount_factor: float the discount factor to use when computing rewards advantage_lambda: float the parameter for trading bias vs. variance in Generalized Advantage Estimation value_weight: float a scale factor for the value loss term in the loss function entropy_weight: float a scale factor for the entropy term in the loss function optimizer: Optimizer the optimizer to use. If None, a default optimizer is used. model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. use_hindsight: bool if True, use Hindsight Experience Replay """ self._env = env self._policy = policy self.max_rollout_length = max_rollout_length self.optimization_rollouts = optimization_rollouts self.optimization_epochs = optimization_epochs self.batch_size = batch_size self.clipping_width = clipping_width self.discount_factor = discount_factor self.advantage_lambda = advantage_lambda self.value_weight = value_weight self.entropy_weight = entropy_weight self.use_hindsight = use_hindsight self._state_is_list = isinstance(env.state_shape[0], collections.Sequence) if optimizer is None: self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999) else: self._optimizer = optimizer self._model = self._build_model(model_dir) output_names = policy.output_names output_tensors = self._model._output_tensors self._value = output_tensors[output_names.index('value')] self._action_prob = output_tensors[output_names.index('action_prob')] rnn_outputs = [ i for i, n in enumerate(output_names) if n == 'rnn_state' ] self._rnn_final_states = [output_tensors[i] for i in rnn_outputs] self._session = tf.Session() self._train_op = self._model._tf_optimizer.minimize( self._model._loss_tensor) self._rnn_states = policy.rnn_initial_states if len(self._rnn_states) > 0 and batch_size != 0: raise ValueError( 'Cannot batch rollouts when the policy contains a recurrent layer. Set batch_size to 0.' ) self._checkpoint = tf.train.Checkpoint() self._checkpoint.save_counter # Ensure the variable has been created self._checkpoint.listed = self._model.model.trainable_variables self._session.run(self._checkpoint.save_counter.initializer)
def test_roulette(self): """Test training a policy for the roulette environment.""" # This is modeled after the Roulette-v0 environment from OpenAI Gym. # The player can bet on any number from 0 to 36, or walk away (which ends the # game). The average reward for any bet is slightly negative, so the best # strategy is to walk away. class RouletteEnvironment(dc.rl.Environment): def __init__(self): super(RouletteEnvironment, self).__init__([(1, )], 38) self._state = [np.array([0])] def step(self, action): if action == 37: self._terminated = True # Walk away. return 0.0 wheel = np.random.randint(37) if wheel == 0: if action == 0: return 35.0 return -1.0 if action != 0 and wheel % 2 == action % 2: return 1.0 return -1.0 def reset(self): self._terminated = False env = RouletteEnvironment() # This policy just learns a constant probability for each action, and a constant for the value. class TestPolicy(dc.rl.Policy): def __init__(self): super(TestPolicy, self).__init__(['action_prob', 'value']) def create_model(self, **kwargs): class TestModel(tf.keras.Model): def __init__(self): super(TestModel, self).__init__(**kwargs) self.action = tf.Variable( np.ones(env.n_actions, np.float32)) self.value = tf.Variable([0.0], tf.float32) def call(self, inputs, **kwargs): prob = tf.nn.softmax( tf.reshape(self.action, (-1, env.n_actions))) return (prob, self.value) return TestModel() # Optimize it. ppo = dc.rl.PPO(env, TestPolicy(), max_rollout_length=20, optimization_epochs=8, optimizer=Adam(learning_rate=0.003)) ppo.fit(100000) # It should have learned that the expected value is very close to zero, and that the best # action is to walk away. (To keep the test fast, we allow that to be either of the two # top actions). action_prob, value = ppo.predict([[0]]) assert -0.8 < value[0] < 0.5 assert 37 in np.argsort(action_prob.flatten())[-2:] assert ppo.select_action([[0]], deterministic=True) == np.argmax(action_prob) # Verify that we can create a new PPO object, reload the parameters from the first one, and # get the same result. new_ppo = dc.rl.PPO(env, TestPolicy(), model_dir=ppo._model.model_dir) new_ppo.restore() action_prob2, value2 = new_ppo.predict([[0]]) assert value2 == value # Do the same thing, only using the "restore" argument to fit(). new_ppo = dc.rl.PPO(env, TestPolicy(), model_dir=ppo._model.model_dir) new_ppo.fit(0, restore=True) action_prob2, value2 = new_ppo.predict([[0]]) assert value2 == value
def test_sine_x(): """ Here we are solving the differential equation- f'(x) = -sin(x) and f(0) = 1 We give initial for the neural network at x_init --> np.linspace(-1 * np.pi, 1 * np.pi, 5) And we try to approximate the function for the domain (-np.pi, np.pi) """ # The PINNModel requires you to create two functions # `create_eval`_fn for letting the model know how to compute the model in inference and # `gradient_fn` for letting model know how to compute the gradient and different regulariser # equation loss depending on the differential equation def create_eval_fn(forward_fn, params): """ Calls the function to evaluate the model """ @jax.jit def eval_model(x, rng=None): bu = forward_fn(params, rng, x) return jnp.squeeze(bu) return eval_model def gradient_fn(forward_fn, loss_outputs, initial_data): """ This function calls the gradient function, to implement the backpropagation """ boundary_data = initial_data['X0'] boundary_target = initial_data['u0'] @jax.jit def model_loss(params, target, weights, rng, x_train): @functools.partial(jax.vmap, in_axes=(None, 0)) def periodic_loss(params, x): """ diffrential equation => grad(f(x)) = - sin(x) minimize f(x) := grad(f(x)) + sin(x) """ x = jnp.expand_dims(x, 0) u_x = jacrev(forward_fn, argnums=(2))(params, rng, x) return u_x + jnp.sin(x) u_pred = forward_fn(params, rng, boundary_data) loss_u = jnp.mean((u_pred - boundary_target)**2) f_pred = periodic_loss(params, x_train) loss_f = jnp.mean((f_pred**2)) return loss_u + loss_f return model_loss # defining the Haiku model def f(x): net = hk.nets.MLP(output_sizes=[256, 128, 1], activation=jax.nn.softplus) val = net(x) return val init_params, forward_fn = hk.transform(f) rng = jax.random.PRNGKey(500) params = init_params(rng, np.random.rand(1000, 1)) opt = Adam(learning_rate=1e-2) # giving an initial boundary condition at 5 points between [-pi, pi] which will be used in l2 loss in_array = np.linspace(-1 * np.pi, 1 * np.pi, 5) out_array = np.cos(in_array) initial_data = { 'X0': jnp.expand_dims(in_array, 1), 'u0': jnp.expand_dims(out_array, 1) } j_m = PINNModel(forward_fn=forward_fn, params=params, initial_data=initial_data, batch_size=1000, optimizer=opt, grad_fn=gradient_fn, eval_fn=create_eval_fn, deterministic=True, log_frequency=1000) # defining our training data. We feed 100 points between [-pi, pi] without the labels, # which will be used as the differential loss(regulariser) X_f = np.expand_dims(np.linspace(-1 * np.pi, 1 * np.pi, 100), 1) dataset = NumpyDataset(X_f) _ = j_m.fit(dataset, nb_epochs=1000) # The expected solution must be as close to cos(x) test = np.expand_dims(np.linspace(-1 * np.pi, 1 * np.pi, 1000), 1) dataset_test = NumpyDataset(test) ans = j_m.predict(dataset_test) out_array = np.cos(test).squeeze() assert np.allclose(out_array, ans, atol=1e-01)
def test_continuous(self): """Test A2C on an environment with a continous action space.""" # The state consists of two numbers: a current value and a target value. # The policy just needs to learn to output the target value (or at least # move toward it). class TestEnvironment(dc.rl.Environment): def __init__(self): super(TestEnvironment, self).__init__((2, ), action_shape=(1, )) def reset(self): target = np.random.uniform(-50, 50) self._state = np.array([0, target], dtype=np.float32) self._terminated = False self.count = 0 def step(self, action): target = self._state[1] dist = np.abs(target - action[0]) old_dist = np.abs(target - self._state[0]) new_state = np.array([action[0], target], dtype=np.float32) self._state = new_state self.count += 1 reward = old_dist - dist self._terminated = (self.count == 10) return reward # A simple policy with no hidden layers. class TestPolicy(dc.rl.Policy): def __init__(self): super(TestPolicy, self).__init__(['action_mean', 'action_std', 'value']) def create_model(self, **kwargs): class TestModel(tf.keras.Model): def __init__(self): super(TestModel, self).__init__(**kwargs) self.mean = Dense(1, kernel_initializer='zeros') self.std = tf.constant([10.0]) self.value = Dense(1) def call(self, inputs, **kwargs): return (self.mean(inputs[0]), self.std, self.value(inputs[0])) return TestModel() # Optimize it. env = TestEnvironment() learning_rate = PolynomialDecay(initial_rate=0.005, final_rate=0.0005, decay_steps=25000) a2c = dc.rl.A2C(env, TestPolicy(), discount_factor=0, optimizer=Adam(learning_rate=learning_rate)) a2c.fit(25000) # Try running it and see if it reaches the target env.reset() while not env.terminated: env.step(a2c.select_action(env.state, deterministic=True)) distance = np.abs(env.state[0] - env.state[1]) tolerance = max(1.0, 0.1 * np.abs(env.state[1])) assert distance < tolerance
def __init__(self, forward_fn: hk.State, params: hk.Params, loss: Optional[Union[Loss, LossFn]], output_types: Optional[List[str]] = None, batch_size: int = 100, learning_rate: float = 0.001, optimizer: Union[optax.GradientTransformation, Optimizer] = None, grad_fn: Callable = create_default_gradient_fn, update_fn: Callable = create_default_update_fn, eval_fn: Callable = create_default_eval_fn, rng=jax.random.PRNGKey(1), log_frequency: int = 100, **kwargs): """ Create a new JaxModel Parameters ---------- model: hk.State or Function Any Jax based model that has a `apply` method for computing the network. Currently only haiku models are supported. params: hk.Params The parameter of the Jax based networks loss: dc.models.losses.Loss or function a Loss or function defining how to compute the training loss for each batch, as described above output_types: list of strings, optional (default None) the type of each output from the model, as described above batch_size: int, optional (default 100) default batch size for training and evaluating learning_rate: float or LearningRateSchedule, optional (default 0.001) the learning rate to use for fitting. If optimizer is specified, this is ignored. optimizer: optax object For the time being, it is optax object rng: jax.random.PRNGKey, optional (default 1) A default global PRNG key to use for drawing random numbers. log_frequency: int, optional (default 100) The frequency at which to log data. Data is logged using `logging` by default. Miscellanous Parameters Yet To Add ---------------------------------- model_dir: str, optional (default None) Will be added along with the save & load method tensorboard: bool, optional (default False) whether to log progress to TensorBoard during training wandb: bool, optional (default False) whether to log progress to Weights & Biases during training Work in Progress ---------------- [1] Integrate the optax losses, optimizers, schedulers with Deepchem [2] Support for saving & loading the model. """ super(JaxModel, self).__init__(model=(forward_fn, params), **kwargs) warnings.warn( 'JaxModel is still in active development and all features may not yet be implemented' ) self._loss_fn = loss # lambda pred, tar: jnp.mean(optax.l2_loss(pred, tar)) self.batch_size = batch_size self.learning_rate = learning_rate if optimizer is None: optimizer = Adam(1e-3) if not isinstance(optimizer, optax.GradientTransformation): self.optimizer = optimizer._create_jax_optimizer() else: self.optimizer = optimizer self.forward_fn = forward_fn self.params = params self._built = False self.log_frequency = log_frequency self.rng = rng self._create_gradient_fn = grad_fn self._create_update_fn = update_fn self._create_eval_fn = eval_fn if output_types is None: self._prediction_outputs = None self._loss_outputs = None self._variance_outputs = None self._other_outputs = None else: self._prediction_outputs = [] self._loss_outputs = [] self._variance_outputs = [] self._other_outputs = [] for i, type in enumerate(output_types): if type == 'prediction': self._prediction_outputs.append(i) elif type == 'loss': self._loss_outputs.append(i) elif type == 'variance': self._variance_outputs.append(i) else: self._other_outputs.append(i) if len(self._loss_outputs) == 0: self._loss_outputs = self._prediction_outputs
def __init__(self, model, loss, output_types=None, batch_size=100, model_dir=None, learning_rate=0.001, optimizer=None, tensorboard=False, log_frequency=100, **kwargs): """Create a new KerasModel. Parameters ---------- model: tf.keras.Model the Keras model implementing the calculation loss: dc.models.losses.Loss or function a Loss or function defining how to compute the training loss for each batch, as described above output_types: list of strings the type of each output from the model, as described above batch_size: int default batch size for training and evaluating model_dir: str the directory on disk where the model will be stored. If this is None, a temporary directory is created. learning_rate: float or LearningRateSchedule the learning rate to use for fitting. If optimizer is specified, this is ignored. optimizer: Optimizer the optimizer to use for fitting. If this is specified, learning_rate is ignored. tensorboard: bool whether to log progress to TensorBoard during training log_frequency: int The frequency at which to log data. Data is logged using `logging` by default. If `tensorboard` is set, data is also logged to TensorBoard. Logging happens at global steps. Roughly, a global step corresponds to one batch of training. If you'd like a printout every 10 batch steps, you'd set `log_frequency=10` for example. """ super(KerasModel, self).__init__(model_instance=model, model_dir=model_dir, **kwargs) self.model = model if isinstance(loss, Loss): self._loss_fn = _StandardLoss(model, loss) else: self._loss_fn = loss self.batch_size = batch_size if optimizer is None: self.optimizer = Adam(learning_rate=learning_rate) else: self.optimizer = optimizer self.tensorboard = tensorboard # Backwards compatibility if "tensorboard_log_frequency" in kwargs: logger.warning( "tensorboard_log_frequency is deprecated. Please use log_frequency instead. This argument will be removed in a future release of DeepChem." ) self.log_frequency = kwargs["tensorboard_log_frequency"] else: self.log_frequency = log_frequency if self.tensorboard: self._summary_writer = tf.summary.create_file_writer( self.model_dir) if output_types is None: self._prediction_outputs = None self._loss_outputs = None self._variance_outputs = None self._other_outputs = None else: self._prediction_outputs = [] self._loss_outputs = [] self._variance_outputs = [] self._other_outputs = [] for i, type in enumerate(output_types): if type == 'prediction': self._prediction_outputs.append(i) elif type == 'loss': self._loss_outputs.append(i) elif type == 'variance': self._variance_outputs.append(i) else: self._other_outputs.append(i) if len(self._loss_outputs) == 0: self._loss_outputs = self._prediction_outputs self._built = False self._inputs_built = False self._training_ops_built = False self._output_functions = {} self._gradient_fn_for_vars = {}
def __init__(self, env, policy, max_rollout_length=20, discount_factor=0.99, advantage_lambda=0.98, value_weight=1.0, entropy_weight=0.01, optimizer=None, model_dir=None, use_hindsight=False): """Create an object for optimizing a policy. Parameters ---------- env: Environment the Environment to interact with policy: Policy the Policy to optimize. It must have outputs with the names 'action_prob' and 'value' (for discrete action spaces) or 'action_mean', 'action_std', and 'value' (for continuous action spaces) max_rollout_length: int the maximum length of rollouts to generate discount_factor: float the discount factor to use when computing rewards advantage_lambda: float the parameter for trading bias vs. variance in Generalized Advantage Estimation value_weight: float a scale factor for the value loss term in the loss function entropy_weight: float a scale factor for the entropy term in the loss function optimizer: Optimizer the optimizer to use. If None, a default optimizer is used. model_dir: str the directory in which the model will be saved. If None, a temporary directory will be created. use_hindsight: bool if True, use Hindsight Experience Replay """ self._env = env self._policy = policy self.max_rollout_length = max_rollout_length self.discount_factor = discount_factor self.advantage_lambda = advantage_lambda self.value_weight = value_weight self.entropy_weight = entropy_weight self.use_hindsight = use_hindsight self._state_is_list = isinstance(env.state_shape[0], SequenceCollection) if optimizer is None: self._optimizer = Adam(learning_rate=0.001, beta1=0.9, beta2=0.999) else: self._optimizer = optimizer output_names = policy.output_names self.continuous = ('action_mean' in output_names) self._value_index = output_names.index('value') if self.continuous: self._action_mean_index = output_names.index('action_mean') self._action_std_index = output_names.index('action_std') else: self._action_prob_index = output_names.index('action_prob') self._rnn_final_state_indices = [ i for i, n in enumerate(output_names) if n == 'rnn_state' ] self._rnn_states = policy.rnn_initial_states self._model = self._build_model(model_dir) self._checkpoint = tf.train.Checkpoint() self._checkpoint.save_counter # Ensure the variable has been created self._checkpoint.listed = self._model.model.trainable_variables
def __init__(self, model: torch.nn.Module, loss: Union[Loss, LossFn], output_types: Optional[List[str]] = None, batch_size: int = 100, model_dir: Optional[str] = None, learning_rate: Union[float, LearningRateSchedule] = 0.001, optimizer: Optional[Optimizer] = None, tensorboard: bool = False, wandb: bool = False, log_frequency: int = 100, device: Optional[torch.device] = None, regularization_loss: Optional[Callable] = None, wandb_logger: Optional[WandbLogger] = None, **kwargs) -> None: """Create a new TorchModel. Parameters ---------- model: torch.nn.Module the PyTorch model implementing the calculation loss: dc.models.losses.Loss or function a Loss or function defining how to compute the training loss for each batch, as described above output_types: list of strings, optional (default None) the type of each output from the model, as described above batch_size: int, optional (default 100) default batch size for training and evaluating model_dir: str, optional (default None) the directory on disk where the model will be stored. If this is None, a temporary directory is created. learning_rate: float or LearningRateSchedule, optional (default 0.001) the learning rate to use for fitting. If optimizer is specified, this is ignored. optimizer: Optimizer, optional (default None) the optimizer to use for fitting. If this is specified, learning_rate is ignored. tensorboard: bool, optional (default False) whether to log progress to TensorBoard during training wandb: bool, optional (default False) whether to log progress to Weights & Biases during training log_frequency: int, optional (default 100) The frequency at which to log data. Data is logged using `logging` by default. If `tensorboard` is set, data is also logged to TensorBoard. If `wandb` is set, data is also logged to Weights & Biases. Logging happens at global steps. Roughly, a global step corresponds to one batch of training. If you'd like a printout every 10 batch steps, you'd set `log_frequency=10` for example. device: torch.device, optional (default None) the device on which to run computations. If None, a device is chosen automatically. regularization_loss: Callable, optional a function that takes no arguments, and returns an extra contribution to add to the loss function wandb_logger: WandbLogger the Weights & Biases logger object used to log data and metrics """ super(TorchModel, self).__init__(model=model, model_dir=model_dir, **kwargs) self.loss = loss # not used self.learning_rate = learning_rate # not used self.output_types = output_types # not used if isinstance(loss, Loss): self._loss_fn: LossFn = _StandardLoss(self, loss) else: self._loss_fn = loss self.batch_size = batch_size if optimizer is None: self.optimizer: Optimizer = Adam(learning_rate=learning_rate) else: self.optimizer = optimizer self.tensorboard = tensorboard self.regularization_loss = regularization_loss # Select a device. if device is None: if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') self.device = device self.model = model.to(device) # W&B logging if wandb: logger.warning( "`wandb` argument is deprecated. Please use `wandb_logger` instead. " "This argument will be removed in a future release of DeepChem." ) if wandb and not _has_wandb: logger.warning( "You set wandb to True but W&B is not installed. To use wandb logging, " "run `pip install wandb; wandb login` see https://docs.wandb.com/huggingface." ) self.wandb = wandb and _has_wandb self.wandb_logger = wandb_logger # If `wandb=True` and no logger is provided, initialize default logger if self.wandb and (self.wandb_logger is None): self.wandb_logger = WandbLogger() # Setup and initialize W&B logging if (self.wandb_logger is not None) and (not self.wandb_logger.initialized): self.wandb_logger.setup() # Update config with KerasModel params wandb_logger_config = dict(loss=loss, output_types=output_types, batch_size=batch_size, model_dir=model_dir, learning_rate=learning_rate, optimizer=optimizer, tensorboard=tensorboard, log_frequency=log_frequency, regularization_loss=regularization_loss) wandb_logger_config.update(**kwargs) if self.wandb_logger is not None: self.wandb_logger.update_config(wandb_logger_config) self.log_frequency = log_frequency if self.tensorboard and not _has_tensorboard: raise ImportError( "This class requires tensorboard to be installed.") if self.tensorboard: self._summary_writer = torch.utils.tensorboard.SummaryWriter( self.model_dir) if output_types is None: self._prediction_outputs = None self._loss_outputs = None self._variance_outputs = None self._other_outputs = None else: self._prediction_outputs = [] self._loss_outputs = [] self._variance_outputs = [] self._other_outputs = [] for i, type in enumerate(output_types): if type == 'prediction': self._prediction_outputs.append(i) elif type == 'loss': self._loss_outputs.append(i) elif type == 'variance': self._variance_outputs.append(i) else: self._other_outputs.append(i) if len(self._loss_outputs) == 0: self._loss_outputs = self._prediction_outputs self._built = False self._output_functions: Dict[Any, Any] = {} self._optimizer_for_vars: Dict[Any, Any] = {}
def test_hindsight(self): """Test Hindsight Experience Replay.""" # The environment is a plane in which the agent moves by steps until it reaches a randomly # positioned goal. No reward is given until it reaches the goal. That makes it very hard # to learn by standard methods, since it may take a very long time to receive any feedback # at all. Using hindsight makes it much easier. class TestEnvironment(dc.rl.Environment): def __init__(self): super(TestEnvironment, self).__init__((4, ), 4) self.moves = [(-1, 0), (1, 0), (0, -1), (0, 1)] def reset(self): self._state = np.concatenate([[0, 0], np.random.randint(-50, 50, 2)]) self._terminated = False self.count = 0 def step(self, action): new_state = self._state.copy() new_state[:2] += self.moves[action] self._state = new_state self.count += 1 reward = 0 if np.array_equal(new_state[:2], new_state[2:]): self._terminated = True reward = 1 elif self.count == 1000: self._terminated = True return reward def apply_hindsight(self, states, actions, goal): new_states = [] rewards = [] goal_pos = goal[:2] for state, action in zip(states, actions): new_state = state.copy() new_state[2:] = goal_pos new_states.append(new_state) pos_after_action = new_state[:2] + self.moves[action] if np.array_equal(pos_after_action, goal_pos): rewards.append(1) break else: rewards.append(0) return new_states, rewards # A simple policy with two hidden layers. class TestPolicy(dc.rl.Policy): def __init__(self): super(TestPolicy, self).__init__(['action_prob', 'value']) def create_model(self, **kwargs): state = Input(shape=(4, )) dense1 = Dense(8, activation=tf.nn.relu)(state) dense2 = Dense(8, activation=tf.nn.relu)(dense1) output = Dense(4, activation=tf.nn.softmax, use_bias=False)(dense2) value = Dense(1)(dense2) return tf.keras.Model(inputs=state, outputs=[output, value]) # Optimize it. env = TestEnvironment() ppo = dc.rl.PPO(env, TestPolicy(), use_hindsight=True, optimization_epochs=1, batch_size=0, optimizer=Adam(learning_rate=0.001)) ppo.fit(1500000) # Try running it a few times and see if it succeeds. pass_count = 0 for i in range(5): env.reset() while not env.terminated: env.step(ppo.select_action(env.state)) if np.array_equal(env.state[:2], env.state[2:]): pass_count += 1 assert pass_count >= 3
def test_roulette(self): """Test training a policy for the roulette environment.""" # This is modeled after the Roulette-v0 environment from OpenAI Gym. # The player can bet on any number from 0 to 36, or walk away (which ends the # game). The average reward for any bet is slightly negative, so the best # strategy is to walk away. class RouletteEnvironment(dc.rl.Environment): def __init__(self): super(RouletteEnvironment, self).__init__([(1, )], 38) self._state = [np.array([0])] def step(self, action): if action == 37: self._terminated = True # Walk away. return 0.0 wheel = np.random.randint(37) if wheel == 0: if action == 0: return 35.0 return -1.0 if action != 0 and wheel % 2 == action % 2: return 1.0 return -1.0 def reset(self): self._terminated = False env = RouletteEnvironment() # This policy just learns a constant probability for each action, and a constant for the value. class TestPolicy(dc.rl.Policy): def create_layers(self, state, **kwargs): action = Variable(np.ones(env.n_actions)) output = SoftMax(in_layers=[ Reshape(in_layers=[action], shape=(-1, env.n_actions)) ]) value = Variable([0.0]) return {'action_prob': output, 'value': value} # Optimize it. mcts = dc.rl.MCTS(env, TestPolicy(), max_search_depth=5, n_search_episodes=200, optimizer=Adam(learning_rate=0.005)) mcts.fit(10, steps_per_iteration=50, epochs_per_iteration=50) # It should have learned that the expected value is very close to zero, and that the best # action is to walk away. action_prob, value = mcts.predict([[0]]) assert -0.5 < value[0] < 0.5 assert action_prob.argmax() == 37 assert mcts.select_action([[0]], deterministic=True) == 37 # Verify that we can create a new MCTS object, reload the parameters from the first one, and # get the same result. new_mcts = dc.rl.MCTS(env, TestPolicy(), model_dir=mcts._graph.model_dir) new_mcts.restore() action_prob2, value2 = new_mcts.predict([[0]]) assert value2 == value # Do the same thing, only using the "restore" argument to fit(). new_mcts = dc.rl.MCTS(env, TestPolicy(), model_dir=mcts._graph.model_dir) new_mcts.fit(0, restore=True) action_prob2, value2 = new_mcts.predict([[0]]) assert value2 == value