def predict_on_batch(self, X): """Return model output for the provided input. Restore(checkpoint) must have previously been called on this object. Args: dataset: deepchem.datasets.dataset object. Returns: Tuple of three numpy arrays with shape num_examples x num_tasks (x ...): output: Model outputs. labels: True labels. weights: Example weights. Note that the output and labels arrays may be more than 2D, e.g. for classifier models that return class probabilities. Raises: AssertionError: If model is not in evaluation mode. ValueError: If output and labels are not both 3D or both 2D. """ if not self._restored_model: self.restore() with self.graph.as_default(): assert not model_ops.is_training() self.require_attributes(['output']) # run eval data through the model num_tasks = self.num_tasks output = [] start = time.time() with self._get_shared_session().as_default(): feed_dict = self.construct_feed_dict(X) data = self._get_shared_session().run(self.output, feed_dict=feed_dict) batch_output = np.asarray(data[:num_tasks], dtype=float) # reshape to batch_size x num_tasks x ... if batch_output.ndim == 3: batch_output = batch_output.transpose((1, 0, 2)) elif batch_output.ndim == 2: batch_output = batch_output.transpose((1, 0)) else: raise ValueError( 'Unrecognized rank combination for output: %s' % (batch_output.shape, )) output.append(batch_output) outputs = np.array( from_one_hot(np.squeeze(np.concatenate(output)), axis=-1)) return np.copy(outputs)
def predict_on_batch(self, X): """Return model output for the provided input. Restore(checkpoint) must have previously been called on this object. Args: dataset: deepchem.datasets.dataset object. Returns: Tuple of three numpy arrays with shape num_examples x num_tasks (x ...): output: Model outputs. labels: True labels. weights: Example weights. Note that the output and labels arrays may be more than 2D, e.g. for classifier models that return class probabilities. Raises: AssertionError: If model is not in evaluation mode. ValueError: If output and labels are not both 3D or both 2D. """ if not self._restored_model: self.restore() with self.graph.as_default(): assert not model_ops.is_training() self.require_attributes(['output']) # run eval data through the model num_tasks = self.num_tasks output = [] start = time.time() with self._get_shared_session().as_default(): feed_dict = self.construct_feed_dict(X) data = self._get_shared_session().run( self.output, feed_dict=feed_dict) batch_output = np.asarray(data[:num_tasks], dtype=float) # reshape to batch_size x num_tasks x ... if batch_output.ndim == 3: batch_output = batch_output.transpose((1, 0, 2)) elif batch_output.ndim == 2: batch_output = batch_output.transpose((1, 0)) else: raise ValueError( 'Unrecognized rank combination for output: %s' % (batch_output.shape,)) output.append(batch_output) outputs = np.array(from_one_hot( np.squeeze(np.concatenate(output)), axis=-1)) return np.copy(outputs)
def restore(self): """Restores the model from the provided training checkpoint. Args: checkpoint: string. Path to checkpoint file. """ if self._restored_model: return with self.graph.as_default(): assert not model_ops.is_training() last_checkpoint = self._find_last_checkpoint() saver = tf.train.Saver() saver.restore(self._get_shared_session(), last_checkpoint) self._restored_model = True
def fit(self, dataset, summaries=False, max_checkpoints_to_keep=5): """Fit the model. Args: dataset: Dataset object that represents data on disk. summaries: If True, add summaries for model parameters. max_checkpoints_to_keep: Integer. Maximum number of checkpoints to keep; older checkpoints will be deleted. Raises: AssertionError: If model is not in training mode. """ num_datapoints = len(dataset) batch_size = self.model_params["batch_size"] step_per_epoch = np.ceil(float(num_datapoints)/batch_size) nb_epoch = self.model_params["nb_epoch"] log("Training for %d epochs" % nb_epoch, self.verbosity) with self.graph.as_default(): assert model_ops.is_training() self.require_attributes(['loss', 'global_step', 'updates']) train_op = self.get_training_op() no_op = tf.no_op() tf.train.write_graph( tf.get_default_graph().as_graph_def(), self.logdir, 'train.pbtxt') with self._get_shared_session() as sess: sess.run(tf.initialize_all_variables()) saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep) # Save an initial checkpoint. saver.save(sess, self._save_path, global_step=self.global_step) for epoch in range(nb_epoch): for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(batch_size): # Run training op and compute summaries. feed_dict = self.construct_feed_dict(X_b, y_b, w_b, ids_b) step, loss, _ = sess.run( [train_op.values()[0], self.loss, self.updates], feed_dict=feed_dict) # Save model checkpoints at end of epoch saver.save(sess, self._save_path, global_step=self.global_step) log('Ending epoch %d: loss %g' % (epoch, loss), self.verbosity) # Always save a final checkpoint when complete. saver.save(sess, self._save_path, global_step=self.global_step)
def predict_on_batch(self, X): """Return model output for the provided input. Restore(checkpoint) must have previously been called on this object. Args: dataset: deepchem.datasets.dataset object. Returns: Tuple of three numpy arrays with shape num_examples x num_tasks (x ...): output: Model outputs. labels: True labels. weights: Example weights. Note that the output and labels arrays may be more than 2D, e.g. for classifier models that return class probabilities. Raises: AssertionError: If model is not in evaluation mode. ValueError: If output and labels are not both 3D or both 2D. """ if not self._restored_model: self.restore() with self.graph.as_default(): assert not model_ops.is_training() self.require_attributes(['output', 'labels', 'weights']) # run eval data through the model num_tasks = self.num_tasks output, labels, weights = [], [], [] start = time.time() with self._get_shared_session().as_default(): batch_count = -1.0 feed_dict = self.construct_feed_dict(X) batch_start = time.time() batch_count += 1 data = self._get_shared_session().run( self.output + self.labels + self.weights, feed_dict=feed_dict) batch_output = np.asarray(data[:num_tasks], dtype=float) batch_labels = np.asarray(data[num_tasks:num_tasks * 2], dtype=float) batch_weights = np.asarray(data[num_tasks * 2:num_tasks * 3], dtype=float) # reshape to batch_size x num_tasks x ... if batch_output.ndim == 3 and batch_labels.ndim == 3: batch_output = batch_output.transpose((1, 0, 2)) batch_labels = batch_labels.transpose((1, 0, 2)) elif batch_output.ndim == 2 and batch_labels.ndim == 2: batch_output = batch_output.transpose((1, 0)) batch_labels = batch_labels.transpose((1, 0)) else: raise ValueError( 'Unrecognized rank combination for output and labels: %s %s' % (batch_output.shape, batch_labels.shape)) batch_weights = batch_weights.transpose((1, 0)) valid = feed_dict[self.valid.name] # only take valid outputs if np.count_nonzero(~valid): batch_output = batch_output[valid] batch_labels = batch_labels[valid] batch_weights = batch_weights[valid] output.append(batch_output) labels.append(batch_labels) weights.append(batch_weights) logging.info('Eval batch took %g seconds', time.time() - start) labels = np.array(from_one_hot( np.squeeze(np.concatenate(labels)), axis=-1)) return np.copy(labels)
def predict_on_batch(self, X): """Return model output for the provided input. Restore(checkpoint) must have previously been called on this object. Args: dataset: deepchem.datasets.dataset object. Returns: Tuple of three numpy arrays with shape num_examples x num_tasks (x ...): output: Model outputs. labels: True labels. weights: Example weights. Note that the output and labels arrays may be more than 2D, e.g. for classifier models that return class probabilities. Raises: AssertionError: If model is not in evaluation mode. ValueError: If output and labels are not both 3D or both 2D. """ if not self._restored_model: self.restore() with self.graph.as_default(): assert not model_ops.is_training() self.require_attributes(['output']) # run eval data through the model num_tasks = self.num_tasks outputs = [] with self._get_shared_session().as_default(): n_samples = len(X) # Some tensorflow models can't handle variadic batches, # especially models using tf.pack, tf.split. Pad batch-size # to handle these cases. X = pad_features(self.model_params["batch_size"], X) feed_dict = self.construct_feed_dict(X) data = self._get_shared_session().run( self.output, feed_dict=feed_dict) batch_outputs = np.asarray(data[:num_tasks], dtype=float) # reshape to batch_size x num_tasks x ... if batch_outputs.ndim == 3: batch_outputs = batch_outputs.transpose((1, 0, 2)) elif batch_outputs.ndim == 2: batch_outputs = batch_outputs.transpose((1, 0)) # Handle edge case when batch-size is 1. elif batch_outputs.ndim == 1: #print("X.shape, batch_outputs.shape") #print(X.shape, batch_outputs.shape) n_samples = len(X) batch_outputs = batch_outputs.reshape((n_samples, num_tasks)) else: raise ValueError( 'Unrecognized rank combination for output: %s' % (batch_outputs.shape)) # Prune away any padding that was added batch_outputs = batch_outputs[:n_samples] outputs.append(batch_outputs) outputs = np.squeeze(np.concatenate(outputs)) return np.copy(outputs)