Пример #1
0
    def predict_on_batch(self, X):
        """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: deepchem.datasets.dataset object.

    Returns:
      Tuple of three numpy arrays with shape num_examples x num_tasks (x ...):
        output: Model outputs.
        labels: True labels.
        weights: Example weights.
      Note that the output and labels arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """

        if not self._restored_model:
            self.restore()
        with self.graph.as_default():
            assert not model_ops.is_training()
            self.require_attributes(['output'])

            # run eval data through the model
            num_tasks = self.num_tasks
            output = []
            start = time.time()
            with self._get_shared_session().as_default():
                feed_dict = self.construct_feed_dict(X)
                data = self._get_shared_session().run(self.output,
                                                      feed_dict=feed_dict)
                batch_output = np.asarray(data[:num_tasks], dtype=float)
                # reshape to batch_size x num_tasks x ...
                if batch_output.ndim == 3:
                    batch_output = batch_output.transpose((1, 0, 2))
                elif batch_output.ndim == 2:
                    batch_output = batch_output.transpose((1, 0))
                else:
                    raise ValueError(
                        'Unrecognized rank combination for output: %s' %
                        (batch_output.shape, ))
                output.append(batch_output)

                outputs = np.array(
                    from_one_hot(np.squeeze(np.concatenate(output)), axis=-1))

        return np.copy(outputs)
Пример #2
0
  def predict_on_batch(self, X):
    """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: deepchem.datasets.dataset object.

    Returns:
      Tuple of three numpy arrays with shape num_examples x num_tasks (x ...):
        output: Model outputs.
        labels: True labels.
        weights: Example weights.
      Note that the output and labels arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
    
    if not self._restored_model:
      self.restore()
    with self.graph.as_default():
      assert not model_ops.is_training()
      self.require_attributes(['output'])

      # run eval data through the model
      num_tasks = self.num_tasks
      output = []
      start = time.time()
      with self._get_shared_session().as_default():
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session().run(
            self.output, feed_dict=feed_dict)
        batch_output = np.asarray(data[:num_tasks], dtype=float)
        # reshape to batch_size x num_tasks x ...
        if batch_output.ndim == 3:
          batch_output = batch_output.transpose((1, 0, 2))
        elif batch_output.ndim == 2:
          batch_output = batch_output.transpose((1, 0))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s' %
              (batch_output.shape,))
        output.append(batch_output)

        outputs = np.array(from_one_hot(
            np.squeeze(np.concatenate(output)), axis=-1))

    return np.copy(outputs)
Пример #3
0
    def restore(self):
        """Restores the model from the provided training checkpoint.

    Args:
      checkpoint: string. Path to checkpoint file.
    """
        if self._restored_model:
            return
        with self.graph.as_default():
            assert not model_ops.is_training()
            last_checkpoint = self._find_last_checkpoint()

            saver = tf.train.Saver()
            saver.restore(self._get_shared_session(), last_checkpoint)
            self._restored_model = True
Пример #4
0
  def restore(self):
    """Restores the model from the provided training checkpoint.

    Args:
      checkpoint: string. Path to checkpoint file.
    """
    if self._restored_model:
      return
    with self.graph.as_default():
      assert not model_ops.is_training()
      last_checkpoint = self._find_last_checkpoint()

      saver = tf.train.Saver()
      saver.restore(self._get_shared_session(),
                    last_checkpoint)
      self._restored_model = True
Пример #5
0
  def fit(self,
          dataset,
          summaries=False,
          max_checkpoints_to_keep=5):
    """Fit the model.

    Args:
      dataset: Dataset object that represents data on disk.
      summaries: If True, add summaries for model parameters.
      max_checkpoints_to_keep: Integer. Maximum number of checkpoints to keep;
        older checkpoints will be deleted.

    Raises:
      AssertionError: If model is not in training mode.
    """
    num_datapoints = len(dataset)
    batch_size = self.model_params["batch_size"]
    step_per_epoch = np.ceil(float(num_datapoints)/batch_size)
    nb_epoch = self.model_params["nb_epoch"]
    log("Training for %d epochs" % nb_epoch, self.verbosity)
    with self.graph.as_default():
      assert model_ops.is_training()
      self.require_attributes(['loss', 'global_step', 'updates'])
      train_op = self.get_training_op()
      no_op = tf.no_op()
      tf.train.write_graph(
          tf.get_default_graph().as_graph_def(), self.logdir, 'train.pbtxt')
      with self._get_shared_session() as sess:
        sess.run(tf.initialize_all_variables())
        saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep)
        # Save an initial checkpoint.
        saver.save(sess, self._save_path, global_step=self.global_step)
        for epoch in range(nb_epoch):
          for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(batch_size):
            # Run training op and compute summaries.
            feed_dict = self.construct_feed_dict(X_b, y_b, w_b, ids_b)
            step, loss, _ = sess.run(
                [train_op.values()[0], self.loss, self.updates],
                feed_dict=feed_dict)
          # Save model checkpoints at end of epoch
          saver.save(sess, self._save_path, global_step=self.global_step)
          log('Ending epoch %d: loss %g' % (epoch, loss), self.verbosity)
        # Always save a final checkpoint when complete.
        saver.save(sess, self._save_path, global_step=self.global_step)
Пример #6
0
  def predict_on_batch(self, X):
    """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: deepchem.datasets.dataset object.

    Returns:
      Tuple of three numpy arrays with shape num_examples x num_tasks (x ...):
        output: Model outputs.
        labels: True labels.
        weights: Example weights.
      Note that the output and labels arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
    if not self._restored_model:
      self.restore()
    with self.graph.as_default():
      assert not model_ops.is_training()
      self.require_attributes(['output', 'labels', 'weights'])

      # run eval data through the model
      num_tasks = self.num_tasks
      output, labels, weights = [], [], []
      start = time.time()
      with self._get_shared_session().as_default():
        batch_count = -1.0

        feed_dict = self.construct_feed_dict(X)
        batch_start = time.time()
        batch_count += 1
        data = self._get_shared_session().run(
            self.output + self.labels + self.weights,
            feed_dict=feed_dict)
        batch_output = np.asarray(data[:num_tasks], dtype=float)
        batch_labels = np.asarray(data[num_tasks:num_tasks * 2], dtype=float)
        batch_weights = np.asarray(data[num_tasks * 2:num_tasks * 3],
                                   dtype=float)
        # reshape to batch_size x num_tasks x ...
        if batch_output.ndim == 3 and batch_labels.ndim == 3:
          batch_output = batch_output.transpose((1, 0, 2))
          batch_labels = batch_labels.transpose((1, 0, 2))
        elif batch_output.ndim == 2 and batch_labels.ndim == 2:
          batch_output = batch_output.transpose((1, 0))
          batch_labels = batch_labels.transpose((1, 0))
        else:
          raise ValueError(
              'Unrecognized rank combination for output and labels: %s %s' %
              (batch_output.shape, batch_labels.shape))
        batch_weights = batch_weights.transpose((1, 0))
        valid = feed_dict[self.valid.name]
        # only take valid outputs
        if np.count_nonzero(~valid):
          batch_output = batch_output[valid]
          batch_labels = batch_labels[valid]
          batch_weights = batch_weights[valid]
        output.append(batch_output)
        labels.append(batch_labels)
        weights.append(batch_weights)

        logging.info('Eval batch took %g seconds', time.time() - start)

        labels = np.array(from_one_hot(
            np.squeeze(np.concatenate(labels)), axis=-1))

    return np.copy(labels)
Пример #7
0
  def predict_on_batch(self, X):
    """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: deepchem.datasets.dataset object.

    Returns:
      Tuple of three numpy arrays with shape num_examples x num_tasks (x ...):
        output: Model outputs.
        labels: True labels.
        weights: Example weights.
      Note that the output and labels arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
    if not self._restored_model:
      self.restore()
    with self.graph.as_default():
      assert not model_ops.is_training()
      self.require_attributes(['output'])

      # run eval data through the model
      num_tasks = self.num_tasks
      outputs = []
      with self._get_shared_session().as_default():
        n_samples = len(X)
        # Some tensorflow models can't handle variadic batches,
        # especially models using tf.pack, tf.split. Pad batch-size
        # to handle these cases.
        X = pad_features(self.model_params["batch_size"], X)
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session().run(
            self.output, feed_dict=feed_dict)
        batch_outputs = np.asarray(data[:num_tasks], dtype=float)
        # reshape to batch_size x num_tasks x ...
        if batch_outputs.ndim == 3:
          batch_outputs = batch_outputs.transpose((1, 0, 2))
        elif batch_outputs.ndim == 2:
          batch_outputs = batch_outputs.transpose((1, 0))
        # Handle edge case when batch-size is 1.
        elif batch_outputs.ndim == 1:
          #print("X.shape, batch_outputs.shape")
          #print(X.shape, batch_outputs.shape)
          n_samples = len(X)
          batch_outputs = batch_outputs.reshape((n_samples, num_tasks))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s' %
              (batch_outputs.shape))
        # Prune away any padding that was added
        batch_outputs = batch_outputs[:n_samples]
        outputs.append(batch_outputs)

        outputs = np.squeeze(np.concatenate(outputs)) 

    return np.copy(outputs)