Python pad_features示例，deepchem.data.pad_features Python示例

示例#1

0

显示文件

文件： lr.py 项目： prasadkawthekar/deepchem

    def predict_proba_on_batch(self, X):
        if self.pad_batches:
            X = pad_features(self.batch_size, X)
        if not self._restored_model:
            self.restore()
        with self.eval_graph.graph.as_default():
            # run eval data through the model
            n_tasks = self.n_tasks
            with self._get_shared_session(train=False).as_default():
                feed_dict = self.construct_feed_dict(X)
                data = self._get_shared_session(train=False).run(
                    self.eval_graph.output, feed_dict=feed_dict)
                batch_outputs = np.asarray(data[:n_tasks], dtype=float)
                # transfer 2D prediction tensor to 2D x n_classes(=2)
                complimentary = np.ones(np.shape(batch_outputs))
                complimentary = complimentary - batch_outputs
                batch_outputs = np.concatenate([complimentary, batch_outputs],
                                               axis=batch_outputs.ndim - 1)
                # reshape to batch_size x n_tasks x ...
                if batch_outputs.ndim == 3:
                    batch_outputs = batch_outputs.transpose((1, 0, 2))
                elif batch_outputs.ndim == 2:
                    batch_outputs = batch_outputs.transpose((1, 0))
                else:
                    raise ValueError(
                        'Unrecognized rank combination for output: %s ' %
                        (batch_outputs.shape, ))

            outputs = batch_outputs

        return np.copy(outputs)

示例#2

0

显示文件

文件： lr.py 项目： joegomes/deepchem

  def predict_proba_on_batch(self, X):
    if self.pad_batches:
      X = pad_features(self.batch_size, X)
    if not self._restored_model:
      self.restore()
    with self.eval_graph.graph.as_default():
      # run eval data through the model
      n_tasks = self.n_tasks
      with self._get_shared_session(train=False).as_default():
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session(train=False).run(
            self.eval_graph.output, feed_dict=feed_dict)
        batch_outputs = np.asarray(data[:n_tasks], dtype=float)
        # transfer 2D prediction tensor to 2D x n_classes(=2) 
        complimentary = np.ones(np.shape(batch_outputs))
        complimentary = complimentary - batch_outputs
        batch_outputs = np.concatenate(
            [complimentary, batch_outputs], axis=batch_outputs.ndim - 1)
        # reshape to batch_size x n_tasks x ...
        if batch_outputs.ndim == 3:
          batch_outputs = batch_outputs.transpose((1, 0, 2))
        elif batch_outputs.ndim == 2:
          batch_outputs = batch_outputs.transpose((1, 0))
        else:
          raise ValueError('Unrecognized rank combination for output: %s ' %
                           (batch_outputs.shape,))

      outputs = batch_outputs

    return np.copy(outputs)

示例#3

0

显示文件

  def predict_on_batch(self, X, pad_batch=False):
    """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: dc.data.Dataset object.

    Returns:
      Tuple of three numpy arrays with shape n_examples x n_tasks (x ...):
        output: Model outputs.
        labels: True labels.
        weights: Example weights.
      Note that the output and labels arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
    len_unpadded = len(X)
    if pad_batch:
      X = pad_features(self.batch_size, X)
    
    if not self._restored_model:
      self.restore()
    with self.eval_graph.graph.as_default():

      # run eval data through the model
      n_tasks = self.n_tasks
      outputs = []
      with self._get_shared_session(train=False).as_default():
        n_samples = len(X)
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session(train=False).run(
            self.eval_graph.output, feed_dict=feed_dict)
        batch_outputs = np.asarray(data[:n_tasks], dtype=float)
        # reshape to batch_size x n_tasks x ...
        if batch_outputs.ndim == 3:
          batch_outputs = batch_outputs.transpose((1, 0, 2))
        elif batch_outputs.ndim == 2:
          batch_outputs = batch_outputs.transpose((1, 0))
        # Handle edge case when batch-size is 1.
        elif batch_outputs.ndim == 1:
          n_samples = len(X)
          batch_outputs = batch_outputs.reshape((n_samples, n_tasks))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s' %
              (batch_outputs.shape))
        # Prune away any padding that was added
        batch_outputs = batch_outputs[:n_samples]
        outputs.append(batch_outputs)

        outputs = np.squeeze(np.concatenate(outputs)) 

    outputs = np.copy(outputs)
    return outputs[:len_unpadded]

示例#4

0

显示文件

文件： __init__.py 项目： deepchem/deepchem

  def predict_on_batch(self, X, pad_batch=False):
    """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: dc.data.Dataset object.

    Returns:
      Tuple of three numpy arrays with shape n_examples x n_tasks (x ...):
        output: Model outputs.
        labels: True labels.
        weights: Example weights.
      Note that the output and labels arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
    len_unpadded = len(X)
    if pad_batch:
      X = pad_features(self.batch_size, X)
    
    if not self._restored_model:
      self.restore()
    with self.eval_graph.graph.as_default():

      # run eval data through the model
      n_tasks = self.n_tasks
      outputs = []
      with self._get_shared_session(train=False).as_default():
        n_samples = len(X)
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session(train=False).run(
            self.eval_graph.output, feed_dict=feed_dict)
        batch_outputs = np.asarray(data[:n_tasks], dtype=float)
        # reshape to batch_size x n_tasks x ...
        if batch_outputs.ndim == 3:
          batch_outputs = batch_outputs.transpose((1, 0, 2))
        elif batch_outputs.ndim == 2:
          batch_outputs = batch_outputs.transpose((1, 0))
        # Handle edge case when batch-size is 1.
        elif batch_outputs.ndim == 1:
          n_samples = len(X)
          batch_outputs = batch_outputs.reshape((n_samples, n_tasks))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s' %
              (batch_outputs.shape))
        # Prune away any padding that was added
        batch_outputs = batch_outputs[:n_samples]
        outputs.append(batch_outputs)

        outputs = np.squeeze(np.concatenate(outputs)) 

    outputs = np.copy(outputs)
    return outputs[:len_unpadded]

示例#5

0

显示文件

文件： graph_models.py 项目： AhlamMD/deepchem

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        deterministic=True,
                        pad_batches=True):
    """ Same generator as Weave models """
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=False):

        X_b = pad_features(self.batch_size, X_b)
        feed_dict = dict()
        if y_b is not None:
          if self.mode == 'classification':
            feed_dict[self.labels[0]] = to_one_hot(y_b.flatten(),
                                                   self.n_classes).reshape(
                                                       -1, self.n_tasks,
                                                       self.n_classes)
          else:
            feed_dict[self.labels[0]] = y_b
        if w_b is not None:
          feed_dict[self.task_weights[0]] = w_b

        atom_feat = []
        pair_feat = []
        atom_split = []
        atom_to_pair = []
        pair_split = []
        start = 0
        for im, mol in enumerate(X_b):
          n_atoms = mol.get_num_atoms()
          # number of atoms in each molecule
          atom_split.extend([im] * n_atoms)
          # index of pair features
          C0, C1 = np.meshgrid(np.arange(n_atoms), np.arange(n_atoms))
          atom_to_pair.append(
              np.transpose(
                  np.array([C1.flatten() + start,
                            C0.flatten() + start])))
          # number of pairs for each atom
          pair_split.extend(C1.flatten() + start)
          start = start + n_atoms

          # atom features
          atom_feat.append(mol.get_atom_features())
          # pair features
          pair_feat.append(
              np.reshape(mol.get_pair_features(),
                         (n_atoms * n_atoms, self.n_pair_feat)))

        feed_dict[self.atom_features] = np.concatenate(atom_feat, axis=0)
        feed_dict[self.pair_features] = np.concatenate(pair_feat, axis=0)
        feed_dict[self.atom_split] = np.array(atom_split)
        feed_dict[self.atom_to_pair] = np.concatenate(atom_to_pair, axis=0)
        yield feed_dict

示例#6

0

显示文件

文件： __init__.py 项目： amoliu/deepchem

 def predict_on_batch(self, X, pad_batch=True):
   """
   Makes predictions on batch of data.
   """
   if pad_batch:
     len_unpadded = len(X)
     Xpad = pad_features(self.model_instance.batch_size, X)
     return self.model_instance.predict_on_batch(Xpad)[:len_unpadded]
   else:
     return self.model_instance.predict_on_batch(X)

示例#7

0

显示文件

文件： graph_models.py 项目： zzachw/deepchem

    def default_generator(self,
                          dataset,
                          epochs=1,
                          mode='fit',
                          deterministic=True,
                          pad_batches=True):
        for epoch in range(epochs):
            for (X_b, y_b, w_b,
                 ids_b) in dataset.iterbatches(batch_size=self.batch_size,
                                               deterministic=deterministic,
                                               pad_batches=pad_batches):

                n_samples = np.array(X_b.shape[0])
                X_b = pad_features(self.batch_size, X_b)
                if y_b is not None and self.mode == 'classification':
                    y_b = to_one_hot(y_b.flatten(), self.n_classes).reshape(
                        -1, self.n_tasks, self.n_classes)

                atom_feat = []
                pair_feat = []
                atom_split = []
                atom_to_pair = []
                pair_split = []
                start = 0
                for im, mol in enumerate(X_b):
                    n_atoms = mol.get_num_atoms()
                    # number of atoms in each molecule
                    atom_split.extend([im] * n_atoms)
                    # index of pair features
                    C0, C1 = np.meshgrid(np.arange(n_atoms),
                                         np.arange(n_atoms))
                    atom_to_pair.append(
                        np.transpose(
                            np.array(
                                [C1.flatten() + start,
                                 C0.flatten() + start])))
                    # number of pairs for each atom
                    pair_split.extend(C1.flatten() + start)
                    start = start + n_atoms

                    # atom features
                    atom_feat.append(mol.get_atom_features())
                    # pair features
                    pair_feat.append(
                        np.reshape(mol.get_pair_features(),
                                   (n_atoms * n_atoms, self.n_pair_feat)))

                inputs = [
                    np.concatenate(atom_feat, axis=0),
                    np.concatenate(pair_feat, axis=0),
                    np.array(atom_split),
                    np.concatenate(atom_to_pair, axis=0), n_samples
                ]
                yield (inputs, [y_b], [w_b])

示例#8

0

显示文件

文件： multitask_classifier.py 项目： XericZephyr/deepchem

    def predict_proba_on_batch(self, X, n_classes=2):
        """Returns class probabilities on batch"""
        # run eval data through the model
        if self.pad_batches:
            X = pad_features(self.batch_size, X)
        n_tasks = self.n_tasks
        with self.sess.as_default():
            feed_dict = self.construct_feed_dict(X)
            batch_outputs = self.sess.run(self.outputs, feed_dict=feed_dict)

        n_samples = len(X)
        outputs = np.zeros((n_samples, self.n_tasks, n_classes))
        for task, output in enumerate(batch_outputs):
            outputs[:, task, :] = output
        return outputs

示例#9

0

显示文件

文件： multitask_classifier.py 项目： joegomes/deepchem

  def predict_proba_on_batch(self, X, n_classes=2):
    """Returns class probabilities on batch"""
    # run eval data through the model
    if self.pad_batches:
      X = pad_features(self.batch_size, X)
    n_tasks = self.n_tasks
    with self.sess.as_default():
      feed_dict = self.construct_feed_dict(X)
      batch_outputs = self.sess.run(self.outputs, feed_dict=feed_dict)

    n_samples = len(X)
    outputs = np.zeros((n_samples, self.n_tasks, n_classes))
    for task, output in enumerate(batch_outputs):
      outputs[:, task, :] = output
    return outputs

示例#10

0

显示文件

文件： multitask_classifier.py 项目： XericZephyr/deepchem

    def predict_on_batch(self, X):
        """Return model output for the provided input.
    """
        if self.pad_batches:
            X = pad_features(self.batch_size, X)
        # run eval data through the model
        n_tasks = self.n_tasks
        with self.sess.as_default():
            feed_dict = self.construct_feed_dict(X)
            # Shape (n_samples, n_tasks)
            batch_outputs = self.sess.run(self.outputs, feed_dict=feed_dict)

        n_samples = len(X)
        outputs = np.zeros((n_samples, self.n_tasks))
        for task, output in enumerate(batch_outputs):
            outputs[:, task] = np.argmax(output, axis=1)
        return outputs

示例#11

0

显示文件

文件： multitask_classifier.py 项目： joegomes/deepchem

  def predict_on_batch(self, X):
    """Return model output for the provided input.
    """
    if self.pad_batches:
      X = pad_features(self.batch_size, X)
    # run eval data through the model
    n_tasks = self.n_tasks
    with self.sess.as_default():
      feed_dict = self.construct_feed_dict(X)
      # Shape (n_samples, n_tasks)
      batch_outputs = self.sess.run(self.outputs, feed_dict=feed_dict)

    n_samples = len(X)
    outputs = np.zeros((n_samples, self.n_tasks))
    for task, output in enumerate(batch_outputs):
      outputs[:, task] = np.argmax(output, axis=1)
    return outputs

示例#12

0

显示文件

文件： __init__.py 项目： calebgeniesse/deepchem

    def predict_proba_on_batch(self, X):
        """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: dc.data.Dataset object.

    Returns:
      Tuple of three numpy arrays with shape n_examples x n_tasks (x ...):
        output: Model outputs.
      Note that the output arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
        if self.pad_batches:
            X = pad_features(self.batch_size, X)
        if not self._restored_model:
            self.restore()
        with self.eval_graph.graph.as_default():
            # run eval data through the model
            n_tasks = self.n_tasks
            with self._get_shared_session(train=False).as_default():
                feed_dict = self.construct_feed_dict(X)
                data = self._get_shared_session(train=False).run(
                    self.eval_graph.output, feed_dict=feed_dict)
                batch_outputs = np.asarray(data[:n_tasks], dtype=float)
                # reshape to batch_size x n_tasks x ...
                if batch_outputs.ndim == 3:
                    batch_outputs = batch_outputs.transpose((1, 0, 2))
                elif batch_outputs.ndim == 2:
                    batch_outputs = batch_outputs.transpose((1, 0))
                else:
                    raise ValueError(
                        'Unrecognized rank combination for output: %s ' %
                        (batch_outputs.shape, ))

            # Note that softmax is already applied in construct_grpah
            outputs = batch_outputs

        return np.copy(outputs)

示例#13

0

显示文件

文件： __init__.py 项目： apappu97/deepchem

  def predict_proba_on_batch(self, X, pad_batch=False):
    """Return model output for the provided input.

    Restore(checkpoint) must have previously been called on this object.

    Args:
      dataset: dc.data.Dataset object.

    Returns:
      Tuple of three numpy arrays with shape n_examples x n_tasks (x ...):
        output: Model outputs.
      Note that the output arrays may be more than 2D, e.g. for
      classifier models that return class probabilities.

    Raises:
      AssertionError: If model is not in evaluation mode.
      ValueError: If output and labels are not both 3D or both 2D.
    """
    if pad_batch:
      X = pad_features(self.batch_size, X)
    if not self._restored_model:
      self.restore()
    with self.eval_graph.graph.as_default():
      # run eval data through the model
      n_tasks = self.n_tasks
      with self._get_shared_session(train=False).as_default():
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session(train=False).run(
            self.eval_graph.output, feed_dict=feed_dict)
        batch_outputs = np.asarray(data[:n_tasks], dtype=float)
        # reshape to batch_size x n_tasks x ...
        if batch_outputs.ndim == 3:
          batch_outputs = batch_outputs.transpose((1, 0, 2))
        elif batch_outputs.ndim == 2:
          batch_outputs = batch_outputs.transpose((1, 0))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s ' %
              (batch_outputs.shape,))

      # Note that softmax is already applied in construct_grpah
      outputs = batch_outputs

    return np.copy(outputs)

示例#14

0

显示文件

文件： lr.py 项目： bowenliu16/deepchem

  def predict_on_batch(self, X, pad_batch=False):
    
    if pad_batch:
      X = pad_features(self.batch_size, X)
    
    if not self._restored_model:
      self.restore()
    with self.eval_graph.graph.as_default():

      # run eval data through the model
      n_tasks = self.n_tasks
      output = []
      start = time.time()
      with self._get_shared_session(train=False).as_default():
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session(train=False).run(
            self.eval_graph.output, feed_dict=feed_dict)
        batch_output = np.asarray(data[:n_tasks], dtype=float)
        # transfer 2D prediction tensor to 2D x n_classes(=2) 
        complimentary = np.ones(np.shape(batch_output))
        complimentary = complimentary - batch_output
        batch_output = np.squeeze(np.stack(arrays = [complimentary,
                                                     batch_output],
                                            axis = 2))
        # reshape to batch_size x n_tasks x ...
        if batch_output.ndim == 3:
          batch_output = batch_output.transpose((1, 0, 2))
        elif batch_output.ndim == 2:
          batch_output = batch_output.transpose((1, 0))
        else:
          raise ValueError(
              'Unrecognized rank combination for output: %s' %
              (batch_output.shape,))
        output.append(batch_output)

        outputs = np.array(from_one_hot(
            np.squeeze(np.concatenate(output)), axis=-1))

    return np.copy(outputs)

示例#15

0

显示文件

文件： lr.py 项目： minghao2016/deepchem

    def predict_on_batch(self, X, pad_batch=False):

        if pad_batch:
            X = pad_features(self.batch_size, X)

        if not self._restored_model:
            self.restore()
        with self.eval_graph.graph.as_default():

            # run eval data through the model
            n_tasks = self.n_tasks
            output = []
            start = time.time()
            with self._get_shared_session(train=False).as_default():
                feed_dict = self.construct_feed_dict(X)
                data = self._get_shared_session(train=False).run(
                    self.eval_graph.output, feed_dict=feed_dict)
                batch_output = np.asarray(data[:n_tasks], dtype=float)
                # transfer 2D prediction tensor to 2D x n_classes(=2)
                complimentary = np.ones(np.shape(batch_output))
                complimentary = complimentary - batch_output
                batch_output = np.squeeze(
                    np.stack(arrays=[complimentary, batch_output], axis=2))
                # reshape to batch_size x n_tasks x ...
                if batch_output.ndim == 3:
                    batch_output = batch_output.transpose((1, 0, 2))
                elif batch_output.ndim == 2:
                    batch_output = batch_output.transpose((1, 0))
                else:
                    raise ValueError(
                        'Unrecognized rank combination for output: %s' %
                        (batch_output.shape, ))
                output.append(batch_output)

                outputs = np.array(
                    from_one_hot(np.squeeze(np.concatenate(output)), axis=-1))

        return np.copy(outputs)

示例#16

0

显示文件

文件： graph_models.py 项目： domsooch/dsdc_demo

    def default_generator(self,
                          dataset,
                          epochs=1,
                          predict=False,
                          deterministic=True,
                          pad_batches=True):
        """ Same generator as Weave models """
        for epoch in range(epochs):
            for (X_b, y_b, w_b,
                 ids_b) in dataset.iterbatches(batch_size=self.batch_size,
                                               deterministic=deterministic,
                                               pad_batches=False):

                X_b = pad_features(self.batch_size, X_b)
                feed_dict = dict()
                if y_b is not None:
                    if self.mode == 'classification':
                        feed_dict[self.labels[0]] = to_one_hot(
                            y_b.flatten(),
                            self.n_classes).reshape(-1, self.n_tasks,
                                                    self.n_classes)
                    else:
                        feed_dict[self.labels[0]] = y_b
                if w_b is not None:
                    feed_dict[self.task_weights[0]] = w_b

                atom_feat = []
                pair_feat = []
                atom_split = []
                atom_to_pair = []
                pair_split = []
                start = 0
                for im, mol in enumerate(X_b):
                    n_atoms = mol.get_num_atoms()
                    # number of atoms in each molecule
                    atom_split.extend([im] * n_atoms)
                    # index of pair features
                    C0, C1 = np.meshgrid(np.arange(n_atoms),
                                         np.arange(n_atoms))
                    atom_to_pair.append(
                        np.transpose(
                            np.array(
                                [C1.flatten() + start,
                                 C0.flatten() + start])))
                    # number of pairs for each atom
                    pair_split.extend(C1.flatten() + start)
                    start = start + n_atoms

                    # atom features
                    atom_feat.append(mol.get_atom_features())
                    # pair features
                    pair_feat.append(
                        np.reshape(mol.get_pair_features(),
                                   (n_atoms * n_atoms, self.n_pair_feat)))

                feed_dict[self.atom_features] = np.concatenate(atom_feat,
                                                               axis=0)
                feed_dict[self.pair_features] = np.concatenate(pair_feat,
                                                               axis=0)
                feed_dict[self.atom_split] = np.array(atom_split)
                feed_dict[self.atom_to_pair] = np.concatenate(atom_to_pair,
                                                              axis=0)
                yield feed_dict