示例#1
0
def test_weave_gather_gaussian_histogram():
    """Test Gaussian Histograms."""
    import tensorflow as tf
    from rdkit import Chem
    out_channels = 2
    n_atoms = 4  # In CCC and C, there are 4 atoms
    raw_smiles = ['CCC', 'C']
    mols = [Chem.MolFromSmiles(s) for s in raw_smiles]
    featurizer = dc.feat.WeaveFeaturizer()
    mols = featurizer.featurize(mols)
    gather = layers.WeaveGather(batch_size=2, n_input=75)
    atom_feat = []
    atom_split = []
    for im, mol in enumerate(mols):
        n_atoms = mol.get_num_atoms()
        atom_split.extend([im] * n_atoms)

        # atom features
        atom_feat.append(mol.get_atom_features())
    inputs = [
        np.array(np.concatenate(atom_feat, axis=0), dtype=np.float32),
        np.array(atom_split)
    ]
    #per_mol_features = tf.math.segment_sum(inputs[0], inputs[1])
    outputs = gather.gaussian_histogram(inputs[0])
    # Gaussian histograms expands into 11 Gaussian buckets.
    assert np.array(outputs).shape == (
        4,
        11 * 75,
    )
示例#2
0
def test_weave_gather():
    """Test invoking WeaveGather."""
    out_channels = 2
    n_atoms = 4  # In CCC and C, there are 4 atoms
    raw_smiles = ['CCC', 'C']
    from rdkit import Chem
    mols = [Chem.MolFromSmiles(s) for s in raw_smiles]
    featurizer = dc.feat.WeaveFeaturizer()
    mols = featurizer.featurize(mols)
    atom_feat = []
    atom_split = []
    for im, mol in enumerate(mols):
        n_atoms = mol.get_num_atoms()
        atom_split.extend([im] * n_atoms)

        # atom features
        atom_feat.append(mol.get_atom_features())
    inputs = [
        np.array(np.concatenate(atom_feat, axis=0), dtype=np.float32),
        np.array(atom_split)
    ]
    # Try without compression
    gather = layers.WeaveGather(batch_size=2, n_input=75, gaussian_expand=True)
    # Outputs should be [mol1_vec, mol2_vec)
    outputs = gather(inputs)
    assert len(outputs) == 2
    assert np.array(outputs[0]).shape == (11 * 75, )
    assert np.array(outputs[1]).shape == (11 * 75, )

    # Try with compression
    gather = layers.WeaveGather(batch_size=2,
                                n_input=75,
                                gaussian_expand=True,
                                compress_post_gaussian_expansion=True)
    # Outputs should be [mol1_vec, mol2_vec)
    outputs = gather(inputs)
    assert len(outputs) == 2
    assert np.array(outputs[0]).shape == (75, )
    assert np.array(outputs[1]).shape == (75, )
示例#3
0
  def __init__(self,
               n_tasks: int,
               n_atom_feat: OneOrMany[int] = 75,
               n_pair_feat: OneOrMany[int] = 14,
               n_hidden: int = 50,
               n_graph_feat: int = 128,
               n_weave: int = 2,
               fully_connected_layer_sizes: List[int] = [2000, 100],
               weight_init_stddevs: OneOrMany[float] = [0.01, 0.04],
               bias_init_consts: OneOrMany[float] = [0.5, 3.0],
               weight_decay_penalty: float = 0.0,
               weight_decay_penalty_type: str = "l2",
               dropouts: OneOrMany[float] = 0.25,
               activation_fns: OneOrMany[KerasActivationFn] = tf.nn.relu,
               batch_normalize: bool = True,
               batch_normalize_kwargs: Dict = {
                   "renorm": True,
                   "fused": False
               },
               gaussian_expand: bool = True,
               compress_post_gaussian_expansion: bool = False,
               mode: str = "classification",
               n_classes: int = 2,
               batch_size: int = 100,
               **kwargs):
    """
    Parameters
    ----------
    n_tasks: int
      Number of tasks
    n_atom_feat: int, optional
      Number of features per atom.
    n_pair_feat: int, optional
      Number of features per pair of atoms.
    n_hidden: int, optional
      Number of units(convolution depths) in corresponding hidden layer
    n_graph_feat: int, optional
      Number of output features for each molecule(graph)
    n_weave: int, optional
      The number of weave layers in this model.
    fully_connected_layer_sizes: list
      The size of each dense layer in the network.  The length of
      this list determines the number of layers.
    weight_init_stddevs: list or float
      The standard deviation of the distribution to use for weight
      initialization of each layer.  The length of this list should
      equal len(layer_sizes).  Alternatively this may be a single
      value instead of a list, in which case the same value is used
      for every layer.
    bias_init_consts: list or float
      The value to initialize the biases in each layer to.  The
      length of this list should equal len(layer_sizes).
      Alternatively this may be a single value instead of a list, in
      which case the same value is used for every layer.
    weight_decay_penalty: float
      The magnitude of the weight decay penalty to use
    weight_decay_penalty_type: str
      The type of penalty to use for weight decay, either 'l1' or 'l2'
    dropouts: list or float
      The dropout probablity to use for each layer.  The length of this list
      should equal len(layer_sizes).  Alternatively this may be a single value
      instead of a list, in which case the same value is used for every layer.
    activation_fns: list or object
      The Tensorflow activation function to apply to each layer.  The length
      of this list should equal len(layer_sizes).  Alternatively this may be a
      single value instead of a list, in which case the same value is used for
      every layer.
    batch_normalize: bool, optional (default True)
      If this is turned on, apply batch normalization before applying
      activation functions on convolutional and fully connected layers.
    batch_normalize_kwargs: Dict, optional (default `{"renorm"=True, "fused": False}`)
      Batch normalization is a complex layer which has many potential
      argumentswhich change behavior. This layer accepts user-defined
      parameters which are passed to all `BatchNormalization` layers in
      `WeaveModel`, `WeaveLayer`, and `WeaveGather`.
    gaussian_expand: boolean, optional (default True)
      Whether to expand each dimension of atomic features by gaussian
      histogram
    compress_post_gaussian_expansion: bool, optional (default False)
      If True, compress the results of the Gaussian expansion back to the
      original dimensions of the input.
    mode: str
      Either "classification" or "regression" for type of model.
    n_classes: int
      Number of classes to predict (only used in classification mode)
    """
    if mode not in ['classification', 'regression']:
      raise ValueError("mode must be either 'classification' or 'regression'")

    if not isinstance(n_atom_feat, collections.Sequence):
      n_atom_feat = [n_atom_feat] * n_weave
    if not isinstance(n_pair_feat, collections.Sequence):
      n_pair_feat = [n_pair_feat] * n_weave
    n_layers = len(fully_connected_layer_sizes)
    if not isinstance(weight_init_stddevs, collections.Sequence):
      weight_init_stddevs = [weight_init_stddevs] * n_layers
    if not isinstance(bias_init_consts, collections.Sequence):
      bias_init_consts = [bias_init_consts] * n_layers
    if not isinstance(dropouts, collections.Sequence):
      dropouts = [dropouts] * n_layers
    if not isinstance(activation_fns, collections.Sequence):
      activation_fns = [activation_fns] * n_layers
    if weight_decay_penalty != 0.0:
      if weight_decay_penalty_type == 'l1':
        regularizer = tf.keras.regularizers.l1(weight_decay_penalty)
      else:
        regularizer = tf.keras.regularizers.l2(weight_decay_penalty)
    else:
      regularizer = None

    self.n_tasks = n_tasks
    self.n_atom_feat = n_atom_feat
    self.n_pair_feat = n_pair_feat
    self.n_hidden = n_hidden
    self.n_graph_feat = n_graph_feat
    self.mode = mode
    self.n_classes = n_classes

    # Build the model.

    atom_features = Input(shape=(self.n_atom_feat[0],))
    pair_features = Input(shape=(self.n_pair_feat[0],))
    pair_split = Input(shape=tuple(), dtype=tf.int32)
    atom_split = Input(shape=tuple(), dtype=tf.int32)
    atom_to_pair = Input(shape=(2,), dtype=tf.int32)
    inputs = [atom_features, pair_features, pair_split, atom_to_pair]
    for ind in range(n_weave):
      n_atom = self.n_atom_feat[ind]
      n_pair = self.n_pair_feat[ind]
      if ind < n_weave - 1:
        n_atom_next = self.n_atom_feat[ind + 1]
        n_pair_next = self.n_pair_feat[ind + 1]
      else:
        n_atom_next = n_hidden
        n_pair_next = n_hidden
      weave_layer_ind_A, weave_layer_ind_P = layers.WeaveLayer(
          n_atom_input_feat=n_atom,
          n_pair_input_feat=n_pair,
          n_atom_output_feat=n_atom_next,
          n_pair_output_feat=n_pair_next,
          batch_normalize=batch_normalize)(inputs)
      inputs = [weave_layer_ind_A, weave_layer_ind_P, pair_split, atom_to_pair]
    # Final atom-layer convolution. Note this differs slightly from the paper
    # since we use a tanh activation. This seems necessary for numerical
    # stability.
    dense1 = Dense(self.n_graph_feat, activation=tf.nn.tanh)(weave_layer_ind_A)
    if batch_normalize:
      dense1 = BatchNormalization(**batch_normalize_kwargs)(dense1)
    weave_gather = layers.WeaveGather(
        batch_size,
        n_input=self.n_graph_feat,
        gaussian_expand=gaussian_expand,
        compress_post_gaussian_expansion=compress_post_gaussian_expansion)(
            [dense1, atom_split])

    if n_layers > 0:
      # Now fully connected layers
      input_layer = weave_gather
      for layer_size, weight_stddev, bias_const, dropout, activation_fn in zip(
          fully_connected_layer_sizes, weight_init_stddevs, bias_init_consts,
          dropouts, activation_fns):
        layer = Dense(
            layer_size,
            kernel_initializer=tf.keras.initializers.TruncatedNormal(
                stddev=weight_stddev),
            bias_initializer=tf.constant_initializer(value=bias_const),
            kernel_regularizer=regularizer)(input_layer)
        if dropout > 0.0:
          layer = Dropout(rate=dropout)(layer)
        if batch_normalize:
          # Should this allow for training?
          layer = BatchNormalization(**batch_normalize_kwargs)(layer)
        layer = Activation(activation_fn)(layer)
        input_layer = layer
      output = input_layer
    else:
      output = weave_gather

    n_tasks = self.n_tasks
    if self.mode == 'classification':
      n_classes = self.n_classes
      logits = Reshape((n_tasks, n_classes))(Dense(n_tasks * n_classes)(output))
      output = Softmax()(logits)
      outputs = [output, logits]
      output_types = ['prediction', 'loss']
      loss: Loss = SoftmaxCrossEntropy()
    else:
      output = Dense(n_tasks)(output)
      outputs = [output]
      output_types = ['prediction']
      loss = L2Loss()
    model = tf.keras.Model(
        inputs=[
            atom_features, pair_features, pair_split, atom_split, atom_to_pair
        ],
        outputs=outputs)
    super(WeaveModel, self).__init__(
        model, loss, output_types=output_types, batch_size=batch_size, **kwargs)
示例#4
0
    def __init__(self,
                 n_tasks,
                 n_atom_feat=75,
                 n_pair_feat=14,
                 n_hidden=50,
                 n_graph_feat=128,
                 mode="classification",
                 n_classes=2,
                 batch_size=100,
                 **kwargs):
        """
    Parameters
    ----------
    n_tasks: int
      Number of tasks
    n_atom_feat: int, optional
      Number of features per atom.
    n_pair_feat: int, optional
      Number of features per pair of atoms.
    n_hidden: int, optional
      Number of units(convolution depths) in corresponding hidden layer
    n_graph_feat: int, optional
      Number of output features for each molecule(graph)
    mode: str
      Either "classification" or "regression" for type of model.
    n_classes: int
      Number of classes to predict (only used in classification mode)
    """
        if mode not in ['classification', 'regression']:
            raise ValueError(
                "mode must be either 'classification' or 'regression'")
        self.n_tasks = n_tasks
        self.n_atom_feat = n_atom_feat
        self.n_pair_feat = n_pair_feat
        self.n_hidden = n_hidden
        self.n_graph_feat = n_graph_feat
        self.mode = mode
        self.n_classes = n_classes

        # Build the model.

        atom_features = Input(shape=(self.n_atom_feat, ))
        pair_features = Input(shape=(self.n_pair_feat, ))
        pair_split = Input(shape=tuple(), dtype=tf.int32)
        atom_split = Input(shape=tuple(), dtype=tf.int32)
        atom_to_pair = Input(shape=(2, ), dtype=tf.int32)
        weave_layer1A, weave_layer1P = layers.WeaveLayer(
            n_atom_input_feat=self.n_atom_feat,
            n_pair_input_feat=self.n_pair_feat,
            n_atom_output_feat=self.n_hidden,
            n_pair_output_feat=self.n_hidden)(
                [atom_features, pair_features, pair_split, atom_to_pair])
        weave_layer2A, weave_layer2P = layers.WeaveLayer(
            n_atom_input_feat=self.n_hidden,
            n_pair_input_feat=self.n_hidden,
            n_atom_output_feat=self.n_hidden,
            n_pair_output_feat=self.n_hidden,
            update_pair=False)(
                [weave_layer1A, weave_layer1P, pair_split, atom_to_pair])
        dense1 = Dense(self.n_graph_feat, activation=tf.nn.tanh)(weave_layer2A)
        batch_norm1 = BatchNormalization(epsilon=1e-5)(dense1)
        weave_gather = layers.WeaveGather(batch_size,
                                          n_input=self.n_graph_feat,
                                          gaussian_expand=True)(
                                              [batch_norm1, atom_split])

        n_tasks = self.n_tasks
        if self.mode == 'classification':
            n_classes = self.n_classes
            logits = Reshape(
                (n_tasks, n_classes))(Dense(n_tasks * n_classes)(weave_gather))
            output = Softmax()(logits)
            outputs = [output, logits]
            output_types = ['prediction', 'loss']
            loss = SoftmaxCrossEntropy()
        else:
            output = Dense(n_tasks)(weave_gather)
            outputs = [output]
            output_types = ['prediction']
            loss = L2Loss()
        model = tf.keras.Model(inputs=[
            atom_features, pair_features, pair_split, atom_split, atom_to_pair
        ],
                               outputs=outputs)
        super(WeaveModel, self).__init__(model,
                                         loss,
                                         output_types=output_types,
                                         batch_size=batch_size,
                                         **kwargs)