示例#1
0
 def test_graph_conv(self):
   """Test invoking GraphConv."""
   out_channels = 2
   n_atoms = 4  # In CCC and C, there are 4 atoms
   raw_smiles = ['CCC', 'C']
   import rdkit
   mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
   featurizer = dc.feat.graph_features.ConvMolFeaturizer()
   mols = featurizer.featurize(mols)
   multi_mol = dc.feat.mol_graphs.ConvMol.agglomerate_mols(mols)
   atom_features = multi_mol.get_atom_features().astype(np.float32)
   degree_slice = multi_mol.deg_slice
   membership = multi_mol.membership
   deg_adjs = multi_mol.get_deg_adjacency_lists()[1:]
   args = [atom_features, degree_slice, membership] + deg_adjs
   layer = layers.GraphConv(out_channels)
   result = layer(args)
   assert result.shape == (n_atoms, out_channels)
   num_deg = 2 * layer.max_degree + (1 - layer.min_degree)
   assert len(layer.trainable_variables) == 2 * num_deg
示例#2
0
  def __init__(self,
               n_tasks,
               graph_conv_layers,
               dense_layer_size=128,
               dropout=0.0,
               mode="classification",
               number_atom_features=75,
               n_classes=2,
               batch_normalize=True,
               uncertainty=False,
               batch_size=100):
    """An internal keras model class.

    The graph convolutions use a nonstandard control flow so the
    standard Keras functional API can't support them. We instead
    use the imperative "subclassing" API to implement the graph
    convolutions.

    All arguments have the same meaning as in GraphConvModel.
    """
    super(_GraphConvKerasModel, self).__init__()
    if mode not in ['classification', 'regression']:
      raise ValueError("mode must be either 'classification' or 'regression'")

    self.mode = mode
    self.uncertainty = uncertainty

    if not isinstance(dropout, collections.Sequence):
      dropout = [dropout] * (len(graph_conv_layers) + 1)
    if len(dropout) != len(graph_conv_layers) + 1:
      raise ValueError('Wrong number of dropout probabilities provided')
    if uncertainty:
      if mode != "regression":
        raise ValueError("Uncertainty is only supported in regression mode")
      if any(d == 0.0 for d in dropout):
        raise ValueError(
            'Dropout must be included in every layer to predict uncertainty')

    self.graph_convs = [
        layers.GraphConv(layer_size, activation_fn=tf.nn.relu)
        for layer_size in graph_conv_layers
    ]
    self.batch_norms = [
        BatchNormalization(fused=False) if batch_normalize else None
        for _ in range(len(graph_conv_layers) + 1)
    ]
    self.dropouts = [
        Dropout(rate=rate) if rate > 0.0 else None for rate in dropout
    ]
    self.graph_pools = [layers.GraphPool() for _ in graph_conv_layers]
    self.dense = Dense(dense_layer_size, activation=tf.nn.relu)
    self.graph_gather = layers.GraphGather(
        batch_size=batch_size, activation_fn=tf.nn.tanh)
    self.trim = TrimGraphOutput()
    if self.mode == 'classification':
      self.reshape_dense = Dense(n_tasks * n_classes)
      self.reshape = Reshape((n_tasks, n_classes))
      self.softmax = Softmax()
    else:
      self.regression_dense = Dense(n_tasks)
      if self.uncertainty:
        self.uncertainty_dense = Dense(n_tasks)
        self.uncertainty_trim = TrimGraphOutput()
        self.uncertainty_activation = Activation(tf.exp)
示例#3
0
    def __init__(self,
                 n_tasks,
                 graph_conv_layers=[64, 64],
                 dense_layer_size=128,
                 dropout=0.0,
                 mode="classification",
                 number_atom_features=75,
                 n_classes=2,
                 uncertainty=False,
                 batch_size=100,
                 **kwargs):
        """
    Parameters
    ----------
    n_tasks: int
      Number of tasks
    graph_conv_layers: list of int
      Width of channels for the Graph Convolution Layers
    dense_layer_size: int
      Width of channels for Atom Level Dense Layer before GraphPool
    dropout: list or float
      the dropout probablity to use for each layer.  The length of this list should equal
      len(graph_conv_layers)+1 (one value for each convolution layer, and one for the
      dense layer).  Alternatively this may be a single value instead of a list, in which
      case the same value is used for every layer.
    mode: str
      Either "classification" or "regression"
    number_atom_features: int
        75 is the default number of atom features created, but
        this can vary if various options are passed to the
        function atom_features in graph_features
    n_classes: int
      the number of classes to predict (only used in classification mode)
    uncertainty: bool
      if True, include extra outputs and loss terms to enable the uncertainty
      in outputs to be predicted
    """
        if mode not in ['classification', 'regression']:
            raise ValueError(
                "mode must be either 'classification' or 'regression'")
        self.n_tasks = n_tasks
        self.mode = mode
        self.dense_layer_size = dense_layer_size
        self.graph_conv_layers = graph_conv_layers
        self.number_atom_features = number_atom_features
        self.n_classes = n_classes
        self.uncertainty = uncertainty
        if not isinstance(dropout, collections.Sequence):
            dropout = [dropout] * (len(graph_conv_layers) + 1)
        if len(dropout) != len(graph_conv_layers) + 1:
            raise ValueError('Wrong number of dropout probabilities provided')
        self.dropout = dropout
        if uncertainty:
            if mode != "regression":
                raise ValueError(
                    "Uncertainty is only supported in regression mode")
            if any(d == 0.0 for d in dropout):
                raise ValueError(
                    'Dropout must be included in every layer to predict uncertainty'
                )

        # Build the model.

        atom_features = Input(shape=(self.number_atom_features, ))
        degree_slice = Input(shape=(2, ), dtype=tf.int32)
        membership = Input(shape=tuple(), dtype=tf.int32)
        n_samples = Input(shape=tuple(), dtype=tf.int32)
        dropout_switch = tf.keras.Input(shape=tuple())

        self.deg_adjs = []
        for i in range(0, 10 + 1):
            deg_adj = Input(shape=(i + 1, ), dtype=tf.int32)
            self.deg_adjs.append(deg_adj)
        in_layer = atom_features
        for layer_size, dropout in zip(self.graph_conv_layers, self.dropout):
            gc1_in = [in_layer, degree_slice, membership] + self.deg_adjs
            gc1 = layers.GraphConv(layer_size,
                                   activation_fn=tf.nn.relu)(gc1_in)
            batch_norm1 = BatchNormalization(fused=False)(gc1)
            if dropout > 0.0:
                batch_norm1 = layers.SwitchedDropout(rate=dropout)(
                    [batch_norm1, dropout_switch])
            gp_in = [batch_norm1, degree_slice, membership] + self.deg_adjs
            in_layer = layers.GraphPool()(gp_in)
        dense = Dense(self.dense_layer_size, activation=tf.nn.relu)(in_layer)
        batch_norm3 = BatchNormalization(fused=False)(dense)
        if self.dropout[-1] > 0.0:
            batch_norm3 = layers.SwitchedDropout(rate=self.dropout[-1])(
                [batch_norm3, dropout_switch])
        self.neural_fingerprint = layers.GraphGather(
            batch_size=batch_size,
            activation_fn=tf.nn.tanh)([batch_norm3, degree_slice, membership] +
                                      self.deg_adjs)

        n_tasks = self.n_tasks
        if self.mode == 'classification':
            n_classes = self.n_classes
            logits = Reshape((n_tasks, n_classes))(Dense(n_tasks * n_classes)(
                self.neural_fingerprint))
            logits = TrimGraphOutput()([logits, n_samples])
            output = Softmax()(logits)
            outputs = [output, logits]
            output_types = ['prediction', 'loss']
            loss = SoftmaxCrossEntropy()
        else:
            output = Dense(n_tasks)(self.neural_fingerprint)
            output = TrimGraphOutput()([output, n_samples])
            if self.uncertainty:
                log_var = Dense(n_tasks)(self.neural_fingerprint)
                log_var = TrimGraphOutput()([log_var, n_samples])
                var = Activation(tf.exp)(log_var)
                outputs = [output, var, output, log_var]
                output_types = ['prediction', 'variance', 'loss', 'loss']

                def loss(outputs, labels, weights):
                    diff = labels[0] - outputs[0]
                    return tf.reduce_mean(diff * diff / tf.exp(outputs[1]) +
                                          outputs[1])
            else:
                outputs = [output]
                output_types = ['prediction']
                loss = L2Loss()
        model = tf.keras.Model(inputs=[
            atom_features, degree_slice, membership, n_samples, dropout_switch
        ] + self.deg_adjs,
                               outputs=outputs)
        super(GraphConvModel, self).__init__(model,
                                             loss,
                                             output_types=output_types,
                                             batch_size=batch_size,
                                             **kwargs)