def test_graph_conv(self): """Test invoking GraphConv.""" out_channels = 2 n_atoms = 4 # In CCC and C, there are 4 atoms raw_smiles = ['CCC', 'C'] import rdkit mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles] featurizer = dc.feat.graph_features.ConvMolFeaturizer() mols = featurizer.featurize(mols) multi_mol = dc.feat.mol_graphs.ConvMol.agglomerate_mols(mols) atom_features = multi_mol.get_atom_features().astype(np.float32) degree_slice = multi_mol.deg_slice membership = multi_mol.membership deg_adjs = multi_mol.get_deg_adjacency_lists()[1:] args = [atom_features, degree_slice, membership] + deg_adjs layer = layers.GraphConv(out_channels) result = layer(args) assert result.shape == (n_atoms, out_channels) num_deg = 2 * layer.max_degree + (1 - layer.min_degree) assert len(layer.trainable_variables) == 2 * num_deg
def __init__(self, n_tasks, graph_conv_layers, dense_layer_size=128, dropout=0.0, mode="classification", number_atom_features=75, n_classes=2, batch_normalize=True, uncertainty=False, batch_size=100): """An internal keras model class. The graph convolutions use a nonstandard control flow so the standard Keras functional API can't support them. We instead use the imperative "subclassing" API to implement the graph convolutions. All arguments have the same meaning as in GraphConvModel. """ super(_GraphConvKerasModel, self).__init__() if mode not in ['classification', 'regression']: raise ValueError("mode must be either 'classification' or 'regression'") self.mode = mode self.uncertainty = uncertainty if not isinstance(dropout, collections.Sequence): dropout = [dropout] * (len(graph_conv_layers) + 1) if len(dropout) != len(graph_conv_layers) + 1: raise ValueError('Wrong number of dropout probabilities provided') if uncertainty: if mode != "regression": raise ValueError("Uncertainty is only supported in regression mode") if any(d == 0.0 for d in dropout): raise ValueError( 'Dropout must be included in every layer to predict uncertainty') self.graph_convs = [ layers.GraphConv(layer_size, activation_fn=tf.nn.relu) for layer_size in graph_conv_layers ] self.batch_norms = [ BatchNormalization(fused=False) if batch_normalize else None for _ in range(len(graph_conv_layers) + 1) ] self.dropouts = [ Dropout(rate=rate) if rate > 0.0 else None for rate in dropout ] self.graph_pools = [layers.GraphPool() for _ in graph_conv_layers] self.dense = Dense(dense_layer_size, activation=tf.nn.relu) self.graph_gather = layers.GraphGather( batch_size=batch_size, activation_fn=tf.nn.tanh) self.trim = TrimGraphOutput() if self.mode == 'classification': self.reshape_dense = Dense(n_tasks * n_classes) self.reshape = Reshape((n_tasks, n_classes)) self.softmax = Softmax() else: self.regression_dense = Dense(n_tasks) if self.uncertainty: self.uncertainty_dense = Dense(n_tasks) self.uncertainty_trim = TrimGraphOutput() self.uncertainty_activation = Activation(tf.exp)
def __init__(self, n_tasks, graph_conv_layers=[64, 64], dense_layer_size=128, dropout=0.0, mode="classification", number_atom_features=75, n_classes=2, uncertainty=False, batch_size=100, **kwargs): """ Parameters ---------- n_tasks: int Number of tasks graph_conv_layers: list of int Width of channels for the Graph Convolution Layers dense_layer_size: int Width of channels for Atom Level Dense Layer before GraphPool dropout: list or float the dropout probablity to use for each layer. The length of this list should equal len(graph_conv_layers)+1 (one value for each convolution layer, and one for the dense layer). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. mode: str Either "classification" or "regression" number_atom_features: int 75 is the default number of atom features created, but this can vary if various options are passed to the function atom_features in graph_features n_classes: int the number of classes to predict (only used in classification mode) uncertainty: bool if True, include extra outputs and loss terms to enable the uncertainty in outputs to be predicted """ if mode not in ['classification', 'regression']: raise ValueError( "mode must be either 'classification' or 'regression'") self.n_tasks = n_tasks self.mode = mode self.dense_layer_size = dense_layer_size self.graph_conv_layers = graph_conv_layers self.number_atom_features = number_atom_features self.n_classes = n_classes self.uncertainty = uncertainty if not isinstance(dropout, collections.Sequence): dropout = [dropout] * (len(graph_conv_layers) + 1) if len(dropout) != len(graph_conv_layers) + 1: raise ValueError('Wrong number of dropout probabilities provided') self.dropout = dropout if uncertainty: if mode != "regression": raise ValueError( "Uncertainty is only supported in regression mode") if any(d == 0.0 for d in dropout): raise ValueError( 'Dropout must be included in every layer to predict uncertainty' ) # Build the model. atom_features = Input(shape=(self.number_atom_features, )) degree_slice = Input(shape=(2, ), dtype=tf.int32) membership = Input(shape=tuple(), dtype=tf.int32) n_samples = Input(shape=tuple(), dtype=tf.int32) dropout_switch = tf.keras.Input(shape=tuple()) self.deg_adjs = [] for i in range(0, 10 + 1): deg_adj = Input(shape=(i + 1, ), dtype=tf.int32) self.deg_adjs.append(deg_adj) in_layer = atom_features for layer_size, dropout in zip(self.graph_conv_layers, self.dropout): gc1_in = [in_layer, degree_slice, membership] + self.deg_adjs gc1 = layers.GraphConv(layer_size, activation_fn=tf.nn.relu)(gc1_in) batch_norm1 = BatchNormalization(fused=False)(gc1) if dropout > 0.0: batch_norm1 = layers.SwitchedDropout(rate=dropout)( [batch_norm1, dropout_switch]) gp_in = [batch_norm1, degree_slice, membership] + self.deg_adjs in_layer = layers.GraphPool()(gp_in) dense = Dense(self.dense_layer_size, activation=tf.nn.relu)(in_layer) batch_norm3 = BatchNormalization(fused=False)(dense) if self.dropout[-1] > 0.0: batch_norm3 = layers.SwitchedDropout(rate=self.dropout[-1])( [batch_norm3, dropout_switch]) self.neural_fingerprint = layers.GraphGather( batch_size=batch_size, activation_fn=tf.nn.tanh)([batch_norm3, degree_slice, membership] + self.deg_adjs) n_tasks = self.n_tasks if self.mode == 'classification': n_classes = self.n_classes logits = Reshape((n_tasks, n_classes))(Dense(n_tasks * n_classes)( self.neural_fingerprint)) logits = TrimGraphOutput()([logits, n_samples]) output = Softmax()(logits) outputs = [output, logits] output_types = ['prediction', 'loss'] loss = SoftmaxCrossEntropy() else: output = Dense(n_tasks)(self.neural_fingerprint) output = TrimGraphOutput()([output, n_samples]) if self.uncertainty: log_var = Dense(n_tasks)(self.neural_fingerprint) log_var = TrimGraphOutput()([log_var, n_samples]) var = Activation(tf.exp)(log_var) outputs = [output, var, output, log_var] output_types = ['prediction', 'variance', 'loss', 'loss'] def loss(outputs, labels, weights): diff = labels[0] - outputs[0] return tf.reduce_mean(diff * diff / tf.exp(outputs[1]) + outputs[1]) else: outputs = [output] output_types = ['prediction'] loss = L2Loss() model = tf.keras.Model(inputs=[ atom_features, degree_slice, membership, n_samples, dropout_switch ] + self.deg_adjs, outputs=outputs) super(GraphConvModel, self).__init__(model, loss, output_types=output_types, batch_size=batch_size, **kwargs)