def create_loss(self): return L2Loss()
def __init__(self, n_tasks: int, graph_attention_layers: list = None, n_attention_heads: int = 8, agg_modes: list = None, activation=F.elu, residual: bool = True, dropout: float = 0., alpha: float = 0.2, predictor_hidden_feats: int = 128, predictor_dropout: float = 0., mode: str = 'regression', number_atom_features: int = 30, n_classes: int = 2, self_loop: bool = True, **kwargs): """ Parameters ---------- n_tasks: int Number of tasks. graph_attention_layers: list of int Width of channels per attention head for GAT layers. graph_attention_layers[i] gives the width of channel for each attention head for the i-th GAT layer. If both ``graph_attention_layers`` and ``agg_modes`` are specified, they should have equal length. If not specified, the default value will be [8, 8]. n_attention_heads: int Number of attention heads in each GAT layer. agg_modes: list of str The way to aggregate multi-head attention results for each GAT layer, which can be either 'flatten' for concatenating all-head results or 'mean' for averaging all-head results. ``agg_modes[i]`` gives the way to aggregate multi-head attention results for the i-th GAT layer. If both ``graph_attention_layers`` and ``agg_modes`` are specified, they should have equal length. If not specified, the model will flatten multi-head results for intermediate GAT layers and compute mean of multi-head results for the last GAT layer. activation: activation function or None The activation function to apply to the aggregated multi-head results for each GAT layer. If not specified, the default value will be ELU. residual: bool Whether to add a residual connection within each GAT layer. Default to True. dropout: float The dropout probability within each GAT layer. Default to 0. alpha: float A hyperparameter in LeakyReLU, which is the slope for negative values. Default to 0.2. predictor_hidden_feats: int The size for hidden representations in the output MLP predictor. Default to 128. predictor_dropout: float The dropout probability in the output MLP predictor. Default to 0. mode: str The model type, 'classification' or 'regression'. Default to 'regression'. number_atom_features: int The length of the initial atom feature vectors. Default to 30. n_classes: int The number of classes to predict per task (only used when ``mode`` is 'classification'). Default to 2. self_loop: bool Whether to add self loops for the nodes, i.e. edges from nodes to themselves. When input graphs have isolated nodes, self loops allow preserving the original feature of them in message passing. Default to True. kwargs This can include any keyword argument of TorchModel. """ model = GAT( n_tasks=n_tasks, graph_attention_layers=graph_attention_layers, n_attention_heads=n_attention_heads, agg_modes=agg_modes, activation=activation, residual=residual, dropout=dropout, alpha=alpha, predictor_hidden_feats=predictor_hidden_feats, predictor_dropout=predictor_dropout, mode=mode, number_atom_features=number_atom_features, n_classes=n_classes) if mode == 'regression': loss: Loss = L2Loss() output_types = ['prediction'] else: loss = SparseSoftmaxCrossEntropy() output_types = ['prediction', 'loss'] super(GATModel, self).__init__( model, loss=loss, output_types=output_types, **kwargs) self._self_loop = self_loop
def __init__(self, dist_kernel: str = 'softmax', n_encoders=8, lambda_attention: float = 0.33, lambda_distance: float = 0.33, h: int = 16, sa_hsize: int = 1024, sa_dropout_p: float = 0.0, output_bias: bool = True, d_input: int = 1024, d_hidden: int = 1024, d_output: int = 1024, activation: str = 'leakyrelu', n_layers: int = 1, ff_dropout_p: float = 0.0, encoder_hsize: int = 1024, encoder_dropout_p: float = 0.0, embed_input_hsize: int = 36, embed_dropout_p: float = 0.0, gen_aggregation_type: str = 'mean', gen_dropout_p: float = 0.0, gen_n_layers: int = 1, gen_attn_hidden: int = 128, gen_attn_out: int = 4, gen_d_output: int = 1, **kwargs): """The wrapper class for the Molecular Attention Transformer. Since we are using a custom data class as input (MATEncoding), we have overriden the default_generator function from DiskDataset and customized it to work with a batch of MATEncoding classes. Parameters ---------- dist_kernel: str Kernel activation to be used. Can be either 'softmax' for softmax or 'exp' for exponential, for the self-attention layer. n_encoders: int Number of encoder layers in the encoder block. lambda_attention: float Constant to be multiplied with the attention matrix in the self-attention layer. lambda_distance: float Constant to be multiplied with the distance matrix in the self-attention layer. h: int Number of attention heads for the self-attention layer. sa_hsize: int Size of dense layer in the self-attention layer. sa_dropout_p: float Dropout probability for the self-attention layer. output_bias: bool If True, dense layers will use bias vectors in the self-attention layer. d_input: int Size of input layer in the feed-forward layer. d_hidden: int Size of hidden layer in the feed-forward layer. Will also be used as d_output for the MATEmbedding layer. d_output: int Size of output layer in the feed-forward layer. activation: str Activation function to be used in the feed-forward layer. Can choose between 'relu' for ReLU, 'leakyrelu' for LeakyReLU, 'prelu' for PReLU, 'tanh' for TanH, 'selu' for SELU, 'elu' for ELU and 'linear' for linear activation. n_layers: int Number of layers in the feed-forward layer. ff_dropout_p: float Dropout probability in the feeed-forward layer. encoder_hsize: int Size of Dense layer for the encoder itself. encoder_dropout_p: float Dropout probability for connections in the encoder layer. embed_input_hsize: int Size of input layer for the MATEmbedding layer. embed_dropout_p: float Dropout probability for the MATEmbedding layer. gen_aggregation_type: str Type of aggregation to be used. Can be 'grover', 'mean' or 'contextual'. gen_dropout_p: float Dropout probability for the MATGenerator layer. gen_n_layers: int Number of layers in MATGenerator. gen_attn_hidden: int Size of hidden attention layer in the MATGenerator layer. gen_attn_out: int Size of output attention layer in the MATGenerator layer. gen_d_output: int Size of output layer in the MATGenerator layer. """ model = MAT(dist_kernel=dist_kernel, n_encoders=n_encoders, lambda_attention=lambda_attention, lambda_distance=lambda_distance, h=h, sa_hsize=sa_hsize, sa_dropout_p=sa_dropout_p, output_bias=output_bias, d_input=d_input, d_hidden=d_hidden, d_output=d_output, activation=activation, n_layers=n_layers, ff_dropout_p=ff_dropout_p, encoder_hsize=encoder_hsize, encoder_dropout_p=encoder_dropout_p, embed_input_hsize=embed_input_hsize, embed_dropout_p=embed_dropout_p, gen_aggregation_type=gen_aggregation_type, gen_dropout_p=gen_dropout_p, gen_n_layers=gen_n_layers, gen_attn_hidden=gen_attn_hidden, gen_attn_out=gen_attn_out, gen_d_output=gen_d_output) loss = L2Loss() output_types = ['prediction'] super(MATModel, self).__init__(model, loss=loss, output_types=output_types, **kwargs)
def __init__(self, frag1_num_atoms=70, frag2_num_atoms=634, complex_num_atoms=701, max_num_neighbors=12, batch_size=24, atom_types=[ 6, 7., 8., 9., 11., 12., 15., 16., 17., 20., 25., 30., 35., 53., -1. ], radial=[[ 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0, 10.5, 11.0, 11.5, 12.0 ], [0.0, 4.0, 8.0], [0.4]], layer_sizes=[32, 32, 16], learning_rate=0.001, **kwargs): """ Params ------ frag1_num_atoms: int Number of atoms in first fragment frag2_num_atoms: int Number of atoms in sec max_num_neighbors: int Maximum number of neighbors possible for an atom. Recall neighbors are spatial neighbors. atom_types: list List of atoms recognized by model. Atoms are indicated by their nuclear numbers. radial: list TODO: add description layer_sizes: list TODO: add description learning_rate: float Learning rate for the model. """ # TODO: Turning off queue for now. Safe to re-activate? self.complex_num_atoms = complex_num_atoms self.frag1_num_atoms = frag1_num_atoms self.frag2_num_atoms = frag2_num_atoms self.max_num_neighbors = max_num_neighbors self.batch_size = batch_size self.atom_types = atom_types rp = [x for x in itertools.product(*radial)] frag1_X = Input(shape=(frag1_num_atoms, 3)) frag1_nbrs = Input(shape=(frag1_num_atoms, max_num_neighbors)) frag1_nbrs_z = Input(shape=(frag1_num_atoms, max_num_neighbors)) frag1_z = Input(shape=(frag1_num_atoms,)) frag2_X = Input(shape=(frag2_num_atoms, 3)) frag2_nbrs = Input(shape=(frag2_num_atoms, max_num_neighbors)) frag2_nbrs_z = Input(shape=(frag2_num_atoms, max_num_neighbors)) frag2_z = Input(shape=(frag2_num_atoms,)) complex_X = Input(shape=(complex_num_atoms, 3)) complex_nbrs = Input(shape=(complex_num_atoms, max_num_neighbors)) complex_nbrs_z = Input(shape=(complex_num_atoms, max_num_neighbors)) complex_z = Input(shape=(complex_num_atoms,)) frag1_conv = AtomicConvolution( atom_types=self.atom_types, radial_params=rp, boxsize=None)([frag1_X, frag1_nbrs, frag1_nbrs_z]) frag2_conv = AtomicConvolution( atom_types=self.atom_types, radial_params=rp, boxsize=None)([frag2_X, frag2_nbrs, frag2_nbrs_z]) complex_conv = AtomicConvolution( atom_types=self.atom_types, radial_params=rp, boxsize=None)([complex_X, complex_nbrs, complex_nbrs_z]) score = AtomicConvScore(self.atom_types, layer_sizes)( [frag1_conv, frag2_conv, complex_conv, frag1_z, frag2_z, complex_z]) model = tf.keras.Model( inputs=[ frag1_X, frag1_nbrs, frag1_nbrs_z, frag1_z, frag2_X, frag2_nbrs, frag2_nbrs_z, frag2_z, complex_X, complex_nbrs, complex_nbrs_z, complex_z ], outputs=score) super(AtomicConvModel, self).__init__( model, L2Loss(), batch_size=batch_size, **kwargs)
def __init__(self, n_tasks: int, node_out_feats: int = 64, edge_hidden_feats: int = 128, num_step_message_passing: int = 3, num_step_set2set: int = 6, num_layer_set2set: int = 3, mode: str = 'regression', number_atom_features: int = 30, number_bond_features: int = 11, n_classes: int = 2, self_loop: bool = False, **kwargs): """ Parameters ---------- n_tasks: int Number of tasks. node_out_feats: int The length of the final node representation vectors. Default to 64. edge_hidden_feats: int The length of the hidden edge representation vectors. Default to 128. num_step_message_passing: int The number of rounds of message passing. Default to 3. num_step_set2set: int The number of set2set steps. Default to 6. num_layer_set2set: int The number of set2set layers. Default to 3. mode: str The model type, 'classification' or 'regression'. Default to 'regression'. number_atom_features: int The length of the initial atom feature vectors. Default to 30. number_bond_features: int The length of the initial bond feature vectors. Default to 11. n_classes: int The number of classes to predict per task (only used when ``mode`` is 'classification'). Default to 2. self_loop: bool Whether to add self loops for the nodes, i.e. edges from nodes to themselves. Generally, an MPNNModel does not require self loops. Default to False. kwargs This can include any keyword argument of TorchModel. """ model = MPNN(n_tasks=n_tasks, node_out_feats=node_out_feats, edge_hidden_feats=edge_hidden_feats, num_step_message_passing=num_step_message_passing, num_step_set2set=num_step_set2set, num_layer_set2set=num_layer_set2set, mode=mode, number_atom_features=number_atom_features, number_bond_features=number_bond_features, n_classes=n_classes) if mode == 'regression': loss: Loss = L2Loss() output_types = ['prediction'] else: loss = SparseSoftmaxCrossEntropy() output_types = ['prediction', 'loss'] super(MPNNModel, self).__init__(model, loss=loss, output_types=output_types, **kwargs) self._self_loop = self_loop
def __init__( self, n_tasks: int, frag1_num_atoms: int = 70, frag2_num_atoms: int = 634, complex_num_atoms: int = 701, max_num_neighbors: int = 12, batch_size: int = 24, atom_types: Sequence[float] = [ 6, 7., 8., 9., 11., 12., 15., 16., 17., 20., 25., 30., 35., 53., -1. ], radial: Sequence[Sequence[float]] = [[ 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0, 10.5, 11.0, 11.5, 12.0 ], [0.0, 4.0, 8.0], [0.4]], # layer_sizes=[32, 32, 16], layer_sizes=[100], weight_init_stddevs: OneOrMany[float] = 0.02, bias_init_consts: OneOrMany[float] = 1.0, weight_decay_penalty: float = 0.0, weight_decay_penalty_type: str = "l2", dropouts: OneOrMany[float] = 0.5, activation_fns: OneOrMany[ActivationFn] = tf.nn.relu, residual: bool = False, learning_rate=0.001, **kwargs) -> None: """ Parameters ---------- n_tasks: int number of tasks frag1_num_atoms: int Number of atoms in first fragment frag2_num_atoms: int Number of atoms in sec max_num_neighbors: int Maximum number of neighbors possible for an atom. Recall neighbors are spatial neighbors. atom_types: list List of atoms recognized by model. Atoms are indicated by their nuclear numbers. radial: list Radial parameters used in the atomic convolution transformation. layer_sizes: list the size of each dense layer in the network. The length of this list determines the number of layers. weight_init_stddevs: list or float the standard deviation of the distribution to use for weight initialization of each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. bias_init_consts: list or float the value to initialize the biases in each layer to. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. weight_decay_penalty: float the magnitude of the weight decay penalty to use weight_decay_penalty_type: str the type of penalty to use for weight decay, either 'l1' or 'l2' dropouts: list or float the dropout probablity to use for each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. activation_fns: list or object the Tensorflow activation function to apply to each layer. The length of this list should equal len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the same value is used for every layer. residual: bool if True, the model will be composed of pre-activation residual blocks instead of a simple stack of dense layers. learning_rate: float Learning rate for the model. """ self.complex_num_atoms = complex_num_atoms self.frag1_num_atoms = frag1_num_atoms self.frag2_num_atoms = frag2_num_atoms self.max_num_neighbors = max_num_neighbors self.batch_size = batch_size self.atom_types = atom_types rp = [x for x in itertools.product(*radial)] frag1_X = Input(shape=(frag1_num_atoms, 3)) frag1_nbrs = Input(shape=(frag1_num_atoms, max_num_neighbors)) frag1_nbrs_z = Input(shape=(frag1_num_atoms, max_num_neighbors)) frag1_z = Input(shape=(frag1_num_atoms, )) frag2_X = Input(shape=(frag2_num_atoms, 3)) frag2_nbrs = Input(shape=(frag2_num_atoms, max_num_neighbors)) frag2_nbrs_z = Input(shape=(frag2_num_atoms, max_num_neighbors)) frag2_z = Input(shape=(frag2_num_atoms, )) complex_X = Input(shape=(complex_num_atoms, 3)) complex_nbrs = Input(shape=(complex_num_atoms, max_num_neighbors)) complex_nbrs_z = Input(shape=(complex_num_atoms, max_num_neighbors)) complex_z = Input(shape=(complex_num_atoms, )) self._frag1_conv = AtomicConvolution( atom_types=self.atom_types, radial_params=rp, boxsize=None)([frag1_X, frag1_nbrs, frag1_nbrs_z]) flattened1 = Flatten()(self._frag1_conv) self._frag2_conv = AtomicConvolution( atom_types=self.atom_types, radial_params=rp, boxsize=None)([frag2_X, frag2_nbrs, frag2_nbrs_z]) flattened2 = Flatten()(self._frag2_conv) self._complex_conv = AtomicConvolution( atom_types=self.atom_types, radial_params=rp, boxsize=None)([complex_X, complex_nbrs, complex_nbrs_z]) flattened3 = Flatten()(self._complex_conv) concat = Concatenate()([flattened1, flattened2, flattened3]) n_layers = len(layer_sizes) if not isinstance(weight_init_stddevs, SequenceCollection): weight_init_stddevs = [weight_init_stddevs] * n_layers if not isinstance(bias_init_consts, SequenceCollection): bias_init_consts = [bias_init_consts] * n_layers if not isinstance(dropouts, SequenceCollection): dropouts = [dropouts] * n_layers if not isinstance(activation_fns, SequenceCollection): activation_fns = [activation_fns] * n_layers if weight_decay_penalty != 0.0: if weight_decay_penalty_type == 'l1': regularizer = tf.keras.regularizers.l1(weight_decay_penalty) else: regularizer = tf.keras.regularizers.l2(weight_decay_penalty) else: regularizer = None prev_layer = concat prev_size = concat.shape[0] next_activation = None # Add the dense layers for size, weight_stddev, bias_const, dropout, activation_fn in zip( layer_sizes, weight_init_stddevs, bias_init_consts, dropouts, activation_fns): layer = prev_layer if next_activation is not None: layer = Activation(next_activation)(layer) layer = Dense( size, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=weight_stddev), bias_initializer=tf.constant_initializer(value=bias_const), kernel_regularizer=regularizer)(layer) if dropout > 0.0: layer = Dropout(rate=dropout)(layer) if residual and prev_size == size: prev_layer = Lambda(lambda x: x[0] + x[1])([prev_layer, layer]) else: prev_layer = layer prev_size = size next_activation = activation_fn if next_activation is not None: prev_layer = Activation(activation_fn)(prev_layer) self.neural_fingerprint = prev_layer output = Reshape((n_tasks, 1))(Dense( n_tasks, kernel_initializer=tf.keras.initializers.TruncatedNormal( stddev=weight_init_stddevs[-1]), bias_initializer=tf.constant_initializer( value=bias_init_consts[-1]))(prev_layer)) loss: Union[dc.models.losses.Loss, LossFn] model = tf.keras.Model(inputs=[ frag1_X, frag1_nbrs, frag1_nbrs_z, frag1_z, frag2_X, frag2_nbrs, frag2_nbrs_z, frag2_z, complex_X, complex_nbrs, complex_nbrs_z, complex_z ], outputs=output) super(AtomicConvModel, self).__init__(model, L2Loss(), batch_size=batch_size, **kwargs)
def __init__(self, n_tasks: int, graph_conv_layers: list = None, activation=None, residual: bool = True, batchnorm: bool = False, dropout: float = 0., predictor_hidden_feats: int = 128, predictor_dropout: float = 0., mode: str = 'regression', number_atom_features=75, n_classes: int = 2, nfeat_name: str = 'x', self_loop: bool = True, **kwargs): """ Parameters ---------- n_tasks: int Number of tasks. graph_conv_layers: list of int Width of channels for GCN layers. graph_conv_layers[i] gives the width of channel for the i-th GCN layer. If not specified, the default value will be [64, 64]. activation: callable The activation function to apply to the output of each GCN layer. By default, no activation function will be applied. residual: bool Whether to add a residual connection within each GCN layer. Default to True. batchnorm: bool Whether to apply batch normalization to the output of each GCN layer. Default to False. dropout: float The dropout probability for the output of each GCN layer. Default to 0. predictor_hidden_feats: int The size for hidden representations in the output MLP predictor. Default to 128. predictor_dropout: float The dropout probability in the output MLP predictor. Default to 0. mode: str The model type, 'classification' or 'regression'. number_atom_features: int The length of the initial atom feature vectors. Default to 75. n_classes: int The number of classes to predict per task (only used when ``mode`` is 'classification'). nfeat_name: str For an input graph ``g``, the model assumes that it stores node features in ``g.ndata[nfeat_name]`` and will retrieve input node features from that. self_loop: bool Whether to add self loops for the nodes, i.e. edges from nodes to themselves. Default to True. kwargs This can include any keyword argument of TorchModel. """ model = GCN( graph_conv_layers=graph_conv_layers, activation=activation, residual=residual, batchnorm=batchnorm, dropout=dropout, predictor_hidden_feats=predictor_hidden_feats, predictor_dropout=predictor_dropout, n_tasks=n_tasks, mode=mode, number_atom_features=number_atom_features, n_classes=n_classes, nfeat_name=nfeat_name) if mode == 'regression': loss: Loss = L2Loss() output_types = ['prediction'] else: loss = SparseSoftmaxCrossEntropy() output_types = ['prediction', 'loss'] super(GCNModel, self).__init__( model, loss=loss, output_types=output_types, **kwargs) self._self_loop = self_loop