def build_model(args: Namespace) -> nn.Module: """ Builds a message passing neural network including final linear layers and initializes parameters. :param args: Arguments. :return: An nn.Module containing the MPN encoder along with final linear layers with parameters initialized. """ # Regression with binning output_size = args.num_tasks # ? Is this width of output, i.e. number of classes? TODO: encoder = MPNEncoder( args, args.atom_fdim, args.bond_fdim) # Obtain the message passing network component first_linear_dim = args.hidden_size * (1 + args.jtnn ) # What exactly is jtnn? TODO drop_layer = lambda p: nn.Dropout(p) linear_layer = lambda input_dim, output_dim, p: nn.Linear( input_dim, output_dim) # Create FFN layers if args.ffn_num_layers == 1: ffn = [ drop_layer(args.ffn_input_dropout), linear_layer(first_linear_dim, output_size, args.ffn_input_dropout) ] else: ffn = [ drop_layer(args.ffn_input_dropout), linear_layer(first_linear_dim, args.ffn_hidden_size, args.ffn_input_dropout) ] for _ in range(args.ffn_num_layers - 2): ffn.extend([ get_activation_function(args.activation), drop_layer(args.ffn_dropout), linear_layer(args.ffn_hidden_size, args.ffn_hidden_size, args.ffn_dropout), ]) ffn.extend([ get_activation_function(args.activation), drop_layer(args.ffn_dropout), linear_layer(args.ffn_hidden_size, output_size, args.ffn_dropout), ]) # Classification if args.dataset_type == 'classification': ffn.append(nn.Sigmoid()) # Combined model ffn = nn.Sequential(*ffn) model = MoleculeModel(encoder, ffn) initialize_weights(model, args) return model
def __init__(self, args: Namespace, num_mols: int, num_tasks: int, embedding_size: int, hidden_size: int, dropout: float, activation: str, classification: bool): super(MatrixFactorizer, self).__init__() self.num_mols = num_mols self.num_tasks = num_tasks self.embedding_size = embedding_size self.hidden_size = hidden_size self.dropout = dropout self.activation = activation self.classification = classification self.random_mol_embeddings = args.random_mol_embeddings if args.random_mol_embeddings: self.mol_embedding = nn.Embedding(self.num_mols, self.embedding_size) else: self.mol_embedding = MPN(args) self.task_embedding = nn.Embedding(self.num_tasks, self.embedding_size) self.W1 = nn.Linear(2 * self.embedding_size, self.hidden_size) self.W2 = nn.Linear(self.hidden_size, 1) self.dropout_layer = nn.Dropout(self.dropout) self.act_func = get_activation_function(self.activation) if self.classification: self.sigmoid = nn.Sigmoid()
def create_ffn(self, args: Namespace): """ Creates the feed-forward network for the model. :param args: Arguments. """ if args.features_only: first_linear_dim = args.features_size else: first_linear_dim = args.hidden_size if args.use_input_features: first_linear_dim += args.features_dim dropout = nn.Dropout(args.dropout) activation = get_activation_function(args.activation) # Create FFN layers if args.ffn_num_layers == 1: ffn = [dropout, nn.Linear(first_linear_dim, args.output_size)] else: ffn = [dropout, nn.Linear(first_linear_dim, args.ffn_hidden_size)] for _ in range(args.ffn_num_layers - 2): ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size), ]) ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, args.output_size), ]) # Create FFN model self.ffn = nn.Sequential(*ffn)
def create_FPEncoder(args: TrainArgs): """ Encodes Molecule Fingerpirnt using Feed-Forward Network. Args: :param args: A :class:`~chemprop.args.TrainArgs` object containing model arguments. Output: Sequential of Feed-forward layers for the model. """ first_linear_dim = args.features_size dropout = nn.Dropout(args.dropout) activation = get_activation_function(args.activation) # Create FFN layers if args.ffn_num_layers == 1: ffn = [dropout, nn.Linear(first_linear_dim, args.fp_ffn_output_size)] else: ffn = [dropout, nn.Linear(first_linear_dim, args.fp_ffn_hidden_size)] for _ in range(args.fp_ffn_num_layers - 2): ffn.extend([ activation, dropout, nn.Linear(args.fp_ffn_hidden_size, args.fp_ffn_hidden_size), ]) ffn.extend([ activation, dropout, nn.Linear(args.fp_ffn_hidden_size, args.fp_ffn_output_size), ]) # return FFN model return nn.Sequential(*ffn)
def create_ffn(self, args: Namespace): """ Creates the feed-forward network for the model. :param args: Arguments. """ self.multiclass = args.dataset_type == 'multiclass' if self.multiclass: self.num_classes = args.multiclass_num_classes if args.features_only: first_linear_dim = args.features_size else: first_linear_dim = args.hidden_size if args.use_input_features: first_linear_dim += args.features_dim dropout = nn.Dropout(args.dropout) activation = get_activation_function(args.activation) wd, dd = get_cc_dropout_hyper(args.train_data_size, args.regularization_scale) # Create FFN layers if args.ffn_num_layers == 1: ffn = [ dropout, ] last_linear_dim = first_linear_dim else: ffn = [ dropout, ConcreteDropout(layer=nn.Linear(first_linear_dim, args.ffn_hidden_size), reg_acc=args.reg_acc, weight_regularizer=wd, dropout_regularizer=dd) if self.mc_dropout else nn.Linear(first_linear_dim, args.ffn_hidden_size) ] for _ in range(args.ffn_num_layers - 2): ffn.extend([ activation, dropout, ConcreteDropout(layer=nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size), reg_acc=args.reg_acc, weight_regularizer=wd, dropout_regularizer=dd) if self.mc_dropout else nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size) ]) ffn.extend([ activation, dropout, ]) last_linear_dim = args.ffn_hidden_size # Create FFN model self._ffn = nn.Sequential(*ffn) if self.aleatoric: self.output_layer = nn.Linear(last_linear_dim, args.output_size) self.logvar_layer = nn.Linear(last_linear_dim, args.output_size) else: self.output_layer = nn.Linear(last_linear_dim, args.output_size)
def __init__(self, atom_fdim: int = 133, bond_fdim: int = 147, activation: str = 'ReLU', hidden_size: int = 300, bias: bool = False, atom_messages: bool = False): """ Configures a message passing graph encoder Args: atom_fdim (int): feature dimensions to use for atoms, default 133 bond_fdim (int): feature dimensions to use for bonds, default 147 activation (str): 'ReLU', 'LeakyReLU', 'PReLU', 'tanh', 'SELU', or 'ELU', default 'ReLU' hidden_size (int): dimension of messages, default 300 bias (bool): include bias in internal linear layers, default False depth (int): number of message passing steps, default 3 dropout (float): dropout rate on messages, default 0.0 layers_per_message (int): linear layers included in message update function, default 1 undirected (bool): propigate messages bidirectionally, default False atom_messages: pass messages from atoms to atoms along bonds, default False """ super().__init__() self.act_func = get_activation_function(activation) self.cached_zero_vector = T.zeros(hidden_size, requires_grad=False) # Input input_dim = atom_fdim if atom_messages else bond_fdim self.W_i = nn.Linear(input_dim, hidden_size, bias=bias) self.atom_messages = atom_messages
def __init__(self, args: Namespace, atom_fdim: int, bond_fdim: int): """Initializes the MPNEncoder. :param args: Arguments. :param atom_fdim: Atom features dimension. :param bond_fdim: Bond features dimension. """ super(MPNEncoder, self).__init__() self.atom_fdim = atom_fdim # 133 self.bond_fdim = bond_fdim # 147 self.hidden_size = args.hidden_size self.bias = args.bias self.depth = args.depth self.dropout = args.dropout self.layers_per_message = 1 self.undirected = args.undirected self.atom_messages = args.atom_messages # Use messages on atoms instead of messages on bonds, default=False self.features_only = args.features_only self.use_input_features = args.use_input_features self.epistemic = args.epistemic self.mc_dropout = self.epistemic == 'mc_dropout' self.args = args self.features_generator = args.features_generator if self.features_only or self.features_generator: return # Dropout self.dropout_layer = nn.Dropout(p=self.dropout) # Activation self.act_func = get_activation_function(args.activation) # Cached zeros self.cached_zero_vector = nn.Parameter(torch.zeros(self.hidden_size), requires_grad=False) # Concrete Dropout for Bayesian NN wd, dd = get_cc_dropout_hyper(args.train_data_size, args.regularization_scale) # Input input_dim = self.atom_fdim if self.atom_messages else self.bond_fdim # 默認input dim -> bond_fdim 147 if self.mc_dropout: self.W_i = ConcreteDropout(layer=nn.Linear(input_dim, self.hidden_size, bias=self.bias), reg_acc=args.reg_acc, weight_regularizer=wd, dropout_regularizer=dd) else: self.W_i = nn.Linear(input_dim, self.hidden_size, bias=self.bias) # in 147 out 1000 self.bias-> False (no bias) if self.atom_messages: w_h_input_size = self.hidden_size + self.bond_fdim else: w_h_input_size = self.hidden_size # 1000 # Shared weight matrix across depths (default) if self.mc_dropout: self.W_h = ConcreteDropout(layer=nn.Linear(w_h_input_size, self.hidden_size, bias=self.bias), reg_acc=args.reg_acc, weight_regularizer=wd, dropout_regularizer=dd, depth=self.depth - 1) self.W_o = ConcreteDropout(layer=nn.Linear(self.atom_fdim + self.hidden_size, self.hidden_size), reg_acc=args.reg_acc, weight_regularizer=wd, dropout_regularizer=dd) else: self.W_h = nn.Linear(w_h_input_size, self.hidden_size, bias=self.bias) # in 1000 out 1000 (no bias) self.W_o = nn.Linear(self.atom_fdim + self.hidden_size, self.hidden_size) # in 1000+133 out 1000 (with bias 1000)
def __init__(self, args: TrainArgs, atom_fdim: int, bond_fdim: int): """ :param args: A :class:`~chemprop.args.TrainArgs` object containing model arguments. :param atom_fdim: Atom feature vector dimension. :param bond_fdim: Bond feature vector dimension. """ super(MPNEncoder, self).__init__() self.atom_fdim = atom_fdim self.bond_fdim = bond_fdim self.atom_messages = args.atom_messages self.hidden_size = args.hidden_size self.bias = args.bias self.depth = args.depth self.dropout = args.dropout self.layers_per_message = 1 self.undirected = args.undirected self.features_only = args.features_only self.use_input_features = args.use_input_features self.device = args.device self.aggregation = args.aggregation self.aggregation_norm = args.aggregation_norm if self.features_only: return # Dropout self.dropout_layer = nn.Dropout(p=self.dropout) # Activation self.act_func = get_activation_function(args.activation) # Cached zeros self.cached_zero_vector = nn.Parameter(torch.zeros(self.hidden_size), requires_grad=False) # Input input_dim = self.atom_fdim if self.atom_messages else self.bond_fdim self.W_i = nn.Linear(input_dim, self.hidden_size, bias=self.bias) if self.atom_messages: w_h_input_size = self.hidden_size + self.bond_fdim else: w_h_input_size = self.hidden_size # Shared weight matrix across depths (default) self.W_h = nn.Linear(w_h_input_size, self.hidden_size, bias=self.bias) self.W_o = nn.Linear(self.atom_fdim + self.hidden_size, self.hidden_size) # layer after concatenating the descriptors if args.atom_descriptors == descriptors if args.atom_descriptors == 'descriptor': self.atom_descriptors_size = args.atom_descriptors_size self.atom_descriptors_layer = nn.Linear( self.hidden_size + self.atom_descriptors_size, self.hidden_size + self.atom_descriptors_size, )
def create_ffn(self, args: Namespace): """ Creates the feed-forward network for the model. :param args: Arguments. """ if args.features_only: first_linear_dim = args.features_size else: first_linear_dim = args.hidden_size if args.use_input_features: first_linear_dim += args.features_size # When using dropout for uncertainty, use dropouts for evaluation in addition to training. if args.uncertainty == 'dropout': dropout = EvaluationDropout(args.dropout) else: dropout = nn.Dropout(args.dropout) activation = get_activation_function(args.activation) output_size = args.output_size if self.uncertainty: output_size *= 2 # Create FFN layers if args.ffn_num_layers == 1: ffn = [dropout, nn.Linear(first_linear_dim, output_size)] else: ffn = [dropout, nn.Linear(first_linear_dim, args.ffn_hidden_size)] for _ in range(args.ffn_num_layers - 3): ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size), ]) ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, args.last_hidden_size), ]) ffn.extend([ activation, dropout, nn.Linear(args.last_hidden_size, output_size), ]) # Create FFN model self.ffn = nn.Sequential(*ffn)
def __init__(self, args: TrainArgs, featurizer: bool = False): """ Initializes the MoleculeModel. :param args: Arguments. :param featurizer: Whether the model should act as a featurizer, i.e. outputting learned features in the final layer before prediction. """ super(MoleculeModelDUN, self).__init__() self.ffn_num_layers = args.ffn_num_layers self.output_size = args.num_tasks self.prior_sig = args.prior_sig_dun ######### ENCODER self.encoder = MPNDUN(args) ######### ACTIVATION LAYER self.act_func = get_activation_function(args.activation) ######### LINEAR LAYERS (handles up to 4 layers) # set first linear dimension if args.features_only: first_linear_dim = args.features_size else: first_linear_dim = args.hidden_size if args.use_input_features: first_linear_dim += args.features_size # if single layer if args.ffn_num_layers == 1: self.layer_single = BayesLinear(first_linear_dim, self.output_size, self.prior_sig) # if multiple layers else: self.layer_in = BayesLinear(first_linear_dim, args.ffn_hidden_size, self.prior_sig) if args.ffn_num_layers > 2: self.layer_hid_1 = BayesLinear(args.ffn_hidden_size, args.ffn_hidden_size, self.prior_sig) if args.ffn_num_layers > 3: self.layer_hid_2 = BayesLinear(args.ffn_hidden_size, args.ffn_hidden_size, self.prior_sig) self.layer_out = BayesLinear(args.ffn_hidden_size, self.output_size, self.prior_sig) # create log noise parameter self.create_log_noise(args)
def __init__(self, atom_fdim: int = 133, bond_fdim: int = 147, activation: str = 'ReLU', hidden_size: int = 300, bias: bool = False, depth: int = 3, dropout: float = 0.0, layers_per_message: int = 1, undirected: bool = False, atom_messages: bool = False): """ Configures a message passing graph encoder Args: atom_fdim (int): feature dimensions to use for atoms, default 200 bond_fdim (int): feature dimensions to use for bonds, default 200 activation (str): 'ReLU', 'LeakyReLU', 'PReLU', 'tanh', 'SELU', or 'ELU', default 'ReLU' hidden_size (int): dimension of messages, default 300 bias (bool): include bias in internal linear layers, default False depth (int): number of message passing steps, default 3 dropout (float): dropout rate on messages, default 0.0 layers_per_message (int): linear layers included in message update function, default 1 undirected (bool): propigate messages bidirectionally, default False atom_messages: pass messages from atoms to atoms along bonds, default False """ super().__init__() self.dropout_layer = nn.Dropout(p=dropout) self.act_func = get_activation_function(activation) self.cached_zero_vector = T.zeros(hidden_size, requires_grad=False) # Input input_dim = atom_fdim if atom_messages else bond_fdim self.W_i = nn.Linear(input_dim, hidden_size, bias=bias)\ if atom_messages: w_h_input_size = hidden_size + bond_fdim else: w_h_input_size = hidden_size self.W_h = nn.Sequential( *([nn.Linear(w_h_input_size, hidden_size, bias=bias)] + sum([[ self.act_func, nn.Linear(hidden_size, hidden_size, bias=bias) ] for _ in range(layers_per_message - 1)], []))) self.W_o = nn.Linear(atom_fdim + hidden_size, hidden_size) self.atom_messages = atom_messages self.depth = depth self.undirected = undirected
def __init__(self, atom_fdim: int = 133, bond_fdim: int = 147, activation: str = 'ReLU', hidden_size: int = 300, context_size: int = 300, bias: bool = False, dropout: float = 0.0, layers_per_message: int = 1, undirected: bool = False, atom_messages: bool = False, messages_per_pass: int = 2): """ Configures a message passing graph encoder Args: atom_fdim (int): feature dimensions to use for atoms, default 133 bond_fdim (int): feature dimensions to use for bonds, default 147 activation (str): 'ReLU', 'LeakyReLU', 'PReLU', 'tanh', 'SELU', or 'ELU', default 'ReLU' hidden_size (int): dimension of messages, default 300 bias (bool): include bias in internal linear layers, default False depth (int): number of message passing steps, default 3 dropout (float): dropout rate on messages, default 0.0 layers_per_message (int): linear layers included in message update function, default 1 undirected (bool): propigate messages bidirectionally, default False atom_messages (bool): pass messages from atoms to atoms along bonds, default False messages_per_pass (int): messages passed between context updates """ super().__init__() assert not (undirected and atom_messages ), "Cannot have undirected atom messages -- sorry" self.dropout_layer = nn.Dropout(p=dropout) self.act_func = get_activation_function(activation) if atom_messages: w_h_input_size = hidden_size + bond_fdim + context_size else: w_h_input_size = hidden_size + context_size self.W_h = nn.Sequential( *([nn.Linear(w_h_input_size, hidden_size, bias=bias)] + sum([[ self.act_func, nn.Linear(hidden_size, hidden_size, bias=bias) ] for _ in range(layers_per_message - 1)], []))) self.atom_messages = atom_messages self.undirected = undirected self.depth = messages_per_pass
def __init__(self, args: TrainArgs, atom_fdim: int, bond_fdim: int): """Initializes the MPNEncoder. :param args: Arguments. :param atom_fdim: Atom features dimension. :param bond_fdim: Bond features dimension. :param atom_messages: Whether to use atoms to pass messages instead of bonds. """ super(MPNEncoder, self).__init__() self.atom_fdim = atom_fdim self.bond_fdim = bond_fdim self.atom_messages = args.atom_messages self.hidden_size = args.hidden_size self.bias = args.bias self.depth = args.depth self.dropout = args.dropout self.layers_per_message = 1 self.undirected = args.undirected self.features_only = args.features_only self.use_input_features = args.use_input_features self.device = args.device self.args = args if self.features_only: return # Dropout self.dropout_layer = nn.Dropout(p=self.dropout) # Activation self.act_func = get_activation_function(args.activation) # Cached zeros self.cached_zero_vector = nn.Parameter(torch.zeros(self.hidden_size), requires_grad=False) # Input input_dim = self.atom_fdim if self.atom_messages else self.bond_fdim self.W_i = nn.Linear(input_dim, self.hidden_size, bias=self.bias) if self.atom_messages: w_h_input_size = self.hidden_size + self.bond_fdim else: w_h_input_size = self.hidden_size # Shared weight matrix across depths (default) self.W_h = nn.Linear(w_h_input_size, self.hidden_size, bias=self.bias) self.W_o = nn.Linear(self.atom_fdim + self.hidden_size, self.hidden_size)
def create_ffn(self, args: TrainArgs) -> None: """ Creates the feed-forward layers for the model. :param args: A :class:`~chemprop.args.TrainArgs` object containing model arguments. """ self.multiclass = args.dataset_type == 'multiclass' if self.multiclass: self.num_classes = args.multiclass_num_classes if args.features_only: first_linear_dim = args.features_size else: first_linear_dim = args.hidden_size * args.number_of_molecules if args.use_input_features: first_linear_dim += args.features_size if args.atom_descriptors == 'descriptor': first_linear_dim += args.atom_descriptors_size dropout = nn.Dropout(args.dropout) activation = get_activation_function(args.activation) # Create FFN layers if args.ffn_num_layers == 1: ffn = [dropout, nn.Linear(first_linear_dim, self.output_size)] else: ffn = [dropout, nn.Linear(first_linear_dim, args.ffn_hidden_size)] for _ in range(args.ffn_num_layers - 2): ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size), ]) ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, self.output_size), ]) # Create FFN model self.ffn = nn.Sequential(*ffn) if args.checkpoint_frzn is not None: if args.frzn_ffn_layers > 0: for param in list( self.ffn.parameters() )[0:2 * args. frzn_ffn_layers]: # Freeze weights and bias for given number of layers param.requires_grad = False
def __init__(self, atom_fdim: int = 133, hidden_size: int = 300, activation: str = 'ReLU', dropout: float = 0.0, atom_messages: bool = False): super().__init__() self.W_o = nn.Linear(atom_fdim + hidden_size, hidden_size) self.dropout_layer = nn.Dropout(p=dropout) self.act_func = get_activation_function(activation) self.cached_zero_vector = T.zeros(hidden_size, requires_grad=False) self.atom_messages = atom_messages
def create_ffn(self, args: Namespace): """ Creates the feed-forward network for the model. :param args: Arguments. """ self.multiclass = args.dataset_type == 'multiclass' if self.multiclass: self.num_classes = args.multiclass_num_classes if args.features_only: first_linear_dim = args.features_size else: first_linear_dim = args.hidden_size if args.use_input_features: first_linear_dim += args.features_dim dropout = nn.Dropout(args.dropout) activation = get_activation_function(args.activation) # Create FFN layers output_sizes = [args.output_size] if args.multitask_split is None else args.multitask_split ffns = [] for output_size in output_sizes: if args.ffn_num_layers == 1: ffn = [ dropout, nn.Linear(first_linear_dim, output_size) ] else: ffn = [ dropout, nn.Linear(first_linear_dim, args.ffn_hidden_size) ] for _ in range(args.ffn_num_layers - 2): ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size), ]) ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, output_size), ]) ffns.append(nn.Sequential(*ffn)) # Create FFN model self.ffns = nn.ModuleList(ffns)
def create_ffn_from_tuple(self, args: TrainArgs) -> None: """ Creates the feed-forward layers for the model. :param args: A :class:`~chemprop.args.TrainArgs` object containing model arguments. """ self.multiclass = args.dataset_type == 'multiclass' if self.multiclass: self.num_classes = args.multiclass_num_classes if args.features_only: first_linear_dim = args.features_size else: first_linear_dim = args.hidden_size * args.number_of_molecules if args.use_input_features: first_linear_dim += args.features_size if args.atom_descriptors == 'descriptor': first_linear_dim += args.atom_descriptors_size dropout = nn.Dropout(args.dropout) activation = get_activation_function(args.activation) # Create FFN layers if args.ffn_num_layers == 1: ffn = [ dropout, nn.Linear(first_linear_dim, self.output_size) ] else: ffn = [ dropout, nn.Linear(first_linear_dim, args.ffn_hidden_size[0]) ] for i in range(args.ffn_num_layers - 2): ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size[i], args.ffn_hidden_size[i+1]), ]) ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size[-1], self.output_size), ]) # Create FFN model self.ffn = nn.Sequential(*ffn)
def __init__(self, args: Namespace, atom_fdim: int, bond_fdim: int): super(MPNEncoder, self).__init__() self.atom_fdim = atom_fdim self.bond_fdim = bond_fdim self.hidden_size = args.hidden_size self.bias = args.bias self.depth = args.depth self.dropout = args.dropout self.layers_per_message = 1 self.undirected = args.undirected self.atom_messages = args.atom_messages self.features_only = args.features_only self.use_input_features = args.use_input_features self.args = args # Dropout self.dropout_layer = nn.Dropout(p=self.dropout) # Activation self.act_func = get_activation_function(args.activation) # Input input_dim = self.atom_fdim self.W_i_atom = nn.Linear(input_dim, self.hidden_size, bias=self.bias) input_dim = self.bond_fdim self.W_i_bond = nn.Linear(input_dim, self.hidden_size, bias=self.bias) w_h_input_size_atom = self.hidden_size + self.bond_fdim self.W_h_atom = nn.Linear(w_h_input_size_atom, self.hidden_size, bias=self.bias) w_h_input_size_bond = self.hidden_size for depth in range(self.depth-1): self._modules[f'W_h_{depth}'] = nn.Linear(w_h_input_size_bond, self.hidden_size, bias=self.bias) self.W_o = nn.Linear( (self.hidden_size)*2, self.hidden_size) self.gru = BatchGRU(self.hidden_size) self.lr = nn.Linear(self.hidden_size*3, self.hidden_size, bias=self.bias)
def create_ffn(self, args: Namespace): """ Creates the feed-forward network for the model. :param args: Arguments. """ self.multiclass = args.dataset_type == 'multiclass' self.ops = args.ops if self.multiclass: self.num_classes = args.multiclass_num_classes if args.features_only: first_linear_dim = args.features_size else: first_linear_dim = args.hidden_size * 2 # To account for 2 molecules if args.use_input_features: first_linear_dim += args.features_dim if args.drug_only or args.cmpd_only or self.ops != 'concat': first_linear_dim = int(first_linear_dim / 2) dropout = nn.Dropout(args.dropout) activation = get_activation_function(args.activation) # Create FFN layers if args.ffn_num_layers == 1: ffn = [dropout, nn.Linear(first_linear_dim, args.output_size)] else: ffn = [dropout, nn.Linear(first_linear_dim, args.ffn_hidden_size)] for _ in range(args.ffn_num_layers - 2): ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size), ]) ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, args.output_size), ]) # Create FFN model self.ffn = nn.Sequential(*ffn)
def __init__(self, args: TrainArgs, atom_fdim: int, bond_fdim: int): """Initializes the MPNEncoder. :param args: Arguments. :param atom_fdim: Atom features dimension. :param bond_fdim: Bond features dimension. :param atom_messages: Whether to use atoms to pass messages instead of bonds. """ super(MPNEncoderDUN, self).__init__() self.atom_fdim = atom_fdim self.bond_fdim = bond_fdim self.atom_messages = args.atom_messages self.hidden_size = args.hidden_size self.bias = args.bias self.layers_per_message = 1 self.undirected = args.undirected self.features_only = args.features_only self.use_input_features = args.use_input_features self.device = args.device self.dropout_mpnn = args.dropout_mpnn # dun args self.prior_sig = args.prior_sig_dun self.depth_min = args.depth_min self.depth_max = args.depth_max # Dropout self.dropout_layer = nn.Dropout(p=self.dropout_mpnn) # Activation self.act_func = get_activation_function(args.activation) # Cached zeros self.cached_zero_vector = nn.Parameter(torch.zeros(self.hidden_size), requires_grad=False) # Input input_dim = self.atom_fdim if self.atom_messages else self.bond_fdim w_h_input_size = self.hidden_size # Bayes linear layers self.W_i = BayesLinear(input_dim, self.hidden_size, self.prior_sig, bias=self.bias) self.W_h = BayesLinear(w_h_input_size, self.hidden_size, self.prior_sig, bias=self.bias) self.W_o = BayesLinear(self.atom_fdim + self.hidden_size, self.hidden_size, self.prior_sig)
def create_ffn(self, args: TrainArgs): """ Creates the feed-forward network for the model. :param args: Arguments. """ self.multiclass = args.dataset_type == 'multiclass' if self.multiclass: self.num_classes = args.multiclass_num_classes if args.features_only: first_linear_dim = args.features_size else: first_linear_dim = args.hidden_size if args.use_input_features: first_linear_dim += args.features_size dropout = nn.Dropout(args.dropout_ffn) activation = get_activation_function(args.activation) # Create FFN layers if args.ffn_num_layers == 1: ffn = [dropout, nn.Linear(first_linear_dim, self.output_size)] else: ffn = [dropout, nn.Linear(first_linear_dim, args.ffn_hidden_size)] for _ in range(args.ffn_num_layers - 2): ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size), ]) ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, self.output_size), ]) # Create FFN model self.ffn = nn.Sequential(*ffn)
def create_ffn(output_size: int, input_size: int, args: TrainArgs): """ Creates the feed-forward layers for the model. :param args: A :class:`~chemprop.args.TrainArgs` object containing model arguments. """ first_linear_dim = args.hidden_size * args.number_of_molecules # need to also add other 2 network outputs if args.use_input_features: first_linear_dim += args.features_size if args.atom_descriptors == "descriptor": first_linear_dim += args.atom_descriptors_size first_linear_dim = input_size dropout = nn.Dropout(args.dropout) activation = get_activation_function(args.activation) # Create FFN layers if args.ffn_num_layers == 1: ffn = [dropout, nn.Linear(first_linear_dim, output_size)] else: ffn = [dropout, nn.Linear(first_linear_dim, args.ffn_hidden_size)] for _ in range(args.ffn_num_layers - 2): ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size), ]) ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, output_size), ]) # return FFN model return nn.Sequential(*ffn)
def create_ffn(self, args: Namespace): """ Creates the feed-forward network for the model. :param args: Arguments. """ self.multiclass = args.dataset_type == 'multiclass' if self.multiclass: self.num_classes = args.multiclass_num_classes if args.features_only: first_linear_dim = args.features_size else: if args.AM_dim1: first_linear_dim = args.hidden_size * 2 if args.use_input_features: first_linear_dim += args.features_dim else: first_linear_dim = args.hidden_size if args.use_input_features: first_linear_dim += args.features_dim dropout_d0 = nn.Dropout( args.dropout) # wei, for atomic fingerprint, depth = 0 dropout = nn.Dropout(args.dropout) dropout_d2 = nn.Dropout( args.dropout) # wei, for atomic fingerprint, depth = 2 dropout_final = nn.Dropout( args.dropout) # wei, for atomic fingerprint, final depth dropout_mol = nn.Dropout( args.dropout) # wei, for molecular fingerprint activation = get_activation_function(args.activation) if args.AM_dim1: # Create FFN layers if args.ffn_num_layers == 1: ffn = [ TimeDistributed_wrapper(dropout), TimeDistributed_wrapper( nn.Linear(first_linear_dim, args.ffn_hidden_size * 2)) ] ffn.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size * 2, args.output_size)), LambdaLayer(lambda x: torch.sum(x, 1)) ]) else: ffn = [ TimeDistributed_wrapper(dropout), TimeDistributed_wrapper( nn.Linear(first_linear_dim, args.ffn_hidden_size * 2)) ] for _ in range(args.ffn_num_layers - 2): ffn.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size * 2, args.ffn_hidden_size * 2)) ]) ffn.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size * 2, args.output_size)), LambdaLayer(lambda x: torch.sum(x, 1)) ]) # Create FFN model self.ffn = nn.Sequential(*ffn) else: # Create FFN layers, for atomic fp, depth=0 if args.ffn_num_layers == 1: ffn_d0 = [ TimeDistributed_wrapper(dropout_d0), TimeDistributed_wrapper( nn.Linear(first_linear_dim, args.output_size)) ] ffn_d0.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout_d0), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.output_size)), LambdaLayer(lambda x: torch.sum(x, 1)) ]) else: ffn_d0 = [ TimeDistributed_wrapper(dropout_d0), TimeDistributed_wrapper( nn.Linear(first_linear_dim, args.ffn_hidden_size)) ] for _ in range(args.ffn_num_layers - 2): ffn_d0.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout_d0), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size)), ]) ffn_d0.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout_d0), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.output_size)), LambdaLayer(lambda x: torch.sum(x, 1)) ]) # Create FFN layers, for atomic fp, depth=1 if args.ffn_num_layers == 1: ffn_d1 = [ TimeDistributed_wrapper(dropout), TimeDistributed_wrapper( nn.Linear(first_linear_dim, args.output_size)) ] ffn_d1.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.output_size)), LambdaLayer(lambda x: torch.sum(x, 1)) ]) else: ffn_d1 = [ TimeDistributed_wrapper(dropout), TimeDistributed_wrapper( nn.Linear(first_linear_dim, args.ffn_hidden_size)) ] for _ in range(args.ffn_num_layers - 2): ffn_d1.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size)), ]) ffn_d1.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.output_size)), LambdaLayer(lambda x: torch.sum(x, 1)) ]) # Create FFN layers, for atomic fp, depth=2 if args.ffn_num_layers == 1: ffn_d2 = [ TimeDistributed_wrapper(dropout_d2), TimeDistributed_wrapper( nn.Linear(first_linear_dim, args.output_size)) ] ffn_d2.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout_d2), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.output_size)), LambdaLayer(lambda x: torch.sum(x, 1)) ]) else: ffn_d2 = [ TimeDistributed_wrapper(dropout_d2), TimeDistributed_wrapper( nn.Linear(first_linear_dim, args.ffn_hidden_size)) ] for _ in range(args.ffn_num_layers - 2): ffn_d2.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout_d2), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size)), ]) ffn_d2.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout_d2), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.output_size)), LambdaLayer(lambda x: torch.sum(x, 1)) ]) # Create FFN layers, for atomic fp, final depth if args.ffn_num_layers == 1: ffn_final = [ TimeDistributed_wrapper(dropout_final), TimeDistributed_wrapper( nn.Linear(first_linear_dim, args.ffn_hidden_size)) ] ffn_final.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout_final), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.output_size)), LambdaLayer(lambda x: torch.sum(x, 1)) ]) else: ffn_final = [ TimeDistributed_wrapper(dropout_final), TimeDistributed_wrapper( nn.Linear(first_linear_dim, args.ffn_hidden_size)) ] for _ in range(args.ffn_num_layers - 2): ffn_final.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout_final), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size)) ]) ffn_final.extend([ TimeDistributed_wrapper(activation), TimeDistributed_wrapper(dropout_final), TimeDistributed_wrapper( nn.Linear(args.ffn_hidden_size, args.output_size)), LambdaLayer(lambda x: torch.sum(x, 1)) ]) # Create FFN layers, for molecular fp if args.ffn_num_layers == 1: ffn_mol = [ dropout_mol, nn.Linear(first_linear_dim, args.output_size) ] else: ffn_mol = [ dropout_mol, nn.Linear(first_linear_dim, args.ffn_hidden_size) ] for _ in range(args.ffn_num_layers - 2): ffn_mol.extend([ activation, dropout_mol, nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size), ]) ffn_mol.extend([ activation, dropout_mol, nn.Linear(args.ffn_hidden_size, args.output_size), ]) # Create FFN model for atomic fp, depth=0 self.ffn_d0 = nn.Sequential(*ffn_d0) # Create FFN model for atomic fp, depth=1 self.ffn_d1 = nn.Sequential(*ffn_d1) # Create FFN model for atomic fp, depth=2 self.ffn_d2 = nn.Sequential(*ffn_d2) # Create FFN model for atomic fp, final depth self.ffn_final = nn.Sequential(*ffn_final) # Create FFN model for molecular fp self.ffn_mol = nn.Sequential(*ffn_mol)
def create_ffn(self, args: TrainArgs) -> None: """ Creates the feed-forward layers for the model. :param args: A :class:`~chemprop.args.TrainArgs` object containing model arguments. """ self.multiclass = args.dataset_type == 'multiclass' if self.multiclass: self.num_classes = args.multiclass_num_classes if args.features_only: first_linear_dim = args.features_size else: first_linear_dim = args.hidden_size * args.number_of_molecules if args.use_input_features: first_linear_dim += args.features_size if args.atom_descriptors == 'descriptor': first_linear_dim += args.atom_descriptors_size dropout = nn.Dropout(args.dropout) activation = get_activation_function(args.activation) # Create FFN layers if args.ffn_num_layers == 1: ffn = [dropout, nn.Linear(first_linear_dim, self.output_size)] else: ffn = [dropout, nn.Linear(first_linear_dim, args.ffn_hidden_size)] for _ in range(args.ffn_num_layers - 2): ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, args.ffn_hidden_size), ]) ffn.extend([ activation, dropout, nn.Linear(args.ffn_hidden_size, self.output_size), ]) # If spectra model, also include spectra activation if args.dataset_type == 'spectra': if args.spectra_activation == 'softplus': spectra_activation = nn.Softplus() else: # default exponential activation which must be made into a custom nn module class nn_exp(torch.nn.Module): def __init__(self): super(nn_exp, self).__init__() def forward(self, x): return torch.exp(x) spectra_activation = nn_exp() ffn.append(spectra_activation) # Create FFN model self.ffn = nn.Sequential(*ffn) if args.checkpoint_frzn is not None: if args.frzn_ffn_layers > 0: for param in list( self.ffn.parameters() )[0:2 * args. frzn_ffn_layers]: # Freeze weights and bias for given number of layers param.requires_grad = False
def create_ffn(self, args: Namespace, params: Dict[str, nn.Parameter] = None): # Learning virtual edges if args.learn_virtual_edges: args.lve_model = self.encoder # to make this accessible during featurization, to select virtual edges if args.dataset_type == 'bert_pretraining': self.ffn = lambda x: x return if args.dataset_type == 'regression_with_binning': output_size = args.num_bins * args.num_tasks elif args.dataset_type == 'unsupervised': output_size = args.unsupervised_n_clusters else: output_size = args.num_tasks # Additional features if args.features_only: first_linear_dim = args.features_dim else: first_linear_dim = args.hidden_size * (1 + args.jtnn) if args.use_input_features: first_linear_dim += args.features_dim if args.mayr_layers: if params is not None: raise ValueError( 'Setting parameters not yet supported for mayer_layers') drop_layer = lambda p: MayrDropout(p) linear_layer = lambda input_dim, output_dim, p, idx: MayrLinear( input_dim, output_dim, p) else: drop_layer = lambda p: nn.Dropout(p) def linear_layer(input_dim: int, output_dim: int, p: float, idx: int): if params is not None: return MAMLLinear( weight=params['ffn.{}.weight'.format(idx)], bias=params['ffn.{}.bias'.format(idx)]) return nn.Linear(input_dim, output_dim) # Create FFN layers idx = 1 if args.ffn_num_layers == 1: ffn = [ drop_layer(args.ffn_input_dropout), linear_layer(first_linear_dim, args.output_size, args.ffn_input_dropout, idx) ] else: ffn = [ drop_layer(args.ffn_input_dropout), linear_layer(first_linear_dim, args.ffn_hidden_size, args.ffn_input_dropout, idx) ] for _ in range(args.ffn_num_layers - 2): idx += 3 ffn.extend([ get_activation_function(args.activation), drop_layer(args.ffn_dropout), linear_layer(args.ffn_hidden_size, args.ffn_hidden_size, args.ffn_dropout, idx), ]) idx += 3 ffn.extend([ get_activation_function(args.activation), drop_layer(args.ffn_dropout), linear_layer(args.ffn_hidden_size, args.output_size, args.ffn_dropout, idx), ]) # Classification if args.dataset_type == 'classification': ffn.append(nn.Sigmoid()) # Combined model self.ffn = nn.Sequential(*ffn) if args.dataset_type == 'kernel': self.kernel_output_layer = LearnedKernel(args) if args.gradual_unfreezing: self.create_unfreeze_queue(args)
def __init__(self, args: Namespace, atom_targets, bond_targets=None, atom_constraints=None, bond_constraints=None, attention=False): """ :param args: :param args: :param constraints: """ features_size = args.hidden_size hidden_size = args.ffn_hidden_size num_layers = args.ffn_num_layers output_size = args.output_size dropout = nn.Dropout(args.dropout) activation = get_activation_function(args.activation) super(MultiReadout, self).__init__() for i, a_target in enumerate(atom_targets): constraint = atom_constraints[ i] if atom_constraints is not None and i < len( atom_constraints) else None if attention: self.add_module( f'readout_{i}', FFNAtten(features_size, hidden_size, num_layers, output_size, dropout, activation, constraint, ffn_type='atom')) else: self.add_module( f'readout_{i}', FFN(features_size, hidden_size, num_layers, output_size, dropout, activation, constraint, ffn_type='atom')) i += 1 for j, b_target in enumerate(bond_targets): i += j constraint = bond_constraints[i] if bond_constraints and j < len( bond_constraints) else None self.add_module( f'readout_{i}', FFN(features_size, hidden_size, num_layers, output_size, dropout, activation, constraint, ffn_type='bond')) self.ffn_list = AttrProxy(self, 'readout_')
def __init__(self, args: Namespace, atom_fdim: int, bond_fdim: int, params: Dict[str, nn.Parameter] = None, param_prefix: str = 'encoder.encoder.'): """Initializes the MPN. :param args: Arguments. :param atom_fdim: Atom feature dimension. :param bond_fdim: Bond feature dimension. :param params: Parameters to use instead of creating parameters. :param param_prefix: Prefix of parameter names. """ super(MPNEncoder, self).__init__() self.atom_fdim = atom_fdim self.bond_fdim = bond_fdim self.param_prefix = param_prefix self.hidden_size = args.hidden_size self.bias = args.bias self.depth = args.depth self.diff_depth_weights = args.diff_depth_weights self.layers_per_message = args.layers_per_message self.normalize_messages = args.normalize_messages self.use_layer_norm = args.layer_norm self.dropout = args.dropout self.attention = args.attention self.message_attention = args.message_attention self.global_attention = args.global_attention self.message_attention_heads = args.message_attention_heads self.master_node = args.master_node self.master_dim = args.master_dim self.use_master_as_output = args.use_master_as_output self.deepset = args.deepset self.set2set = args.set2set self.set2set_iters = args.set2set_iters self.learn_virtual_edges = args.learn_virtual_edges self.bert_pretraining = args.dataset_type == 'bert_pretraining' if self.bert_pretraining: self.output_size = args.vocab.output_size self.features_size = args.features_size self.undirected = args.undirected self.atom_messages = args.atom_messages self.features_only = args.features_only self.use_input_features = args.use_input_features self.args = args if self.features_only: return # won't use any of the graph stuff in this case if self.atom_messages: # Not supported with atom messages assert not (self.message_attention or self.global_attention or self.learn_virtual_edges or self.master_node or self.bert_pretraining or self.undirected) assert self.layers_per_message == 1 # Layer norm if self.use_layer_norm: self.layer_norm = nn.LayerNorm(self.hidden_size) # Dropout self.dropout_layer = nn.Dropout(p=self.dropout) # Activation self.act_func = get_activation_function(args.activation) # Using pre-specified parameters (for meta learning) if params is not None: params = defaultdict( lambda: None, params) # nonexistent parameters default to None self.cached_zero_vector = params[self.param_prefix + 'cached_zero_vector'] self.W_i = partial(F.linear, weight=params[self.param_prefix + 'W_i.weight'], bias=params[self.param_prefix + 'W_i.bias']) if self.message_attention: self.num_heads = self.message_attention_heads self.W_ma = [ partial(F.linear, weight=params[self.param_prefix + f'W_ma.{i}.weight'], bias=params[self.param_prefix + f'W_ma.{i}.bias']) for i in range(self.num_heads) ] if args.learn_virtual_edges: self.lve = partial(F.linear, weight=params[self.param_prefix + 'lve.weight'], bias=params[self.param_prefix + 'lve.bias']) if self.diff_depth_weights: self.W_h = [[ partial(F.linear, weight=params[self.param_prefix + f'W_h.{i}.{j}.weight'], bias=params[self.param_prefix + f'W_h.{i}.{j}.bias']) for j in range(self.depth - 1) ] for i in range(self.layers_per_message)] else: # TODO this option is currently broken; the params are None self.W_h = [[ partial(F.linear, weight=params[self.param_prefix + f'W_h.{i}.{j}.weight'], bias=params[self.param_prefix + f'W_h.{i}.{j}.bias']) for j in range(self.depth - 1) ] for i in range(self.layers_per_message)] self.W_ga1 = partial(F.linear, weight=params[self.param_prefix + 'W_ga1.weight'], bias=params[self.param_prefix + 'W_ga1.bias']) self.W_ga2 = partial(F.linear, weight=params[self.param_prefix + 'W_ga2.weight'], bias=params[self.param_prefix + 'W_ga2.bias']) self.W_master_in = partial( F.linear, weight=params[self.param_prefix + 'W_master_in.weight'], bias=params[self.param_prefix + 'W_master_in.bias']) self.W_master_out = partial( F.linear, weight=params[self.param_prefix + 'W_master_out.weight'], bias=params[self.param_prefix + 'W_master_out.bias']) self.W_o = partial(F.linear, weight=params[self.param_prefix + 'W_o.weight'], bias=params[self.param_prefix + 'W_o.bias']) self.W_s2s_a = partial( F.linear, weight=params[self.param_prefix + 'W_s2s_a.weight'], bias=params[self.param_prefix + 'W_s2s_a.bias']) self.W_s2s_b = partial( F.linear, weight=params[self.param_prefix + 'W_s2s_b.weight'], bias=params[self.param_prefix + 'W_s2s_b.bias']) if self.set2set: raise ValueError('Setting params of LSTM not supported yet.') self.W_a = partial(F.linear, weight=params[self.param_prefix + 'W_a.weight'], bias=params[self.param_prefix + 'W_a.bias']) self.W_b = partial(F.linear, weight=params[self.param_prefix + 'W_b.weight'], bias=params[self.param_prefix + 'W_b.bias']) self.W_v = partial(F.linear, weight=params[self.param_prefix + 'W_v.weight'], bias=params[self.param_prefix + 'W_v.bias']) self.W_f = partial(F.linear, weight=params[self.param_prefix + 'W_f.weight'], bias=params[self.param_prefix + 'W_f.bias']) return # Cached zeros self.cached_zero_vector = nn.Parameter(torch.zeros(self.hidden_size), requires_grad=False) # Input input_dim = self.atom_fdim if self.atom_messages else self.bond_fdim self.W_i = nn.Linear(input_dim, self.hidden_size, bias=self.bias) # Message attention if self.message_attention: self.num_heads = self.message_attention_heads w_h_input_size = self.num_heads * self.hidden_size self.W_ma = nn.ModuleList([ nn.Linear(self.hidden_size, self.hidden_size, bias=self.bias) for _ in range(self.num_heads) ]) elif self.atom_messages: w_h_input_size = self.hidden_size + self.bond_fdim else: w_h_input_size = self.hidden_size if self.learn_virtual_edges: self.lve = nn.Linear(self.atom_fdim, self.atom_fdim) # Message passing if self.diff_depth_weights: # Different weight matrix for each depth self.W_h = nn.ModuleList([ nn.ModuleList([ nn.Linear(w_h_input_size, self.hidden_size, bias=self.bias) for _ in range(self.depth - 1) ]) for _ in range(self.layers_per_message) ]) else: # Shared weight matrix across depths (default) self.W_h = nn.ModuleList([ nn.ModuleList([ nn.Linear(w_h_input_size, self.hidden_size, bias=self.bias) ] * (self.depth - 1)) for _ in range(self.layers_per_message) ]) if self.global_attention: self.W_ga1 = nn.Linear(self.hidden_size, self.hidden_size, bias=False) self.W_ga2 = nn.Linear(self.hidden_size, self.hidden_size) if self.master_node: self.W_master_in = nn.Linear(self.hidden_size, self.master_dim) self.W_master_out = nn.Linear(self.master_dim, self.hidden_size) # Readout if not (self.master_node and self.use_master_as_output): self.W_o = nn.Linear(self.atom_fdim + self.hidden_size, self.hidden_size) if self.deepset: self.W_s2s_a = nn.Linear(self.hidden_size, self.hidden_size, bias=self.bias) self.W_s2s_b = nn.Linear(self.hidden_size, self.hidden_size, bias=self.bias) if self.set2set: self.set2set_rnn = nn.LSTM( input_size=self.hidden_size, hidden_size=self.hidden_size, dropout=self.dropout, bias= False # no bias so that an input of all zeros stays all zero ) if self.attention: self.W_a = nn.Linear(self.hidden_size, self.hidden_size, bias=self.bias) self.W_b = nn.Linear(self.hidden_size, self.hidden_size) if self.bert_pretraining: if args.bert_vocab_func == 'feature_vector': self.W_v = nn.Linear(self.hidden_size, args.vocab.output_size) else: self.W_v = nn.Linear(self.hidden_size, self.output_size) if self.features_size is not None: self.W_f = nn.Linear(self.hidden_size, self.features_size)