class BagNet(BaseNet): """ This is not intended to be called directly, calling its regressor/classifier subclass will define loss and enabe training. Learns representation by convolutional net (instance of MainNet class) and then aggregates for each molecule all conformers into a single vector (by method defined in pool parameter), representing whole bag. """ def __init__(self, ndim:Sequence, pool:str='mean', init_cuda:bool=False) -> None: """ Parameters ---------- ndim: Sequence Hyperparameter for MainNet: each entry of sequence specifies the number of nodes in each layer and length of the sequence specifies number of layers pool: str, default is None Pooling method to use init_cuda: bool, default is False Use Cuda GPU or not? """ super().__init__(init_cuda=init_cuda) self.main_net = MainNet(ndim) self.pooling = Pooling(pool) self.estimator = Linear(ndim[-1], 1) if self.init_cuda: self.main_net.cuda() self.estimator.cuda() def forward(self, x: torch.Tensor, m: torch.Tensor) -> Tuple[None, torch.Tensor]: """ Feed forward input data. Parameters ---------- x: torch.Tensor m: torch.Tensor Returns -------- Tuple with None (standing for weights of conformers, which are not applicable here), and tensor of shape Nmol*1, where Nmol is the number of molecules. The tensor is final output y, but it needs to be passed to sigmoid to obtain final class probabilities in case of classification (this classs shouldnt be called directly, call regressor/classifier subclass to obtain final y). Examples -------- >>> import torch >>> import numpy as np >>> from torch import randn >>> from miqsar.estimators.mi_nets import BagNet >>> x_train = randn((3, 3, 3)) >>> bag_net = BagNet(ndim=(x_train[0].shape[-1], 4, 6, 4), init_cuda=False) >>> _, m = bag_net.add_padding(x_train) >>> m = torch.from_numpy(m.astype('float32')) >>> _ = bag_net.forward(x_train, m) # (assign result to a variable to supress std output) """ out = self.main_net(x) out = self.pooling(out, m) out = self.estimator(out) if isinstance(self, BaseClassifier): out = Sigmoid()(out) return None, out
x = LeakyReLU()(self.bn2(self.fc1(x))) return self.fc2(x), LeakyReLU()(self.bn_q1(self.fc1_q(x))) ''' 生成网络 ''' Net_G = Generator() Net_D = Discriminator() Net_G = DataParallel(Net_G) Net_D = DataParallel(Net_D) Q_cat = Linear(embedding_len, c1_len) if GPU_NUMS > 1: Net_G = Generator().cuda() Net_D = Discriminator().cuda() Q_cat = Q_cat.cuda() qcat_optim = Adam(Q_cat.parameters(), lr = 2e-4) if c2_len: Q_con = Linear(embedding_len, c2_len).cuda() if GPU_NUMS > 1 else Linear(embedding_len, c2_len) qcon_optim = Adam(Q_con.parameters(), lr = 2e-4) if c3_len: Q_bin = Linear(embedding_len, c3_len).cuda() if GPU_NUMS > 1 else Linear(embedding_len, c3_len) qbin_optim = Adam(Q_bin.parameters(), lr = 2e-4) g_optim = Adam(Net_G.parameters(), lr = 1e-3) d_optim = Adam(Net_D.parameters(), lr = 2e-4) nll = NLLLoss().cuda() if GPU_NUMS > 1 else NLLLoss() mse = MSELoss().cuda() if GPU_NUMS > 1 else MSELoss() bce = BCELoss().cuda() if GPU_NUMS > 1 else BCELoss()
class MLP(nn.Module): def __init__(self, ndim=None, init_cuda=False): super().__init__() self.init_cuda = init_cuda self.main_net = MainNet(ndim) self.estimator = Linear(ndim[-1], 1) if self.init_cuda: self.main_net.cuda() self.estimator.cuda() def train_val_split(self, x, y, val_size=0.2, random_state=42): x, y = np.asarray(x), np.asarray(y) x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=val_size, random_state=random_state) x_train, y_train = self.array_to_tensor(x_train, y_train) x_val, y_val = self.array_to_tensor(x_val, y_val) return x_train, x_val, y_train, y_val def get_mini_batches(self, x, y, batch_size=16): data = MBSplitter(x, y) mb = DataLoader(data, batch_size=batch_size, shuffle=True) return mb def array_to_tensor(self, x, y): x = torch.from_numpy(x.astype('float32')) y = torch.from_numpy(y.astype('float32')) if y.ndim == 1: y = y.reshape(-1, 1) if self.init_cuda: x, y = x.cuda(), y.cuda() return x, y def loss_batch(self, x_mb, y_mb, optimizer=None): y_out = self.forward(x_mb) total_loss = self.loss(y_out, y_mb) if optimizer is not None: optimizer.zero_grad() total_loss.backward() optimizer.step() return total_loss.item() def forward(self, x): out = self.main_net(x) out = self.estimator(out) if isinstance(self, BaseClassifier): out = Sigmoid()(out) return out def fit(self, x, y, n_epoch=100, batch_size=128, lr=0.001, weight_decay=0, dropout=0, verbose=False): x_train, x_val, y_train, y_val = self.train_val_split(x, y) optimizer = optim.Yogi(self.parameters(), lr=lr, weight_decay=weight_decay) val_loss = [] for epoch in range(n_epoch): mb = self.get_mini_batches(x_train, y_train, batch_size=batch_size) self.train() for x_mb, y_mb in mb: loss = self.loss_batch(x_mb, y_mb, optimizer=optimizer) self.eval() with torch.no_grad(): loss = self.loss_batch(x_val, y_val, optimizer=None) val_loss.append(loss) min_loss_idx = val_loss.index(min(val_loss)) if min_loss_idx == epoch: best_parameters = self.state_dict() if verbose: print(epoch, loss) self.load_state_dict(best_parameters, strict=True) return self def predict(self, x): x = torch.from_numpy(x.astype('float32')) self.eval() with torch.no_grad(): if self.init_cuda: x = x.cuda() y_pred = self.forward(x) return np.asarray(y_pred.cpu())
def make_model_and_optim(): model = Linear(in_dim, 2, bias=False) model = model.cuda() optim = AdaScale(SGD(model.parameters(), lr=0.1, momentum=0.9), num_gradients_to_accumulate=accum_steps) return model, optim
def test_against_fp(self): """Test whether FP is same as is_perfect inference tile.""" # pylint: disable-msg=too-many-locals # Prepare the datasets (input and expected output). x = Tensor([[0.1, 0.2, 0.4, 0.3], [0.2, 0.1, 0.1, 0.3]]) y = Tensor([[1.0, 0.5], [0.7, 0.3]]) # Define a single-layer network, using a constant step device type. rpu_config = self.get_rpu_config() rpu_config.forward.is_perfect = True model_torch = Linear(4, 2, bias=True) model = AnalogLinear(4, 2, bias=True, rpu_config=rpu_config) model.set_weights(model_torch.weight, model_torch.bias) model_fp = AnalogLinear(4, 2, bias=True, rpu_config=FloatingPointRPUConfig()) model_fp.set_weights(model_torch.weight, model_torch.bias) self.assertTensorAlmostEqual(model.get_weights()[0], model_torch.weight) self.assertTensorAlmostEqual(model.get_weights()[0], model_fp.get_weights()[0]) # Move the model and tensors to cuda if it is available. if self.use_cuda: x = x.cuda() y = y.cuda() model.cuda() model_fp.cuda() model_torch.cuda() # Define an analog-aware optimizer, preparing it for using the layers. opt = AnalogSGD(model.parameters(), lr=0.1) opt_fp = AnalogSGD(model_fp.parameters(), lr=0.1) opt_torch = SGD(model_torch.parameters(), lr=0.1) for _ in range(100): # inference opt.zero_grad() pred = model(x) loss = mse_loss(pred, y) loss.backward() opt.step() # same for fp opt_fp.zero_grad() pred_fp = model_fp(x) loss_fp = mse_loss(pred_fp, y) loss_fp.backward() opt_fp.step() # same for torch opt_torch.zero_grad() pred_torch = model_torch(x) loss_torch = mse_loss(pred_torch, y) loss_torch.backward() opt_torch.step() self.assertTensorAlmostEqual(pred_torch, pred) self.assertTensorAlmostEqual(loss_torch, loss) self.assertTensorAlmostEqual(model.get_weights()[0], model_torch.weight) self.assertTensorAlmostEqual(pred_fp, pred) self.assertTensorAlmostEqual(loss_fp, loss) self.assertTensorAlmostEqual(model.get_weights()[0], model_fp.get_weights()[0])
class FFClassifier(Classifier): """ Feed-forward classifier module for the unsupervised neural translation task. """ abstract = False def __init__(self, input_size: int, hidden_size: int, output_size: int, learning_rate: float, optimizer_type: str, cuda: bool): """ An instance of a feed-forward discriminator. :param input_size: int, input size of the feed-forward network. :param hidden_size: int, hidden size of the feed forward neural network. :param learning_rate: float, learning rate of the optimizer. :param cuda: bool, true if cuda support is enabled. """ super().__init__(hidden_size=hidden_size, output_size=output_size, input_size=input_size, learning_rate=learning_rate, optimizer_type=optimizer_type, cuda=cuda) self._input_layer = Linear(input_size, hidden_size) self._hidden_layer = Linear(hidden_size, hidden_size) self._output_layer = Linear(hidden_size, self._output_size) self._activation = LeakyReLU() if self._cuda: self._input_layer = self._input_layer.cuda() self._hidden_layer = self._hidden_layer.cuda() self._output_layer = self._output_layer.cuda() self._activation = self._activation.cuda() self._optimizer = Optimizer(parameters=self.parameters(), optimizer_type=optimizer_type, scheduler_type='ReduceLROnPlateau', learning_rate=self._learning_rate) def forward(self, *args, inputs, **kwargs): """ Forward step for the classifier. :param inputs: Variable, (batch_size, input_size), where input_size is equal to the encoder's hidden_size. :return output: Variable, (batch_size, 1). """ output = self._activation(self._input_layer(inputs)) output = self._activation(self._hidden_layer(output)) output = self._output_layer(output) softmax_output = softmax(output, dim=1) return output, softmax_output @property def optimizer(self): return self._optimizer
class Layer(Module): def __init__(self, input_size, output_size, use_cuda): """ :param input_size: :param output_size: :param use_cuda: """ super().__init__() self._weights = Linear(input_size, output_size) self.size = output_size if use_cuda: self._weights = self._weights.cuda() self._optimizer = Optimizer(parameters=self.parameters(), optimizer_type='Adam', scheduler_type='ReduceLROnPlateau', learning_rate=0.001) def forward(self, inputs): """ :param inputs: :return outputs: """ return self._weights(inputs) def freeze(self): for param in self.parameters(): param.requires_grad = False def unfreeze(self): for param in self.parameters(): param.requires_grad = True @property def optimizer(self): """ Property for the optimizer of the layer. """ return self._optimizer @property def state(self): """ Property for the state of the embedding. """ return { 'weight': self.state_dict(), 'optimizer': self._optimizer.state } @state.setter def state(self, states): """ Setter method for the state of the embedding. :param states: dict, containing the state of the weights and optimizer. """ self.load_state_dict(states['weight']) self._optimizer.state = states['optimizer']
class RNNClassifier(Classifier): """ Recurrent discriminator module for the unsupervised neural translation task. """ abstract = False interface = Interface(**{ **Classifier.interface.dictionary, 'num_layers': (Interface.last_key(Classifier.interface.dictionary) + 1, None) }) def __init__(self, input_size: int, hidden_size: int, output_size: int, learning_rate: float, num_layers: int, optimizer_type: str, cuda: bool): """ An instance of a recurrent discriminator. :param input_size: int, input size of the feed-forward network. :param hidden_size: int, hidden size of the feed forward neural network. :param learning_rate: float, learning rate of the optimizer. :param cuda: bool, true if cuda support is enabled. """ super().__init__(hidden_size=hidden_size, input_size=input_size, output_size=output_size, learning_rate=learning_rate, optimizer_type=optimizer_type, cuda=cuda) self._num_layers = num_layers self._recurrent_layer = torch.nn.GRU(input_size=input_size, num_layers=num_layers, hidden_size=hidden_size, batch_first=True) self._output_layer = Linear(self._hidden_size, self._output_size) if self._cuda: self._recurrent_layer = self._recurrent_layer.cuda() self._output_layer = self._output_layer.cuda() self._optimizer = Optimizer(parameters=self.parameters(), optimizer_type=optimizer_type, scheduler_type='ReduceLROnPlateau', learning_rate=self._learning_rate) def forward(self, *args, inputs, lengths, **kwargs): """ Forward step for the discriminator. :param inputs: Variable, (batch_size, input_size), where input_size is equal to the encoder's hidden_size. :param lengths: :return final_output: Variable, (batch_size, 1). """ initial_state = self._init_hidden(inputs.size(0)) padded_sequence = pack_padded_sequence(inputs, lengths=lengths, batch_first=True) self._recurrent_layer.flatten_parameters() outputs, _ = self._recurrent_layer(padded_sequence, initial_state) outputs, _ = pad_packed_sequence(outputs, batch_first=True) outputs = self._output_layer(outputs[:, -1, :]) softmax_outputs = softmax(outputs, dim=1) return outputs, softmax_outputs def _init_hidden(self, batch_size): """ Initializes the hidden state of the encoder module. :return: Variable, (num_layers*directions, batch_size, hidden_dim) with zeros as initial values. """ state = torch.autograd.Variable(torch.randn(self._num_layers, batch_size, self._hidden_size)) if self._use_cuda: state = state.cuda() return state @property def optimizer(self): return self._optimizer
class Reduce(StructuredEmbedding): """ Reduce a list of embedded fields or messages using a feed forward. Requires list to be a constant size """ def __init__(self, input_dims, encoding_output_dim, use_cuda): """ :param input_dims: dimensions of the elements to reduce (list of size the number of elements, integer dimension). :param encoding_output_dim: dimension of the reduce output. """ super().__init__(embedding_size=encoding_output_dim, use_cuda=use_cuda) self.encoding_dim = encoding_output_dim self.input_dims = input_dims sum_input_dims = 0 for dim in input_dims: sum_input_dims += dim self.sum_input_dims = sum_input_dims self.linear1 = Linear(sum_input_dims, sum_input_dims * 2) self.linear2 = Linear(sum_input_dims * 2, encoding_output_dim) if self.use_cuda: self.linear1 = self.linear1.cuda() self.linear2 = self.linear2.cuda() def forward(self, input_list, cuda=None, pad_missing=False): if len(input_list) != len(self.input_dims) and not pad_missing: raise ValueError( "The input_list dimension does not match input_dims, and pad_missing is not specified. " ) batch_size = input_list[0].size(0) if pad_missing: index = 0 while len(input_list) < len(self.input_dims): # pad input list with minibatches of zeros: variable = Variable( torch.zeros(batch_size, self.input_dims[index])) if self.is_cuda(cuda): variable = variable.cuda(async=True) input_list += [variable] index += 1 if any([input.dim() != 2 for input in input_list ]) or [input.size(1) for input in input_list] != self.input_dims: print("STOP: input dims={} sizes={}".format( [input.dim() for input in input_list], [input.size() for input in input_list])) for index, input in enumerate(input_list): assert input.size(1) == self.input_dims[ index], "input dimension must match declared for input {} ".format( index) x = torch.cat(input_list, dim=1) return self.forward_flat_inputs(x) def forward_flat_inputs(self, x): x = self.linear1(x) x = torch.nn.functional.relu(x) x = self.linear2(x) x = torch.nn.functional.relu(x) return x.view(-1, self.encoding_dim) def collect_inputs(self, x, phase=0, tensor_cache=NoCache(), cuda=None, batcher=None): assert x.size( -1 ) == self.sum_input_dims, "you must provided a pre-padded input to Reduce in batch mode." return batcher.store_inputs(self, x) def forward_batch(self, batcher, phase=0): return self.forward_flat_inputs(batcher.get_batched_input(self))
class AttentionNet(BaseNet): """ This is not intended to be called directly, calling its regressor/classifier subclass will define loss and enabe training. Learns representation of bag by employing convolutional net (instance of MainNet class) and then aggregates for each molecule all conformers into a single vector representing whole bag. Aggeregation is done using attention weights, which are also learnt here. """ def __init__(self, ndim: Sequence, det_ndim: Sequence, init_cuda: bool = False): """ Parameters ---------- ndim: Sequence Hyperparameter for MainNet: each entry of sequence specifies the number of nodes in each layer and length of the sequence specifies number of layers det_ndim: Sequence Hyperparameter for attention subnet: each entry of sequence specifies the number of nodes in each layer and length of the sequence specifies number of layers init_cuda: bool, default is False Use Cuda GPU or not? """ super().__init__(init_cuda=init_cuda) self.main_net = MainNet(ndim) self.estimator = Linear(ndim[-1], 1) # input_dim = ndim[-1] attention = [] for dim in det_ndim: attention.append(Linear(input_dim, dim)) attention.append(Sigmoid()) input_dim = dim attention.append(Linear(input_dim, 1)) self.detector = Sequential(*attention) if init_cuda: self.main_net.cuda() self.detector.cuda() self.estimator.cuda() def forward(self, x: torch.Tensor, m: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ Feed forward input data. Parameters ---------- x: torch.Tensor m: torch.Tensor Returns -------- Tuple with weights of conformers and tensor of shape Nmol*1, where Nmol is the number of molecules. The tensor is final output y, but it needs to be passed to sigmoid to obtain final class probabilities in case of classification (recall, this classs shouldnt be called directly, call regressor/classifier subclass to obtain final y). Examples -------- >> > import torch >> > import numpy as np >> > from torch import randn >> > from miqsar.estimators.attention_nets import AttentionNet >> > x_train = randn((3, 3, 3)) >> > at_net = AttentionNet(ndim=(x_train[0].shape[-1], 4, 6, 4), det_ndim = (4,4), init_cuda=False) >> > _, m = at_net.add_padding(x_train) >> > m = torch.from_numpy(m.astype('float32')) >> > _ = at_net.forward(x_train, m) # (assign result to a variable to supress std output) """ x = self.main_net(x) x_det = torch.transpose(m * self.detector(x), 2, 1) w = nn.functional.gumbel_softmax(x_det, tau=self.instance_dropout, dim=2) x = torch.bmm(w, x) out = self.estimator(x) if isinstance(self, BaseClassifier): out = Sigmoid()(out) out = out.view(-1, 1) return w, out