def __init__(self, config): super().__init__(config) if args.grad_ckpt: self.transformer.layer = ModuleList([ CkptXLNetLayer(layer) for layer in self.transformer.layer ])
def __init__(self, input_size, hidden_size, pad_idx, vocab_size): super().__init__() self.embedding = Embedding( num_embeddings=vocab_size, embedding_dim=input_size, padding_idx=pad_idx) self.dropout = Dropout(p=0.1) self.merge = Linear( in_features=hidden_size * 2, out_features=hidden_size, bias=False) # creating rnn layer as module list so locked # dropout can be applied between each layer # NOTE: currently not using weight drop, because # it is incompatible with apex self.rnn = ModuleList([ GRU(input_size=input_size, hidden_size=hidden_size, bidirectional=True, batch_first=True)] + [ GRU(input_size=hidden_size, hidden_size=hidden_size, batch_first=True) for _ in range(2) ])
def __init__(self, config): super().__init__(config) # TODO remove temporary solution that we # are only using the first 4 layers of # XLNet for faster testing speed self.transformer.layer = ModuleList( [layer for layer in self.transformer.layer[:4]]) self.lm_loss = Linear(config.d_model, config.n_token) self.apply(self.init_weights) self.tie_weights()
def __init__(self, input_dim, hidden_dim=None, num_classes=0, dropout=0.5, config=None): super(SBMGNN, self).__init__() self.num_classes = num_classes self.dropout = dropout self.config = config self.hidden = [int(x) for x in hidden_dim] self.num_layers = len(self.hidden) self.h = GraphConvolution(in_features=input_dim, out_features=self.hidden[0], act=nn.LeakyReLU(negative_slope=0.2), dropout=-1.) h_mid = [] for i in range(self.num_layers-2): h_mid.append(GraphConvolution(in_features=self.hidden[i], out_features=self.hidden[i+1], act=nn.LeakyReLU(negative_slope=0.2), dropout=self.dropout)) self.h_mid = ModuleList(h_mid) self.h1 = GraphConvolution(in_features=self.hidden[-2], out_features=self.hidden[-1], act=lambda x: x, dropout=self.dropout) self.h2 = GraphConvolution(in_features=self.hidden[-2], out_features=self.hidden[-1], act=lambda x: x, dropout=self.dropout) self.h3 = GraphConvolution(in_features=self.hidden[-2], out_features=self.hidden[-1], act=lambda x: x, dropout=self.dropout) self.deep_decoder = nn.Sequential(nn.Linear(self.hidden[self.num_layers-1], config.model.g_hidden), nn.LeakyReLU(negative_slope=0.2), nn.Linear(config.model.g_hidden, config.model.g_hidden//2)) self.mean = None self.logv = None self.pi_logit = None self.a = None self.beta_a = None self.b = None self.beta_b = None self.v = None self.x_hat = None self.logit_post = None self.log_prior = None self.z_discrete = None self.z_real = None self.y_sample = None self.reconstructions = None self.get_alpha_beta(config=self.config)
def __init__(self, input_size, hidden_size, vocab_size): super().__init__() self.embedding = Embedding( num_embeddings=vocab_size, embedding_dim=input_size) self.dropout = Dropout(p=0.1) self.rnn = ModuleList([ GRU(input_size=input_size, hidden_size=hidden_size, batch_first=True)] + [ GRU(input_size=hidden_size, hidden_size=hidden_size, batch_first=True) for _ in range(2) ]) self.attn = Attention(hidden_size=hidden_size) self.out_bias = Parameter(torch.zeros((vocab_size, ))) self.out_weight = self.embedding.weight
def _get_clones(module: nn.Module, N: int) -> ModuleList: return ModuleList([copy.deepcopy(module) for i in range(N)])
def _get_clones(module, n): return ModuleList([copy.deepcopy(module) for i in range(n)])