def __init__(self, act_fn, combinator, neurons, normalize=None, init='random', alpha_dropout=None, hr_test=None): super(MIX, self).__init__() self.combinator = combinator # name of the combinator, e.g. "Linear" self.act_fn = act_fn # basic activation function to be used, e.g. "Tanh, Sigmoid" self.normalize = normalize # normalize alpha, e.g. with a Sigmoid self.neurons = neurons # number of neurons of the layer self.alpha_dropout = alpha_dropout # apply a dropout on alpha (only for MLP_ATT) self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.act_module = { 'relu': nn.ReLU(), # dictionary containing useful functions 'sigmoid': nn.Sigmoid(), 'tanh': nn.Tanh(), 'antirelu': Antirelu(), 'identity': Identity(), 'softmax': nn.Softmax(dim=-1) } self.hr_test = hr_test # TODO: assert hr_test != False implies combinator=='MLP_ATT_b' if combinator == 'Linear': # 3 different alpha initialization for the Linear combinator assert init in ['normal', 'uniform', 'random' ], "init must be 'normal','uniform','random'" if init == 'normal': # sample taken from a gaussian N(0,1) self.alpha = nn.Parameter(torch.randn(neurons, len(act_fn)), requires_grad=True) elif init == 'uniform': # same init for each alpha, equal to 1/(num of act_fn) self.alpha = nn.Parameter(torch.ones(neurons, len(act_fn)) / len(act_fn), requires_grad=True) elif init == 'random': # sample alpha in a uniform interval self.alpha = nn.Parameter(torch.FloatTensor( neurons, len(act_fn)).uniform_(-0.5, 0.5), requires_grad=True) elif combinator in MLP_list + ATT_list: # create a list of MLP self.MLP_list = nn.ModuleList([ MLP(combinator).to(self.device) for _ in range(neurons) ]).to(self.device) if combinator == 'MLP_ATT_b': self.beta = nn.Parameter(torch.FloatTensor(neurons).uniform_( -0.5, 0.5), requires_grad=True).to(self.device) elif combinator == 'Hybrid': self.MLP_list = nn.ModuleList([]) for i in range(neurons // 3): self.MLP_list.extend([self.act_module['relu']]) self.MLP_list.extend([self.act_module['relu']]) self.MLP_list.extend([MLP('MLP1')]) elif combinator == 'MLPr': # MLPr is a mix of MLP1, MLP2 self.MLP_list = nn.ModuleList([]) for i in range(neurons // 2): self.MLP_list.extend([MLP('MLP1')]) self.MLP_list.extend([MLP('MLP2')])
def __init__(self, args): super(Model, self).__init__() self.args = args # the embedding layer self.word_embed = nn.Embedding(num_embeddings=args.n_words, embedding_dim=args.n_embed) if args.feat == 'char': self.feat_embed = CHAR_LSTM(n_chars=args.n_feats, n_embed=args.n_char_embed, n_out=args.n_embed) elif args.feat == 'bert': self.feat_embed = BertEmbedding(model=args.bert_model, n_layers=args.n_bert_layers, n_out=args.n_embed) else: self.feat_embed = nn.Embedding(num_embeddings=args.n_feats, embedding_dim=args.n_embed) self.embed_dropout = IndependentDropout(p=args.embed_dropout) # the word-lstm layer self.lstm = BiLSTM(input_size=args.n_embed*2, hidden_size=args.n_lstm_hidden, num_layers=args.n_lstm_layers, dropout=args.lstm_dropout) self.lstm_dropout = SharedDropout(p=args.lstm_dropout) # the MLP layers self.mlp_arc_h = MLP(n_in=args.n_lstm_hidden*2, n_hidden=args.n_mlp_arc, dropout=args.mlp_dropout) self.mlp_arc_d = MLP(n_in=args.n_lstm_hidden*2, n_hidden=args.n_mlp_arc, dropout=args.mlp_dropout) self.mlp_rel_h = MLP(n_in=args.n_lstm_hidden*2, n_hidden=args.n_mlp_rel, dropout=args.mlp_dropout) self.mlp_rel_d = MLP(n_in=args.n_lstm_hidden*2, n_hidden=args.n_mlp_rel, dropout=args.mlp_dropout) # the Biaffine layers self.arc_attn = Biaffine(n_in=args.n_mlp_arc, bias_x=True, bias_y=False) self.rel_attn = Biaffine(n_in=args.n_mlp_rel, n_out=args.n_rels, bias_x=True, bias_y=True) self.pad_index = args.pad_index self.unk_index = args.unk_index
def __init__(self, in_dim, out_dim, aggregator='softmax', beta=1.0, learn_beta=False, p=1.0, learn_p=False, msg_norm=False, learn_msg_scale=False, mlp_layers=1, eps=1e-7): super(GENConv, self).__init__() self.aggr = aggregator self.eps = eps channels = [in_dim] for _ in range(mlp_layers - 1): channels.append(in_dim * 2) channels.append(out_dim) self.mlp = MLP(channels) self.msg_norm = MessageNorm(learn_msg_scale) if msg_norm else None self.beta = nn.Parameter( torch.Tensor([beta]), requires_grad=True ) if learn_beta and self.aggr == 'softmax' else beta self.p = nn.Parameter(torch.Tensor([p]), requires_grad=True) if learn_p else p self.edge_encoder = BondEncoder(in_dim)
def __init__(self, input_dim, hps, use_global=False): super(Generator, self).__init__() dim = hps.gen_dim style_dim = hps.gen_style_dim n_downsample = hps.gen_n_downsample n_res = hps.gen_n_res activ = hps.gen_activ pad_type = hps.gen_pad_type mlp_dim = hps.gen_mlp_dim # style encoder self.enc_style = StyleEncoder(4, input_dim, dim, style_dim, norm='none', activ=activ, pad_type=pad_type) # content encoder if use_global: self.enc_content = ContentEncoder(n_downsample, n_res, input_dim, dim, 'global_in', activ, pad_type=pad_type) self.dec = Decoder(n_downsample, n_res, self.enc_content.output_dim, input_dim, res_norm='global_adain', activ=activ, pad_type=pad_type, use_global=use_global) else: self.enc_content = ContentEncoder(n_downsample, n_res, input_dim, dim, 'in', activ, pad_type=pad_type) self.dec = Decoder(n_downsample, n_res, self.enc_content.output_dim, input_dim, res_norm='adain', activ=activ, pad_type=pad_type, use_global=use_global) # MLP to generate AdaIN parameters self.mlp = MLP(style_dim, self.get_num_adain_params(self.dec), mlp_dim, 3, norm='none', activ=activ)
def __init__(self, word_vocab_size, word_emb_dim, pos_vocab_size, pos_emb_dim, emb_dropout, lstm_hidden, lstm_depth, lstm_dropout, arc_hidden, arc_depth, arc_dropout, arc_activation, lab_hidden, lab_depth, lab_dropout, lab_activation, n_labels): super(BiAffineParser, self).__init__() # Embeddings self.word_embedding = TimeDistributed( nn.Embedding(word_vocab_size, word_emb_dim, padding_idx=0)) self.pos_embedding = TimeDistributed( nn.Embedding(pos_vocab_size, pos_emb_dim, padding_idx=0)) self.emb_dropout = nn.Dropout(p=emb_dropout) # LSTM lstm_input = word_emb_dim + pos_emb_dim self.lstm = nn.LSTM(input_size=lstm_input, hidden_size=lstm_hidden, batch_first=True, dropout=lstm_dropout, bidirectional=True) # MLPs self.arc_mlp_h = TimeDistributed( MLP(lstm_hidden * 2, arc_hidden, arc_depth, arc_activation, arc_dropout)) self.arc_mlp_d = TimeDistributed( MLP(lstm_hidden * 2, arc_hidden, arc_depth, arc_activation, arc_dropout)) self.lab_mlp_h = TimeDistributed( MLP(lstm_hidden * 2, lab_hidden, lab_depth, lab_activation, lab_dropout)) self.lab_mlp_d = TimeDistributed( MLP(lstm_hidden * 2, lab_hidden, lab_depth, lab_activation, lab_dropout)) # BiAffine layers self.arc_attn = BiAffineAttn(arc_hidden, 1, bias_head=False, bias_dep=True) self.lab_attn = BiAffineAttn(lab_hidden, n_labels, bias_head=True, bias_dep=True)
def __init__(self, num_atoms, v_dim, e_dim, num_edge_types): super().__init__() self.num_atoms = num_atoms self.num_edge_types = num_edge_types self.mlp_v = MLP(n_in=e_dim, n_hid=64, n_out=v_dim) self.mlp_e = nn.ModuleList([ MLP(n_in=v_dim * 2, n_hid=64, n_out=e_dim) for _ in range(num_edge_types) ]) off_diag = np.ones([num_atoms, num_atoms]) - np.eye(num_atoms) rel_rec = np.array(encode_onehot(np.where(off_diag)[1]), dtype=np.float32) rel_send = np.array(encode_onehot(np.where(off_diag)[0]), dtype=np.float32) self.rel_rec = torch.FloatTensor(rel_rec).cuda() self.rel_send = torch.FloatTensor(rel_send).cuda()
def __init__(self, num_atoms, v_dim, e_dim): super().__init__() self.num_atoms = num_atoms self.mlp_v = MLP(n_in=e_dim, n_hid=32, n_out=v_dim) #nn.Sequential(nn.Linear(e_dim, 16), # nn.Tanh(), # nn.Linear(16, v_dim)) self.mlp_e = MLP(n_in=v_dim * 2, n_hid=32, n_out=e_dim) #nn.Sequential(nn.Linear(v_dim*2, 16), # nn.Tanh(), # nn.Linear(16, e_dim)) off_diag = np.ones([num_atoms, num_atoms]) - np.eye(num_atoms) rel_rec = np.array(encode_onehot(np.where(off_diag)[1]), dtype=np.float32) rel_send = np.array(encode_onehot(np.where(off_diag)[0]), dtype=np.float32) self.rel_rec = torch.FloatTensor(rel_rec).cuda() self.rel_send = torch.FloatTensor(rel_send).cuda()
def __init__(self, sr=SAMPLE_RATE, rnn_channels=512, out_size_in_seconds=SAMPLE_LENGTH_IN_SECONDS, n_oscillators=101, filter_size=256, z_size=16): """ Construct a DDSPDecoder Args: sr (int, optional): Sample rate. Defaults to SAMPLE_RATE. rnn_channels (int, optional): Number of RNN hidden states. Defaults to 512. out_size_in_seconds (float, optional): Length of the output in seconds. Defaults to SAMPLE_LENGTH_IN_SECONDS. n_oscillators (int, optional): Number of oscillators in the harmonic oscillator bank. Defaults to 101. filter_size (int, optional): Length of the FIR filter impulse response. Defaults to 256. z_size (int, optional): Size of the latent dimension. Defaults to 16. """ super().__init__() self.out_size = int(out_size_in_seconds * sr) self.f0_mlp = MLP(in_size=1) self.loudness_mlp = MLP(in_size=1) self.z_mlp = MLP(in_size=z_size) self.gru = nn.GRU(input_size=rnn_channels * 3, hidden_size=rnn_channels) self.final_mlp = MLP(in_size=rnn_channels * 3) self.H_out = nn.Linear(rnn_channels, filter_size // 2 + 1) self.amp_out = nn.Linear(rnn_channels, n_oscillators) self.output_activation_H = nn.Sigmoid() # ScaledSigmoid() self.output_activation_amp = nn.Sigmoid() # ScaledSigmoid()
def __init__(self, useritem_embeds, model, hidden_size, bidirectional, input_size, layer_num): nn.Module.__init__(self) self.useritem_embeds = useritem_embeds self.model = model self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=False, bidirectional=bidirectional) self.support_proj = MLP(input_size=2 * input_size, hidden_layers=[(input_size, True, 0.2)], normalization='layer_norm', activation='relu') self.bidirectional = bidirectional self.layer_num = layer_num if bidirectional: hidden_size *= 2 self.user_attn = nn.Linear(hidden_size, layer_num * input_size * 2) self.item_attn = nn.Linear(hidden_size, layer_num * input_size * 2)
def __init__(self, dataset, in_dim, out_dim, aggregator='softmax', beta=1.0, learn_beta=False, p=1.0, learn_p=False, msg_norm=False, learn_msg_scale=False, norm='batch', mlp_layers=1, eps=1e-7): super(GENConv, self).__init__() self.aggr = aggregator self.eps = eps channels = [in_dim] for i in range(mlp_layers - 1): channels.append(in_dim * 2) channels.append(out_dim) self.mlp = MLP(channels, norm=norm) self.msg_norm = MessageNorm(learn_msg_scale) if msg_norm else None self.beta = nn.Parameter( torch.Tensor([beta]), requires_grad=True ) if learn_beta and self.aggr == 'softmax' else beta self.p = nn.Parameter(torch.Tensor([p]), requires_grad=True) if learn_p else p if dataset == 'ogbg-molhiv': self.edge_encoder = BondEncoder(in_dim) elif dataset == 'ogbg-ppa': self.edge_encoder = nn.Linear(in_dim, in_dim) else: raise ValueError(f'Dataset {dataset} is not supported.')
def __init__(self, num_steps, x_size, window_size, z_what_size, rnn_hidden_size, encoder_net=[], decoder_net=[], predict_net=[], embed_net=None, bl_predict_net=[], non_linearity='ReLU', decoder_output_bias=None, decoder_output_use_sigmoid=False, use_masking=True, use_baselines=True, baseline_scalar=None, scale_prior_mean=3.0, scale_prior_sd=0.1, pos_prior_mean=0.0, pos_prior_sd=1.0, likelihood_sd=0.3, use_cuda=False): super(AIR, self).__init__() self.num_steps = num_steps self.x_size = x_size self.window_size = window_size self.z_what_size = z_what_size self.rnn_hidden_size = rnn_hidden_size self.use_masking = use_masking self.use_baselines = use_baselines self.baseline_scalar = baseline_scalar self.likelihood_sd = likelihood_sd self.use_cuda = use_cuda prototype = torch.tensor(0.).cuda() if use_cuda else torch.tensor(0.) self.options = dict(dtype=prototype.dtype, device=prototype.device) self.z_pres_size = 1 self.z_where_size = 3 # By making these parameters they will be moved to the gpu # when necessary. (They are not registered with pyro for # optimization.) self.z_where_loc_prior = nn.Parameter(torch.FloatTensor( [scale_prior_mean, pos_prior_mean, pos_prior_mean]), requires_grad=False) self.z_where_scale_prior = nn.Parameter(torch.FloatTensor( [scale_prior_sd, pos_prior_sd, pos_prior_sd]), requires_grad=False) # Create nn modules. rnn_input_size = x_size**2 if embed_net is None else embed_net[-1] rnn_input_size += self.z_where_size + z_what_size + self.z_pres_size nl = getattr(nn, non_linearity) self.rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size) self.encode = Encoder(window_size**2, encoder_net, z_what_size, nl) self.decode = Decoder(window_size**2, decoder_net, z_what_size, decoder_output_bias, decoder_output_use_sigmoid, nl) self.predict = Predict(rnn_hidden_size, predict_net, self.z_pres_size, self.z_where_size, nl) self.embed = Identity() if embed_net is None else MLP( x_size**2, embed_net, nl, True) self.bl_rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size) self.bl_predict = MLP(rnn_hidden_size, bl_predict_net + [1], nl) self.bl_embed = Identity() if embed_net is None else MLP( x_size**2, embed_net, nl, True) # Create parameters. self.h_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.c_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.bl_h_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.bl_c_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.z_where_init = nn.Parameter(torch.zeros(1, self.z_where_size)) self.z_what_init = nn.Parameter(torch.zeros(1, self.z_what_size)) if use_cuda: self.cuda()
def __init__(self, num_steps, x_size, window_size, z_what_size, rnn_hidden_size, encoder_net=[], decoder_net=[], predict_net=[], embed_net=None, bl_predict_net=[], non_linearity='ReLU', decoder_output_bias=None, decoder_output_use_sigmoid=False, use_masking=True, use_baselines=True, baseline_scalar=None, fudge_z_pres=False, use_cuda=False): super(AIR, self).__init__() self.num_steps = num_steps self.x_size = x_size self.window_size = window_size self.z_what_size = z_what_size self.rnn_hidden_size = rnn_hidden_size self.use_masking = use_masking and not fudge_z_pres self.use_baselines = use_baselines and not fudge_z_pres self.baseline_scalar = baseline_scalar self.fudge_z_pres = fudge_z_pres self.use_cuda = use_cuda self.z_pres_size = 1 self.z_where_size = 3 # By making these parameters they will be moved to the gpu # when necessary. (They are not registered with pyro for # optimization.) self.z_where_mu_prior = nn.Parameter(torch.FloatTensor([3.0, 0, 0]), requires_grad=False) self.z_where_sigma_prior = nn.Parameter(torch.FloatTensor([0.1, 1, 1]), requires_grad=False) # Create nn modules. rnn_input_size = x_size**2 if embed_net is None else embed_net[-1] rnn_input_size += self.z_where_size + z_what_size + self.z_pres_size nl = getattr(nn, non_linearity) self.rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size) self.encode = Encoder(window_size**2, encoder_net, z_what_size, nl) self.decode = Decoder(window_size**2, decoder_net, z_what_size, decoder_output_bias, decoder_output_use_sigmoid, nl) self.predict = Predict(rnn_hidden_size, predict_net, self.z_pres_size, self.z_where_size, nl) self.embed = Identity() if embed_net is None else MLP( x_size**2, embed_net, nl, True) self.bl_rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size) self.bl_predict = MLP(rnn_hidden_size, bl_predict_net + [1], nl) self.bl_embed = Identity() if embed_net is None else MLP( x_size**2, embed_net, nl, True) # Create parameters. self.h_init = zeros(1, rnn_hidden_size) self.c_init = zeros(1, rnn_hidden_size) self.bl_h_init = zeros(1, rnn_hidden_size) self.bl_c_init = zeros(1, rnn_hidden_size) self.z_where_init = zeros(1, self.z_where_size) self.z_what_init = zeros(1, self.z_what_size) if use_cuda: self.cuda()
def __init__( self, token_embeddings: TokenEmbeddings, relations_dictionary: Dictionary, lstm_hidden_size: int = 400, n_mlp_arc: int = 500, n_mlp_rel: int = 100, n_lstm_layers: int = 3, mlp_dropout: float = .33, lstm_dropout: float = 0.2, relearn_embeddings: bool = True, beta: float = 1.0, pickle_module: str = "pickle", ): super(BiAffineParser, self).__init__() self.token_embeddings = token_embeddings self.beta = beta self.relations_dictionary: Dictionary = relations_dictionary self.relearn_embeddings = relearn_embeddings self.lstm_hidden_size = lstm_hidden_size self.n_mlp_arc = n_mlp_arc self.n_mlp_rel = n_mlp_rel self.n_lstm_layers = n_lstm_layers self.lstm_dropout = lstm_dropout self.mlp_dropout = mlp_dropout lstm_input_dim: int = self.token_embeddings.embedding_length if self.relations_dictionary: self.embedding2nn = torch.nn.Linear(lstm_input_dim, lstm_input_dim) self.lstm = BiLSTM(input_size=self.lstm_input_dim, hidden_size=self.lstm_hidden_size, num_layers=self.n_lstm_layers, dropout=self.lstm_dropout) self.mlp_arc_h = MLP(n_in=self.lstm_hidden_size * 2, n_hidden=self.n_mlp_arc, dropout=self.mlp_dropout) self.mlp_arc_d = MLP(n_in=self.lstm_hidden_size * 2, n_hidden=self.n_mlp_arc, dropout=self.mlp_dropout) self.mlp_rel_h = MLP(n_in=self.lstm_hidden_size * 2, n_hidden=self.n_mlp_rel, dropout=self.mlp_dropout) self.mlp_rel_d = MLP(n_in=self.lstm_hidden_size * 2, n_hidden=self.n_mlp_rel, dropout=self.mlp_dropout) # the Biaffine layers self.arc_attn = Biaffine(n_in=self.n_mlp_arc, bias_x=True, bias_y=False) self.rel_attn = Biaffine(n_in=self.n_mlp_rel, n_out=len(relations_dictionary), bias_x=True, bias_y=True) self.loss_function = torch.nn.CrossEntropyLoss() self.to(flair.device)