def __init__(self, vocab_size, embedding_dim=256, hidden_size=512, num_layers=1, kernel_size=3, dropout=0.25, max_length=100): super().__init__() assert kernel_size % 2 == 1, f"Kernel size must be odd, got {kernel_size}" self.embedding_dim = embedding_dim self.vocab_size = vocab_size self.max_length = max_length self.num_layers = num_layers self.scale = torch.sqrt(torch.FloatTensor([0.5])) self.token_embedding = Embedding(vocab_size, embedding_dim) self.position_embedding = Embedding(max_length, embedding_dim) self.embed2hidden = Linear(embedding_dim, hidden_size) self.hidden2embed = Linear(hidden_size, embedding_dim) convs = [ EncoderConv(hidden_size, 2 * hidden_size, kernel_size, (kernel_size - 1) // 2, dropout) for _ in range(num_layers) ] self.convs = Sequential(*convs) self.dropout = Dropout(dropout)
def __init__(self, hidden_dim=512, embedding_dim=256, vocab_size=10000, num_layers=10, kernel_size=3, dropout=0.25, PAD_token=0, max_len=50): super().__init__() self.kernel_size = kernel_size self.PAD_token = PAD_token self.num_layers = num_layers self.vocab_size = vocab_size self.max_len = max_len self.token_embedding = Embedding(vocab_size, embedding_dim) self.position_embedding = Embedding(max_len, embedding_dim) self.embedd2hidden = Linear(embedding_dim, hidden_dim) self.hidden2embedd = Linear(hidden_dim, embedding_dim) self.attention_layer = Attention(embedding_dim, hidden_dim) self.decoder_conv = DecoderConv(kernel_size, dropout, PAD_token) self.convs = ModuleList([ Conv1d(in_channels=hidden_dim, out_channels=2 * hidden_dim, kernel_size=kernel_size) for _ in range(num_layers) ]) self.out = Linear(embedding_dim, vocab_size) self.dropout = Dropout(dropout)
def __init__(self, hyper_param, word_embedding, char_embedding, vocabs, task_vocab_size, domain_vocab_size, param_types, device): super().__init__() word_embeddings_weight = torch.FloatTensor(word_embedding) self.word_matrix = Embedding.from_pretrained(word_embeddings_weight, freeze=False) char_embeddings_weight = torch.FloatTensor(char_embedding) self.char_matrix = Embedding.from_pretrained(char_embeddings_weight, freeze=False) self.char_cnn = CharCNN(hyper_param.drop_out, hyper_param.char_embed_dim, hyper_param.char_cnn_kernels) self.task_embeddings = Embedding.from_pretrained( torch.from_numpy(self.random_embedding(task_vocab_size, 8)), freeze=False).float() self.domain_embeddings = Embedding.from_pretrained( torch.from_numpy(self.random_embedding(domain_vocab_size, 8)), freeze=False).float() self.lstm_input_size = hyper_param.word_embed_dim + hyper_param.char_cnn_kernels * 3 self.rnn = NoParamLSTM(bidirectional=True, num_layers=1, input_size=self.lstm_input_size, hidden_size=hyper_param.lstm_hidden, batch_first=True) self.drop_out = Dropout(p=hyper_param.drop_out) self.fc = {} for key in param_types: self.fc[key] = Linear(hyper_param.lstm_hidden * 2, len(vocabs[key])) setattr(self, 'fc_' + '_'.join(key), self.fc[key]) self.device = device
def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers, bidirectional, max_length, dropout_rate, embedding_weights=None): super(Encoder, self).__init__() self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.num_layers = num_layers self.max_length = max_length self.num_directions = 2 if bidirectional else 1 self.embedding_layer = Embedding(self.vocab_size, self.embedding_dim, padding_idx=0) if not (embedding_weights == None): self.embedding_layer.load_state_dict({'weight': embedding_weights}) self.dropout = Dropout(p=dropout_rate) self.lstm_layer = LSTM(self.embedding_dim, self.hidden_dim, num_layers=self.num_layers, bidirectional=bidirectional, batch_first=True, dropout=dropout_rate)
def load_embedding_model(args, vocab): embedding_model = Embedding(vocab.size(), args.input_dim) if args.cuda: embedding_model = embedding_model.cuda() emb_file = os.path.join(args.data, args.emb_dir.split("/")[-1]+"_"+args.emb_file + '_emb.pth') if os.path.isfile(emb_file) and torch.load(emb_file).size()[1] == args.input_dim: emb = torch.load(emb_file) else: glove_vocab, glove_emb = load_word_vectors(os.path.join(args.emb_dir,args.emb_file)) print('==> GLOVE vocabulary size: %d ' % glove_vocab.size()) emb = torch.zeros(vocab.size(), glove_emb.size(1)) not_known = [] for word in vocab.token_to_idx.keys(): if glove_vocab.get_index(word): emb[vocab.get_index(word)] = glove_emb[glove_vocab.get_index(word)] else: not_known.append(word) emb[vocab.get_index(word)] = torch.Tensor(emb[vocab.get_index(word)].size()).normal_(-0.05, 0.05) if args.calculate_new_words: emb = apply_not_known_words(emb, args, not_known, vocab) torch.save(emb, emb_file) if args.cuda: emb = emb.cuda() # plug these into embedding matrix inside model embedding_model.state_dict()['weight'].copy_(emb) return embedding_model
def __init__( self, num_nodes: int, embedding_dim: int, num_layers: int, alpha: Optional[Union[float, Tensor]] = None, **kwargs, ): super().__init__() self.num_nodes = num_nodes self.embedding_dim = embedding_dim self.num_layers = num_layers if alpha is None: alpha = 1. / (num_layers + 1) if isinstance(alpha, Tensor): assert alpha.size(0) == num_layers + 1 else: alpha = torch.tensor([alpha] * (num_layers + 1)) self.register_buffer('alpha', alpha) self.embedding = Embedding(num_nodes, embedding_dim) self.convs = ModuleList([LGConv(**kwargs) for _ in range(num_layers)]) self.reset_parameters()
def __init__(self, config, num_classes, vocab_size): super().__init__(config=config) w_embed_dim = config.word_embedding_dim filter_sizes = config.window_sizes filter_num = config.filter_num max_filter_size = max(config.window_sizes) self.fix_embedding = config.fix_embedding position_embed_dim = config.position_embedding_dim self.word_embed = Embedding(vocab_size, w_embed_dim) self.position_embed = Embedding(2 * max_filter_size + 1, position_embed_dim) self.full_embed_dim = w_embed_dim + position_embed_dim self.convs1 = ModuleList([ Conv2d(1, filter_num, (fs, self.full_embed_dim)) for fs in filter_sizes ]) self.dropout = nn.Dropout(config.dropout) self.linear = nn.Linear(len(filter_sizes) * filter_num, num_classes) # Add batch size dimension. self.positions = torch.IntTensor(range(2 * max_filter_size + 1))
def __init__(self, energy_and_force=False, cutoff=10.0, num_layers=6, hidden_channels=128, num_filters=128, num_gaussians=50): super(SchNet, self).__init__() self.energy_and_force = energy_and_force self.cutoff = cutoff self.num_layers = num_layers self.hidden_channels = hidden_channels self.num_filters = num_filters self.num_gaussians = num_gaussians self.init_v = Embedding(100, hidden_channels) self.dist_emb = emb(0.0, cutoff, num_gaussians) self.update_vs = torch.nn.ModuleList([ update_v(hidden_channels, num_filters) for _ in range(num_layers) ]) self.update_es = torch.nn.ModuleList([ update_e(hidden_channels, num_filters, num_gaussians, cutoff) for _ in range(num_layers) ]) self.update_u = update_u(hidden_channels) self.reset_parameters()
def __init__(self, categories_nums, num_layers=2, hidden=64, features_num=16, num_class=2, node_num=10000): super(GCN, self).__init__() embed_size = 8 self.dropout_p = 0.1 id_embed_size = 8 self.id_embedding = Embedding(categories_nums[0], id_embed_size) self.lin0_id_emb = Linear(id_embed_size, id_embed_size) self.embeddings = torch.nn.ModuleList() for max_nums in categories_nums[1:]: self.embeddings.append(Embedding(max_nums, embed_size)) n = max(0,len(categories_nums)-1) if n>0: self.lin0_emb = Linear(embed_size*n, embed_size*n) if features_num>0: self.lin0 = Linear(features_num, hidden) self.ln0 = torch.nn.LayerNorm(id_embed_size+embed_size*n+hidden) self.conv1 = GCNConv(id_embed_size+embed_size*n+hidden, hidden) else: self.ln0 = torch.nn.LayerNorm(id_embed_size+embed_size*n) self.conv1 = GCNConv(id_embed_size+embed_size*n, hidden) self.ln1 = torch.nn.LayerNorm(hidden) self.conv2 = GCNConv(hidden, hidden) self.ln2 = torch.nn.LayerNorm(hidden) self.lin1 = Linear(hidden, num_class)
def __init__(self): super(Net, self).__init__() self.embedder1 = Embedding(num_embeddings=316, embedding_dim=16) self.embedder2 = Embedding(num_embeddings=289, embedding_dim=8) self.cnn1_1 = Conv2d(in_channels=1, out_channels=128, kernel_size=(3, 16), stride=1, padding=(1, 0)) self.cnn1_2 = Conv2d(in_channels=1, out_channels=128, kernel_size=(4, 16), stride=1) self.cnn1_3 = Conv2d(in_channels=1, out_channels=128, kernel_size=(5, 16), stride=1, padding=(2, 0)) self.cnn2 = Conv2d(in_channels=1, out_channels=128, kernel_size=(4, 8), stride=4) self.lstm = LSTM(input_size=512, hidden_size=100, bidirectional=True, batch_first=True) self.lin1 = Linear(200, 64) self.lin2 = Linear(64, 32) self.lin3 = Linear(32, 1)
def __init__(self, input_embeddings: str, input_vocabulary_size: int, input_embeddings_size: int, clear_text: bool, tokenize_model: str): super().__init__() if clear_text: assert tokenize_model is not None from pytorch_pretrained_bert import BertTokenizer self.bert_tokenizer = BertTokenizer.from_pretrained( tokenize_model, do_lower_case=False) input_vocabulary_size = len(self.bert_tokenizer.vocab) self.lut_embeddings = Embedding( num_embeddings=input_vocabulary_size, embedding_dim=input_embeddings_size, padding_idx=pad_token_index) self._is_fixed = False else: self.bert_tokenizer = None if input_embeddings is not None: self.lut_embeddings = Embedding.from_pretrained( embeddings=input_embeddings, freeze=True) self._is_fixed = True else: self.lut_embeddings = Embedding( num_embeddings=input_vocabulary_size, embedding_dim=input_embeddings_size, padding_idx=pad_token_index) self._is_fixed = False self._output_dim = input_embeddings_size
def __init__(self, cfg): super(GRU_model, self).__init__() self.input_dim = cfg.model.input_size self.hidden_dim = cfg.model.hidden_size self.output_dim = cfg.model.output_size self.n_layers = cfg.model.n_layers self.cfg = cfg self.accuracy = pl.metrics.Accuracy() self.hidden_state = 0 self.embed_seq = Embedding(26, cfg.model.embed_dim_seq) self.embed_creature = Embedding(10, cfg.model.embed_dim_creature) self.gru = nn.GRU(input_size=(cfg.model.embed_dim_seq+cfg.model.embed_dim_creature),hidden_size=cfg.model.hidden_size, num_layers=self.n_layers, batch_first=True, dropout=cfg.model.dropout, bidirectional=cfg.model.bi) self.num_directions = (2 if cfg.model.bi else 1) self.fc_1 = nn.Sequential( nn.Linear(in_features=cfg.model.hidden_size*self.num_directions, out_features=20) #nn.BatchNorm1d(20), #nn.ReLU(), #nn.Dropout(p=cfg.model.dropout), #nn.Linear(in_features=20, out_features=20) ) self.correct = [] self.count = [] self.correct_test = [] self.count_test = []
def __init__(self): super().__init__() self.node_emb = Embedding(21, 75) self.edge_emb = Embedding(4, 50) aggregators = ['mean', 'min', 'max', 'std'] scalers = ['identity', 'amplification', 'attenuation'] self.convs = ModuleList() self.batch_norms = ModuleList() for _ in range(4): conv = PNAConv(in_channels=75, out_channels=75, aggregators=aggregators, scalers=scalers, deg=deg, edge_dim=50, towers=5, pre_layers=1, post_layers=1, divide_input=False) self.convs.append(conv) self.batch_norms.append(BatchNorm(75)) self.mlp = Sequential(Linear(75, 50), ReLU(), Linear(50, 25), ReLU(), Linear(25, 1))
def from_table(table: Union[array, Tensor], frozen: bool) -> Embedding: ne, ed = table.shape embedder = Embedding(ne, ed, padding_idx=0) embedder.weight.data = tensor(table).to(float32) embedder.weight.data[0] = 0. embedder.requires_grad_(not frozen) return embedder
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.save_hyperparameters() kwargs = self.sanetize_kwargs(kwargs) self.node_emb = Embedding(kwargs["node_vocab"], kwargs["node_dim"]) self.edge_emb = Embedding(kwargs["edge_vocab"], kwargs["edge_dim"]) self.convs = ModuleList() self.batch_norms = ModuleList() for _ in range(kwargs["num_layers"]): conv = PNAConv( in_channels=kwargs["node_dim"], out_channels=kwargs["node_dim"], aggregators=kwargs["aggregators"], scalers=kwargs["scalers"], deg=torch.tensor(kwargs["deg"]), edge_dim=kwargs["edge_dim"], towers=kwargs["towers"], pre_layers=kwargs["pre_layers"], post_layers=kwargs["post_layers"], divide_input=kwargs["divide_input"], ) self.convs.append(conv) self.batch_norms.append(BatchNorm(kwargs["node_dim"])) self.mlp = Sequential( Linear(kwargs["node_dim"], kwargs["edge_dim"]), ReLU(), Linear(kwargs["edge_dim"], kwargs["hidden_channels"]), ReLU(), Linear(kwargs["hidden_channels"], kwargs["num_classes"]), )
def __init__( self, src_vocab_size: int, tgt_vocab_size: int, tgt_sequence_size: int, word_emb_dim: int = 32, nhead: int = 1, num_encoder_layers: int = 2, num_decoder_layers: int = 2, dim_feedforward: int = 128, dropout_prob: float = 0.1, ) -> None: super(Transformer, self).__init__() # para self.word_emb_dim = word_emb_dim self.nhead = nhead self.tgt_sequence_size = tgt_sequence_size # layers self.src_word_emb = Embedding(src_vocab_size, word_emb_dim) self.tgt_word_emb = Embedding(tgt_vocab_size, word_emb_dim) self.position_encoding = PositionalEncoding(word_emb_dim, dropout_prob) encoder_layer = TransformerEncoderLayer(word_emb_dim, nhead, dim_feedforward, dropout_prob) encoder_norm = LayerNorm(word_emb_dim) self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) decoder_layer = TransformerDecoderLayer(word_emb_dim, nhead, dim_feedforward, dropout_prob) decoder_norm = LayerNorm(word_emb_dim) self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm) self.generator = Linear(word_emb_dim * tgt_sequence_size, tgt_vocab_size)
def __init__(self): super(Net_without_BiLSTM, self).__init__() self.embedder1 = Embedding(num_embeddings=316, embedding_dim=16) self.embedder2 = Embedding(num_embeddings=289, embedding_dim=8) self.cnn1_1 = Conv2d(in_channels=1, out_channels=128, kernel_size=(3, 16), stride=1, padding=(1, 0)) self.cnn1_2 = Conv2d(in_channels=1, out_channels=128, kernel_size=(4, 16), stride=1) self.cnn1_3 = Conv2d(in_channels=1, out_channels=128, kernel_size=(5, 16), stride=1, padding=(2, 0)) self.cnn2 = Conv2d(in_channels=1, out_channels=128, kernel_size=(4, 8), stride=4) self.lin1 = Linear(512000, 32) self.lin2 = Linear(32, 16) self.lin3 = Linear(16, 1)
def __init__(self, config, dissimilarity): super().__init__() self.ent_emb_dim = config.entities_embedding_dimension self.rel_emb_dim = config.relations_embedding_dimension self.number_entities = config.number_entities self.number_relations = config.number_relations self.norm_type = config.norm_type self.dissimilarity = dissimilarity # initialize embedding objects self.entity_embeddings = Embedding(self.number_entities, self.ent_emb_dim) self.relation_embeddings = Embedding(self.number_relations, self.rel_emb_dim) # fill the embedding weights with Xavier initialized values self.entity_embeddings.weight = Parameter( xavier_uniform_( empty(size=(self.number_entities, self.ent_emb_dim)))) self.relation_embeddings.weight = Parameter( xavier_uniform_( empty(size=(self.number_relations, self.rel_emb_dim)))) # normalize the embeddings self.entity_embeddings.weight.data = normalize( self.entity_embeddings.weight.data, p=self.norm_type, dim=1) self.relation_embeddings.weight.data = normalize( self.relation_embeddings.weight.data, p=self.norm_type, dim=1)
def __init__(self, edge_index, embedding_dim, walk_length, context_size, walks_per_node=1, p=1, q=1, num_negative_samples=1, num_nodes=None, sparse=False): super().__init__() if random_walk is None: raise ImportError('`Node2Vec` requires `torch-cluster`.') N = maybe_num_nodes(edge_index, num_nodes) row, col = edge_index self.adj = SparseTensor(row=row, col=col, sparse_sizes=(N, N)) self.adj = self.adj.to('cpu') assert walk_length >= context_size self.embedding_dim = embedding_dim self.walk_length = walk_length - 1 self.context_size = context_size self.walks_per_node = walks_per_node self.p = p self.q = q self.num_negative_samples = num_negative_samples self.embedding = Embedding(N, embedding_dim, sparse=sparse) self.reset_parameters()
def __init__(self, num_classes: int, dim: int, dropout_rate: float = 0.1): super(ComplexEmbedding, self).__init__() self.amplitudes = Embedding(num_embeddings=num_classes, embedding_dim=dim) self.frequencies = Embedding(num_embeddings=num_classes, embedding_dim=dim) self.dropout = Dropout(dropout_rate)
def __init__(self, in_channels: int, out_channels: int, num_node_types: int, num_edge_types: int, edge_type_emb_dim: int, edge_dim: int, edge_attr_emb_dim: int, heads: int = 1, concat: bool = True, negative_slope: float = 0.2, dropout: float = 0.0, root_weight: bool = True, bias: bool = True, **kwargs): kwargs.setdefault('aggr', 'add') super().__init__(node_dim=0, **kwargs) self.in_channels = in_channels self.out_channels = out_channels self.heads = heads self.concat = concat self.negative_slope = negative_slope self.dropout = dropout self.root_weight = root_weight self.hetero_lin = HeteroLinear(in_channels, out_channels, num_node_types, bias=bias) self.edge_type_emb = Embedding(num_edge_types, edge_type_emb_dim) self.edge_attr_emb = Linear(edge_dim, edge_attr_emb_dim, bias=False) self.att = Linear( 2 * out_channels + edge_type_emb_dim + edge_attr_emb_dim, self.heads, bias=False) self.lin = Linear(out_channels + edge_attr_emb_dim, out_channels, bias=bias) self.reset_parameters()
def __init__(self, vocab_size, embed_dim): super(BayesianSG, self).__init__() # Sizes self.vocab_size = vocab_size self.embed_dim = embed_dim # Priors self.prior_locs = Embedding(vocab_size, embed_dim) self.prior_scales = Embedding(vocab_size, embed_dim) # Inference self.embeddings = Embedding(vocab_size, embed_dim) self.encoder = Linear(2 * embed_dim, 2 * embed_dim) self.affine_loc = Linear(2 * embed_dim, embed_dim) self.affine_scale = Linear(2 * embed_dim, embed_dim) self.std_normal = MultivariateNormal(torch.zeros(embed_dim), torch.eye(embed_dim)) # Generation self.affine_vocab = Linear(embed_dim, vocab_size) # Functions self.softmax = Softmax(dim=1) self.softplus = Softplus() self.relu = ReLU()
def __init__(self, emb_dim, hidden_dim, rank_dim, n_layers, dropout): super(Net, self).__init__() self.node_emb = Embedding(21, emb_dim) self.edge_emb = Embedding(4, emb_dim) self.n_layers = n_layers self.dropout = dropout aggregators = ['mean', 'min', 'max', 'std'] scalers = ['identity', 'amplification', 'attenuation'] self.convs = ModuleList() self.pool = graph_cp_pooling(hidden_dim) self.batch_norms = ModuleList() for _ in range(n_layers): #conv = PNAConv(in_channels=75, out_channels=75, # aggregators=aggregators, scalers=scalers, deg=deg, # edge_dim=50, towers=5, pre_layers=1, post_layers=1, # divide_input=False) conv = GCNConv(emb_dim=emb_dim, hidden_dim=hidden_dim, rank_dim=rank_dim) self.convs.append(conv) self.batch_norms.append(BatchNorm(hidden_dim)) self.mlp = Sequential(Linear(hidden_dim, 50), ReLU(), Linear(50, 25), ReLU(), Linear(25, 1))
def __init__( self, num_embeddings: int = 1024, embedding_dim: int = 128, embedding_initial_weights: Optional[Tensor] = None, freeze_embedding: bool = False, rnn_style: str = "LSTM", rnn_num_layers: int = 1, hidden_dim: int = 128, bidirectional: bool = False, ): super().__init__() self.embedding = Embedding(num_embeddings, embedding_dim) if embedding_initial_weights is not None: self.embedding.load_state_dict({"weight": embedding_initial_weights}) if freeze_embedding: for param in self.embedding.parameters(): param.requires_grad = False self.rnn = RNN_CLASS_MAPPING[rnn_style]( embedding_dim, hidden_dim, rnn_num_layers, bidirectional=bidirectional, batch_first=True, ) num_directions = 2 if bidirectional else 1 self.rnn_output_dim = num_directions * hidden_dim self.hidden_state_dim = rnn_num_layers * num_directions * hidden_dim
def __init__(self, num_embeddings=12, embedding_dim=64, num_layers=1, num_heads=1, num_classes=10, dropout=0.2, edge_encoding=EDGE_ENCODING_TYPE.RELATIVE_POSITION, log=False, name=None, use_cuda=False): super(TransformerModel, self).__init__(name=name, log=log, use_cuda=use_cuda) if edge_encoding == EDGE_ENCODING_TYPE.RELATIVE_POSITION or edge_encoding==EDGE_ENCODING_TYPE.GRAPH_RELATIVE: self.edges_embedding = ModuleList([Embedding(5, embedding_dim), Embedding(5, embedding_dim)]) else: self.edges_embedding = None self.embeddings = Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim, padding_idx=0) self.softmax = Softmax(dim=-1) self.l_out = Linear(in_features=embedding_dim, out_features=num_classes) self.attention_layers = ModuleList( [TransformerLayer(in_features=embedding_dim, hidden_dim=embedding_dim, num_heads=num_heads, dropout=dropout, edge_encoding=edge_encoding) for _ in range(num_layers)] )
def __init__(self, config): super().__init__() self.word_embeddings = Embedding(config.vocab_size, config.embedding_size, padding_idx=0) self.position_embeddings = Embedding(config.max_position_embeddings, config.embedding_size) self.token_type_embeddings = Embedding(config.type_vocab_size, config.embedding_size) self.LayerNorm = LayerNorm(config.embedding_size, eps=1e-12) self.dropout = Dropout(config.dropout_prob)
def __init__(self, token_vocabulary: Union[Vocabulary, PretrainedVocabulary], label_vocabulary: LabelVocabulary, word_embedding_dim: int, hidden_size: int, num_layer: int, dropout: float): super().__init__() self.word_embedding_dim = word_embedding_dim self.token_vocabulary = token_vocabulary self.label_vocabulary = label_vocabulary if isinstance(token_vocabulary, Vocabulary): self.embedding: Embedding = Embedding( num_embeddings=token_vocabulary.size, embedding_dim=word_embedding_dim, padding_idx=token_vocabulary.padding_index) elif isinstance(token_vocabulary, PretrainedVocabulary): self.embedding: Embedding = Embedding.from_pretrained( token_vocabulary.embedding_matrix, freeze=True, padding_idx=token_vocabulary.padding_index) self.hidden_size = hidden_size self.lstm = LSTM(input_size=word_embedding_dim, hidden_size=hidden_size, num_layers=num_layer, bidirectional=True, dropout=dropout) self.liner = Linear(in_features=hidden_size * 2, out_features=label_vocabulary.label_size) self.reset_parameters()
def __init__(self, config): super(BiDAF, self).__init__() self.config = config self.logits = None self.yp = None self.dc, self.dw, self.dco = config.char_emb_size, config.word_emb_size, \ config.char_out_size self.N, self.M, self.JX, self.JQ, self.VW, self.VC, self.d, self.W = \ config.batch_size, config.max_num_sents, config.max_sent_size, \ config.max_ques_size, config.word_vocab_size, config.char_vocab_size, \ config.hidden_size, config.max_word_size self.word_embed = Embedding(config.word_vocab_size, \ config.glove_vec_size) self.char_embed = Embedding(config.char_vocab_size, \ config.char_emb_size) # char-level convs filter_sizes = list(map(int, config.out_channel_dims.split(','))) heights = list(map(int, config.filter_heights.split(','))) self.filter_sizes = filter_sizes self.heights = heights self.multiconv_1d = L.MultiConv1D(config.is_train, config.keep_prob) self.multiconv_1d_qq = L.MultiConv1D(config.is_train, config.keep_prob) if config.use_char_emb: highway_outsize = self.dco + self.dw else: highway_outsize = self.dw self.highway = L.HighwayNet(config.highway_num_layers, highway_outsize) self.prepro = L.BiEncoder(config, highway_outsize, hidden_size=config.hidden_size) self.prepro_x = L.BiEncoder(config, highway_outsize, hidden_size=config.hidden_size) self.attention_layer = L.AttentionLayer(config, self.JX, self.M, self.JQ, 2 * config.hidden_size) # Because p0 = torch.cat([h, u_a, torch.mul(h, u_a), torch.mul(h, h_a)], 3) and the last dim of # these matrices are d. self.g0_biencoder = L.BiEncoder(config, 8 * config.hidden_size, hidden_size=config.hidden_size) self.g1_biencoder = L.BiEncoder(config, 2 * config.hidden_size, hidden_size=config.hidden_size) # p0: 8 * d. g1: 2 * d self.g1_logits = L.GetLogits(config, 10 * config.hidden_size, input_keep_prob=config.input_keep_prob, function=config.answer_func) # p0: [60, 1, 161, 800], g1: [60, 1, 161, 200], a1: [60, 1, 161, 200], g1 * a1: [60, 1, 161, 200] self.g2_biencoder = L.BiEncoder(config, 14 * config.hidden_size, hidden_size=config.hidden_size) # p0: 8 * d. g2: 2 * d self.g2_logits = L.GetLogits(config, 10 * config.hidden_size, input_keep_prob=config.input_keep_prob, function=config.answer_func)
def __init__(self, args, train_dataset, dataset, hidden_channels, num_layers, max_z, GNN=GCNConv, k=0.6, use_feature=False, dataset_name=None, node_embedding=None): super(WLGNN_model, self).__init__() self.use_feature = use_feature self.node_embedding = node_embedding self.args = args if k <= 1: # Transform percentile to number. if args.dynamic_train: sampled_train = train_dataset[:1000] else: sampled_train = train_dataset num_nodes = sorted([g.num_nodes for g in sampled_train]) k = num_nodes[int(math.ceil(k * len(num_nodes))) - 1] k = max(10, k) self.k = int(k) self.max_z = max_z if "social" in dataset_name: self.w_z = 50000 else: self.w_z = max_z self.w_embedding = Embedding(self.w_z, hidden_channels) self.z1_embedding = Embedding(self.max_z, hidden_channels) self.z2_embedding = Embedding(self.max_z, hidden_channels) self.convs = ModuleList() initial_channels = hidden_channels * 3 if self.use_feature: initial_channels += dataset.num_features * 2 if self.node_embedding is not None: initial_channels += node_embedding.embedding_dim self.convs.append(GNN(initial_channels, hidden_channels)) for i in range(0, num_layers - 1): self.convs.append(GNN(hidden_channels, hidden_channels)) self.convs.append(GNN(hidden_channels, 1)) conv1d_channels = [16, 32] total_latent_dim = hidden_channels * num_layers + 1 conv1d_kws = [total_latent_dim, 5] self.conv1 = Conv1d(1, conv1d_channels[0], conv1d_kws[0], conv1d_kws[0]) self.maxpool1d = MaxPool1d(2, 2) self.conv2 = Conv1d(conv1d_channels[0], conv1d_channels[1], conv1d_kws[1], 1) dense_dim = int((self.k - 2) / 2 + 1) dense_dim = (dense_dim - conv1d_kws[1] + 1) * conv1d_channels[1] self.lin1 = Linear(dense_dim, 128) self.lin2 = Linear(128, 1)
def __init__(self, categories_nums, features_num=16, num_class=2, sparse=False): super(GAT_noLN, self).__init__() if sparse: print('---sparse---') hidden = 16 heads = 8 else: print('---no sparse---') hidden = 8 heads = 4 dropout = 0.1 self.dropout_p = dropout embed_size = 8 id_embed_size = 8 self.id_embedding = Embedding(categories_nums[0], id_embed_size) self.lin0_id_emb = Linear(id_embed_size, id_embed_size) self.embeddings = torch.nn.ModuleList() for max_nums in categories_nums[1:]: self.embeddings.append(Embedding(max_nums, embed_size)) n = max(0, len(categories_nums) - 1) if n > 0: self.lin0_emb = Linear(embed_size * n, embed_size * n) if features_num > 0: self.lin0 = Linear(features_num, hidden) self.ln0 = torch.nn.LayerNorm(id_embed_size + embed_size * n + hidden) self.conv1 = GATConv(id_embed_size + embed_size * n + hidden, hidden, heads=heads, concat=True, dropout=dropout) else: self.ln0 = torch.nn.LayerNorm(id_embed_size + embed_size * n) self.conv1 = GATConv(id_embed_size + embed_size * n, hidden, heads=heads, concat=True, dropout=dropout) self.ln1 = torch.nn.LayerNorm(hidden * heads) self.conv2 = GATConv(hidden * heads, hidden, heads=heads, concat=True, dropout=dropout) self.ln2 = torch.nn.LayerNorm(hidden * heads) self.lin1 = Linear(hidden * heads, num_class)
def test_time_distributed_reshapes_correctly(self): char_embedding = Embedding(2, 2) char_embedding.weight = Parameter(torch.FloatTensor([[.4, .4], [.5, .5]])) distributed_embedding = TimeDistributed(char_embedding) char_input = torch.LongTensor([[[1, 0], [1, 1]]]) output = distributed_embedding(char_input) assert_almost_equal(output.data.numpy(), [[[[.5, .5], [.4, .4]], [[.5, .5,], [.5, .5]]]])