def __init__(self, embedding_dim, vocab_size): super(EmbeddingLayer, self).__init__() self.word_encoder = nn.Sequential( nn.Embedding(vocab_size, embedding_dim), nn.Dropout(0.3))
def __init__(self, embedding_sizelist, embedding_dimlist): nn.Module.__init__(self) self.embeddinglist = nn.ModuleList( [nn.Embedding(size, dim).cuda() for size, dim in zip(embedding_sizelist, embedding_dimlist)] )
num_epochs = 20 learning_rate = 0.01 # 初始数据 dataset, idx_to_token, token_to_idx, counter = read_data('./data/ptb.train.txt') dataset = subsample_dataset(dataset) centers, contexts = get_centers_and_contexts(dataset, 5) negatives = get_negatives(contexts, 5) # 转化为迭代式读取方式 dataset = MyDataset(centers, contexts, negatives) data_iter = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True, collate_fn=collate, num_workers=4) # 定义模型 Net = nn.Sequential( nn.Embedding(num_embeddings=len(idx_to_token), embedding_dim=embedding_size), nn.Embedding(num_embeddings=len(idx_to_token), embedding_dim=embedding_size) # 嵌入层的输入为数字(索引)tensor,输出为其对应的向量 ) loss = SigmoidBinaryCrossEntropyLoss() optimizer = torch.optim.Adam(Net.parameters(), lr=learning_rate) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') train(net=Net, loss=loss, optimizer=optimizer, device=device, num_epochs=num_epochs, lr=learning_rate) word2vec = Net[0].weight.data print(word2vec[token_to_idx['have']]) """ 总结: 1.可以使用PyTorch通过负采样训练跳字模型。 2.二次采样试图尽可能减轻高频词对训练词嵌入模型的影响。 3.可以将长度不同的样本填充至长度相同的小批量,并通过掩码变量区分非填充和填充,然后只令非填充参与损失函数的计算。
def __init__(self, config): super().__init__() self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=0) self.position_embeddings = nn.Embedding(config.max_position_embeddings, config.hidden_size) self.dropout = nn.Dropout(config.hidden_dropout_prob)
def __init__(self, d_model, vocab): super(Embeddings, self).__init__() self.lut = nn.Embedding(vocab, d_model) self.d_model = d_model
def __init__(self, input_size, hidden_size, ctx_size_dict, ctx_name, n_vocab, rnn_type, tied_emb=False, dec_init='zero', dec_init_activ='tanh', dec_init_size=None, att_type='mlp', att_activ='tanh', att_bottleneck='ctx', att_temp=1.0, att_ctx2hid=True, transform_ctx=True, mlp_bias=False, dropout_out=0, emb_maxnorm=None, emb_gradscale=False, sched_sample=0, bos_type='emb', bos_dim=None, bos_activ=None, bos_bias=False, out_logic='simple', emb_interact=None, emb_interact_dim=None, emb_interact_activ=None, dec_inp_activ=None): super().__init__() # Normalize case self.rnn_type = rnn_type.upper() self.out_logic = out_logic # A persistent dictionary to save activations for further debugging # Currently only used in MMT decoder self.persistence = defaultdict(list) # Safety checks assert self.rnn_type in ('GRU', 'LSTM'), \ "rnn_type '{}' not known".format(rnn_type) assert bos_type in ('emb', 'feats', 'zero'), "Unknown bos_type" assert dec_init.startswith(('zero', 'feats', 'sum_ctx', 'mean_ctx', 'max_ctx', 'last_ctx')), \ "dec_init '{}' not known".format(dec_init) RNN = getattr(nn, '{}Cell'.format(self.rnn_type)) # LSTMs have also the cell state self.n_states = 1 if self.rnn_type == 'GRU' else 2 # Set custom handlers for GRU/LSTM if self.rnn_type == 'GRU': self._rnn_unpack_states = lambda x: x self._rnn_pack_states = lambda x: x elif self.rnn_type == 'LSTM': self._rnn_unpack_states = self._lstm_unpack_states self._rnn_pack_states = self._lstm_pack_states # Set decoder initializer self._init_func = getattr(self, '_rnn_init_{}'.format(dec_init)) # Other arguments self.input_size = input_size self.hidden_size = hidden_size self.ctx_size_dict = ctx_size_dict self.ctx_name = ctx_name self.n_vocab = n_vocab self.tied_emb = tied_emb self.dec_init = dec_init self.dec_init_size = dec_init_size self.dec_init_activ = dec_init_activ self.att_bottleneck = att_bottleneck self.att_activ = att_activ self.att_type = att_type self.att_temp = att_temp self.transform_ctx = transform_ctx self.att_ctx2hid = att_ctx2hid self.mlp_bias = mlp_bias self.dropout_out = dropout_out self.emb_maxnorm = emb_maxnorm self.emb_gradscale = emb_gradscale self.sched_sample = sched_sample self.bos_type = bos_type self.bos_dim = bos_dim self.bos_activ = bos_activ self.bos_bias = bos_bias self.emb_interact = emb_interact self.emb_interact_dim = emb_interact_dim self.emb_interact_activ = emb_interact_activ self.dec_inp_activ_fn = get_activation_fn(dec_inp_activ) # no-ops self.emb_fn = lambda e, f: e self.v_emb = None if self.emb_interact and self.emb_interact.startswith('trg'): self.ff_feats = FF( self.emb_interact_dim, self.input_size, bias=True, activ=self.emb_interact_activ) if self.emb_interact == 'trgmul': self.emb_fn = lambda e, f: e * f elif self.emb_interact == 'trgsum': self.emb_fn = lambda e, f: e + f self.emb_interact = True else: self.emb_interact = False if self.bos_type == 'feats': # Learn a visual <bos> embedding self.ff_bos = FF(self.bos_dim, self.input_size, bias=self.bos_bias, activ=self.bos_activ) # Create target embeddings self.emb = nn.Embedding(self.n_vocab, self.input_size, padding_idx=0, max_norm=self.emb_maxnorm, scale_grad_by_freq=self.emb_gradscale) if self.att_type: # Create attention layer Attention = get_attention(self.att_type) self.att = Attention( self.ctx_size_dict[self.ctx_name], self.hidden_size, transform_ctx=self.transform_ctx, ctx2hid=self.att_ctx2hid, mlp_bias=self.mlp_bias, att_activ=self.att_activ, att_bottleneck=self.att_bottleneck, temp=self.att_temp) if self.dec_init != 'zero': # For source-based inits, input size is the encoding size # For 'feats', it's given by dec_init_size, no need to infer if self.dec_init.endswith('_ctx'): self.dec_init_size = self.ctx_size_dict[self.ctx_name] # Add a FF layer for decoder initialization self.ff_dec_init = FF( self.dec_init_size, self.hidden_size * self.n_states, activ=self.dec_init_activ) # Create decoders self.dec0 = RNN(self.input_size, self.hidden_size) if self.att_type: # If no attention, do not add the 2nd GRU self.dec1 = RNN(self.hidden_size, self.hidden_size) # Output dropout if self.dropout_out > 0: self.do_out = nn.Dropout(p=self.dropout_out) # Output bottleneck: maps hidden states to target emb dim # simple: tanh(W*h) # deep: tanh(W*h + U*emb + V*ctx) out_inp_size = self.hidden_size # Dummy op to return back the hidden state for simple output self.out_merge_fn = lambda h, e, c: h if self.out_logic == 'deep': out_inp_size += (self.input_size + self.hidden_size) self.out_merge_fn = lambda h, e, c: torch.cat((h, e, c), dim=1) # Final transformation that receives concatenated outputs or only h self.hid2out = FF(out_inp_size, self.input_size, bias_zero=True, activ='tanh') # Final softmax self.out2prob = FF(self.input_size, self.n_vocab) # Tie input embedding matrix and output embedding matrix if self.tied_emb: self.out2prob.weight = self.emb.weight self.nll_loss = nn.NLLLoss(reduction="sum", ignore_index=0)
def __init__(self, rnn_type, ntoken, ninp, nhid, nlayers, dropout=0.5, tie_weights=False, use_dni=False): super(RNNModel, self).__init__() self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) if rnn_type in ['LSTM', 'GRU']: self.rnn = getattr(nn, rnn_type)(ninp, nhid, nlayers, dropout=dropout) else: try: nonlinearity = { 'RNN_TANH': 'tanh', 'RNN_RELU': 'relu' }[rnn_type] except KeyError: raise ValueError( """An invalid option for `--model` was supplied, options are ['LSTM', 'GRU', 'RNN_TANH' or 'RNN_RELU']""" ) self.rnn = nn.RNN(ninp, nhid, nlayers, nonlinearity=nonlinearity, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) # Optionally tie weights as in: # "Using the Output Embedding to Improve Language Models" (Press & Wolf 2016) # https://arxiv.org/abs/1608.05859 # and # "Tying Word Vectors and Word Classifiers: A Loss Framework for Language Modeling" (Inan et al. 2016) # https://arxiv.org/abs/1611.01462 if tie_weights: if nhid != ninp: raise ValueError( 'When using the tied flag, nhid must be equal to emsize') self.decoder.weight = self.encoder.weight self.init_weights() self.rnn_type = rnn_type self.nhid = nhid self.nlayers = nlayers if use_dni: if rnn_type == 'LSTM': output_dim = 2 * nhid else: output_dim = nhid self.backward_interface = dni.BackwardInterface( dni.BasicSynthesizer(output_dim, n_hidden=2)) else: self.backward_interface = None
def __init__(self, input_size, hidden_size): super(EncoderRNN, self).__init__() self.hidden_size = hidden_size self.embedding = nn.Embedding(input_size, hidden_size) self.gru = nn.GRU(hidden_size, hidden_size)
def __init__(self, which_attn_g, which_attn_c, generate_bahd, copy_bahd, vocab_size, embed_size, hidden_size, max_oovs=12, local_attn_cp=0, D=0, bi=0): super(CopyDecoder, self).__init__() self.vocab_size = vocab_size self.hidden_size = hidden_size self.time = time.time() self.embed = nn.Embedding(vocab_size, embed_size, padding_idx=0) self.max_oovs = max_oovs # largest number of OOVs available per sample #self.Ws = nn.Linear(hidden_size, hidden_size) # only used at initial stage #self.Wo = nn.Linear(hidden_size*2, vocab_size) # generate mode #self.Wc = nn.Linear(hidden_size, hidden_size*2) # copy mode self.nonlinear = nn.Tanh() self.which_attn_g, self.which_attn_c = which_attn_g, which_attn_c self.generate_bahd, self.copy_bahd = generate_bahd, copy_bahd # weights #self.attn = Attn('concat', hidden_size) self.attn = Attn(which_attn_g, hidden_size, bi=1) self.local_attn_cp = local_attn_cp if self.local_attn_cp != 0: self.sig = nn.Sigmoid() self.D = D self.Vp = nn.Parameter(torch.randn(self.hidden_size, 1)) self.Wp = nn.Linear(self.hidden_size, self.hidden_size) #elif which_attn_g == 'dot': # self.Wo = nn.Linear(hidden_size, vocab_size) if generate_bahd in [0, 2]: self.gru = nn.GRU(input_size=embed_size + 3 * hidden_size, hidden_size=hidden_size, batch_first=True) #elif generate_bahd or copy_bahd: # self.gru = nn.GRU(input_size=embed_size+hidden_size, hidden_size=hidden_size, batch_first=True) #else: # self.gru = nn.GRU(input_size=embed_size, hidden_size=hidden_size, batch_first=True) elif generate_bahd == 1: self.gru = nn.GRU(input_size=embed_size + 2 * hidden_size, hidden_size=hidden_size, batch_first=True) if generate_bahd == 0: self.Wo = nn.Linear(hidden_size, vocab_size) elif generate_bahd in [1, 2]: self.Wo = nn.Linear(hidden_size * 3, vocab_size) if copy_bahd: self.Wc = nn.Linear(hidden_size * 2, hidden_size) if which_attn_c == 'concat': self.Wc = nn.Linear(hidden_size * 3, hidden_size) self.v = nn.Parameter(torch.randn(1, hidden_size, 1)) elif which_attn_c == 'location': self.Wc = nn.Linear(hidden_size, 1) else: self.Wc = nn.Linear(hidden_size, hidden_size * 3) if which_attn_c == 'concat': self.Wc = nn.Linear(hidden_size * 3, hidden_size * 3) self.v = nn.Parameter(torch.randn(1, hidden_size * 3, 1)) elif which_attn_c == 'location': self.Wc = nn.Linear(hidden_size, 1) # ============================================================================= # if bi == 1: # self.Ws = nn.Linear(2*hidden_size, hidden_size) # else: # ============================================================================= self.Ws = nn.Linear(2 * hidden_size, hidden_size) # only used at initial stage
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim ############################################ # Word Embeddings ############################################ word2idx = {'hello': 0, 'world': 1} embeds = nn.Embedding(2, 5) lookup_tensor = torch.tensor([word2idx['hello']], dtype=torch.long) hello_embed = embeds(lookup_tensor) print(hello_embed) ########################################### # N-Gram Language Model ########################################### CONTEXT_SIZE = 2 EMBEDDING_DIM = 10 # We will use Shakespeare Sonnet 2 test_sentence = """When forty winters shall besiege thy brow, And dig deep trenches in thy beauty's field, Thy youth's proud livery so gazed on now, Will be a totter'd weed of small worth held: Then being asked, where all thy beauty lies, Where all the treasure of thy lusty days; To say, within thine own deep sunken eyes, Were an all-eating shame, and thriftless praise.
def __init__(self, vocab_size, embedding_dim, context_size): super(NGramLanguageModeler, self).__init__() self.embeddings = nn.Embedding(vocab_size, embedding_dim) self.linear1 = nn.Linear(context_size * embedding_dim, 128) self.linear2 = nn.Linear(128, vocab_size)
def __init__(self, vocab_size, context_size, embedding_size): super(CBOW, self).__init__() self.embeddings = nn.Embedding(vocab_size, embedding_size) self.linear1 = nn.Linear(context_size * embedding_size, 128) self.linear2 = nn.Linear(128, vocab_size)
def __init__(self, user_num, item_num, factor_num, num_layers, dropout, lr, epochs, lamda, model_name, GMF_model=None, MLP_model=None, gpuid='0', loss_type='BPR', early_stop=True): super(PairNeuMF, self).__init__() """ user_num: number of users; item_num: number of items; factor_num: number of predictive factors; num_layers: the number of layers in MLP model; dropout: dropout rate between fully connected layers; model: 'MLP', 'GMF', 'NeuMF-end', and 'NeuMF-pre'; GMF_model: pre-trained GMF weights; MLP_model: pre-trained MLP weights. """ os.environ['CUDA_VISIBLE_DEVICES'] = gpuid cudnn.benchmark = True self.lr = lr self.epochs = epochs self.lamda = lamda self.dropout = dropout self.model = model_name self.GMF_model = GMF_model self.MLP_model = MLP_model self.embed_user_GMF = nn.Embedding(user_num, factor_num) self.embed_item_GMF = nn.Embedding(item_num, factor_num) self.embed_user_MLP = nn.Embedding(user_num, factor_num * (2**(num_layers - 1))) self.embed_item_MLP = nn.Embedding(item_num, factor_num * (2**(num_layers - 1))) MLP_modules = [] for i in range(num_layers): input_size = factor_num * (2**(num_layers - i)) MLP_modules.append(nn.Dropout(p=self.dropout)) MLP_modules.append(nn.Linear(input_size, input_size // 2)) MLP_modules.append(nn.ReLU()) self.MLP_layers = nn.Sequential(*MLP_modules) if self.model in ['MLP', 'GMF']: predict_size = factor_num else: predict_size = factor_num * 2 self.predict_layer = nn.Linear(predict_size, 1) self._init_weight_() self.loss_type = loss_type self.early_stop = early_stop
def Embedding(num_embeddings, embedding_dim, padding_idx): m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) nn.init.uniform_(m.weight, -0.1, 0.1) nn.init.constant_(m.weight[padding_idx], 0) return m
def __init__(self, vocab_size, emb_size, decoder_model, attention): super(Decoder, self).__init__() self.embedding = nn.Embedding(vocab_size, emb_size) self.attention = attention self.decoder_model = decoder_model self.lm_input_layer = nn.Linear(emb_size + 2*attention.encoder_hidden_size, emb_size, bias=True)
def __init__(self, args): super(TriAN, self).__init__() self.args = args self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + args.rel_emb_dim # Max passage size p_max_size = args.p_max_size self.p_max_size = p_max_size # Max question size q_max_size = args.q_max_size self.q_max_size = q_max_size # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN question encoder: word emb + pos emb qst_input_size = self.embedding_dim + args.pos_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size self.doc_hidden_size = doc_hidden_size question_hidden_size = 2 * args.hidden_size self.question_hidden_size = question_hidden_size # print('p_mask : ' , doc_input_size) # Attention over passage and question self.q_self_attn_start = layers.LinearSeqAttn(question_hidden_size, q_max_size) self.p_q_attn_start = layers.BilinearSeqAttn(p_max_size, q_max_size, p_max_size) self.q_self_attn_end = layers.LinearSeqAttn(question_hidden_size, q_max_size) self.p_q_attn_end = layers.BilinearSeqAttn(p_max_size, q_max_size, p_max_size) # Bilinear layer and sigmoid to proba self.p_q_bilinear_start = nn.Bilinear(question_hidden_size, question_hidden_size, 1) self.p_q_bilinear_end = nn.Bilinear(question_hidden_size, question_hidden_size, 1) self.p_linear_start = nn.Linear(question_hidden_size, 1) self.p_linear_end = nn.Linear(question_hidden_size, 1) # Attention start end self.start_end_attn = layers.BilinearProbaAttn(p_max_size) self.end_start_attn = layers.BilinearProbaAttn(p_max_size) # Feed forward self.feedforward_start = layers.NeuralNet(p_max_size, p_max_size, p_max_size) self.feedforward_end = layers.NeuralNet(p_max_size, p_max_size, p_max_size)
def __init__(self, vocab_size, emb_size, hidden_size, num_layers=2, bidirectional=True, dropout=0.3): super(Encoder, self).__init__() self.embedding = nn.Embedding(vocab_size, emb_size) self.num_layers = num_layers self.rnn = nn.GRU(emb_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional, dropout=dropout)
def __init__(self, input_dim, output_dim): super(Embedding, self).__init__() self.embedding = nn.Embedding(input_dim, output_dim)
def __init__(self, model_args): ''' Implementation of a Relation Network for VQA that includes a basic late fusion model and text-only LSTM as special cases. ''' super().__init__() self.model_args = model_args self.kind = model_args['model'] if model_args.get('act_f') in [None, 'relu']: act_f = nn.ReLU() elif model_args['act_f'] == 'elu': act_f = nn.ELU() self.num_classes = 2 # question embedding self.qembedding = nn.Embedding(model_args['vocab_size'], model_args['word_embed_dim']) self.qlstm = nn.LSTM(model_args['word_embed_dim'], model_args['ques_rnn_hidden_dim'], model_args['ques_num_layers'], batch_first=True, dropout=0) ques_dim = model_args['ques_rnn_hidden_dim'] # text-only classifier if self.kind == 'lstm': self.qclassifier = nn.Sequential( nn.Linear(ques_dim, 512), act_f, nn.Linear(512, 512), nn.Dropout(), act_f, nn.Linear(512, self.num_classes), ) # image embedding if self.kind in ['cnn+lstm', 'rn']: img_net_dim = model_args.get('img_net_dim', 64) self.img_net = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(64), act_f, nn.Conv2d(64, img_net_dim, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(img_net_dim), act_f, nn.Conv2d(img_net_dim, img_net_dim, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(img_net_dim), act_f, nn.Conv2d(img_net_dim, img_net_dim, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(img_net_dim), act_f, ) self.img_net_last_layer = nn.Sequential( nn.Conv2d(img_net_dim, 64, kernel_size=3, stride=2, padding=1), nn.BatchNorm2d(64), act_f, ) img_net_out_dim = 64 # late fusion classifier if self.kind == 'cnn+lstm': self.cnn_lstm_classifier = nn.Sequential( nn.Linear(ques_dim + 8 * 8 * img_net_out_dim, 512), act_f, nn.Linear(512, 512), nn.Dropout(), act_f, nn.Linear(512, self.num_classes), ) # relation network modules if self.kind == 'rn': g_in_dim = 2 * (img_net_out_dim + 2) + ques_dim # maybe batchnorm if model_args.get('rn_bn', False): f_act = nn.Sequential( nn.BatchNorm1d(model_args['rn_f_dim']), act_f, ) g_act = nn.Sequential( nn.BatchNorm1d(model_args['rn_g_dim']), act_f, ) else: f_act = g_act = act_f self.g = nn.Sequential( nn.Linear(g_in_dim, model_args['rn_g_dim']), g_act, nn.Linear(model_args['rn_g_dim'], model_args['rn_g_dim']), g_act, nn.Linear(model_args['rn_g_dim'], model_args['rn_g_dim']), g_act, nn.Linear(model_args['rn_g_dim'], model_args['rn_g_dim']), g_act, ) self.f = nn.Sequential( nn.Linear(model_args['rn_g_dim'], model_args['rn_f_dim']), f_act, nn.Linear(model_args['rn_f_dim'], model_args['rn_f_dim']), f_act, nn.Dropout(), nn.Linear(model_args['rn_f_dim'], self.num_classes), ) self.loc_feat_cache = {} # random init self.apply(self.init_parameters)
def __init__(self, args, base_model): super(SudokuRRNLatent, self).__init__() self.args = args embed_size = args.sudoku_embed_size hidden_dim = args.sudoku_hidden_dim edge_drop = args.latent_sudoku_do self.num_steps = args.latent_sudoku_num_steps self.basic_graph = sd._basic_sudoku_graph() self.sudoku_indices = torch.arange(0, 81) if args.use_gpu: self.sudoku_indices = self.sudoku_indices.cuda() self.rows = self.sudoku_indices // 9 self.cols = self.sudoku_indices % 9 self.row_embed = nn.Embedding(9, embed_size) self.col_embed = nn.Embedding(9, embed_size) #Pdb().set_trace() self.row_embed.weight.data = base_model.sudoku_solver.row_embed.weight.data.clone( ).detach() self.col_embed.weight.data = base_model.sudoku_solver.col_embed.weight.data.clone( ).detach() if args.latent_sudoku_input_type in ['dif', 'cat']: self.digit_embed = nn.Embedding(10, embed_size) self.digit_embed.weight.data = base_model.sudoku_solver.digit_embed.weight.data.clone( ).detach() input_dim = 2 * embed_size + 10 if args.latent_sudoku_input_type == 'pae' else 3 * embed_size if args.latent_sudoku_input_type == 'cat': input_dim = 4 * embed_size elif args.latent_sudoku_input_type == 'pae': input_dim = 2 * embed_size + 10 elif args.latent_sudoku_input_type == 'dif': input_dim = 3 * embed_size self.input_layer = nn.Sequential( nn.Linear(input_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), ) self.lstm = nn.LSTMCell(hidden_dim * 2, hidden_dim, bias=False) msg_layer = nn.Sequential( nn.Linear(2 * hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), ) self.rrn = RRN(msg_layer, self.node_update_func, self.num_steps, edge_drop) if args.selector_model: self.output_layer = nn.Linear(hidden_dim, 1) else: self.output_layer = nn.Linear(hidden_dim, args.nlm_nullary_dim) """
def __init__( self, observation_spaces: SpaceDict, goal_sensor_uuid: str, # rel_position_uuid: str, gesture_sensor_uuid: str, human_pose_uuid: str, rgb_resnet_preprocessor_uuid: str, depth_resnet_preprocessor_uuid: str, class_dims: int = 512, gesture_compressor_hidden_out_dim: int = 512, human_pose_hidden_out_dim: int = 512, resnet_compressor_hidden_out_dims: Tuple[int, int] = (128, 32), combiner_hidden_out_dims: Tuple[int, int] = (128, 32), ) -> None: super().__init__() self.goal_uuid = goal_sensor_uuid # self.rel_position_uuid = rel_position_uuid self.gesture_uuid = gesture_sensor_uuid self.human_pose_uuid = human_pose_uuid self.rgb_resnet_uuid = rgb_resnet_preprocessor_uuid self.depth_resnet_uuid = depth_resnet_preprocessor_uuid self.class_dims = class_dims self.gesture_hid_out_dim = gesture_compressor_hidden_out_dim self.human_pose_hid_out_dim = human_pose_hidden_out_dim self.resnet_hid_out_dims = resnet_compressor_hidden_out_dims self.combine_hid_out_dims = combiner_hidden_out_dims self.embed_class = nn.Embedding( num_embeddings=observation_spaces.spaces[self.goal_uuid].n, embedding_dim=self.class_dims, ) self.blind = ( self.rgb_resnet_uuid not in observation_spaces.spaces or self.depth_resnet_uuid not in observation_spaces.spaces ) if not self.blind: self.resnet_tensor_shape = observation_spaces.spaces[ self.rgb_resnet_uuid ].shape self.rgb_resnet_compressor = nn.Sequential( nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1), nn.ReLU(), nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1), nn.ReLU(), nn.Flatten(), nn.Linear(self.resnet_tensor_shape[1]*self.resnet_tensor_shape[2]*self.resnet_hid_out_dims[-1], 512), nn.ReLU(), ) self.depth_resnet_compressor = nn.Sequential( nn.Conv2d(self.resnet_tensor_shape[0], self.resnet_hid_out_dims[0], 1), nn.ReLU(), nn.Conv2d(*self.resnet_hid_out_dims[0:2], 1), nn.ReLU(), nn.Flatten(), nn.Linear(self.resnet_tensor_shape[1]*self.resnet_tensor_shape[2]*self.resnet_hid_out_dims[-1], 512), nn.ReLU(), ) self.rgb_target_obs_combiner = nn.Sequential( nn.Conv2d( self.resnet_hid_out_dims[1] + self.class_dims, self.combine_hid_out_dims[0], 1, ), nn.ReLU(), nn.Conv2d(*self.combine_hid_out_dims[0:2], 1), ) self.depth_target_obs_combiner = nn.Sequential( nn.Conv2d( self.resnet_hid_out_dims[1] + self.class_dims, self.combine_hid_out_dims[0], 1, ), nn.ReLU(), nn.Conv2d(*self.combine_hid_out_dims[0:2], 1), ) self.rgb_target_gesture_obs_combiner = nn.Sequential( nn.Conv2d( self.resnet_hid_out_dims[1] + self.class_dims + self.gesture_hid_out_dim + self.human_pose_hid_out_dim, self.combine_hid_out_dims[0], 1, ), nn.ReLU(), nn.Conv2d(*self.combine_hid_out_dims[0:2], 1), ) self.depth_target_gesture_obs_combiner = nn.Sequential( nn.Conv2d( self.resnet_hid_out_dims[1] + self.class_dims + self.gesture_hid_out_dim + self.human_pose_hid_out_dim, self.combine_hid_out_dims[0], 1, ), nn.ReLU(), nn.Conv2d(*self.combine_hid_out_dims[0:2], 1), ) # self.vision_target_obs_combiner_compressor = nn.Sequential( # nn.Flatten(), # nn.Linear( # in_features=self.resnet_tensor_shape[1]*self.resnet_tensor_shape[2]*self.resnet_hid_out_dims[1], # out_features=self.gesture_hid_out_dim, # ), # # nn.ReLU(), # # nn.Linear( # # in_features=self.gesture_hid_out_dim*2, # # out_features=self.gesture_hid_out_dim, # # ) # ) self.gesture_tensor_shape = observation_spaces.spaces[self.gesture_uuid].shape # self.gesture_compressor = nn.LSTM( # input_size=self.gesture_tensor_shape[1], # hidden_size=self.gesture_hid_out_dim, # num_layers=1, # batch_first=True, # ) self.gesture_compressor = nn.Sequential( nn.Flatten(), nn.Linear( in_features=self.gesture_tensor_shape[0]*self.gesture_tensor_shape[1], out_features=self.gesture_hid_out_dim, ), nn.ReLU(), # nn.Linear( # in_features=self.gesture_hid_out_dim*4, # out_features=self.gesture_hid_out_dim, # ), ) self.human_pose_shape = observation_spaces.spaces[self.human_pose_uuid].shape self.human_pose_compressor = nn.Sequential( nn.Linear( in_features=self.human_pose_shape[0], out_features=self.human_pose_hid_out_dim, ), nn.ReLU(), )
def main(args): now = datetime.now() current_time = now.strftime("%H:%M:%S") print("Start Time =", current_time) writer = SummaryWriter('./logs/{0}'.format('chatbot')) # Load/Assemble voc and pairs corpus_name = "cornell movie-dialogs corpus" corpus = os.path.join("data", corpus_name) datafile = os.path.join(corpus, "formatted_movie_lines.txt") save_dir = os.path.join("model", "checkpoints") voc, pairs = loadPrepareData(corpus_name, datafile, args.max_length) # Print some pairs to validate print("\npairs:") for pair in pairs[:10]: print(pair) # Trim voc and pairs pairs = trimRareWords(voc, pairs, args.min_count) # Configure models model_name = 'cb_model' dropout = 0.1 # Set checkpoint to load from; set to None if starting from scratch loadFilename = None print('Building encoder and decoder ...') # Initialize word embeddings embedding = nn.Embedding(voc.num_words, args.hidden_size) # Initialize encoder & decoder models encoder = EncoderRNN(args.hidden_size, embedding, args.encoder_n_layers, dropout) decoder = LuongAttnDecoderRNN(args.attn_model, embedding, args.hidden_size, voc.num_words, args.decoder_n_layers, dropout) # Use appropriate args.device encoder = encoder.to(args.device) decoder = decoder.to(args.device) print('Models built and ready to go!') # Configure training/optimization clip = 50.0 decoder_learning_ratio = 5.0 n_iteration = args.epochs print_every = 100 save_every = 100 # Ensure dropout layers are in train mode encoder.train() decoder.train() # Initialize optimizers print('Building optimizers ...') encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.lr) decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.lr * decoder_learning_ratio) # If you have cuda, configure cuda to call for state in encoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() for state in decoder_optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() # Run training iterations print("Starting Training!") trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, args.encoder_n_layers, args.decoder_n_layers, save_dir, n_iteration, print_every, save_every, clip, corpus_name, loadFilename, args, writer) now = datetime.now() current_time = now.strftime("%H:%M:%S") print("End Time =", current_time)
def __init__(self, config): # , dico, is_encoder, with_output): super(XLMModel, self).__init__(config) self.output_attentions = config.output_attentions self.output_hidden_states = config.output_hidden_states # encoder / decoder, output layer self.is_encoder = config.is_encoder self.is_decoder = not config.is_encoder if self.is_decoder: raise NotImplementedError("Currently XLM can only be used as an encoder") # self.with_output = with_output self.causal = config.causal # dictionary / languages self.n_langs = config.n_langs self.use_lang_emb = config.use_lang_emb self.n_words = config.n_words self.eos_index = config.eos_index self.pad_index = config.pad_index # self.dico = dico # self.id2lang = config.id2lang # self.lang2id = config.lang2id # assert len(self.dico) == self.n_words # assert len(self.id2lang) == len(self.lang2id) == self.n_langs # model parameters self.dim = config.emb_dim # 512 by default self.hidden_dim = self.dim * 4 # 2048 by default self.n_heads = config.n_heads # 8 by default self.n_layers = config.n_layers self.dropout = config.dropout self.attention_dropout = config.attention_dropout assert self.dim % self.n_heads == 0, "transformer dim must be a multiple of n_heads" # embeddings self.position_embeddings = nn.Embedding(config.max_position_embeddings, self.dim) if config.sinusoidal_embeddings: create_sinusoidal_embeddings(config.max_position_embeddings, self.dim, out=self.position_embeddings.weight) if config.n_langs > 1 and config.use_lang_emb: self.lang_embeddings = nn.Embedding(self.n_langs, self.dim) self.embeddings = nn.Embedding(self.n_words, self.dim, padding_idx=self.pad_index) self.layer_norm_emb = nn.LayerNorm(self.dim, eps=config.layer_norm_eps) # transformer layers self.attentions = nn.ModuleList() self.layer_norm1 = nn.ModuleList() self.ffns = nn.ModuleList() self.layer_norm2 = nn.ModuleList() # if self.is_decoder: # self.layer_norm15 = nn.ModuleList() # self.encoder_attn = nn.ModuleList() for _ in range(self.n_layers): self.attentions.append(MultiHeadAttention(self.n_heads, self.dim, config=config)) self.layer_norm1.append(nn.LayerNorm(self.dim, eps=config.layer_norm_eps)) # if self.is_decoder: # self.layer_norm15.append(nn.LayerNorm(self.dim, eps=config.layer_norm_eps)) # self.encoder_attn.append(MultiHeadAttention(self.n_heads, self.dim, dropout=self.attention_dropout)) self.ffns.append(TransformerFFN(self.dim, self.hidden_dim, self.dim, config=config)) self.layer_norm2.append(nn.LayerNorm(self.dim, eps=config.layer_norm_eps)) if hasattr(config, "pruned_heads"): pruned_heads = config.pruned_heads.copy().items() config.pruned_heads = {} for layer, heads in pruned_heads: if self.attentions[int(layer)].n_heads == config.n_heads: self.prune_heads({int(layer): list(map(int, heads))}) self.init_weights()
def __init__(self, args, data, ss_vectors=None): super(NN4EMO_SEMI_HIERARCHICAL, self).__init__() self.args = args self.class_size = args.class_size self.dropout = args.dropout self.d_e = args.d_e self.d_ff = args.d_ff self.device = args.device # GloVe embedding self.glove_emb = nn.Embedding(args.word_vocab_size, args.word_dim) # initialize word embedding with GloVe self.glove_emb.weight.data.copy_(data.TEXT.vocab.vectors) if args.datastories: embeddings_dict = build_datastories_vectors(data) for word in embeddings_dict: index = data.TEXT.vocab.stoi[word] self.glove_emb.weight.data[index] = torch.tensor( embeddings_dict[word]) # fine-tune the word embedding if not args.tune_embeddings: self.glove_emb.weight.requires_grad = False # <unk> vectors is randomly initialized nn.init.uniform_(self.glove_emb.weight.data[0], -0.05, 0.05) # word2vec + emoji2vec embeddings self.word2vec_emb = nn.Embedding(args.word_vocab_size, args.word_dim) word2vec = gsm.KeyedVectors.load_word2vec_format( 'data/word2vec/GoogleNews-vectors-negative300.bin', binary=True) emoji2vec = gsm.KeyedVectors.load_word2vec_format( 'data/emoji/emoji2vec.bin', binary=True) for i in range(args.word_vocab_size): word = data.TEXT.vocab.itos[i] if word in emoji.UNICODE_EMOJI and word in emoji2vec.vocab: self.word2vec_emb.weight.data[i] = torch.tensor( emoji2vec[word]) elif word in word2vec.vocab: self.word2vec_emb.weight.data[i] = torch.tensor(word2vec[word]) else: nn.init.uniform_(self.word2vec_emb.weight.data[i], -0.05, 0.05) if not args.tune_embeddings: self.word2vec_emb.weight.requires_grad = False # character embedding self.char_emb = nn.Embedding(args.char_vocab_size, args.char_dim, padding_idx=0) self.charCNN = CharCNN(args) # utterance encoders self.utterance_encoder_turn1 = UtteranceEncoder(args, data) if args.share_encoder: self.utterance_encoder_turn2 = self.utterance_encoder_turn1 self.utterance_encoder_turn3 = self.utterance_encoder_turn1 else: self.utterance_encoder_turn2 = UtteranceEncoder(args, data) self.utterance_encoder_turn3 = UtteranceEncoder(args, data) # hierarchical LSTM encoder self.lstm_input_dim = 2 * 2 * args.d_e self.hierarchical_lstm = LSTMEncoder(args, input_dim=self.lstm_input_dim, last_hidden=True) # feed-forward layers # u1, u2, u3, u1 - u2 + u3 and output of lstm self.fc_dim = 2 * 2 * args.d_e * 4 + 2 * args.lstm_hidden_dim self.fc1 = nn.Linear(self.fc_dim, args.d_e) self.fc2 = nn.Linear(self.fc_dim + args.d_e, args.d_e) self.fc_out = nn.Linear(args.d_e, args.class_size) self.layer_norm = nn.LayerNorm(args.d_e) self.dropout = nn.Dropout(args.dropout) self.relu = nn.ReLU()
def __init__(self, num_instances, latent_dim, tracing_steps, has_params=False, fit_single_srn=False, use_unet_renderer=False, freeze_networks=False): super().__init__() self.latent_dim = latent_dim self.has_params = has_params self.num_hidden_units_phi = 256 self.phi_layers = 4 # includes the in and out layers self.rendering_layers = 5 # includes the in and out layers self.sphere_trace_steps = tracing_steps self.freeze_networks = freeze_networks self.fit_single_srn = fit_single_srn if self.fit_single_srn: # Fit a single scene with a single SRN (no hypernetworks) self.phi = pytorch_prototyping.FCBlock(hidden_ch=self.num_hidden_units_phi, num_hidden_layers=self.phi_layers - 2, in_features=3, out_features=self.num_hidden_units_phi) else: # Auto-decoder: each scene instance gets its own code vector z self.latent_codes = nn.Embedding(num_instances, latent_dim).cuda() nn.init.normal_(self.latent_codes.weight, mean=0, std=0.01) self.hyper_phi = hyperlayers.HyperFC(hyper_in_ch=self.latent_dim, hyper_num_hidden_layers=1, hyper_hidden_ch=self.latent_dim, hidden_ch=self.num_hidden_units_phi, num_hidden_layers=self.phi_layers - 2, in_ch=3, out_ch=self.num_hidden_units_phi) self.ray_marcher = custom_layers.Raymarcher(num_feature_channels=self.num_hidden_units_phi, raymarch_steps=self.sphere_trace_steps) if use_unet_renderer: self.pixel_generator = custom_layers.DeepvoxelsRenderer(nf0=32, in_channels=self.num_hidden_units_phi, input_resolution=128, img_sidelength=128) else: self.pixel_generator = pytorch_prototyping.FCBlock(hidden_ch=self.num_hidden_units_phi, num_hidden_layers=self.rendering_layers - 1, in_features=self.num_hidden_units_phi, out_features=3, outermost_linear=True) if self.freeze_networks: all_network_params = (self.pixel_generator.parameters() + self.ray_marcher.parameters() + self.hyper_phi.parameters()) for param in all_network_params: param.requires_grad = False # Losses self.l2_loss = nn.MSELoss(reduction="mean") # List of logs self.logs = list() print(self) print("Number of parameters:") util.print_network(self)
def __init__(self, args, data, ss_vectors=None): super(NN4EMO_SEPARATE, self).__init__() self.args = args self.class_size = args.class_size self.dropout = args.dropout self.d_e = args.d_e self.d_ff = args.d_ff self.device = args.device # GloVe embedding self.glove_emb = nn.Embedding(args.word_vocab_size, args.word_dim) # initialize word embedding with GloVe self.glove_emb.weight.data.copy_(data.TEXT.vocab.vectors) # emojis init with open('data/emoji/emoji-vectors.pkl', 'rb') as f: emoji_vectors = pickle.load(f) for i in range(args.word_vocab_size): word = data.TEXT.vocab.itos[i] if word in emoji_vectors: self.glove_emb.weight.data[i] = torch.tensor( emoji_vectors[word]) if args.datastories: embeddings_dict = build_datastories_vectors(data) for word in embeddings_dict: index = data.TEXT.vocab.stoi[word] self.glove_emb.weight.data[index] = torch.tensor( embeddings_dict[word]) # fine-tune the word embedding if not args.tune_embeddings: self.glove_emb.weight.requires_grad = False # <unk> vectors is randomly initialized nn.init.uniform_(self.glove_emb.weight.data[0], -0.05, 0.05) # sentiment specific embedding self.ss_emb = nn.Embedding(args.word_vocab_size, args.word_dim) if args.ss_emb: self.ss_emb.weight.data.copy_(ss_vectors) if not args.ss_emb_tune: self.ss_emb.weight.requires_grad = False if args.fasttext: self.ss_emb.weight.data.copy_(data.FASTTEXT.vocab.vectors) if not args.fasttext_tune: self.ss_emb.weight.requires_grad = False if args.word2vec: word2vec = gsm.KeyedVectors.load_word2vec_format( 'data/word2vec/GoogleNews-vectors-negative300.bin', binary=True) emoji2vec = gsm.KeyedVectors.load_word2vec_format( 'data/emoji/emoji2vec.bin', binary=True) for i in range(args.word_vocab_size): word = data.TEXT.vocab.itos[i] if word in emoji.UNICODE_EMOJI and word in emoji2vec.vocab: self.ss_emb.weight.data[i] = torch.tensor(emoji2vec[word]) elif word in word2vec.vocab: self.ss_emb.weight.data[i] = torch.tensor(word2vec[word]) else: nn.init.uniform_(self.ss_emb.weight.data[i], -0.05, 0.05) if not args.word2vec_tune: self.ss_emb.weight.requires_grad = False # character embedding self.char_emb = nn.Embedding(args.char_vocab_size, args.char_dim, padding_idx=0) self.charCNN = CharCNN(args) if args.uni_encoder: self.sentence_encoder_turn1 = UniEncoder(args, data) if args.simple_encoder: self.fc_dim = 2 * args.lstm_hidden_dim * 5 self.lstm_dim = 2 * args.lstm_hidden_dim else: self.fc_dim = 2 * args.lstm_hidden_dim * 4 * 2 + 2 * args.lstm_hidden_dim self.lstm_dim = 2 * args.lstm_hidden_dim * 2 elif args.simple_encoder: self.sentence_encoder_turn1 = SimpleEncoder(args, data) self.fc_dim = 2 * args.d_e * 4 + 2 * args.lstm_hidden_dim self.lstm_dim = 2 * args.d_e else: self.sentence_encoder_turn1 = SentenceEncoder(args, data) self.fc_dim = 2 * args.d_e * 2 * 4 + 2 * args.lstm_hidden_dim self.lstm_dim = 2 * args.d_e * 2 if args.share_encoder: self.sentence_encoder_turn3 = self.sentence_encoder_turn1 if args.turn2: if args.uni_encoder: self.sentence_encoder_turn2 = UniEncoder(args, data) elif args.simple_encoder: self.sentence_encoder_turn2 = SimpleEncoder(args, data) else: self.sentence_encoder_turn2 = SentenceEncoder(args, data) else: self.sentence_encoder_turn2 = self.sentence_encoder_turn1 else: if args.uni_encoder: self.sentence_encoder_turn2 = UniEncoder(args, data) self.sentence_encoder_turn3 = UniEncoder(args, data) elif args.simple_encoder: self.sentence_encoder_turn2 = SimpleEncoder(args, data) self.sentence_encoder_turn3 = SimpleEncoder(args, data) else: self.sentence_encoder_turn2 = SentenceEncoder(args, data) self.sentence_encoder_turn3 = SentenceEncoder(args, data) self.lstm = LSTMEncoder(args, input_dim=self.lstm_dim, last_hidden=True) if args.no_turn2: if args.simple_encoder: self.fc_dim = 2 * args.d_e * 4 else: self.fc_dim = 2 * args.d_e * 2 * 4 self.fc1 = nn.Linear(self.fc_dim, args.d_e) self.fc2 = nn.Linear(self.fc_dim + args.d_e, args.d_e) self.fc_out = nn.Linear(args.d_e, args.class_size) self.layer_norm = nn.LayerNorm(args.d_e) self.dropout = nn.Dropout(args.dropout) self.relu = nn.ReLU()
params_to_optimize = list(image_model.parameters()) if args.comparison == 'dotp': scorer_model = DotPScorer() elif args.comparison == 'bilinear': # FIXME: This won't work with --poe scorer_model = BilinearScorer(512, dropout=args.dropout, identity_debug=args.debug_bilinear) else: raise NotImplementedError scorer_model = scorer_model.to(device) params_to_optimize.extend(scorer_model.parameters()) if args.use_hyp: embedding_model = nn.Embedding(train_vocab_size, 512) if args.decode_hyp: proposal_model = TextProposal(embedding_model) proposal_model = proposal_model.to(device) params_to_optimize.extend(proposal_model.parameters()) if args.encode_hyp: hint_model = TextRep(embedding_model) hint_model = hint_model.to(device) params_to_optimize.extend(hint_model.parameters()) if args.multimodal_concept: multimodal_model = MultimodalRep() # multimodal_model = MultimodalLinearRep() # multimodal_model = MultimodalWeightedRep()
def __init__(self): # input: [-1, 2, 1] # output: [-1, 1] super().__init__() self.embedding = nn.Embedding(7, 4) self.fc = nn.Linear(8, 1)
def Embedding(num_embeddings, embedding_dim, padding_idx): m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) nn.init.normal_(m.weight, mean=0, std=embedding_dim**-0.5) nn.init.constant_(m.weight[padding_idx], 0) return m
def __init__(self, num_features: int, out_features: int): super().__init__() self.weight = nn.Embedding(num_embeddings=num_features, embedding_dim=out_features) self.bias = nn.Parameter(torch.zeros((out_features, )))