] + itos #+ itos_lemmas[:vocab_size] + itos_morph assert len(itos_total) == outVocabSize # could also provide per-word subcategorization frames from the treebank as input??? #baseline = nn.Linear(args.emb_dim, 1).cuda() dropout = nn.Dropout(args.dropout_rate).cuda() rnn_both = nn.GRU(2 * args.emb_dim, args.rnn_dim, args.rnn_layers).cuda() for name, param in rnn_both.named_parameters(): if 'bias' in name: nn.init.constant(param, 0.0) elif 'weight' in name: nn.init.xavier_normal(param) decoder = nn.Linear(args.rnn_dim, outVocabSize).cuda() #pos_ptb_decoder = nn.Linear(128,len(posFine)+3).cuda() startHidden = nn.Linear(1, args.rnn_dim).cuda() startHidden.bias.data.fill_(0) components = [rnn_both, decoder, word_pos_morph_embeddings, startHidden] hiddenToLogSDHidden = nn.Linear(args.rnn_dim, args.rnn_dim).cuda() cellToMean = nn.Linear(args.rnn_dim, args.rnn_dim).cuda() sampleToHidden = nn.Linear(args.rnn_dim, args.rnn_dim).cuda() hiddenToLogSDHidden.bias.data.fill_(0) cellToMean.bias.data.fill_(0) sampleToHidden.bias.data.fill_(0)
] + itos #+ itos_lemmas[:vocab_size] + itos_morph assert len(itos_total) == outVocabSize # could also provide per-word subcategorization frames from the treebank as input??? #baseline = nn.Linear(args.emb_dim, 1).cuda() dropout = nn.Dropout(args.dropout_rate).cuda() rnn_both = nn.GRU(2 * args.emb_dim, args.rnn_dim, args.rnn_layers).cuda() for name, param in rnn_both.named_parameters(): if 'bias' in name: nn.init.constant(param, 0.0) elif 'weight' in name: nn.init.xavier_normal(param) decoder = nn.Linear(args.rnn_dim, outVocabSize).cuda() #pos_ptb_decoder = nn.Linear(128,len(posFine)+3).cuda() startHidden = nn.Linear(1, args.rnn_dim).cuda() components = [rnn_both, decoder, word_pos_morph_embeddings, startHidden] import torchkit.nn as nn_ character_embeddings = torch.nn.Embedding( num_embeddings=len(itos_chars_total) + 3, embedding_dim=args.char_emb_dim).cuda() char_composition = torch.nn.LSTM(args.char_emb_dim, args.char_enc_hidden_dim, 1,
embedding_dim=args.emb_dim).cuda() outVocabSize = 2 * (3 + vocab_size) itos_total = ["EOS", "OOV", "SOS"] + itos_1 + ["EOS", "OOV", "SOS"] + itos_2 assert len(itos_total) == outVocabSize dropout = nn.Dropout(args.dropout_rate).cuda() rnn_both = nn.GRU(2 * args.emb_dim, args.rnn_dim, args.rnn_layers).cuda() for name, param in rnn_both.named_parameters(): if 'bias' in name: nn.init.constant(param, 0.0) elif 'weight' in name: nn.init.xavier_normal(param) decoder_1 = nn.Linear(args.rnn_dim, 50003).cuda() decoder_2 = nn.Linear(args.rnn_dim, 50003).cuda() #pos_ptb_decoder = nn.Linear(128,len(posFine)+3).cuda() startHidden = nn.Linear(1, args.rnn_dim).cuda() components = [ rnn_both, decoder_1, decoder_2, word_pos_morph_embeddings, startHidden ] hiddenToLogSDHidden = nn.Linear(args.rnn_dim, args.rnn_dim).cuda() cellToMean = nn.Linear(args.rnn_dim, args.rnn_dim).cuda() sampleToHidden = nn.Linear(args.rnn_dim, args.rnn_dim).cuda() import torchkit.nn as nn_
assert len(itos_total) == outVocabSize # could also provide per-word subcategorization frames from the treebank as input??? #baseline = nn.Linear(args.emb_dim, 1).cuda() dropout = nn.Dropout(args.dropout_rate).cuda() rnn_both = nn.GRU(args.emb_dim, args.rnn_dim, args.rnn_layers).cuda() for name, param in rnn_both.named_parameters(): if 'bias' in name: nn.init.constant(param, 0.0) elif 'weight' in name: nn.init.xavier_normal(param) decoder = nn.Linear(args.rnn_dim,outVocabSize).cuda() #pos_ptb_decoder = nn.Linear(128,len(posFine)+3).cuda() components = [rnn_both, decoder, word_pos_morph_embeddings] # klLoss = [None for _ in inputEmbeddings] # logStandardDeviationHidden = hiddenToLogSDHidden(hidden[1][0]) # sampled = torch.normal(hiddenMean, torch.exp(logStandardDeviationHidden)) # klLoss = 0.5 * (-1 - 2 * (logStandardDeviationHidden) + torch.pow(meanHidden, 2) + torch.exp(2*logStandardDeviationHidden)) # hiddenNew = sampleToHidden(sampled) # cellNew = sampleToCell(sampled)
] + itos #+ itos_lemmas[:vocab_size] + itos_morph assert len(itos_total) == outVocabSize # could also provide per-word subcategorization frames from the treebank as input??? #baseline = nn.Linear(args.emb_dim, 1).cuda() dropout = nn.Dropout(args.dropout_rate).cuda() rnn_both = nn.GRU(2 * args.emb_dim, args.rnn_dim, args.rnn_layers).cuda() for name, param in rnn_both.named_parameters(): if 'bias' in name: nn.init.constant(param, 0.0) elif 'weight' in name: nn.init.xavier_normal(param) decoder = nn.Linear(args.rnn_dim, outVocabSize).cuda() #pos_ptb_decoder = nn.Linear(128,len(posFine)+3).cuda() startHidden = nn.Linear(1, args.rnn_dim).cuda() startHidden.bias.data.fill_(0) components = [rnn_both, decoder, word_pos_morph_embeddings, startHidden] char_embeddings = torch.nn.Embedding(num_embeddings=len(itos_chars_total) + 3, embedding_dim=args.char_emb_dim) char_composition = nn.GRU(args.char_emb_dim, args.char_rnn_dim, 1, bidirectional=True).cuda() char_composition_output = nn.Linear(2 * args.char_rnn_dim, args.emb_dim)
] + itos #+ itos_lemmas[:vocab_size] + itos_morph assert len(itos_total) == outVocabSize # could also provide per-word subcategorization frames from the treebank as input??? #baseline = nn.Linear(args.emb_dim, 1).cuda() dropout = nn.Dropout(args.dropout_rate).cuda() rnn_both = nn.GRU(args.emb_dim, args.rnn_dim, args.rnn_layers).cuda() for name, param in rnn_both.named_parameters(): if 'bias' in name: nn.init.constant(param, 0.0) elif 'weight' in name: nn.init.xavier_normal(param) decoder = nn.Linear(args.rnn_dim, outVocabSize).cuda() #pos_ptb_decoder = nn.Linear(128,len(posFine)+3).cuda() components = [rnn_both, decoder, word_pos_morph_embeddings] # klLoss = [None for _ in inputEmbeddings] # logStandardDeviationHidden = hiddenToLogSDHidden(hidden[1][0]) # sampled = torch.normal(hiddenMean, torch.exp(logStandardDeviationHidden)) # klLoss = 0.5 * (-1 - 2 * (logStandardDeviationHidden) + torch.pow(meanHidden, 2) + torch.exp(2*logStandardDeviationHidden)) # hiddenNew = sampleToHidden(sampled) # cellNew = sampleToCell(sampled) hiddenToLogSDHidden = nn.Linear(args.rnn_dim, args.rnn_dim).cuda() hiddenToLogSDHidden.weight.data.fill_(0) hiddenToLogSDHidden.bias.data.fill_(0)