class BiLSTM_CRF(nn.Module): def __init__(self, data): super(BiLSTM_CRF, self).__init__() print("build batched lstmcrf...") self.gpu = data.HP_gpu # For CRF, we need to add extra two label START and END for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = BiLSTM(data) self.crf = CRF(label_size, self.gpu) def neg_log_likelihood_loss(self, gaz_list, char_inputs, bichar_inputs, char_seq_lengths, batch_label, mask): outs = self.lstm.get_output_score(gaz_list, char_inputs, bichar_inputs, char_seq_lengths) total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label) scores, tag_seq = self.crf._viterbi_decode(outs, mask) return total_loss, tag_seq def forward(self, gaz_list, char_inputs, bichar_inputs, char_seq_lengths, mask): outs = self.lstm.get_output_score(gaz_list, char_inputs, bichar_inputs, char_seq_lengths) scores, tag_seq = self.crf._viterbi_decode(outs, mask) return tag_seq def get_lstm_features(self, gaz_list, char_inputs, bichar_inputs, char_seq_lengths): return self.lstm.get_lstm_features(gaz_list, char_inputs, bichar_inputs, char_seq_lengths)
class BiLSTM_CRF(nn.Module): def __init__(self, data): super(BiLSTM_CRF, self).__init__() print ("build batched lstmcrf...") self.gpu = data.HP_gpu ## add two more label for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = BiLSTM(data) self.crf = CRF(label_size, self.gpu) def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask): outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label) scores, tag_seq = self.crf._viterbi_decode(outs, mask) return total_loss, tag_seq def forward(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask): outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) batch_size = word_inputs.size(0) seq_len = word_inputs.size(1) scores, tag_seq = self.crf._viterbi_decode(outs, mask) return tag_seq def get_lstm_features(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover): return self.lstm.get_lstm_features(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
def __init__(self, data): super(BiLSTM_CRF, self).__init__() print("build batched lstmcrf...") self.gpu = data.HP_gpu # For CRF, we need to add extra two label START and END for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = BiLSTM(data) self.crf = CRF(label_size, self.gpu)
def __init__(self, data): super(BiLSTM_CRF, self).__init__() print("build batched lstmcrf...") self.gpu = data.HP_gpu ## add two more label for downlayer lstm, use original label size for CRF self.lstm = BiLSTM(data) self.softmax = nn.LogSoftmax() self.loss_op = nn.CrossEntropyLoss() self.negid = data.label_alphabet.get_index("NEGATIVE")
def __init__(self, data): super(BiLSTM_CRF, self).__init__() print("build batched lstmcrf...") self.gpu = data.HP_gpu # add two more label for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = BiLSTM(data) self.crf = CRF(label_size, self.gpu)
def __init__(self, config): super(Joint, self).__init__() self.config = config # embed self.embed_num = config.embed_num self.embed_dim = config.embed_dim self.label_num = config.label_num self.paddingId = config.paddingId # dropout self.dropout_emb = config.dropout_emb self.dropout = config.dropout # lstm self.lstm_hiddens = config.lstm_hiddens self.lstm_layers = config.lstm_layers # pre train self.pretrained_embed = config.pretrained_embed self.pretrained_weight = config.pretrained_weight # cnn param self.wide_conv = config.wide_conv self.conv_filter_sizes = self._conv_filter(config.conv_filter_sizes) self.conv_filter_nums = config.conv_filter_nums self.use_cuda = config.use_cuda if self.config.model_bilstm: self.model = BiLSTM(embed_num=self.embed_num, embed_dim=self.embed_dim, label_num=self.label_num, paddingId=self.paddingId, dropout_emb=self.dropout_emb, dropout=self.dropout, lstm_hiddens=self.lstm_hiddens, lstm_layers=self.lstm_layers, pretrained_embed=self.pretrained_embed, pretrained_weight=self.pretrained_weight, use_cuda=self.use_cuda)
def __init__(self, data): super(BiLSTM_CRF, self).__init__() print ("build batched lstmcrf...") self.gpu = data.HP_gpu ## add two more label for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = BiLSTM(data) self.crf = CRF(label_size, self.gpu)
class BiLSTM_CRF(nn.Module): def __init__(self, data): super(BiLSTM_CRF, self).__init__() print("build batched lstmcrf...") self.gpu = data.HP_gpu ## add two more label for downlayer lstm, use original label size for CRF self.lstm = BiLSTM(data) self.softmax = nn.LogSoftmax() self.loss_op = nn.CrossEntropyLoss() self.negid = data.label_alphabet.get_index("NEGATIVE") # self.crf = CRF(label_size, self.gpu) def count_weight_loss(self, output, targets): #计算loss,排除negtive这一类 ONLY_POSITIVE = False targets_tensor = targets #torch.from_numpy(targets).type(torch.LongTensor) softmax_result = Fun.log_softmax(output) log_loss = softmax_result[ torch.arange(targets_tensor.shape[0]).type(torch.LongTensor), targets_tensor] if ONLY_POSITIVE: classify_loss = -torch.mean(log_loss) else: if (targets_tensor != self.negid).nonzero().shape[0] == 0: #如果只有negid classify_loss = -torch.mean( 1 * log_loss[(targets_tensor == self.negid).nonzero()]) else: classify_loss = -torch.mean(log_loss[ (targets_tensor != self.negid).nonzero()]) - torch.mean( 1 * log_loss[(targets_tensor == self.negid).nonzero()] ) #(targets_tensor != label_dict["NEGATIVE"]) l2_reg = Variable(torch.FloatTensor([0]), requires_grad=True) loss = Variable(torch.FloatTensor([0]), requires_grad=True) if args.cuda: l2_reg = l2_reg.cuda() loss = loss.cuda() for W in filter(lambda p: p.requires_grad, model.parameters()): l2_reg += W.norm(2) loss = classify_loss + 1e-5 * l2_reg #args.l2_weight loss = loss.squeeze() return loss def neg_log_likelihood_loss(self, gaz_list, batch_word, batch_entity, batch_gloss, batch_label, mask): outs = self.lstm.get_output_score(gaz_list, batch_word, batch_entity, batch_gloss, batch_label, mask) outs = outs.view([-1, outs.shape[2]]) batch_label = batch_label.view([-1]) # batch_label = torch.zeros(batch_label.shape[0], batch_label.shape[1], outs.shape[2]).scatter_(2, batch_label.unsqueeze(-1).type(torch.LongTensor), 1) loss = self.loss_op(outs, batch_label).sum() # loss = self.count_weight_loss(outs,batch_label).sum() return loss, torch.max(outs, 1)[1]
def __init__(self, vocab_size, emb_size, hidden_size, out_size): """初始化参数: vocab_size:字典的大小 emb_size:词向量的维数 hidden_size:隐向量的维数 out_size:标注的种类 """ super(BiLSTM_CRF, self).__init__() self.bilstm = BiLSTM(vocab_size, emb_size, hidden_size, out_size) # CRF实际上就是多学习一个转移矩阵 [out_size, out_size] 初始化为均匀分布 self.transition = nn.Parameter( torch.ones(out_size, out_size) * 1 / out_size)
del test_word_freq, test_label_freq # build natwork if args.model == "bilstm_crf": net = BiLSTM_CRF(vocab.num_words, config["word_dim"], config["layers"], config["word_hidden"], vocab.num_labels, config["dropout"], ) elif args.model == "bilstm": net = BiLSTM(vocab.num_words, config["word_dim"], config["layers"], config["word_hidden"], vocab.num_labels, config["dropout"], ) print(net) # init optim if config["optimizer"] == 'adam': print('Using Adam optimizer...', flush = True) optimizer = optim.Adam(net.parameters(), lr=config["lr"]) # if use GPU , move all needed tensors to CUDA if config.get("use_cuda", False) and not config.get("useMultiGPU", False) and not config.get("useDistGPU", False): net.cuda() elif config.get("useMultiGPU", False):
class BiLSTM_CRF(nn.Module): def __init__(self, data): super(BiLSTM_CRF, self).__init__() print("build batched lstmcrf...") self.gpu = data.HP_gpu ## add two more label for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.crf = CRF(label_size, self.gpu) label_size_ner = data.label_alphabet_size_ner data.label_alphabet_size_ner += 2 self.crf_ner = CRF(label_size_ner, self.gpu) label_size_general = data.label_alphabet_size_general data.label_alphabet_size_general += 2 self.crf_general = CRF(label_size_general, self.gpu) self.lstm = BiLSTM(data) def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask): outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label) scores, tag_seq = self.crf._viterbi_decode(outs, mask) return total_loss, tag_seq def neg_log_likelihood_loss_ner(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask): outs = self.lstm.get_output_score_ner(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) total_loss = self.crf_ner.neg_log_likelihood_loss( outs, mask, batch_label) scores, tag_seq = self.crf_ner._viterbi_decode(outs, mask) return total_loss, tag_seq def neg_log_likelihood_loss_general(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask): outs = self.lstm.get_output_score_general( gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) total_loss = self.crf_general.neg_log_likelihood_loss( outs, mask, batch_label) scores, tag_seq = self.crf_general._viterbi_decode(outs, mask) return total_loss, tag_seq def forward(self, is_ner, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask): if not is_ner: outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) scores, tag_seq = self.crf._viterbi_decode(outs, mask) else: outs = self.lstm.get_output_score_ner( gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) scores, tag_seq = self.crf_ner._viterbi_decode(outs, mask) return tag_seq
def train(args): """Train the neural network. Write out model every several iterations. Args: workspace: str, path of workspace. tr_snr: float, training SNR. te_snr: float, testing SNR. lr: float, learning rate. """ print(args) workspace = args.workspace model_name = args.model_name lr = args.lr tr_dir_name = args.tr_dir_name va_dir_name = args.va_dir_name iter_training = args.iteration dropout = args.dropout # Load data. t1 = time.time() tr_hdf5_path = os.path.join(workspace, "packed_features", "spectrogram", "train", tr_dir_name, "data.h5") # va_hdf5_path = os.path.join(workspace, "packed_features", "spectrogram", "validation", va_dir_name, "data.h5") (tr_x, tr_y) = pp_data.load_hdf5(tr_hdf5_path) # (va_x, va_y) = pp_data.load_hdf5(va_hdf5_path) print(tr_x.shape, tr_y.shape) # print(va_x.shape, va_y.shape) print("Load data time: %s s" % (time.time() - t1,)) batch_size = 500 print("%d iterations / epoch" % int(tr_x.shape[0] / batch_size)) # Scale data. if True: t1 = time.time() scaler_path = os.path.join(workspace, "packed_features", "spectrogram", "train", tr_dir_name, "scaler.p") scaler = pickle.load(open(scaler_path, 'rb')) tr_x = pp_data.scale_on_3d(tr_x, scaler) tr_y = pp_data.scale_on_2d(tr_y, scaler) # va_x = pp_data.scale_on_3d(va_x, scaler) # va_y = pp_data.scale_on_2d(va_y, scaler) print("Scale data time: %s s" % (time.time() - t1,)) # Debug plot. if False: plt.matshow(tr_x[0 : 1000, 0, :].T, origin='lower', aspect='auto', cmap='jet') plt.show() pause # Build model (_, n_concat, n_freq) = tr_x.shape n_hid = 2048 with tf.Session() as sess: model = BiLSTM(sess, lr, batch_size, (n_concat, n_freq), n_freq, dropouts=dropout, training=True) model.build() sess.run( tf.global_variables_initializer()) merge_op = tf.summary.merge_all() # Data generator. tr_gen = DataGenerator(batch_size=batch_size, type='train') # eval_te_gen = DataGenerator(batch_size=batch_size, type='test', te_max_iter=100) eval_tr_gen = DataGenerator(batch_size=batch_size, type='test', te_max_iter=100) # Directories for saving models and training stats model_dir = os.path.join(workspace, "models", model_name) pp_data.create_folder(model_dir) stats_dir = os.path.join(workspace, "training_stats", model_name) pp_data.create_folder(stats_dir) # Print loss before training. iter = 0 tr_loss = eval(sess, model, eval_tr_gen, tr_x, tr_y) # te_loss = eval(model, eval_te_gen, te_x, te_y) # print("Iteration: %d, tr_loss: %f, te_loss: %f" % (iter, tr_loss, te_loss)) print("Iteration: %d, tr_loss: %f" % (iter, tr_loss)) # Save out training stats. stat_dict = {'iter': iter, 'tr_loss': tr_loss,} # 'te_loss': te_loss,} stat_path = os.path.join(stats_dir, "%diters.p" % iter) pickle.dump(stat_dict, open(stat_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) # Train. t1 = time.time() for (batch_x, batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]): feed_dict = {model.x_noisy: batch_x, model.y_clean: batch_y} _, loss, summary_str = sess.run( [model.optimizer, model.loss, merge_op], feed_dict=feed_dict) iter += 1 # Validate and save training stats. if iter % 1000 == 0: tr_loss = eval(sess, model, eval_tr_gen, tr_x, tr_y) # te_loss = eval(model, eval_te_gen, te_x, te_y) print("Iteration: %d, tr_loss: %f" % (iter, tr_loss)) # print("Iteration: %d, tr_loss: %f, te_loss: %f" % (iter, tr_loss, te_loss)) # Save out training stats. stat_dict = {'iter': iter, 'tr_loss': tr_loss, } # 'te_loss': te_loss, } stat_path = os.path.join(stats_dir, "%diters.p" % iter) pickle.dump(stat_dict, open(stat_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) # Save model. if iter % 5000 == 0: ckpt_file_path = os.path.join(model_dir, model_name) # if os.path.isdir(model_dir) is False: # os.makedirs(model_dir) tf.train.Saver().save(sess, ckpt_file_path, write_meta_graph=True) print("Saved model to %s" % ckpt_file_path) if iter == iter_training + 1: break print("Training time: %s s" % (time.time() - t1,))
def inference(args): """Inference all test data, write out recovered wavs to disk. Args: workspace: str, path of workspace. tr_snr: float, training SNR. te_snr: float, testing SNR. n_concat: int, number of frames to concatenta, should equal to n_concat in the training stage. iter: int, iteration of model to load. visualize: bool, plot enhanced spectrogram for debug. """ print(args) workspace = args.workspace n_concat = args.n_concat iter = args.iteration dir_name = args.dir_name model_name = args.model_name n_window = cfg.n_window n_overlap = cfg.n_overlap fs = cfg.sample_rate scale = True tr_enh = args.tr_enh # Load model. model_dir = os.path.join(workspace, "models", model_name) with tf.Session() as sess: model = BiLSTM(sess, 0.0, 1, (n_concat, int(n_window/2 + 1)), int(n_window/2 + 1)) model.build() saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(model_dir) saver.restore(sess, ckpt) # saver.restore(sess, ckpt.model_checkpoint_path) # model_path = os.path.join(model_dir, "md_%diters.h5" % iter) # model = load_model(model_path) # Load scaler. scaler_path = os.path.join(workspace, "packed_features", "spectrogram", "train", "REVERB_tr_cut", "scaler.p") scaler = pickle.load(open(scaler_path, 'rb')) # Load test data. feat_dir = os.path.join(workspace, "features", "spectrogram", tr_enh, dir_name) names = os.listdir(feat_dir) for (cnt, na) in enumerate(names): # Load feature. feat_path = os.path.join(feat_dir, na) data = pickle.load(open(feat_path, 'rb')) [mixed_cmplx_x, speech_x, na] = data mixed_x = np.abs(mixed_cmplx_x) # Process data. n_pad = (n_concat - 1) / 2 mixed_x = pp_data.pad_with_border(mixed_x, n_pad) mixed_x = pp_data.log_sp(mixed_x) speech_x = pp_data.log_sp(speech_x) # Scale data. if scale: mixed_x = pp_data.scale_on_2d(mixed_x, scaler) speech_x = pp_data.scale_on_2d(speech_x, scaler) # Cut input spectrogram to 3D segments with n_concat. mixed_x_3d = pp_data.mat_2d_to_3d(mixed_x, agg_num=n_concat, hop=1) # Predict. pred = sess.run([model.enhanced_outputs], feed_dict={model.x_noisy: mixed_x_3d}) # model.predict(mixed_x_3d) pred = np.reshape(pred, (-1, int(n_window/2 + 1))) print(cnt, na) # Inverse scale. if scale: mixed_x = pp_data.inverse_scale_on_2d(mixed_x, scaler) speech_x = pp_data.inverse_scale_on_2d(speech_x, scaler) pred = pp_data.inverse_scale_on_2d(pred, scaler) # Debug plot. if args.visualize: fig, axs = plt.subplots(3,1, sharex=False) axs[0].matshow(mixed_x.T, origin='lower', aspect='auto', cmap='jet') axs[1].matshow(speech_x.T, origin='lower', aspect='auto', cmap='jet') axs[2].matshow(pred.T, origin='lower', aspect='auto', cmap='jet') # axs[0].set_title("%ddb mixture log spectrogram" % int(te_snr)) axs[1].set_title("Clean speech log spectrogram") axs[2].set_title("Enhanced speech log spectrogram") for j1 in xrange(3): axs[j1].xaxis.tick_bottom() plt.tight_layout() plt.show() # Recover enhanced wav. pred_sp = np.exp(pred) s = recover_wav(pred_sp, mixed_cmplx_x, n_overlap, np.hamming) s *= np.sqrt((np.hamming(n_window)**2).sum()) # Scaler for compensate the amplitude # change after spectrogram and IFFT. # Write out enhanced wav. out_path = os.path.join(workspace, "enh_wavs", "test", dir_name, "%s.enh.wav" % na) pp_data.create_folder(os.path.dirname(out_path)) pp_data.write_audio(out_path, s, fs)