Exemplo n.º 1
0
class BiLSTM_CRF(nn.Module):
    def __init__(self, data):
        super(BiLSTM_CRF, self).__init__()
        print("build batched lstmcrf...")
        self.gpu = data.HP_gpu
        # For CRF, we need to add extra two label START and END for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2
        self.lstm = BiLSTM(data)
        self.crf = CRF(label_size, self.gpu)

    def neg_log_likelihood_loss(self, gaz_list, char_inputs, bichar_inputs,
                                char_seq_lengths, batch_label, mask):
        outs = self.lstm.get_output_score(gaz_list, char_inputs, bichar_inputs,
                                          char_seq_lengths)
        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return total_loss, tag_seq

    def forward(self, gaz_list, char_inputs, bichar_inputs, char_seq_lengths,
                mask):
        outs = self.lstm.get_output_score(gaz_list, char_inputs, bichar_inputs,
                                          char_seq_lengths)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return tag_seq

    def get_lstm_features(self, gaz_list, char_inputs, bichar_inputs,
                          char_seq_lengths):
        return self.lstm.get_lstm_features(gaz_list, char_inputs,
                                           bichar_inputs, char_seq_lengths)
Exemplo n.º 2
0
class BiLSTM_CRF(nn.Module):
    def __init__(self, data):
        super(BiLSTM_CRF, self).__init__()
        print ("build batched lstmcrf...")
        self.gpu = data.HP_gpu
        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2
        self.lstm = BiLSTM(data)
        self.crf = CRF(label_size, self.gpu)


    def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths,  char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask):
        outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths,  char_inputs, char_seq_lengths, char_seq_recover)
        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return total_loss, tag_seq


    def forward(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask):
        outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return tag_seq


    def get_lstm_features(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
        return self.lstm.get_lstm_features(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        
Exemplo n.º 3
0
 def __init__(self, data):
     super(BiLSTM_CRF, self).__init__()
     print("build batched lstmcrf...")
     self.gpu = data.HP_gpu
     # For CRF, we need to add extra two label START and END for downlayer lstm, use original label size for CRF
     label_size = data.label_alphabet_size
     data.label_alphabet_size += 2
     self.lstm = BiLSTM(data)
     self.crf = CRF(label_size, self.gpu)
 def __init__(self, data):
     super(BiLSTM_CRF, self).__init__()
     print("build batched lstmcrf...")
     self.gpu = data.HP_gpu
     ## add two more label for downlayer lstm, use original label size for CRF
     self.lstm = BiLSTM(data)
     self.softmax = nn.LogSoftmax()
     self.loss_op = nn.CrossEntropyLoss()
     self.negid = data.label_alphabet.get_index("NEGATIVE")
Exemplo n.º 5
0
 def __init__(self, data):
     super(BiLSTM_CRF, self).__init__()
     print("build batched lstmcrf...")
     self.gpu = data.HP_gpu
     # add two more label for downlayer lstm, use original label size for CRF
     label_size = data.label_alphabet_size
     data.label_alphabet_size += 2
     self.lstm = BiLSTM(data)
     self.crf = CRF(label_size, self.gpu)
    def __init__(self, config):
        super(Joint, self).__init__()
        self.config = config
        # embed
        self.embed_num = config.embed_num
        self.embed_dim = config.embed_dim
        self.label_num = config.label_num
        self.paddingId = config.paddingId
        # dropout
        self.dropout_emb = config.dropout_emb
        self.dropout = config.dropout
        # lstm
        self.lstm_hiddens = config.lstm_hiddens
        self.lstm_layers = config.lstm_layers
        # pre train
        self.pretrained_embed = config.pretrained_embed
        self.pretrained_weight = config.pretrained_weight
        # cnn param
        self.wide_conv = config.wide_conv
        self.conv_filter_sizes = self._conv_filter(config.conv_filter_sizes)
        self.conv_filter_nums = config.conv_filter_nums
        self.use_cuda = config.use_cuda

        if self.config.model_bilstm:
            self.model = BiLSTM(embed_num=self.embed_num,
                                embed_dim=self.embed_dim,
                                label_num=self.label_num,
                                paddingId=self.paddingId,
                                dropout_emb=self.dropout_emb,
                                dropout=self.dropout,
                                lstm_hiddens=self.lstm_hiddens,
                                lstm_layers=self.lstm_layers,
                                pretrained_embed=self.pretrained_embed,
                                pretrained_weight=self.pretrained_weight,
                                use_cuda=self.use_cuda)
Exemplo n.º 7
0
 def __init__(self, data):
     super(BiLSTM_CRF, self).__init__()
     print ("build batched lstmcrf...")
     self.gpu = data.HP_gpu
     ## add two more label for downlayer lstm, use original label size for CRF
     label_size = data.label_alphabet_size
     data.label_alphabet_size += 2
     self.lstm = BiLSTM(data)
     self.crf = CRF(label_size, self.gpu)
class BiLSTM_CRF(nn.Module):
    def __init__(self, data):
        super(BiLSTM_CRF, self).__init__()
        print("build batched lstmcrf...")
        self.gpu = data.HP_gpu
        ## add two more label for downlayer lstm, use original label size for CRF
        self.lstm = BiLSTM(data)
        self.softmax = nn.LogSoftmax()
        self.loss_op = nn.CrossEntropyLoss()
        self.negid = data.label_alphabet.get_index("NEGATIVE")
        # self.crf = CRF(label_size, self.gpu)

    def count_weight_loss(self, output, targets):
        #计算loss,排除negtive这一类
        ONLY_POSITIVE = False
        targets_tensor = targets  #torch.from_numpy(targets).type(torch.LongTensor)
        softmax_result = Fun.log_softmax(output)
        log_loss = softmax_result[
            torch.arange(targets_tensor.shape[0]).type(torch.LongTensor),
            targets_tensor]
        if ONLY_POSITIVE:
            classify_loss = -torch.mean(log_loss)
        else:
            if (targets_tensor !=
                    self.negid).nonzero().shape[0] == 0:  #如果只有negid
                classify_loss = -torch.mean(
                    1 * log_loss[(targets_tensor == self.negid).nonzero()])
            else:
                classify_loss = -torch.mean(log_loss[
                    (targets_tensor != self.negid).nonzero()]) - torch.mean(
                        1 * log_loss[(targets_tensor == self.negid).nonzero()]
                    )  #(targets_tensor != label_dict["NEGATIVE"])
        l2_reg = Variable(torch.FloatTensor([0]), requires_grad=True)
        loss = Variable(torch.FloatTensor([0]), requires_grad=True)
        if args.cuda:
            l2_reg = l2_reg.cuda()
            loss = loss.cuda()
        for W in filter(lambda p: p.requires_grad, model.parameters()):
            l2_reg += W.norm(2)
        loss = classify_loss + 1e-5 * l2_reg  #args.l2_weight
        loss = loss.squeeze()
        return loss

    def neg_log_likelihood_loss(self, gaz_list, batch_word, batch_entity,
                                batch_gloss, batch_label, mask):
        outs = self.lstm.get_output_score(gaz_list, batch_word, batch_entity,
                                          batch_gloss, batch_label, mask)
        outs = outs.view([-1, outs.shape[2]])
        batch_label = batch_label.view([-1])
        # batch_label = torch.zeros(batch_label.shape[0], batch_label.shape[1], outs.shape[2]).scatter_(2, batch_label.unsqueeze(-1).type(torch.LongTensor), 1)
        loss = self.loss_op(outs, batch_label).sum()
        # loss = self.count_weight_loss(outs,batch_label).sum()
        return loss, torch.max(outs, 1)[1]
Exemplo n.º 9
0
    def __init__(self, vocab_size, emb_size, hidden_size, out_size):
        """初始化参数:
            vocab_size:字典的大小
            emb_size:词向量的维数
            hidden_size:隐向量的维数
            out_size:标注的种类
        """
        super(BiLSTM_CRF, self).__init__()
        self.bilstm = BiLSTM(vocab_size, emb_size, hidden_size, out_size)

        # CRF实际上就是多学习一个转移矩阵 [out_size, out_size] 初始化为均匀分布
        self.transition = nn.Parameter(
            torch.ones(out_size, out_size) * 1 / out_size)
Exemplo n.º 10
0
    del test_word_freq, test_label_freq

    # build natwork
    if args.model == "bilstm_crf":  
        net = BiLSTM_CRF(vocab.num_words,
                     config["word_dim"],
                     config["layers"],
                     config["word_hidden"],
                     vocab.num_labels,
                     config["dropout"],
        )  
    elif args.model == "bilstm":  
        net = BiLSTM(vocab.num_words,
                     config["word_dim"],
                     config["layers"],
                     config["word_hidden"],
                     vocab.num_labels,
                     config["dropout"],
        )
    print(net)

    # init optim 
    if config["optimizer"] == 'adam':
         print('Using Adam optimizer...', flush = True)
         optimizer = optim.Adam(net.parameters(), lr=config["lr"])
    
    # if use GPU , move all needed tensors to CUDA
    if config.get("use_cuda", False) and not config.get("useMultiGPU", False) and not config.get("useDistGPU", False):
        net.cuda()
    
    elif config.get("useMultiGPU", False):
class BiLSTM_CRF(nn.Module):
    def __init__(self, data):
        super(BiLSTM_CRF, self).__init__()
        print("build batched lstmcrf...")
        self.gpu = data.HP_gpu
        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2
        self.crf = CRF(label_size, self.gpu)

        label_size_ner = data.label_alphabet_size_ner
        data.label_alphabet_size_ner += 2
        self.crf_ner = CRF(label_size_ner, self.gpu)

        label_size_general = data.label_alphabet_size_general
        data.label_alphabet_size_general += 2
        self.crf_general = CRF(label_size_general, self.gpu)

        self.lstm = BiLSTM(data)

    def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs,
                                word_seq_lengths, char_inputs,
                                char_seq_lengths, char_seq_recover,
                                batch_label, mask):
        outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs,
                                          word_seq_lengths, char_inputs,
                                          char_seq_lengths, char_seq_recover)
        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return total_loss, tag_seq

    def neg_log_likelihood_loss_ner(self, gaz_list, word_inputs, biword_inputs,
                                    word_seq_lengths, char_inputs,
                                    char_seq_lengths, char_seq_recover,
                                    batch_label, mask):
        outs = self.lstm.get_output_score_ner(gaz_list, word_inputs,
                                              biword_inputs, word_seq_lengths,
                                              char_inputs, char_seq_lengths,
                                              char_seq_recover)
        total_loss = self.crf_ner.neg_log_likelihood_loss(
            outs, mask, batch_label)
        scores, tag_seq = self.crf_ner._viterbi_decode(outs, mask)
        return total_loss, tag_seq

    def neg_log_likelihood_loss_general(self, gaz_list, word_inputs,
                                        biword_inputs, word_seq_lengths,
                                        char_inputs, char_seq_lengths,
                                        char_seq_recover, batch_label, mask):
        outs = self.lstm.get_output_score_general(
            gaz_list, word_inputs, biword_inputs, word_seq_lengths,
            char_inputs, char_seq_lengths, char_seq_recover)
        total_loss = self.crf_general.neg_log_likelihood_loss(
            outs, mask, batch_label)
        scores, tag_seq = self.crf_general._viterbi_decode(outs, mask)
        return total_loss, tag_seq

    def forward(self, is_ner, gaz_list, word_inputs, biword_inputs,
                word_seq_lengths, char_inputs, char_seq_lengths,
                char_seq_recover, mask):
        if not is_ner:
            outs = self.lstm.get_output_score(gaz_list, word_inputs,
                                              biword_inputs, word_seq_lengths,
                                              char_inputs, char_seq_lengths,
                                              char_seq_recover)
            scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        else:
            outs = self.lstm.get_output_score_ner(
                gaz_list, word_inputs, biword_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover)
            scores, tag_seq = self.crf_ner._viterbi_decode(outs, mask)
        return tag_seq
Exemplo n.º 12
0
def train(args):
    """Train the neural network. Write out model every several iterations. 
    
    Args:
      workspace: str, path of workspace. 
      tr_snr: float, training SNR. 
      te_snr: float, testing SNR. 
      lr: float, learning rate. 
    """
    print(args)
    workspace = args.workspace
    model_name = args.model_name
    lr = args.lr
    tr_dir_name = args.tr_dir_name
    va_dir_name = args.va_dir_name
    iter_training = args.iteration    
    dropout = args.dropout

    # Load data. 
    t1 = time.time()
    tr_hdf5_path = os.path.join(workspace, "packed_features", "spectrogram", "train", tr_dir_name, "data.h5")
    # va_hdf5_path = os.path.join(workspace, "packed_features", "spectrogram", "validation", va_dir_name, "data.h5")
    (tr_x, tr_y) = pp_data.load_hdf5(tr_hdf5_path)
    # (va_x, va_y) = pp_data.load_hdf5(va_hdf5_path)
    print(tr_x.shape, tr_y.shape)
    # print(va_x.shape, va_y.shape)
    print("Load data time: %s s" % (time.time() - t1,))
    
    batch_size = 500
    print("%d iterations / epoch" % int(tr_x.shape[0] / batch_size))
    
    # Scale data. 
    if True:
        t1 = time.time()
        scaler_path = os.path.join(workspace, "packed_features", "spectrogram", "train", tr_dir_name, "scaler.p")
        scaler = pickle.load(open(scaler_path, 'rb'))
        tr_x = pp_data.scale_on_3d(tr_x, scaler)
        tr_y = pp_data.scale_on_2d(tr_y, scaler)
        # va_x = pp_data.scale_on_3d(va_x, scaler)
        # va_y = pp_data.scale_on_2d(va_y, scaler)
        print("Scale data time: %s s" % (time.time() - t1,))
        
    # Debug plot. 
    if False:
        plt.matshow(tr_x[0 : 1000, 0, :].T, origin='lower', aspect='auto', cmap='jet')
        plt.show()
        pause
        
    # Build model
    (_, n_concat, n_freq) = tr_x.shape
    n_hid = 2048
   
    with tf.Session() as sess:
        model = BiLSTM(sess, lr, batch_size, (n_concat, n_freq), n_freq, dropouts=dropout, training=True)
        model.build()
        sess.run( tf.global_variables_initializer())
        merge_op = tf.summary.merge_all()
		
        # Data generator. 
        tr_gen = DataGenerator(batch_size=batch_size, type='train')
        # eval_te_gen = DataGenerator(batch_size=batch_size, type='test', te_max_iter=100)
        eval_tr_gen = DataGenerator(batch_size=batch_size, type='test', te_max_iter=100)
    
        # Directories for saving models and training stats
        model_dir = os.path.join(workspace, "models", model_name)
        pp_data.create_folder(model_dir)
    
        stats_dir = os.path.join(workspace, "training_stats", model_name)
        pp_data.create_folder(stats_dir)
    
        # Print loss before training. 
        iter = 0
        tr_loss = eval(sess, model, eval_tr_gen, tr_x, tr_y)
        # te_loss = eval(model, eval_te_gen, te_x, te_y)
        # print("Iteration: %d, tr_loss: %f, te_loss: %f" % (iter, tr_loss, te_loss))
        print("Iteration: %d, tr_loss: %f" % (iter, tr_loss))
    
        # Save out training stats. 
        stat_dict = {'iter': iter, 
                    'tr_loss': tr_loss,} 
                    # 'te_loss': te_loss,}
        stat_path = os.path.join(stats_dir, "%diters.p" % iter)
        pickle.dump(stat_dict, open(stat_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
    
        # Train. 
        t1 = time.time()
        for (batch_x, batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]):
		
            feed_dict = {model.x_noisy: batch_x,
                                     model.y_clean: batch_y}
            _, loss, summary_str = sess.run(
                            [model.optimizer, model.loss, merge_op], feed_dict=feed_dict)
							
            iter += 1
        
            # Validate and save training stats. 
            if iter % 1000 == 0:
                tr_loss = eval(sess, model, eval_tr_gen, tr_x, tr_y)
                # te_loss = eval(model, eval_te_gen, te_x, te_y)
                print("Iteration: %d, tr_loss: %f" % (iter, tr_loss))
                # print("Iteration: %d, tr_loss: %f, te_loss: %f" % (iter, tr_loss, te_loss))
            
                # Save out training stats. 
                stat_dict = {'iter': iter, 
                             'tr_loss': tr_loss, }
                             # 'te_loss': te_loss, }
                stat_path = os.path.join(stats_dir, "%diters.p" % iter)
                pickle.dump(stat_dict, open(stat_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
            
            # Save model. 
            if iter % 5000 == 0:
                ckpt_file_path = os.path.join(model_dir, model_name)
                # if os.path.isdir(model_dir) is False:
                #       os.makedirs(model_dir)
                tf.train.Saver().save(sess, ckpt_file_path, write_meta_graph=True)
                print("Saved model to %s" % ckpt_file_path)
        
            if iter == iter_training + 1:
                break
             
        print("Training time: %s s" % (time.time() - t1,))
Exemplo n.º 13
0
def inference(args):
    """Inference all test data, write out recovered wavs to disk. 
    
    Args:
      workspace: str, path of workspace. 
      tr_snr: float, training SNR. 
      te_snr: float, testing SNR. 
      n_concat: int, number of frames to concatenta, should equal to n_concat 
          in the training stage. 
      iter: int, iteration of model to load. 
      visualize: bool, plot enhanced spectrogram for debug. 
    """
    print(args)
    workspace = args.workspace
    n_concat = args.n_concat
    iter = args.iteration
    dir_name = args.dir_name
    model_name  = args.model_name

    n_window = cfg.n_window
    n_overlap = cfg.n_overlap
    fs = cfg.sample_rate
    scale = True

    tr_enh = args.tr_enh

    # Load model. 
    model_dir = os.path.join(workspace, "models", model_name)
    with tf.Session() as sess:

        model = BiLSTM(sess, 0.0, 1, (n_concat, int(n_window/2 + 1)), int(n_window/2 + 1))
        model.build()
        saver = tf.train.Saver()

        ckpt = tf.train.latest_checkpoint(model_dir)
        saver.restore(sess, ckpt)

        # saver.restore(sess, ckpt.model_checkpoint_path)

        # model_path = os.path.join(model_dir, "md_%diters.h5" % iter)
        # model = load_model(model_path)
    
        # Load scaler. 
        scaler_path = os.path.join(workspace, "packed_features", "spectrogram", "train", "REVERB_tr_cut", "scaler.p")
        scaler = pickle.load(open(scaler_path, 'rb'))
    
        # Load test data. 
        feat_dir = os.path.join(workspace, "features", "spectrogram", tr_enh, dir_name)
        names = os.listdir(feat_dir)

        for (cnt, na) in enumerate(names):
            # Load feature. 
            feat_path = os.path.join(feat_dir, na)
            data = pickle.load(open(feat_path, 'rb'))
            [mixed_cmplx_x, speech_x, na] = data
            mixed_x = np.abs(mixed_cmplx_x)
        
            # Process data. 
            n_pad = (n_concat - 1) / 2
            mixed_x = pp_data.pad_with_border(mixed_x, n_pad)
            mixed_x = pp_data.log_sp(mixed_x)
            speech_x = pp_data.log_sp(speech_x)
        
            # Scale data. 
            if scale:
                mixed_x = pp_data.scale_on_2d(mixed_x, scaler)
                speech_x = pp_data.scale_on_2d(speech_x, scaler)
        
            # Cut input spectrogram to 3D segments with n_concat. 
            mixed_x_3d = pp_data.mat_2d_to_3d(mixed_x, agg_num=n_concat, hop=1)
        
            # Predict. 
            pred = sess.run([model.enhanced_outputs], feed_dict={model.x_noisy: mixed_x_3d}) # model.predict(mixed_x_3d)
            pred = np.reshape(pred, (-1, int(n_window/2 + 1)))
            print(cnt, na)
        
            # Inverse scale. 
            if scale:
                mixed_x = pp_data.inverse_scale_on_2d(mixed_x, scaler)
                speech_x = pp_data.inverse_scale_on_2d(speech_x, scaler)
                pred = pp_data.inverse_scale_on_2d(pred, scaler)
        
            # Debug plot. 
            if args.visualize:
                fig, axs = plt.subplots(3,1, sharex=False)
                axs[0].matshow(mixed_x.T, origin='lower', aspect='auto', cmap='jet')
                axs[1].matshow(speech_x.T, origin='lower', aspect='auto', cmap='jet')
                axs[2].matshow(pred.T, origin='lower', aspect='auto', cmap='jet')
                # axs[0].set_title("%ddb mixture log spectrogram" % int(te_snr))
                axs[1].set_title("Clean speech log spectrogram")
                axs[2].set_title("Enhanced speech log spectrogram")
                for j1 in xrange(3):
                    axs[j1].xaxis.tick_bottom()
                plt.tight_layout()
                plt.show()

            # Recover enhanced wav. 
            pred_sp = np.exp(pred)

            s = recover_wav(pred_sp, mixed_cmplx_x, n_overlap, np.hamming)
            s *= np.sqrt((np.hamming(n_window)**2).sum())   # Scaler for compensate the amplitude 
                                                        # change after spectrogram and IFFT. 
        
            # Write out enhanced wav. 
            out_path = os.path.join(workspace, "enh_wavs", "test", dir_name, "%s.enh.wav" % na)
            pp_data.create_folder(os.path.dirname(out_path))
            pp_data.write_audio(out_path, s, fs)