def train(epoch): global e, updates, total_loss, start_time, report_total, report_correct, total_loss_sgm, total_loss_ss e = epoch model.train() SDR_SUM = np.array([]) SDRi_SUM = np.array([]) if updates <= config.warmup: #如果不在warm阶段就正常规划 pass elif config.schedule and scheduler.get_lr()[0] > 5e-7: scheduler.step() print(("Decaying learning rate to %g" % scheduler.get_lr()[0])) lera.log({ 'lr': [group['lr'] for group in optim.optimizer.param_groups][0], }) if opt.model == 'gated': model.current_epoch = epoch train_data_gen = prepare_data('once', 'train') while True: if updates <= config.warmup: # 如果在warm就开始warmup tmp_lr = config.learning_rate * min( max(updates, 1)**(-0.5), max(updates, 1) * (config.warmup**(-1.5))) for param_group in optim.optimizer.param_groups: param_group['lr'] = tmp_lr scheduler.base_lrs = list( [group['lr'] for group in optim.optimizer.param_groups]) if updates % 100 == 0: #记录一下 print(updates) print("Warmup learning rate to %g" % tmp_lr) lera.log({ 'lr': [group['lr'] for group in optim.optimizer.param_groups][0], }) train_data = next(train_data_gen) if train_data == False: print(('SDR_aver_epoch:', SDR_SUM.mean())) print(('SDRi_aver_epoch:', SDRi_SUM.mean())) break # 如果这个epoch的生成器没有数据了,直接进入下一个epoch src = Variable(torch.from_numpy(train_data['mix_feas'])) # raw_tgt = [spk.keys() for spk in train_data['multi_spk_fea_list']] # raw_tgt = [sorted(spk.keys()) for spk in train_data['multi_spk_fea_list']] raw_tgt = train_data['batch_order'] feas_tgt = models.rank_feas( raw_tgt, train_data['multi_spk_fea_list']) # 这里是目标的图谱,aim_size,len,fre padded_mixture, mixture_lengths, padded_source = train_data['tas_zip'] padded_mixture = torch.from_numpy(padded_mixture).float() mixture_lengths = torch.from_numpy(mixture_lengths) padded_source = torch.from_numpy(padded_source).float() padded_mixture = padded_mixture.cuda().transpose(0, 1) mixture_lengths = mixture_lengths.cuda() padded_source = padded_source.cuda() # 要保证底下这几个都是longTensor(长整数) tgt_max_len = config.MAX_MIX + 2 # with bos and eos. tgt = Variable( torch.from_numpy( np.array( [[0] + [dict_spk2idx[spk] for spk in spks] + (tgt_max_len - len(spks) - 1) * [dict_spk2idx['<EOS>']] for spks in raw_tgt], dtype=np.int))).transpose(0, 1) # 转换成数字,然后前后加开始和结束符号。 # tgt = Variable(torch.from_numpy(np.array([[0,1,2,102] for __ in range(config.batch_size)], dtype=np.int))).transpose(0, 1) # 转换成数字,然后前后加开始和结束符号。 src_len = Variable( torch.LongTensor(config.batch_size).zero_() + mix_speech_len).unsqueeze(0) tgt_len = Variable( torch.LongTensor([ len(one_spk) for one_spk in train_data['multi_spk_fea_list'] ])).unsqueeze(0) if config.WFM: siz = src.size() # bs,T,F assert len(siz) == 3 # topk_max = config.MAX_MIX # 最多可能的topk个数 topk_max = 2 # 最多可能的topk个数 x_input_map_multi = torch.unsqueeze(src, 1).expand( siz[0], topk_max, siz[1], siz[2]).contiguous().view(-1, siz[1], siz[2]) # bs,topk,T,F feas_tgt_tmp = feas_tgt.view(siz[0], -1, siz[1], siz[2]) feas_tgt_square = feas_tgt_tmp * feas_tgt_tmp feas_tgt_sum_square = torch.sum(feas_tgt_square, dim=1, keepdim=True).expand( siz[0], topk_max, siz[1], siz[2]) WFM_mask = feas_tgt_square / (feas_tgt_sum_square + 1e-15) feas_tgt = x_input_map_multi.view( siz[0], -1, siz[1], siz[2]).data * WFM_mask # bs,topk,T,F feas_tgt = feas_tgt.view(-1, siz[1], siz[2]) # bs*topk,T,F WFM_mask = WFM_mask.cuda() del x_input_map_multi elif config.PSM: siz = src.size() # bs,T,F assert len(siz) == 3 # topk_max = config.MAX_MIX # 最多可能的topk个数 topk_max = 2 # 最多可能的topk个数 x_input_map_multi = torch.unsqueeze(src, 1).expand( siz[0], topk_max, siz[1], siz[2]).contiguous() # bs,topk,T,F feas_tgt_tmp = feas_tgt.view(siz[0], -1, siz[1], siz[2]) IRM = feas_tgt_tmp / (x_input_map_multi + 1e-15) angle_tgt = models.rank_feas( raw_tgt, train_data['multi_spk_angle_list']).view( siz[0], -1, siz[1], siz[2]) angle_mix = Variable( torch.from_numpy(np.array( train_data['mix_angle']))).unsqueeze(1).expand( siz[0], topk_max, siz[1], siz[2]).contiguous() ang = np.cos(angle_mix - angle_tgt) ang = np.clip(ang, 0, None) feas_tgt = x_input_map_multi * IRM * ang # bs,topk,T,F feas_tgt = feas_tgt.view(-1, siz[1], siz[2]) # bs*topk,T,F del x_input_map_multi elif config.frame_mask: siz = src.size() # bs,T,F assert len(siz) == 3 # topk_max = config.MAX_MIX # 最多可能的topk个数 topk_max = 2 # 最多可能的topk个数 x_input_map_multi = torch.unsqueeze(src, 1).expand( siz[0], topk_max, siz[1], siz[2]).contiguous() # bs,topk,T,F feas_tgt_tmp = feas_tgt.view(siz[0], -1, siz[1], siz[2]) feas_tgt_time = torch.sum(feas_tgt_tmp, 3).transpose(1, 2) #bs,T,topk for v1 in feas_tgt_time: for v2 in v1: if v2[0] > v2[1]: v2[0] = 1 v2[1] = 0 else: v2[0] = 0 v2[1] = 1 frame_mask = feas_tgt_time.transpose(1, 2).unsqueeze(-1) #bs,topk,t,1 feas_tgt = x_input_map_multi * frame_mask feas_tgt = feas_tgt.view(-1, siz[1], siz[2]) # bs*topk,T,F if use_cuda: src = src.cuda().transpose(0, 1) tgt = tgt.cuda() src_len = src_len.cuda() tgt_len = tgt_len.cuda() feas_tgt = feas_tgt.cuda() model.zero_grad() if config.use_center_loss: center_loss.zero_grad() # aim_list 就是找到有正经说话人的地方的标号 aim_list = (tgt[1:-1].transpose(0, 1).contiguous().view(-1) != dict_spk2idx['<EOS>']).nonzero().squeeze() aim_list = aim_list.data.cpu().numpy() outputs, pred, targets, multi_mask, dec_enc_attn_list = model( src, src_len, tgt, tgt_len, dict_spk2idx, None, mix_wav=padded_mixture ) # 这里的outputs就是hidden_outputs,还没有进行最后分类的隐层,可以直接用 print('mask size:', multi_mask.size()) # writer.add_histogram('global gamma',gamma, updates) src = src.transpose(0, 1) # expand the raw mixed-features to topk_max channel. siz = src.size() assert len(siz) == 3 topk_max = config.MAX_MIX # 最多可能的topk个数 x_input_map_multi = torch.unsqueeze(src, 1).expand( siz[0], topk_max, siz[1], siz[2]).contiguous() #.view(-1, siz[1], siz[2]) # x_input_map_multi = x_input_map_multi[aim_list] multi_mask = multi_mask.transpose(0, 1) # if config.WFM: # feas_tgt = x_input_map_multi.data * WFM_mask if config.use_tas: if 1 and len(opt.gpus) > 1: ss_loss, pmt_list, max_snr_idx, *__ = model.module.separation_tas_loss( padded_mixture, multi_mask, padded_source, mixture_lengths) else: ss_loss, pmt_list, max_snr_idx, *__ = model.separation_tas_loss( padded_mixture, multi_mask, padded_source, mixture_lengths) best_pmt = [ list(pmt_list[int(mm)].data.cpu().numpy()) for mm in max_snr_idx ] else: if 1 and len(opt.gpus) > 1: # 先ss获取Perm ss_loss, best_pmt = model.module.separation_pit_loss( x_input_map_multi, multi_mask, feas_tgt) else: ss_loss, best_pmt = model.separation_pit_loss( x_input_map_multi, multi_mask, feas_tgt) print('loss for SS,this batch:', ss_loss.cpu().item()) print('best perms for this batch:', best_pmt) writer.add_scalars('scalar/loss', {'ss_loss': ss_loss.cpu().item()}, updates) # 按照Best_perm重新排列spk的预测目标 targets = targets.transpose(0, 1) #bs,aim+1(EOS也在) # print('targets',targets) targets_old = targets for idx, (tar, per) in enumerate(zip(targets, best_pmt)): per.append(topk_max) #每个batch后面加个结尾,保持最后一个EOS不变 targets_old[idx] = tar[per] targets = targets_old.transpose(0, 1) # print('targets',targets) if 1 and len(opt.gpus) > 1: sgm_loss, num_total, num_correct = model.module.compute_loss( outputs, targets, opt.memory) else: sgm_loss, num_total, num_correct = model.compute_loss( outputs, targets, opt.memory) print(('loss for SGM,this batch:', sgm_loss.cpu().item())) writer.add_scalars('scalar/loss', {'sgm_loss': sgm_loss.cpu().item()}, updates) if config.use_center_loss: cen_alpha = 0.01 cen_loss = center_loss(outputs.view(-1, config.SPK_EMB_SIZE), targets.view(-1)) print(('loss for SGM center loss,this batch:', cen_loss.cpu().item())) writer.add_scalars('scalar/loss', {'center_loss': cen_loss.cpu().item()}, updates) if not config.use_tas: loss = sgm_loss + 5 * ss_loss else: loss = 50 * sgm_loss + ss_loss loss.backward() if config.use_center_loss: for c_param in center_loss.parameters(): c_param.grad.data *= (0.01 / (cen_alpha * scheduler.get_lr()[0])) # print 'totallllllllllll loss:',loss total_loss_sgm += sgm_loss.cpu().item() total_loss_ss += ss_loss.cpu().item() lera.log({ 'sgm_loss': sgm_loss.cpu().item(), 'ss_loss': ss_loss.cpu().item(), 'loss:': loss.cpu().item(), }) if updates > 10 and updates % config.eval_interval in [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ]: if not config.use_tas: predicted_maps = multi_mask * x_input_map_multi.view( siz[0] * topk_max, siz[1], siz[2]) # predicted_maps=Variable(feas_tgt) # 这个是groundTruth utils.bss_eval(config, predicted_maps, train_data['multi_spk_fea_list'], raw_tgt, train_data, dst='batch_output1') del predicted_maps, multi_mask, x_input_map_multi sdr_aver_batch, sdri_aver_batch = bss_test.cal( 'batch_output1/') else: utils.bss_eval_tas(config, multi_mask, train_data['multi_spk_fea_list'], raw_tgt, train_data, dst='batch_output1') del x_input_map_multi sdr_aver_batch, sdri_aver_batch = bss_test.cal( 'batch_output1/') lera.log({'SDR sample': sdr_aver_batch}) lera.log({'SDRi sample': sdri_aver_batch}) writer.add_scalars('scalar/loss', { 'SDR_sample': sdr_aver_batch, 'SDRi_sample': sdri_aver_batch }, updates) SDR_SUM = np.append(SDR_SUM, sdr_aver_batch) SDRi_SUM = np.append(SDRi_SUM, sdri_aver_batch) print(('SDR_aver_now:', SDR_SUM.mean())) print(('SDRi_aver_now:', SDRi_SUM.mean())) total_loss += loss.cpu().item() report_correct += num_correct.cpu().item() report_total += num_total.cpu().item() optim.step() updates += 1 if updates % 30 == 0: logging( "time: %6.3f, epoch: %3d, updates: %8d, train loss this batch: %6.3f,sgm loss: %6.6f,ss loss: %6.6f,label acc: %6.6f\n" % (time.time() - start_time, epoch, updates, loss / num_total, total_loss_sgm / 30.0, total_loss_ss / 30.0, report_correct / report_total)) lera.log({'label_acc': report_correct / report_total}) writer.add_scalars('scalar/loss', {'label_acc': report_correct / report_total}, updates) total_loss_sgm, total_loss_ss = 0, 0 # continue if 0 and updates % config.eval_interval == 0 and epoch > 3: #建议至少跑几个epoch再进行测试,否则模型还没学到东西,会有很多问题。 logging( "time: %6.3f, epoch: %3d, updates: %8d, train loss: %6.5f\n" % (time.time() - start_time, epoch, updates, total_loss / report_total)) print(('evaluating after %d updates...\r' % updates)) original_bs = config.batch_size score = eval(epoch) # eval的时候batch_size会变成1 # print 'Orignal bs:',original_bs config.batch_size = original_bs # print 'Now bs:',config.batch_size for metric in config.metric: scores[metric].append(score[metric]) lera.log({ 'sgm_micro_f1': score[metric], }) if metric == 'micro_f1' and score[metric] >= max( scores[metric]): save_model(log_path + 'best_' + metric + '_checkpoint.pt') if metric == 'hamming_loss' and score[metric] <= min( scores[metric]): save_model(log_path + 'best_' + metric + '_checkpoint.pt') model.train() total_loss = 0 start_time = 0 report_total = 0 report_correct = 0 if 1 and updates % config.save_interval == 1: save_model(log_path + 'TDAAv3_PIT_{}.pt'.format(updates))
def train(epoch): e = epoch model.train() SDR_SUM = np.array([]) SDRi_SUM = np.array([]) if config.schedule and scheduler.get_lr()[0] > 5e-5: scheduler.step() print("Decaying learning rate to %g" % scheduler.get_lr()[0]) lera.log({ 'lr': scheduler.get_lr()[0], }) if opt.model == 'gated': model.current_epoch = epoch global e, updates, total_loss, start_time, report_total, report_correct, total_loss_sgm, total_loss_ss train_data_gen = prepare_data('once', 'train') while True: print '\n' train_data = train_data_gen.next() if train_data == False: print('SDR_aver_epoch:', SDR_SUM.mean()) print('SDRi_aver_epoch:', SDRi_SUM.mean()) break # 如果这个epoch的生成器没有数据了,直接进入下一个epoch src = Variable(torch.from_numpy(train_data['mix_feas'])) # raw_tgt = [spk.keys() for spk in train_data['multi_spk_fea_list']] raw_tgt = [ sorted(spk.keys()) for spk in train_data['multi_spk_fea_list'] ] feas_tgt = models.rank_feas( raw_tgt, train_data['multi_spk_fea_list']) # 这里是目标的图谱,aim_size,len,fre # 要保证底下这几个都是longTensor(长整数) tgt_max_len = config.MAX_MIX + 2 # with bos and eos. tgt = Variable( torch.from_numpy( np.array( [[0] + [dict_spk2idx[spk] for spk in spks] + (tgt_max_len - len(spks) - 1) * [dict_spk2idx['<EOS>']] for spks in raw_tgt], dtype=np.int))).transpose(0, 1) # 转换成数字,然后前后加开始和结束符号。 src_len = Variable( torch.LongTensor(config.batch_size).zero_() + mix_speech_len).unsqueeze(0) tgt_len = Variable( torch.LongTensor([ len(one_spk) for one_spk in train_data['multi_spk_fea_list'] ])).unsqueeze(0) if use_cuda: src = src.cuda().transpose(0, 1) tgt = tgt.cuda() src_len = src_len.cuda() tgt_len = tgt_len.cuda() feas_tgt = feas_tgt.cuda() model.zero_grad() # aim_list 就是找到有正经说话人的地方的标号 aim_list = (tgt[1:-1].transpose(0, 1).contiguous().view(-1) != dict_spk2idx['<EOS>']).nonzero().squeeze() aim_list = aim_list.data.cpu().numpy() outputs, targets, multi_mask, gamma = model( src, src_len, tgt, tgt_len, dict_spk2idx) # 这里的outputs就是hidden_outputs,还没有进行最后分类的隐层,可以直接用 # print('mask size:', multi_mask.size()) writer.add_histogram('global gamma', gamma, updates) if 1 and len(opt.gpus) > 1: sgm_loss, num_total, num_correct = model.module.compute_loss( outputs, targets, opt.memory) else: sgm_loss, num_total, num_correct = model.compute_loss( outputs, targets, opt.memory) print('loss for SGM,this batch:', sgm_loss.cpu().item()) writer.add_scalars('scalar/loss', {'sgm_loss': sgm_loss.cpu().item()}, updates) src = src.transpose(0, 1) # expand the raw mixed-features to topk_max channel. siz = src.size() assert len(siz) == 3 topk_max = config.MAX_MIX # 最多可能的topk个数 x_input_map_multi = torch.unsqueeze(src, 1).expand( siz[0], topk_max, siz[1], siz[2]).contiguous().view(-1, siz[1], siz[2]) x_input_map_multi = x_input_map_multi[aim_list] multi_mask = multi_mask.transpose(0, 1) if 1 and len(opt.gpus) > 1: ss_loss = model.module.separation_loss(x_input_map_multi, multi_mask, feas_tgt) else: ss_loss = model.separation_loss(x_input_map_multi, multi_mask, feas_tgt) print('loss for SS,this batch:', ss_loss.cpu().item()) writer.add_scalars('scalar/loss', {'ss_loss': ss_loss.cpu().item()}, updates) loss = sgm_loss + 5 * ss_loss loss.backward() # print 'totallllllllllll loss:',loss total_loss_sgm += sgm_loss.cpu().item() total_loss_ss += ss_loss.cpu().item() lera.log({ 'sgm_loss': sgm_loss.cpu().item(), 'ss_loss': ss_loss.cpu().item(), 'loss:': loss.cpu().item(), }) if updates > 10 and updates % config.eval_interval in [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ]: predicted_maps = multi_mask * x_input_map_multi # predicted_maps=Variable(feas_tgt) utils.bss_eval(config, predicted_maps, train_data['multi_spk_fea_list'], raw_tgt, train_data, dst='batch_output') del predicted_maps, multi_mask, x_input_map_multi sdr_aver_batch, sdri_aver_batch = bss_test.cal('batch_output/') lera.log({'SDR sample': sdr_aver_batch}) lera.log({'SDRi sample': sdri_aver_batch}) writer.add_scalars('scalar/loss', { 'SDR_sample': sdr_aver_batch, 'SDRi_sample': sdri_aver_batch }, updates) SDR_SUM = np.append(SDR_SUM, sdr_aver_batch) SDRi_SUM = np.append(SDRi_SUM, sdri_aver_batch) print('SDR_aver_now:', SDR_SUM.mean()) print('SDRi_aver_now:', SDRi_SUM.mean()) total_loss += loss.cpu().item() report_correct += num_correct.cpu().item() report_total += num_total.cpu().item() optim.step() updates += 1 if updates % 30 == 0: logging( "time: %6.3f, epoch: %3d, updates: %8d, train loss this batch: %6.3f,sgm loss: %6.6f,ss loss: %6.6f,label acc: %6.6f\n" % (time.time() - start_time, epoch, updates, loss / num_total, total_loss_sgm / 30.0, total_loss_ss / 30.0, report_correct / report_total)) lera.log({'label_acc': report_correct / report_total}) writer.add_scalars('scalar/loss', {'label_acc': report_correct / report_total}, updates) total_loss_sgm, total_loss_ss = 0, 0 # continue if 0 and updates % config.eval_interval == 0 and epoch > 3: #建议至少跑几个epoch再进行测试,否则模型还没学到东西,会有很多问题。 logging( "time: %6.3f, epoch: %3d, updates: %8d, train loss: %6.5f\n" % (time.time() - start_time, epoch, updates, total_loss / report_total)) print('evaluating after %d updates...\r' % updates) original_bs = config.batch_size score = eval(epoch) # eval的时候batch_size会变成1 print 'Orignal bs:', original_bs config.batch_size = original_bs print 'Now bs:', config.batch_size for metric in config.metric: scores[metric].append(score[metric]) lera.log({ 'sgm_micro_f1': score[metric], }) if metric == 'micro_f1' and score[metric] >= max( scores[metric]): save_model(log_path + 'best_' + metric + '_checkpoint.pt') if metric == 'hamming_loss' and score[metric] <= min( scores[metric]): save_model(log_path + 'best_' + metric + '_checkpoint.pt') model.train() total_loss = 0 start_time = 0 report_total = 0 report_correct = 0 if updates % config.save_interval == 1: save_model(log_path + 'TDAAv3_{}.pt'.format(updates))
def eval(epoch): model.eval() reference, candidate, source, alignments = [], [], [], [] valid_mode = 'test' print 'Valid or Test:', valid_mode eval_data_gen = prepare_data('once', valid_mode, 2, 2) # for raw_src, src, src_len, raw_tgt, tgt, tgt_len in validloader: SDR_SUM = np.array([]) batch_idx = 0 global best_SDR while True: # for ___ in range(100): print '-' * 30 eval_data = eval_data_gen.next() if eval_data == False: break #如果这个epoch的生成器没有数据了,直接进入下一个epoch src = Variable(torch.from_numpy(eval_data['mix_feas'])) raw_tgt = [ sorted(spk.keys()) for spk in eval_data['multi_spk_fea_list'] ] top_k = len(raw_tgt[0]) # 要保证底下这几个都是longTensor(长整数) # tgt = Variable(torch.from_numpy(np.array([[0]+[dict_spk2idx[spk] for spk in spks]+[dict_spk2idx['<EOS>']] for spks in raw_tgt],dtype=np.int))).transpose(0,1) #转换成数字,然后前后加开始和结束符号。 tgt = Variable(torch.ones( top_k + 2, config.batch_size)) # 这里随便给一个tgt,为了测试阶段tgt的名字无所谓其实。 src_len = Variable( torch.LongTensor(config.batch_size).zero_() + mix_speech_len).unsqueeze(0) tgt_len = Variable( torch.LongTensor(config.batch_size).zero_() + len(eval_data['multi_spk_fea_list'][0])).unsqueeze(0) feas_tgt = models.rank_feas(raw_tgt, eval_data['multi_spk_fea_list']) #这里是目标的图谱 if config.WFM: tmp_size = feas_tgt.size() assert len(tmp_size) == 4 feas_tgt_sum = torch.sum(feas_tgt, dim=1, keepdim=True) feas_tgt_sum_square = (feas_tgt_sum * feas_tgt_sum).expand(tmp_size) feas_tgt_square = feas_tgt * feas_tgt WFM_mask = feas_tgt_square / feas_tgt_sum_square if use_cuda: src = src.cuda() tgt = tgt.cuda() src_len = src_len.cuda() tgt_len = tgt_len.cuda() feas_tgt = feas_tgt.cuda() if config.WFM: WFM_mask = WFM_mask.cuda() if len(opt.gpus) > 1: samples, alignment = model.module.sample(src, src_len) else: try: samples, alignment, hiddens, predicted_masks = model.beam_sample( src, src_len, dict_spk2idx, tgt, beam_size=config.beam_size) except Exception, info: print '**************Error occurs here************:', info continue if config.top1: predicted_masks = torch.cat([predicted_masks, 1 - predicted_masks], 1) # ''' # expand the raw mixed-features to topk channel. siz = src.size() assert len(siz) == 3 topk = feas_tgt.size()[1] x_input_map_multi = torch.unsqueeze(src, 1).expand(siz[0], topk, siz[1], siz[2]) if config.WFM: feas_tgt = x_input_map_multi.data * WFM_mask ss_loss = model.separation_loss(x_input_map_multi, predicted_masks, feas_tgt) print 'loss for ss,this batch:', ss_loss.data[0] del ss_loss, hiddens if batch_idx <= (500 / config.batch_size ): #only the former batches counts the SDR predicted_maps = predicted_masks * x_input_map_multi utils.bss_eval(config, predicted_maps, eval_data['multi_spk_fea_list'], raw_tgt, eval_data, dst='batch_output1t2') del predicted_maps, predicted_masks, x_input_map_multi SDR_SUM = np.append(SDR_SUM, bss_test.cal('batch_output1t2/')) print 'SDR_aver_now:', SDR_SUM.mean() # raw_input('Press any key to continue......') elif batch_idx == (500 / config.batch_size) + 1 and SDR_SUM.mean( ) > best_SDR: #only record the best SDR once. print 'Best SDR from {}---->{}'.format(best_SDR, SDR_SUM.mean()) best_SDR = SDR_SUM.mean() # save_model(log_path+'checkpoint_bestSDR{}.pt'.format(best_SDR)) # ''' candidate += [ convertToLabels(dict_idx2spk, s, dict_spk2idx['<EOS>']) for s in samples ] # source += raw_src reference += raw_tgt print 'samples:', samples print 'can:{}, \nref:{}'.format(candidate[-1 * config.batch_size:], reference[-1 * config.batch_size:]) alignments += [align for align in alignment] batch_idx += 1
def train(epoch): e = epoch model.train() SDR_SUM = np.array([]) if config.schedule: scheduler.step() print("Decaying learning rate to %g" % scheduler.get_lr()[0]) if config.is_dis: scheduler_dis.step() lera.log({ 'lr': scheduler.get_lr()[0], }) if opt.model == 'gated': model.current_epoch = epoch global e, updates, total_loss, start_time, report_total, total_loss_sgm, total_loss_ss if config.MLMSE: global Var train_data_gen = prepare_data('once', 'train') # for raw_src, src, src_len, raw_tgt, tgt, tgt_len in trainloader: while True: try: train_data = train_data_gen.next() if train_data == False: print 'SDR_aver_epoch:', SDR_SUM.mean() break #如果这个epoch的生成器没有数据了,直接进入下一个epoch src = Variable(torch.from_numpy(train_data['mix_feas'])) # raw_tgt = [spk.keys() for spk in train_data['multi_spk_fea_list']] raw_tgt = [ sorted(spk.keys()) for spk in train_data['multi_spk_fea_list'] ] feas_tgt = models.rank_feas( raw_tgt, train_data['multi_spk_fea_list']) #这里是目标的图谱,aim_size,len,fre # 要保证底下这几个都是longTensor(长整数) tgt_max_len = config.MAX_MIX + 2 # with bos and eos. tgt = Variable( torch.from_numpy( np.array([[0] + [dict_spk2idx[spk] for spk in spks] + (tgt_max_len - len(spks) - 1) * [dict_spk2idx['<EOS>']] for spks in raw_tgt], dtype=np.int))).transpose(0, 1) #转换成数字,然后前后加开始和结束符号。 src_len = Variable( torch.LongTensor(config.batch_size).zero_() + mix_speech_len).unsqueeze(0) tgt_len = Variable( torch.LongTensor([ len(one_spk) for one_spk in train_data['multi_spk_fea_list'] ])).unsqueeze(0) if use_cuda: src = src.cuda().transpose(0, 1) tgt = tgt.cuda() src_len = src_len.cuda() tgt_len = tgt_len.cuda() feas_tgt = feas_tgt.cuda() model.zero_grad() # optim.optimizer.zero_grad() # aim_list 就是找到有正经说话人的地方的标号 aim_list = (tgt[1:-1].transpose(0, 1).contiguous().view(-1) != dict_spk2idx['<EOS>']).nonzero().squeeze() aim_list = aim_list.data.cpu().numpy() outputs, targets, multi_mask = model( src, src_len, tgt, tgt_len, dict_spk2idx) #这里的outputs就是hidden_outputs,还没有进行最后分类的隐层,可以直接用 print 'mask size:', multi_mask.size() if 1 and len(opt.gpus) > 1: sgm_loss, num_total, num_correct = model.module.compute_loss( outputs, targets, opt.memory) else: sgm_loss, num_total, num_correct = model.compute_loss( outputs, targets, opt.memory) print 'loss for SGM,this batch:', sgm_loss.data[0] / num_total src = src.transpose(0, 1) # expand the raw mixed-features to topk_max channel. siz = src.size() assert len(siz) == 3 topk_max = config.MAX_MIX #最多可能的topk个数 x_input_map_multi = torch.unsqueeze(src, 1).expand( siz[0], topk_max, siz[1], siz[2]).contiguous().view(-1, siz[1], siz[2]) x_input_map_multi = x_input_map_multi[aim_list] multi_mask = multi_mask.transpose(0, 1) if 1 and len(opt.gpus) > 1: if config.MLMSE: Var = model.module.update_var(x_input_map_multi, multi_mask, feas_tgt) lera.log_image(u'Var weight', Var.data.cpu().numpy().reshape( config.speech_fre, config.speech_fre, 1).repeat(3, 2), clip=(-1, 1)) ss_loss = model.module.separation_loss( x_input_map_multi, multi_mask, feas_tgt, Var) else: ss_loss = model.module.separation_loss( x_input_map_multi, multi_mask, feas_tgt) else: ss_loss = model.separation_loss(x_input_map_multi, multi_mask, feas_tgt) loss = sgm_loss + 5 * ss_loss # dis_loss model if config.is_dis: dis_loss = models.loss.dis_loss(config, topk_max, model_dis, x_input_map_multi, multi_mask, feas_tgt, func_dis) loss = loss + dis_loss # print 'dis_para',model_dis.parameters().next()[0] # print 'ss_para',model.parameters().next()[0] loss.backward() # print 'totallllllllllll loss:',loss total_loss_sgm += sgm_loss.data[0] total_loss_ss += ss_loss.data[0] lera.log({ 'sgm_loss': sgm_loss.data[0], 'ss_loss': ss_loss.data[0], 'loss:': loss.data[0], }) if (updates % config.eval_interval) in [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ]: predicted_maps = multi_mask * x_input_map_multi # predicted_maps=Variable(feas_tgt) utils.bss_eval(config, predicted_maps, train_data['multi_spk_fea_list'], raw_tgt, train_data, dst='batch_outputjaa') del predicted_maps, multi_mask, x_input_map_multi # raw_input('wait to continue......') sdr_aver_batch = bss_test.cal('batch_outputjaa/') lera.log({'SDR sample': sdr_aver_batch}) SDR_SUM = np.append(SDR_SUM, sdr_aver_batch) print 'SDR_aver_now:', SDR_SUM.mean() total_loss += loss.data[0] report_total += num_total optim.step() if config.is_dis: optim_dis.step() updates += 1 if updates % 30 == 0: logging( "time: %6.3f, epoch: %3d, updates: %8d, train loss this batch: %6.3f,sgm loss: %6.6f,ss loss: %6.6f\n" % (time.time() - start_time, epoch, updates, loss / num_total, total_loss_sgm / 30.0, total_loss_ss / 30.0)) total_loss_sgm, total_loss_ss = 0, 0 # continue if 0 or updates % config.eval_interval == 0 and epoch > 1: logging( "time: %6.3f, epoch: %3d, updates: %8d, train loss: %6.5f\n" % (time.time() - start_time, epoch, updates, total_loss / report_total)) print('evaluating after %d updates...\r' % updates) # score = eval(epoch) for metric in config.metric: scores[metric].append(score[metric]) lera.log({ 'sgm_micro_f1': score[metric], }) if metric == 'micro_f1' and score[metric] >= max( scores[metric]): save_model(log_path + 'best_' + metric + '_checkpoint.pt') if metric == 'hamming_loss' and score[metric] <= min( scores[metric]): save_model(log_path + 'best_' + metric + '_checkpoint.pt') model.train() total_loss = 0 start_time = 0 report_total = 0 except RuntimeError, eeee: print 'Erros here eeee: ', eeee continue except Exception, dddd: print '\n\n\nRare errors: ', dddd continue
def eval(epoch): model.eval() reference, candidate, source, alignments = [], [], [], [] e = epoch test_or_valid = 'test' print 'Test or valid:', test_or_valid eval_data_gen = prepare_data('once', test_or_valid, config.MIN_MIX, config.MAX_MIX) # for raw_src, src, src_len, raw_tgt, tgt, tgt_len in validloader: SDR_SUM = np.array([]) SDRi_SUM = np.array([]) batch_idx = 0 global best_SDR, Var while True: # for ___ in range(2): print '-' * 30 eval_data = eval_data_gen.next() if eval_data == False: break #如果这个epoch的生成器没有数据了,直接进入下一个epoch src = Variable(torch.from_numpy(eval_data['mix_feas'])) raw_tgt = [ sorted(spk.keys()) for spk in eval_data['multi_spk_fea_list'] ] top_k = len(raw_tgt[0]) # 要保证底下这几个都是longTensor(长整数) # tgt = Variable(torch.from_numpy(np.array([[0]+[dict_spk2idx[spk] for spk in spks]+[dict_spk2idx['<EOS>']] for spks in raw_tgt],dtype=np.int))).transpose(0,1) #转换成数字,然后前后加开始和结束符号。 tgt = Variable(torch.ones( top_k + 2, config.batch_size)) # 这里随便给一个tgt,为了测试阶段tgt的名字无所谓其实。 src_len = Variable( torch.LongTensor(config.batch_size).zero_() + mix_speech_len).unsqueeze(0) tgt_len = Variable( torch.LongTensor(config.batch_size).zero_() + len(eval_data['multi_spk_fea_list'][0])).unsqueeze(0) feas_tgt = models.rank_feas(raw_tgt, eval_data['multi_spk_fea_list']) #这里是目标的图谱 if config.WFM: tmp_size = feas_tgt.size() assert len(tmp_size) == 4 feas_tgt_square = feas_tgt * feas_tgt feas_tgt_square_sum = torch.sum(feas_tgt_square, dim=1, keepdim=True).expand(tmp_size) WFM_mask = feas_tgt_square / (feas_tgt_square_sum + 1e-10) if use_cuda: src = src.cuda().transpose(0, 1) tgt = tgt.cuda() src_len = src_len.cuda() tgt_len = tgt_len.cuda() feas_tgt = feas_tgt.cuda() if config.WFM: WFM_mask = WFM_mask.cuda() try: if 1 and len(opt.gpus) > 1: # samples, alignment = model.module.sample(src, src_len) samples, alignment, hiddens, predicted_masks = model.module.beam_sample( src, src_len, dict_spk2idx, tgt, beam_size=config.beam_size) else: samples, alignment, hiddens, predicted_masks = model.beam_sample( src, src_len, dict_spk2idx, tgt, beam_size=config.beam_size) # samples, alignment, hiddens, predicted_masks = model.beam_sample(src, src_len, dict_spk2idx, tgt, beam_size=config.beam_size) except Exception, info: print '**************Error eval occurs here************:', info continue if len(samples[0]) != 3: print 'Wrong num of mixtures, passed.' continue if config.top1: predicted_masks = torch.cat([predicted_masks, 1 - predicted_masks], 1) # ''' # expand the raw mixed-features to topk channel. src = src.transpose(0, 1) siz = src.size() assert len(siz) == 3 topk = feas_tgt.size()[1] x_input_map_multi = torch.unsqueeze(src, 1).expand(siz[0], topk, siz[1], siz[2]) if config.WFM: feas_tgt = x_input_map_multi.data * WFM_mask if 1 and len(opt.gpus) > 1: ss_loss = model.module.separation_loss(x_input_map_multi, predicted_masks, feas_tgt, None) else: ss_loss = model.separation_loss(x_input_map_multi, predicted_masks, feas_tgt, None) print 'loss for ss,this batch:', ss_loss.data[0] lera.log({ 'ss_loss_' + test_or_valid: ss_loss.data[0], }) del ss_loss, hiddens if 0 and config.reID: print '#' * 30 + 'ReID part ' + '#' * 30 predict_multi_map = predicted_masks * x_input_map_multi predict_multi_map = predict_multi_map.view(-1, mix_speech_len, speech_fre).transpose( 0, 1) tgt_reID = Variable(torch.ones( 3, top_k * config.batch_size)) # 这里随便给一个tgt,为了测试阶段tgt的名字无所谓其实。 src_len_reID = Variable( torch.LongTensor(topk * config.batch_size).zero_() + mix_speech_len).unsqueeze(0).cuda() try: if 1 and len(opt.gpus) > 1: # samples, alignment = model.module.sample(src, src_len) samples, alignment, hiddens, predicted_masks = model.module.beam_sample( predict_multi_map, src_len_reID, dict_spk2idx, tgt_reID, beam_size=config.beam_size) else: samples, alignment, hiddens, predicted_masks = model.beam_sample( predict_multi_map, src_len_reID, dict_spk2idx, tgt_reID, beam_size=config.beam_size) # samples, alignment, hiddens, predicted_masks = model.beam_sample(src, src_len, dict_spk2idx, tgt, beam_size=config.beam_size) except Exception, info: print '**************Error eval occurs here************:', info # outputs_reID, targets_reID, multi_mask_reID = model(predict_multi_map, src_len_reID, tgt_reID, tgt_len_reID) #这里的outputs就是hidden_outputs,还没有进行最后分类的隐层,可以直接用 if batch_idx <= (500 / config.batch_size ): #only the former batches counts the SDR # predicted_maps=predicted_masks*x_input_map_multi predicted_maps = predicted_masks * predict_multi_map.transpose( 0, 1).unsqueeze(1) predicted_maps = predicted_maps.transpose(0, 1) # predicted_maps=Variable(feas_tgt) utils.bss_eval(config, predicted_maps, eval_data['multi_spk_fea_list'], raw_tgt, eval_data, dst='batch_output23jo') del predicted_maps, predicted_masks, x_input_map_multi, predict_multi_map SDR, SDRi = bss_test.cal('batch_output23jo/') # SDR_SUM = np.append(SDR_SUM, bss_test.cal('batch_output23jo/')) SDR_SUM = np.append(SDR_SUM, SDR) SDRi_SUM = np.append(SDRi_SUM, SDRi) print 'SDR_aver_now:', SDR_SUM.mean() print 'SDRi_aver_now:', SDRi_SUM.mean() lera.log({'SDR sample': SDR_SUM.mean()}) lera.log({'SDRi sample': SDRi_SUM.mean()}) elif batch_idx == (500 / config.batch_size) + 1 and SDR_SUM.mean( ) > best_SDR: #only record the best SDR once. print 'Best SDR from {}---->{}'.format(best_SDR, SDR_SUM.mean()) best_SDR = SDR_SUM.mean() # save_model(log_path+'checkpoint_bestSDR{}.pt'.format(best_SDR)) print '#' * 30 + 'ReID part ' + '#' * 30
elif batch_idx == (500 / config.batch_size) + 1 and SDR_SUM.mean( ) > best_SDR: #only record the best SDR once. print 'Best SDR from {}---->{}'.format(best_SDR, SDR_SUM.mean()) best_SDR = SDR_SUM.mean() # save_model(log_path+'checkpoint_bestSDR{}.pt'.format(best_SDR)) print '#' * 30 + 'ReID part ' + '#' * 30 # ''''' elif batch_idx <= (5000 / config.batch_size ): #only the former batches counts the SDR predicted_maps = predicted_masks * x_input_map_multi # predicted_maps=Variable(feas_tgt) utils.bss_eval(config, predicted_maps, eval_data['multi_spk_fea_list'], raw_tgt, eval_data, dst='batch_output23jo') del predicted_maps, predicted_masks, x_input_map_multi SDR, SDRi = bss_test.cal('batch_output23jo/') # SDR_SUM = np.append(SDR_SUM, bss_test.cal('batch_output23jo/')) SDR_SUM = np.append(SDR_SUM, SDR) SDRi_SUM = np.append(SDRi_SUM, SDRi) print 'SDR_aver_now:', SDR_SUM.mean() print 'SDRi_aver_now:', SDRi_SUM.mean() lera.log({'SDR sample': SDR_SUM.mean()}) lera.log({'SDRi sample': SDRi_SUM.mean()}) elif batch_idx == (5000 / config.batch_size) + 1 and SDR_SUM.mean( ) > best_SDR: #only record the best SDR once. print 'Best SDR from {}---->{}'.format(best_SDR, SDR_SUM.mean()) best_SDR = SDR_SUM.mean()
def train_recu(epoch): global e, updates, total_loss, start_time, report_total, report_correct, total_loss_sgm, total_loss_ss e = epoch model.train() SDR_SUM = np.array([]) SDRi_SUM = np.array([]) if updates <= config.warmup: #如果不在warm阶段就正常规划 pass elif config.schedule and scheduler.get_lr()[0] > 5e-7: scheduler.step() print(("Decaying learning rate to %g" % scheduler.get_lr()[0])) lera.log({ 'lr': [group['lr'] for group in optim.optimizer.param_groups][0], }) if opt.model == 'gated': model.current_epoch = epoch train_data_gen = prepare_data('once', 'train') while True: if updates <= config.warmup: # 如果在warm就开始warmup tmp_lr = config.learning_rate * min( max(updates, 1)**(-0.5), max(updates, 1) * (config.warmup**(-1.5))) for param_group in optim.optimizer.param_groups: param_group['lr'] = tmp_lr scheduler.base_lrs = list( [group['lr'] for group in optim.optimizer.param_groups]) if updates % 100 == 0: #记录一下 print(updates) print("Warmup learning rate to %g" % tmp_lr) lera.log({ 'lr': [group['lr'] for group in optim.optimizer.param_groups][0], }) train_data = next(train_data_gen) if train_data == False: print(('SDR_aver_epoch:', SDR_SUM.mean())) print(('SDRi_aver_epoch:', SDRi_SUM.mean())) break # 如果这个epoch的生成器没有数据了,直接进入下一个epoch src = Variable(torch.from_numpy(train_data['mix_feas'])) # raw_tgt = [spk.keys() for spk in train_data['multi_spk_fea_list']] # raw_tgt = [sorted(spk.keys()) for spk in train_data['multi_spk_fea_list']] raw_tgt = train_data['batch_order'] feas_tgt = models.rank_feas( raw_tgt, train_data['multi_spk_fea_list']) # 这里是目标的图谱,aim_size,len,fre if 0 and config.WFM: tmp_size = feas_tgt.size() assert len(tmp_size) == 3 feas_tgt_square = feas_tgt * feas_tgt feas_tgt_sum_square = torch.sum(feas_tgt_square, dim=0, keepdim=True).expand(tmp_size) WFM_mask = feas_tgt_square / (feas_tgt_sum_square + 1e-15) WFM_mask = WFM_mask.cuda() feas_tgt = x_input_map_multi.data * WFM_mask # 要保证底下这几个都是longTensor(长整数) src_original = src.transpose(0, 1) #To T,bs,F multi_mask_all = None for len_idx in range(config.MIN_MIX + 2, 2, -1): #逐个分离 # len_idx=3 tgt_max_len = len_idx # 4,3,2 with bos and eos. tgt = Variable( torch.from_numpy( np.array([[0] + [ dict_spk2idx[spk] for spk in spks[-1 * (tgt_max_len - 2):] ] + 1 * [dict_spk2idx['<EOS>']] for spks in raw_tgt], dtype=np.int))).transpose( 0, 1) # 转换成数字,然后前后加开始和结束符号。4,bs src_len = Variable( torch.LongTensor(config.batch_size).zero_() + mix_speech_len).unsqueeze(0) tgt_len = Variable( torch.LongTensor([ tgt_max_len - 2 for one_spk in train_data['multi_spk_fea_list'] ])).unsqueeze(0) if use_cuda: src = src.cuda().transpose(0, 1) # to T,bs,fre src_original = src_original.cuda() # TO T,bs,fre tgt = tgt.cuda() src_len = src_len.cuda() tgt_len = tgt_len.cuda() feas_tgt = feas_tgt.cuda() model.zero_grad() outputs, targets, multi_mask, gamma = model( src, src_len, tgt, tgt_len, dict_spk2idx, src_original) # 这里的outputs就是hidden_outputs,还没有进行最后分类的隐层,可以直接用 print('mask size:', multi_mask.size()) # writer.add_histogram('global gamma',gamma, updates) if 1 and len(opt.gpus) > 1: sgm_loss, num_total, num_correct = model.module.compute_loss( outputs, targets, opt.memory) else: sgm_loss, num_total, num_correct = model.compute_loss( outputs, targets, opt.memory) print(('loss for SGM,this batch:', sgm_loss.cpu().item())) writer.add_scalars( 'scalar/loss', {'sgm_loss' + str(len_idx): sgm_loss.cpu().item()}, updates) src = src_original.transpose(0, 1) #确保分离的时候用的是原始的语音 # expand the raw mixed-features to topk_max channel. siz = src.size() #bs,T,F assert len(siz) == 3 # topk_max = config.MAX_MIX # 最多可能的topk个数 topk_max = len_idx - 2 # 最多可能的topk个数 x_input_map_multi = torch.unsqueeze(src, 1).expand( siz[0], topk_max, siz[1], siz[2]).contiguous().view(-1, siz[1], siz[2]) #bs,topk,T,F # x_input_map_multi = x_input_map_multi[aim_list] multi_mask = multi_mask.transpose(0, 1) if len_idx == 4: aim_feas = list(range(0, 2 * config.batch_size, 2)) #每个samples的第一个说话人取出来 multi_mask_all = multi_mask #bs*topk,T,F src = src * (1 - multi_mask[aim_feas]) #调整到bs为第一维,# bs,T,F # src=src.transpose(0,1)*(1-multi_mask[aim_feas]) #调整到bs为第一维 src = src.detach() #第二轮用第一轮预测出来的剩下的谱 elif len_idx == 3: aim_feas = list(range(1, 2 * config.batch_size, 2)) #每个samples的第二个说话人取出来 multi_mask_all[aim_feas] = multi_mask feas_tgt = feas_tgt[aim_feas] if 1 and len(opt.gpus) > 1: ss_loss = model.module.separation_loss(x_input_map_multi, multi_mask, feas_tgt) else: ss_loss = model.separation_loss(x_input_map_multi, multi_mask, feas_tgt) print(('loss for SS,this batch:', ss_loss.cpu().item())) writer.add_scalars( 'scalar/loss', {'ss_loss' + str(len_idx): ss_loss.cpu().item()}, updates) loss = sgm_loss + 5 * ss_loss loss.backward() optim.step() lera.log({ 'sgm_loss' + str(len_idx): sgm_loss.cpu().item(), 'ss_loss' + str(len_idx): ss_loss.cpu().item(), 'loss:' + str(len_idx): loss.cpu().item(), }) total_loss_sgm += sgm_loss.cpu().item() total_loss_ss += ss_loss.cpu().item() multi_mask = multi_mask_all x_input_map_multi = torch.unsqueeze(src, 1).expand( siz[0], 2, siz[1], siz[2]).contiguous().view(-1, siz[1], siz[2]) if updates > 10 and updates % config.eval_interval in [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ]: predicted_maps = multi_mask * x_input_map_multi # predicted_maps=Variable(feas_tgt) utils.bss_eval(config, predicted_maps, train_data['multi_spk_fea_list'], raw_tgt, train_data, dst='batch_output') del predicted_maps, multi_mask, x_input_map_multi sdr_aver_batch, sdri_aver_batch = bss_test.cal('batch_output/') lera.log({'SDR sample': sdr_aver_batch}) lera.log({'SDRi sample': sdri_aver_batch}) writer.add_scalars('scalar/loss', { 'SDR_sample': sdr_aver_batch, 'SDRi_sample': sdri_aver_batch }, updates) SDR_SUM = np.append(SDR_SUM, sdr_aver_batch) SDRi_SUM = np.append(SDRi_SUM, sdri_aver_batch) print(('SDR_aver_now:', SDR_SUM.mean())) print(('SDRi_aver_now:', SDRi_SUM.mean())) total_loss += loss.cpu().item() report_correct += num_correct.cpu().item() report_total += num_total.cpu().item() updates += 1 if updates % 30 == 0: logging( "time: %6.3f, epoch: %3d, updates: %8d, train loss this batch: %6.3f,sgm loss: %6.6f,ss loss: %6.6f,label acc: %6.6f\n" % (time.time() - start_time, epoch, updates, loss / num_total, total_loss_sgm / 30.0, total_loss_ss / 30.0, report_correct / report_total)) lera.log({'label_acc': report_correct / report_total}) writer.add_scalars('scalar/loss', {'label_acc': report_correct / report_total}, updates) total_loss_sgm, total_loss_ss = 0, 0 # continue if 0 and updates % config.eval_interval == 0 and epoch > 3: #建议至少跑几个epoch再进行测试,否则模型还没学到东西,会有很多问题。 logging( "time: %6.3f, epoch: %3d, updates: %8d, train loss: %6.5f\n" % (time.time() - start_time, epoch, updates, total_loss / report_total)) print(('evaluating after %d updates...\r' % updates)) original_bs = config.batch_size score = eval(epoch) # eval的时候batch_size会变成1 # print 'Orignal bs:',original_bs config.batch_size = original_bs # print 'Now bs:',config.batch_size for metric in config.metric: scores[metric].append(score[metric]) lera.log({ 'sgm_micro_f1': score[metric], }) if metric == 'micro_f1' and score[metric] >= max( scores[metric]): save_model(log_path + 'best_' + metric + '_checkpoint.pt') if metric == 'hamming_loss' and score[metric] <= min( scores[metric]): save_model(log_path + 'best_' + metric + '_checkpoint.pt') model.train() total_loss = 0 start_time = 0 report_total = 0 report_correct = 0 if 1 and updates % config.save_interval == 1: save_model(log_path + 'TDAAv3_{}.pt'.format(updates))
def eval(epoch): model.eval() reference, candidate, source, alignments = [], [], [], [] e = epoch test_or_valid = 'valid' print 'Test or valid:', test_or_valid eval_data_gen = prepare_data('once', test_or_valid, config.MIN_MIX, config.MAX_MIX) # for raw_src, src, src_len, raw_tgt, tgt, tgt_len in validloader: SDR_SUM = np.array([]) batch_idx = 0 global best_SDR, Var while True: # for ___ in range(2): print '-' * 30 eval_data = eval_data_gen.next() if eval_data == False: break #如果这个epoch的生成器没有数据了,直接进入下一个epoch src = Variable(torch.from_numpy(eval_data['mix_feas'])) raw_tgt = [ sorted(spk.keys()) for spk in eval_data['multi_spk_fea_list'] ] top_k = len(raw_tgt[0]) # 要保证底下这几个都是longTensor(长整数) # tgt = Variable(torch.from_numpy(np.array([[0]+[dict_spk2idx[spk] for spk in spks]+[dict_spk2idx['<EOS>']] for spks in raw_tgt],dtype=np.int))).transpose(0,1) #转换成数字,然后前后加开始和结束符号。 tgt = Variable(torch.ones( top_k + 2, config.batch_size)) # 这里随便给一个tgt,为了测试阶段tgt的名字无所谓其实。 src_len = Variable( torch.LongTensor(config.batch_size).zero_() + mix_speech_len).unsqueeze(0) tgt_len = Variable( torch.LongTensor(config.batch_size).zero_() + len(eval_data['multi_spk_fea_list'][0])).unsqueeze(0) feas_tgt = models.rank_feas(raw_tgt, eval_data['multi_spk_fea_list']) #这里是目标的图谱 if config.WFM: tmp_size = feas_tgt.size() assert len(tmp_size) == 4 feas_tgt_sum = torch.sum(feas_tgt, dim=1, keepdim=True) feas_tgt_sum_square = (feas_tgt_sum * feas_tgt_sum).expand(tmp_size) feas_tgt_square = feas_tgt * feas_tgt WFM_mask = feas_tgt_square / feas_tgt_sum_square if use_cuda: src = src.cuda().transpose(0, 1) tgt = tgt.cuda() src_len = src_len.cuda() tgt_len = tgt_len.cuda() feas_tgt = feas_tgt.cuda() if config.WFM: WFM_mask = WFM_mask.cuda() if config.buffer_size or config.buffer_shift: # first convet to realtime batches assert src.size()[1] == 1 left_padding = Variable( torch.zeros(config.buffer_size, src.size()[1], src.size()[-1]).cuda()) src = torch.cat((left_padding, src), dim=0) split_idx = 0 src_new = Variable( torch.zeros(config.buffer_size + config.buffer_shift, mix_speech_len / config.buffer_shift + 1, src.size()[-1]).cuda()) batch_counter = 0 while True: print 'split_idx at:', split_idx split_len = config.buffer_size + config.buffer_shift # the len of every split if split_idx + split_len > src.size( )[0]: # if pass the right length print 'Need to add right padding with len:', ( split_idx + split_len) - src.size()[0] right_padding = Variable( torch.zeros((split_idx + split_len) - src.size()[0], src.size()[1], src.size()[-1]).cuda()) src = torch.cat((src, right_padding), dim=0) src_split = src[split_idx:(split_idx + split_len)] src_new[:, batch_counter] = src_split break src_split = src[split_idx:(split_idx + split_len)] src_new[:, batch_counter] = src_split split_idx += config.buffer_shift batch_counter += 1 assert batch_counter + 1 == src_new.size()[1] src_len[0] = config.buffer_shift + config.buffer_size src_len = src_len.expand(1, src_new.size()[1]) try: if 1 and len(opt.gpus) > 1: # samples, alignment = model.module.sample(src, src_len) samples, alignment, hiddens, predicted_masks = model.module.beam_sample( src_new, src_len, dict_spk2idx, tgt, beam_size=config.beam_size) else: samples, alignment, hiddens, predicted_masks = model.beam_sample( src_new, src_len, dict_spk2idx, tgt, beam_size=config.beam_size) # samples, alignment, hiddens, predicted_masks = model.beam_sample(src, src_len, dict_spk2idx, tgt, beam_size=config.beam_size) except Exception, info: print '**************Error occurs here************:', info continue if config.top1: predicted_masks = torch.cat([predicted_masks, 1 - predicted_masks], 1) if config.buffer_size and config.buffer_shift: # then recover the whole maps # masks:[7,topk,buffer_size+buffer_shift,fre] masks_recover = Variable( torch.zeros(1, predicted_masks.size(1), mix_speech_len, speech_fre).cuda()) recover_idx = 0 for batch_counter in range(predicted_masks.size(0)): if not batch_counter == predicted_masks.size(0) - 1: masks_recover[:, :, recover_idx:recover_idx + config.buffer_shift] = predicted_masks[ batch_counter, :, -1 * config.buffer_shift:] else: # the last shift assert mix_speech_len - recover_idx == config.buffer_shift - right_padding.size( 0) masks_recover[:, :, recover_idx:] = predicted_masks[ batch_counter, :, -1 * config.buffer_shift:(-1 * right_padding.size(0))] recover_idx += config.buffer_shift predicted_masks = masks_recover src = Variable(torch.from_numpy(eval_data['mix_feas'])).transpose( 0, 1).cuda() # ''' # expand the raw mixed-features to topk channel. src = src.transpose(0, 1) siz = src.size() assert len(siz) == 3 topk = feas_tgt.size()[1] x_input_map_multi = torch.unsqueeze(src, 1).expand(siz[0], topk, siz[1], siz[2]) if config.WFM: feas_tgt = x_input_map_multi.data * WFM_mask ''' if 1 and len(opt.gpus) > 1: ss_loss = model.module.separation_loss(x_input_map_multi, predicted_masks, feas_tgt,Var) else: ss_loss = model.separation_loss(x_input_map_multi, predicted_masks, feas_tgt,None) print 'loss for ss,this batch:',ss_loss.data[0] lera.log({ 'ss_loss_'+test_or_valid: ss_loss.data[0], }) del ss_loss,hiddens # ''' '' if batch_idx <= (500 / config.batch_size ): #only the former batches counts the SDR # x_input_map_multi=x_input_map_multi[:,:,:config.buffer_shift] predicted_maps = predicted_masks * x_input_map_multi # predicted_maps=Variable(feas_tgt) utils.bss_eval(config, predicted_maps, eval_data['multi_spk_fea_list'], raw_tgt, eval_data, dst='batch_outputwaddd') del predicted_maps, predicted_masks, x_input_map_multi SDR_SUM = np.append(SDR_SUM, bss_test.cal('batch_outputwaddd/')) print 'SDR_aver_now:', SDR_SUM.mean() lera.log({'SDR sample': SDR_SUM.mean()}) # raw_input('Press any key to continue......') elif batch_idx == (500 / config.batch_size) + 1 and SDR_SUM.mean( ) > best_SDR: #only record the best SDR once. print 'Best SDR from {}---->{}'.format(best_SDR, SDR_SUM.mean()) best_SDR = SDR_SUM.mean() # save_model(log_path+'checkpoint_bestSDR{}.pt'.format(best_SDR)) # ''' candidate += [ convertToLabels(dict_idx2spk, s, dict_spk2idx['<EOS>']) for s in samples ] # source += raw_src reference += raw_tgt print 'samples:', samples print 'can:{}, \nref:{}'.format(candidate[-1 * config.batch_size:], reference[-1 * config.batch_size:]) alignments += [align for align in alignment] batch_idx += 1
def eval(epoch): model.eval() reference, candidate, source, alignments = [], [], [], [] e = epoch test_or_valid = 'valid' print 'Test or valid:', test_or_valid eval_data_gen = prepare_data('once', test_or_valid, config.MIN_MIX, config.MAX_MIX) # for raw_src, src, src_len, raw_tgt, tgt, tgt_len in validloader: SDR_SUM = np.array([]) batch_idx = 0 global best_SDR while True: # for ___ in range(2): print '-' * 30 eval_data = eval_data_gen.next() if eval_data == False: break #如果这个epoch的生成器没有数据了,直接进入下一个epoch src = Variable(torch.from_numpy(eval_data['mix_feas'])) raw_tgt = [ sorted(spk.keys()) for spk in eval_data['multi_spk_fea_list'] ] top_k = len(raw_tgt[0]) # 要保证底下这几个都是longTensor(长整数) # tgt = Variable(torch.from_numpy(np.array([[0]+[dict_spk2idx[spk] for spk in spks]+[dict_spk2idx['<EOS>']] for spks in raw_tgt],dtype=np.int))).transpose(0,1) #转换成数字,然后前后加开始和结束符号。 tgt = Variable(torch.ones( top_k + 2, config.batch_size)) # 这里随便给一个tgt,为了测试阶段tgt的名字无所谓其实。 src_len = Variable( torch.LongTensor(config.batch_size).zero_() + mix_speech_len).unsqueeze(0) tgt_len = Variable( torch.LongTensor(config.batch_size).zero_() + len(eval_data['multi_spk_fea_list'][0])).unsqueeze(0) feas_tgt = models.rank_feas(raw_tgt, eval_data['multi_spk_fea_list']) #这里是目标的图谱 relitu(mix_speech_len, speech_fre, feas_tgt.numpy()[0, 0].transpose()) relitu(mix_speech_len, speech_fre, feas_tgt.numpy()[0, 1].transpose()) # 1/0 if config.WFM: tmp_size = feas_tgt.size() assert len(tmp_size) == 4 feas_tgt_sum = torch.sum(feas_tgt, dim=1, keepdim=True) feas_tgt_sum_square = (feas_tgt_sum * feas_tgt_sum).expand(tmp_size) feas_tgt_square = feas_tgt * feas_tgt WFM_mask = feas_tgt_square / feas_tgt_sum_square if use_cuda: src = src.cuda() tgt = tgt.cuda() src_len = src_len.cuda() tgt_len = tgt_len.cuda() feas_tgt = feas_tgt.cuda() if config.WFM: WFM_mask = WFM_mask.cuda() if len(opt.gpus) > 1: samples, alignment = model.module.sample(src, src_len) else: samples, alignment, hiddens, predicted_masks = model.beam_sample( src, src_len, dict_spk2idx, tgt, beam_size=config.beam_size) # try: # samples, alignment, hiddens, predicted_masks = model.beam_sample(src, src_len, dict_spk2idx, tgt, beam_size=config.beam_size) # except Exception,info: # print '**************Error occurs here************:', info # continue if config.top1: predicted_masks = torch.cat([predicted_masks, 1 - predicted_masks], 1) # ''' # expand the raw mixed-features to topk channel. siz = src.size() assert len(siz) == 3 topk = feas_tgt.size()[1] x_input_map_multi = torch.unsqueeze(src, 1).expand(siz[0], topk, siz[1], siz[2]) if config.WFM: feas_tgt = x_input_map_multi.data * WFM_mask ss_loss = model.separation_loss(x_input_map_multi, predicted_masks, feas_tgt) print 'loss for ss,this batch:', ss_loss.data[0] del ss_loss, hiddens # ''''' if batch_idx <= (3000 / config.batch_size ): #only the former batches counts the SDR predicted_maps = predicted_masks * x_input_map_multi # predicted_maps=Variable(feas_tgt) utils.bss_eval(config, predicted_maps, eval_data['multi_spk_fea_list'], raw_tgt, eval_data, dst='batch_output23jo') del predicted_maps, predicted_masks, x_input_map_multi SDR_SUM = np.append(SDR_SUM, bss_test.cal('batch_output23jo/')) print 'SDR_aver_now:', SDR_SUM.mean() # 1/0 raw_input('Press any key to continue......') continue elif batch_idx == (500 / config.batch_size) + 1 and SDR_SUM.mean( ) > best_SDR: #only record the best SDR once. print 'Best SDR from {}---->{}'.format(best_SDR, SDR_SUM.mean()) best_SDR = SDR_SUM.mean() # save_model(log_path+'checkpoint_bestSDR{}.pt'.format(best_SDR)) # ''' candidate += [ convertToLabels(dict_idx2spk, s, dict_spk2idx['<EOS>']) for s in samples ] # source += raw_src reference += raw_tgt print 'samples:', samples print 'can:{}, \nref:{}'.format(candidate[-1 * config.batch_size:], reference[-1 * config.batch_size:]) alignments += [align for align in alignment] batch_idx += 1 if opt.unk: cands = [] for s, c, align in zip(source, candidate, alignments): cand = [] for word, idx in zip(c, align): if word == dict.UNK_WORD and idx < len(s): try: cand.append(s[idx]) except: cand.append(word) print("%d %d\n" % (len(s), idx)) else: cand.append(word) cands.append(cand) candidate = cands score = {} result = utils.eval_metrics(reference, candidate, dict_spk2idx, log_path) logging_csv([e, updates, result['hamming_loss'], \ result['micro_f1'], result['micro_precision'], result['micro_recall']]) print('hamming_loss: %.8f | micro_f1: %.4f' % (result['hamming_loss'], result['micro_f1'])) score['hamming_loss'] = result['hamming_loss'] score['micro_f1'] = result['micro_f1'] return score