def test(): vis = Visualizer(env='svs') model = getattr(models, 'Unet')().eval() # model.cuda() model.load_state_dict( t.load('G:/Unet_svs/check/epoch_219__0724_16_57_35.pth')) mix_wav, _ = load("C:/Users/lenovo/Music/c.mp3", sr=8192) mix_wav_mag, mix_wav_phase = magphase( stft(mix_wav, n_fft=1024, hop_length=768)) START = 700 END = START + 128 mix_wav_mag = mix_wav_mag[:, START:END] mix_wav_phase = mix_wav_phase[:, START:END] print(mix_wav_mag.shape) gg = mix_wav_mag[1:] gg = t.from_numpy(gg) gg.unsqueeze_(0) gg.unsqueeze_(0) vis.img('a', gg) print(gg.shape) with t.no_grad(): gg = Variable(gg) score = model(gg) predict = gg.data * score.data print(predict.shape) target_pred_mag = predict.view(512, 128).cpu().numpy() target_pred_mag = np.vstack((np.zeros((128)), target_pred_mag)) vis.img('b', t.from_numpy(target_pred_mag)) print(target_pred_mag.shape) write_wav( f'C:/Users/lenovo/Music/pred_vocal.wav', istft( target_pred_mag * mix_wav_phase # (mix_wav_mag * target_pred_mag) * mix_wav_phase , win_length=1024, hop_length=768), 8192, norm=True) write_wav(f'C:/Users/lenovo/Music/pred_mix.wav', istft(mix_wav_mag * mix_wav_phase, win_length=1024, hop_length=768), 8192, norm=True)
def train(**kwargs): # step1: configure opt.parse(**kwargs) if opt.vis: vis = Visualizer(opt.env) # step2: data normalize = T.Normalize(mean = [0.5,0.5,0.5], std = [0.5,0.5,0.5] ) transforms = T.Compose( [ T.Resize(opt.image_size), T.CenterCrop(opt.image_size), T.ToTensor(), normalize ]) # 对于这个模型 transform对于train和test没有区别 dataset = tv.datasets.ImageFolder(opt.data_path,transform=transforms) dataloader = DataLoader(dataset, batch_size = opt.batch_size, shuffle=True, num_workers=opt.num_workers, drop_last=True) # 加载图片,用于训练NetD模型 true_labels = Variable(t.ones(opt.batch_size)) fake_labels = Variable(t.zeros(opt.batch_size)) fix_noises = Variable(t.randn(opt.batch_size, opt.nz, 1, 1)) # 固定噪声,用于验证NetG模型 noises = Variable(t.randn(opt.batch_size, opt.nz, 1, 1)) # 随机噪声,用于训练和测试NetG模型 # step3: model netg, netd = NetG(opt), NetD(opt) map_location = lambda storage, loc:storage if opt.netg_path: netg.load(opt.netg_path) if opt.netd_path: netd.load(opt.netd_path) # step4: criterion and optimizer optimizer_g = t.optim.Adam(params=netg.parameters(), lr = opt.lrg, betas=(opt.beta1,0.999)) optimizer_d = t.optim.Adam(params=netd.parameters(), lr = opt.lrd, betas=(opt.beta1,0.999)) criterion = t.nn.BCELoss() # step: meters errord_meter = meter.AverageValueMeter() errorg_meter = meter.AverageValueMeter() if opt.use_gpu: netd.cuda() netg.cuda() criterion.cuda() true_labels, fake_labels = true_labels.cuda(), fake_labels.cuda() fix_noises, noises = fix_noises.cuda(), noises.cuda() # step5: train for epoch in range(opt.max_epoch): ## step5.1 train for ii,(data, _) in tqdm(enumerate(dataloader)): real_img = Variable(data) if opt.use_gpu: real_img = real_img.cuda() if (ii+1) % opt.d_every ==0: # 判别器 optimizer_d.zero_grad() # 真图片 output = netd(real_img) error_d_real = criterion(output, true_labels) error_d_real.backward() # 假图片 noises.data.copy_(t.randn(opt.batch_size, opt.nz, 1, 1)) # 在第八章的时候想通了这里为什么要加detach,这个detach不是为了防止反向传播传到netg中,因为parameter已经保证了,是为了fake_output的requires_grad设置为False,不对fake_img求导,因为不需要,当然,在后来的实验室中我设置成没有也没有报错,但是这是为了节约内存,已经确定,可以停止反向传播,节约内存,与requires_grad=False的意义一样,但是requires_grad只能用于leaf节点,对于非leaf节点,使其不进行求导的方式是detach() fake_img = netg(noises).detach() fake_output = netd(fake_img) error_d_fake = criterion(fake_output, fake_labels) error_d_fake.backward() optimizer_d.step() error_d = error_d_real+error_d_fake errord_meter.add(error_d.data) if (ii+1) % opt.g_every == 0: optimizer_g.zero_grad() noises.data.copy_(t.randn(opt.batch_size, opt.nz, 1, 1)) fake_img = netg(noises) output = netd(fake_img) error_g = criterion(output, true_labels) error_g.backward() optimizer_g.step() errorg_meter.add(error_g.data) ## step5.2 validate and visualize on batch_size # 我们可以看到,损失函数并不是一个epoch画一次,而是几个batch画一次 if (ii+1) % opt.print_freq == 0 and opt.vis: if os.path.exists(opt.debug_file): # import ipdb ipdb.set_trace() fix_fake_imgs = netg(fix_noises) # batch_size*nz*1*1 --> batch_size(256)*3*96*96 # 可以认为是在验证模型 vis.img('fix_fake',fix_fake_imgs.data[:64]*0.5+0.5) vis.img('real', real_img.data[:64]*0.5+0.5) vis.plot(win = 'errord',y= errord_meter.value()[0]) vis.plot(win = 'errorg',y= errorg_meter.value()[0]) ## step5.3 validate and save model on epoch # 模型保存是每几个epoch保存一次, # 按理来说模型验证也应该是每次或这每几次验证一次,这一点和这一章的模型验证有所不一样,不过不用太在意,因为这一章的模型验证没有指标。 if (epoch+1)%opt.save_freq == 0: netg.save(opt.model_save_path,'netg_%s' %epoch) netd.save(opt.model_save_path,'netd_%s' %epoch) fix_fake_imgs = val(netg,fix_noises) tv.utils.save_image(fix_fake_imgs,'%s/%s.png' % (opt.img_save_path, epoch),normalize=True, range=(-1,1)) # 和作者沟通后,因为数据集少,所以为了避免每次重置的噪声,多几个epoch再重置,等下试试每次重置的话这个误差的变化情况 errord_meter.reset() errorg_meter.reset() """