def forward(model, x, mean, std, cuda, volatile=False): stack_num = x.shape[1] x = move_data_to_gpu(x, cuda, volatile) # hamming window x = x * hamming_window # rdft (x_real, x_imag) = dft.rdft(x) x_mag = torch.sqrt(x_real ** 2 + x_imag ** 2) cos = x_real / x_mag sin = x_imag / x_mag x = transform(x_mag, type='torch') x = pp_data.scale(x, mean, std) output = model(x) output = pp_data.inv_scale(output, mean, std) output = inv_transform(output, type='torch') y_real = output * cos[:, stack_num // 2, :] y_imag = output * sin[:, stack_num // 2, :] s = dft.irdft(y_real, y_imag) s /= hamming_window return s
def evaluate(model, data_loader, mean_, std_, cuda): iter = 0 output_all = [] target_all = [] max_iter = 200 for (batch_x, batch_y) in data_loader.generate(): output = forward(model, batch_x, mean_, std_, cuda, volatile=True) output_all.append(output) batch_y = np.abs(batch_y) batch_y = move_data_to_gpu(batch_y, cuda) batch_y = transform(batch_y, type='torch') batch_y = pp_data.scale(batch_y, mean_, std_) target_all.append(batch_y) iter += 1 if iter == max_iter: break output_all = torch.cat(output_all, dim=0) target_all = torch.cat(target_all, dim=0) loss = mse_loss(output_all, target_all) return loss
def forward(model, x, mean, std, cuda, volatile=False): x = np.abs(x) x = move_data_to_gpu(x, cuda, volatile) x = transform(x, type='torch') x = pp_data.scale(x, mean, std) output = model(x) return output
def forward(model, x, mean, std, dft, cuda, volatile=False): x = np.abs(x) x = move_data_to_gpu(x, cuda, volatile) # (x_real, x_imag) = dft.rdft(x) # x = torch.sqrt(x_real ** 2 + x_imag ** 2) import crash pause x = transform(x, type='torch') x = pp_data.scale(x, mean, std) import crash pause output = model(x) output = pp_data.inv_scale(output, mean, std) output = inv_transform(output, type='torch') return output
def train(args): workspace = args.workspace audio_type = args.audio_type stack_num = args.stack_num hop_frames = args.hop_frames filename = args.filename cuda = args.use_cuda and torch.cuda.is_available() print("cuda:", cuda) hdf5_file = os.path.join(args.workspace, "features", "cmplx_spectrogram.h5") data_type = 'train' t1 = time.time() batch_size = 500 data_loader = pp_data.DataLoader(hdf5_file, data_type, audio_type, stack_num, hop_frames, center_only=True, batch_size=batch_size) eval_tr_data_loader = pp_data.DataLoader(hdf5_file, 'train', audio_type, stack_num, hop_frames, center_only=True, batch_size=batch_size) eval_te_data_loader = pp_data.DataLoader(hdf5_file, 'test', audio_type, stack_num, hop_frames, center_only=True, batch_size=batch_size) print("Load time: %s" % (time.time() - t1)) # Load scalar scalar_path = os.path.join(workspace, "scalars", filename, "scalar.p") (mean_, std_) = cPickle.load(open(scalar_path, 'rb')) mean_ = move_data_to_gpu(mean_, cuda) std_ = move_data_to_gpu(std_, cuda) # Model n_freq = 257 model = DNN(stack_num, n_freq) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # Train iter = 0 model_dir = os.path.join(workspace, "models", filename, audio_type) pp_data.create_folder(model_dir) t_train = time.time() for (batch_x, batch_y) in data_loader.generate(): output = forward(model, batch_x, mean_, std_, cuda) batch_y = np.abs(batch_y) batch_y = move_data_to_gpu(batch_y, cuda) batch_y = transform(batch_y, type='torch') batch_y = pp_data.scale(batch_y, mean_, std_) loss = mse_loss(output, batch_y) # if iter%1000==0: # # output = pp_data.inv_scale(output, mean_, std_) # # output = inv_transform(output, type='torch') # # batch_y = pp_data.inv_scale(batch_y, mean_, std_) # # fig, axs = plt.subplots(3,1, sharex=True) # axs[0].matshow(np.log((np.abs(batch_x[:, 0, :]))).T, origin='lower', aspect='auto', cmap='jet') # axs[1].matshow(batch_y.data.cpu().numpy().T, origin='lower', aspect='auto', cmap='jet') # axs[2].matshow(output.data.cpu().numpy().T, origin='lower', aspect='auto', cmap='jet') # plt.show() # Backward optimizer.zero_grad() loss.backward() optimizer.step() iter += 1 # Evaluate. loss_ary = [] if iter % 500 == 0: t_eval = time.time() tr_loss = evaluate(model, eval_tr_data_loader, mean_, std_, cuda) # tr_loss = -1 te_loss = evaluate(model, eval_te_data_loader, mean_, std_, cuda) print("Iter: %d, train err: %f, test err: %f, train time: %s, eval time: %s" % \ (iter, tr_loss, te_loss, time.time() - t_train, time.time() - t_eval)) t_train = time.time() # Save model. if iter % 5000 == 0: save_out_dict = { 'iter': iter, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'te_loss': loss, } save_out_path = os.path.join(model_dir, "md_%d_iters.tar" % iter) torch.save(save_out_dict, save_out_path) print("Save model to %s" % save_out_path) t1 = time.time()