def generate(args): file_path = args.model output_dir = args.output try: os.makedirs(output_dir) except OSError: pass audio_path = os.path.join(output_dir, "audio") try: os.makedirs(audio_path) except OSError: pass # data = DanceDataset(args.data) with open(args.wav, 'rb') as fo: # 读取pkl文件数据 data_ori = pickle.load(fo) data = (data_ori * 32768).int().float() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") TensorNew = torch.FloatTensor() TensorNew.to(device) generator = Generator(batch=data.shape[0]) generator.eval() generator.load_state_dict(torch.load(file_path, map_location='cpu')) generator.to(device) data_ori_flat = data_ori.view(-1, 80000) audio_out = data.view(-1, 80000) # 80000 audio = Variable(data.transpose(1, 0)) # 50,1,1600 # GAN loss fake = generator(audio) fake = fake.contiguous().cpu().detach().numpy() # 1,50,36 # fake = fake.reshape([50, 36]) for i in range(fake.shape[0]): # librosa.output.write_wav(os.path.join(output_dir, "audio/{}.wav").format(i), np.array(data_ori_flat[i, :]), 16000) fake_coors = fake[i, :, :] fake_coors = fake_coors.reshape([-1, 18, 2]) fake_coors[:, :, 0] = (fake_coors[:, :, 0] + 1) * 320 fake_coors[:, :, 1] = (fake_coors[:, :, 1] + 1) * 180 fake_coors = fake_coors.astype(int) # print(fake_coors) # # for row in range(fake_coors.shape[0]): # for col in range(fake_coors.shape[1]): # for c in range(fake_coors.shape[2]): # pv = fake_coors[row, col, c] # if c == 0: # fake_coors[row, col, c] = 320 - pv # elif c == 1: # fake_coors[row, col, c] = 180 - pv save_batch_images_combine(fake_coors, batch_num=i, save_dir_start=output_dir) copyfile(src=args.wav[:-3] + 'wav', dst=os.path.join(output_dir, "audio/0.wav"))
parser.add_argument("--count", type=int, default=50) parser.add_argument( "--output", default="'/mnt/external4/output_demo'", metavar="FILE", help="path to output", type=str, ) args = parser.parse_args() file_path = args.model counter = args.count output_dir = args.output Tensor = torch.cuda.FloatTensor generator = Generator(1) generator.eval() generator.load_state_dict(torch.load(file_path)) generator.cuda() data = DanceDataset("diff") dataloader = torch.utils.data.DataLoader(data, batch_size=1, shuffle=False, num_workers=8, pin_memory=False) criterion_pixelwise = torch.nn.L1Loss() count = 0 total_loss = 0.0 img_orig = np.ones((360, 640, 3), np.uint8) * 255 for i, (x, target) in enumerate(dataloader): audio_out = x.view(-1) #80000
try: os.makedirs(opt.out) except OSError: pass #init dataset data = DanceDataset(opt) dataloader = torch.utils.data.DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=16, pin_memory=False, drop_last=True) #init model generator = Generator(opt.batch_size) frame_discriminator = HCN() seq_discriminator = seq_discriminator(opt.batch_size) optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr_g) optimizer_D1 = torch.optim.Adam(frame_discriminator.parameters(), lr=opt.lr_d_frame) optimizer_D2 = torch.optim.Adam(seq_discriminator.parameters(), lr=opt.lr_d_seq) generator.cuda() frame_discriminator.cuda() seq_discriminator.cuda() print("data ok") train(generator, frame_discriminator, seq_discriminator, opt)
import datetime import sys from matplotlib import pyplot as plt from tensorboardX import SummaryWriter import cv2 Tensor = torch.cuda.FloatTensor batch_size = 100 log_dir = "local_GCN_perceptual_D_Feature_girl" weight = 200 gap = 1 writer = SummaryWriter( log_dir='/home/xuanchi/self_attention_model/log/{}'.format(log_dir)) generator = Generator(batch_size) frame_discriminator = HCN() seq_discriminator = seq_discriminator(batch_size) #output optimizer_G = torch.optim.Adam(generator.parameters(), lr=0.0003) optimizer_D1 = torch.optim.Adam(frame_discriminator.parameters(), lr=0.0003) optimizer_D2 = torch.optim.Adam(seq_discriminator.parameters(), lr=0.0005) generator.cuda() frame_discriminator.cuda() seq_discriminator.cuda() from net.st_gcn_perceptual import Model class GCNLoss(nn.Module):
def generate_old(args): file_path = args.model counter = args.count output_dir = args.output try: os.makedirs(output_dir) except OSError: pass audio_path = os.path.join(output_dir, "audio") try: os.makedirs(audio_path) except OSError: pass device = torch.device("cuda" if torch.cuda.is_available() else "cpu") TensorNew = torch.FloatTensor() TensorNew.to(device) generator = Generator(1) generator.eval() generator.load_state_dict(torch.load(file_path, map_location='cpu')) generator.to(device) # data = DanceDataset(args.data) with open(args.wav, 'rb') as fo: # 读取pkl文件数据 data = pickle.load(fo) print(data.size()) dataloader = torch.utils.data.DataLoader( data, batch_size=1, shuffle=False, num_workers=1, # num_workers=8, pin_memory=False) criterion_pixelwise = torch.nn.L1Loss() count = 0 total_loss = 0.0 img_orig = np.ones((360, 640, 3), np.uint8) * 255 for i, (x, target) in enumerate(dataloader): audio_out = x.view(-1) # 80000 scaled = np.int16(audio_out) audio = Variable(x.type(TensorNew).transpose(1, 0)) # 50,1,1600 pose = Variable(target.type(TensorNew)) # 1,50,18,2 pose = pose.view(1, 50, 36) # GAN loss fake = generator(audio) loss_pixel = criterion_pixelwise(fake, pose) total_loss += loss_pixel.item() fake = fake.contiguous().cpu().detach().numpy() # 1,50,36 fake = fake.reshape([50, 36]) if (count <= counter): write(output_dir + "/audio/{}.wav".format(i), 16000, scaled) real_coors = pose.cpu().numpy() fake_coors = fake real_coors = real_coors.reshape([-1, 18, 2]) fake_coors = fake_coors.reshape([-1, 18, 2]) real_coors[:, :, 0] = (real_coors[:, :, 0] + 1) * 320 real_coors[:, :, 1] = (real_coors[:, :, 1] + 1) * 180 real_coors = real_coors.astype(int) fake_coors[:, :, 0] = (fake_coors[:, :, 0] + 1) * 320 fake_coors[:, :, 1] = (fake_coors[:, :, 1] + 1) * 180 fake_coors = fake_coors.astype(int) save_2_batch_images(real_coors, fake_coors, batch_num=count, save_dir_start=output_dir) count += 1 final_loss = total_loss / count print("final_loss:", final_loss)