Пример #1
0
def generate(args):
    file_path = args.model
    output_dir = args.output
    try:
        os.makedirs(output_dir)
    except OSError:
        pass

    audio_path = os.path.join(output_dir, "audio")
    try:
        os.makedirs(audio_path)
    except OSError:
        pass

    # data = DanceDataset(args.data)
    with open(args.wav, 'rb') as fo:  # 读取pkl文件数据
        data_ori = pickle.load(fo)
    data = (data_ori * 32768).int().float()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    TensorNew = torch.FloatTensor()
    TensorNew.to(device)
    generator = Generator(batch=data.shape[0])
    generator.eval()
    generator.load_state_dict(torch.load(file_path, map_location='cpu'))
    generator.to(device)

    data_ori_flat = data_ori.view(-1, 80000)
    audio_out = data.view(-1, 80000)  # 80000
    audio = Variable(data.transpose(1, 0))  # 50,1,1600

    # GAN loss
    fake = generator(audio)
    fake = fake.contiguous().cpu().detach().numpy()  # 1,50,36
    # fake = fake.reshape([50, 36])

    for i in range(fake.shape[0]):
        # librosa.output.write_wav(os.path.join(output_dir, "audio/{}.wav").format(i), np.array(data_ori_flat[i, :]), 16000)
        fake_coors = fake[i, :, :]
        fake_coors = fake_coors.reshape([-1, 18, 2])
        fake_coors[:, :, 0] = (fake_coors[:, :, 0] + 1) * 320
        fake_coors[:, :, 1] = (fake_coors[:, :, 1] + 1) * 180
        fake_coors = fake_coors.astype(int)
        # print(fake_coors)
        #
        # for row in range(fake_coors.shape[0]):
        #     for col in range(fake_coors.shape[1]):
        #         for c in range(fake_coors.shape[2]):
        #             pv = fake_coors[row, col, c]
        #             if c == 0:
        #                 fake_coors[row, col, c] = 320 - pv
        #             elif c == 1:
        #                 fake_coors[row, col, c] = 180 - pv

        save_batch_images_combine(fake_coors,
                                  batch_num=i,
                                  save_dir_start=output_dir)

    copyfile(src=args.wav[:-3] + 'wav',
             dst=os.path.join(output_dir, "audio/0.wav"))
parser.add_argument("--count", type=int, default=50)
parser.add_argument(
    "--output",
    default="'/mnt/external4/output_demo'",
    metavar="FILE",
    help="path to output",
    type=str,
)
args = parser.parse_args()

file_path = args.model
counter = args.count
output_dir = args.output

Tensor = torch.cuda.FloatTensor
generator = Generator(1)
generator.eval()
generator.load_state_dict(torch.load(file_path))
generator.cuda()
data = DanceDataset("diff")
dataloader = torch.utils.data.DataLoader(data,
                                         batch_size=1,
                                         shuffle=False,
                                         num_workers=8,
                                         pin_memory=False)
criterion_pixelwise = torch.nn.L1Loss()
count = 0
total_loss = 0.0
img_orig = np.ones((360, 640, 3), np.uint8) * 255
for i, (x, target) in enumerate(dataloader):
    audio_out = x.view(-1)  #80000
Пример #3
0
    try:
        os.makedirs(opt.out)
    except OSError:
        pass

    #init dataset
    data = DanceDataset(opt)
    dataloader = torch.utils.data.DataLoader(data,
                                             batch_size=opt.batch_size,
                                             shuffle=True,
                                             num_workers=16,
                                             pin_memory=False,
                                             drop_last=True)

    #init model
    generator = Generator(opt.batch_size)
    frame_discriminator = HCN()
    seq_discriminator = seq_discriminator(opt.batch_size)

    optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr_g)
    optimizer_D1 = torch.optim.Adam(frame_discriminator.parameters(),
                                    lr=opt.lr_d_frame)
    optimizer_D2 = torch.optim.Adam(seq_discriminator.parameters(),
                                    lr=opt.lr_d_seq)

    generator.cuda()
    frame_discriminator.cuda()
    seq_discriminator.cuda()
    print("data ok")

    train(generator, frame_discriminator, seq_discriminator, opt)
import datetime
import sys
from matplotlib import pyplot as plt
from tensorboardX import SummaryWriter
import cv2

Tensor = torch.cuda.FloatTensor
batch_size = 100
log_dir = "local_GCN_perceptual_D_Feature_girl"
weight = 200
gap = 1

writer = SummaryWriter(
    log_dir='/home/xuanchi/self_attention_model/log/{}'.format(log_dir))

generator = Generator(batch_size)
frame_discriminator = HCN()
seq_discriminator = seq_discriminator(batch_size)  #output

optimizer_G = torch.optim.Adam(generator.parameters(), lr=0.0003)
optimizer_D1 = torch.optim.Adam(frame_discriminator.parameters(), lr=0.0003)
optimizer_D2 = torch.optim.Adam(seq_discriminator.parameters(), lr=0.0005)

generator.cuda()
frame_discriminator.cuda()
seq_discriminator.cuda()

from net.st_gcn_perceptual import Model


class GCNLoss(nn.Module):
Пример #5
0
def generate_old(args):
    file_path = args.model
    counter = args.count

    output_dir = args.output
    try:
        os.makedirs(output_dir)
    except OSError:
        pass

    audio_path = os.path.join(output_dir, "audio")
    try:
        os.makedirs(audio_path)
    except OSError:
        pass

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    TensorNew = torch.FloatTensor()
    TensorNew.to(device)
    generator = Generator(1)
    generator.eval()
    generator.load_state_dict(torch.load(file_path, map_location='cpu'))
    generator.to(device)
    # data = DanceDataset(args.data)
    with open(args.wav, 'rb') as fo:  # 读取pkl文件数据
        data = pickle.load(fo)
    print(data.size())
    dataloader = torch.utils.data.DataLoader(
        data,
        batch_size=1,
        shuffle=False,
        num_workers=1,
        # num_workers=8,
        pin_memory=False)

    criterion_pixelwise = torch.nn.L1Loss()
    count = 0
    total_loss = 0.0
    img_orig = np.ones((360, 640, 3), np.uint8) * 255
    for i, (x, target) in enumerate(dataloader):
        audio_out = x.view(-1)  # 80000
        scaled = np.int16(audio_out)

        audio = Variable(x.type(TensorNew).transpose(1, 0))  # 50,1,1600
        pose = Variable(target.type(TensorNew))  # 1,50,18,2
        pose = pose.view(1, 50, 36)

        # GAN loss
        fake = generator(audio)
        loss_pixel = criterion_pixelwise(fake, pose)
        total_loss += loss_pixel.item()

        fake = fake.contiguous().cpu().detach().numpy()  # 1,50,36
        fake = fake.reshape([50, 36])

        if (count <= counter):
            write(output_dir + "/audio/{}.wav".format(i), 16000, scaled)
            real_coors = pose.cpu().numpy()
            fake_coors = fake
            real_coors = real_coors.reshape([-1, 18, 2])
            fake_coors = fake_coors.reshape([-1, 18, 2])
            real_coors[:, :, 0] = (real_coors[:, :, 0] + 1) * 320
            real_coors[:, :, 1] = (real_coors[:, :, 1] + 1) * 180
            real_coors = real_coors.astype(int)

            fake_coors[:, :, 0] = (fake_coors[:, :, 0] + 1) * 320
            fake_coors[:, :, 1] = (fake_coors[:, :, 1] + 1) * 180
            fake_coors = fake_coors.astype(int)

            save_2_batch_images(real_coors,
                                fake_coors,
                                batch_num=count,
                                save_dir_start=output_dir)
        count += 1

    final_loss = total_loss / count
    print("final_loss:", final_loss)