Пример #1
0
def train_ofb(args):
    train_dataset = dataset.DAVISDataset(args.dataset, use_flow=True)
    train_loader = DataLoader(train_dataset, batch_size=1)

    transformer = transformer_net.TransformerNet(args.pad_type)
    transformer.train()
    optimizer = torch.optim.Adam(transformer.parameters(), args.lr)
    mse_loss = torch.nn.MSELoss()

    vgg = Vgg16()
    vgg.load_state_dict(
        torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight")))
    vgg.eval()

    if args.cuda:
        transformer.cuda()
    vgg.cuda()
    mse_loss.cuda()

    style = utils.tensor_load_resize(args.style_image, args.style_size)
    style = style.unsqueeze(0)
    print("=> Style image size: " + str(style.size()))
    print("=> Pixel OFB loss weight: %f" % args.time_strength)

    style = utils.preprocess_batch(style)
    if args.cuda: style = style.cuda()
    style = utils.subtract_imagenet_mean_batch(style)
    features_style = vgg(style)
    gram_style = [utils.gram_matrix(y).detach() for y in features_style]

    train_loader.dataset.reset()
    transformer.train()
    transformer.cuda()
    agg_content_loss = agg_style_loss = agg_pixelofb_loss = 0.
    iters = 0
    anormaly = False
    elapsed_time = 0
    for batch_id, (x, flow, conf) in enumerate(tqdm(train_loader)):
        x, flow, conf = x[0], flow[0], conf[0]
        iters += 1

        optimizer.zero_grad()
        x = utils.preprocess_batch(x)  # (N, 3, 256, 256)
        if args.cuda:
            x = x.cuda()
            flow = flow.cuda()
            conf = conf.cuda()
        y = transformer(x)  # (N, 3, 256, 256)

        begin_time = time.time()
        warped_y, warped_y_mask = warp(y[1:], flow)
        warped_y = warped_y.detach()
        warped_y_mask *= conf
        pixel_ofb_loss = args.time_strength * weighted_mse(
            y[:-1], warped_y, warped_y_mask)
        pixel_ofb_loss.backward()
        elapsed_time += time.time() - begin_time
        if batch_id > 1000: break
    print(elapsed_time / float(batch_id + 1))
Пример #2
0
def train(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        kwargs = {'num_workers': 0, 'pin_memory': False}
    else:
        kwargs = {}

    if args.model_type == "rnn":
        transformer = transformer_net.TransformerRNN(args.pad_type)
        seq_size = 4
    else:
        transformer = transformer_net.TransformerNet(args.pad_type)
        seq_size = 2

    train_dataset = dataset.DAVISDataset(args.dataset,
                                         seq_size=seq_size,
                                         use_flow=args.flow)
    train_loader = DataLoader(train_dataset, batch_size=1, **kwargs)

    if args.model_type == "rnn":
        transformer = transformer_net.TransformerRNN(args.pad_type)
    else:
        transformer = transformer_net.TransformerNet(args.pad_type)
    model_path = args.init_model
    print("=> Load from model file %s" % model_path)
    transformer.load_state_dict(torch.load(model_path))
    transformer.train()
    if args.model_type == "rnn":
        transformer.conv1 = transformer_net.ConvLayer(6,
                                                      32,
                                                      kernel_size=9,
                                                      stride=1,
                                                      pad_type=args.pad_type)
    optimizer = torch.optim.Adam(transformer.parameters(), args.lr)
    mse_loss = torch.nn.MSELoss()
    l1_loss = torch.nn.SmoothL1Loss()

    vgg = Vgg16()
    vgg.load_state_dict(
        torch.load(os.path.join(args.vgg_model, "vgg16.weight")))
    vgg.eval()

    if args.cuda:
        transformer.cuda()
        vgg.cuda()
        mse_loss.cuda()
        l1_loss.cuda()

    style = utils.tensor_load_resize(args.style_image, args.style_size)
    style = style.unsqueeze(0)
    print("=> Style image size: " + str(style.size()))
    print("=> Pixel OFB loss weight: %f" % args.time_strength)

    style = utils.preprocess_batch(style)
    if args.cuda: style = style.cuda()
    utils.tensor_save_bgrimage(
        style[0].detach(), os.path.join(args.save_model_dir,
                                        'train_style.jpg'), args.cuda)
    style = utils.subtract_imagenet_mean_batch(style)
    features_style = vgg(style)
    gram_style = [utils.gram_matrix(y).detach() for y in features_style]

    for e in range(args.epochs):
        train_loader.dataset.reset()
        transformer.train()
        transformer.cuda()
        agg_content_loss = agg_style_loss = agg_pixelofb_loss = 0.
        iters = 0
        anormaly = False
        for batch_id, (x, flow, conf) in enumerate(train_loader):
            x, flow, conf = x[0], flow[0], conf[0]
            iters += 1

            optimizer.zero_grad()
            x = utils.preprocess_batch(x)  # (N, 3, 256, 256)
            if args.cuda:
                x = x.cuda()
                flow = flow.cuda()
                conf = conf.cuda()
            y = transformer(x)  # (N, 3, 256, 256)

            xc = center_crop(x.detach(), y.size(2), y.size(3))

            vgg_y = utils.subtract_imagenet_mean_batch(y)
            vgg_x = utils.subtract_imagenet_mean_batch(xc)

            features_y = vgg(vgg_y)
            features_xc = vgg(vgg_x)

            #content target
            f_xc_c = features_xc[2].detach()
            # content
            f_c = features_y[2]

            #content_feature_target = center_crop(f_xc_c, f_c.size(2), f_c.size(3))
            content_loss = args.content_weight * mse_loss(f_c, f_xc_c)

            style_loss = 0.
            for m in range(len(features_y)):
                gram_s = gram_style[m]
                gram_y = utils.gram_matrix(features_y[m])
                batch_style_loss = 0
                for n in range(gram_y.shape[0]):
                    batch_style_loss += args.style_weight * mse_loss(
                        gram_y[n], gram_s[0])
                style_loss += batch_style_loss / gram_y.shape[0]

            warped_y, warped_y_mask = warp(y[1:], flow)
            warped_y = warped_y.detach()
            warped_y_mask *= conf
            pixel_ofb_loss = args.time_strength * weighted_mse(
                y[:-1], warped_y, warped_y_mask)

            total_loss = content_loss + style_loss + pixel_ofb_loss

            total_loss.backward()
            optimizer.step()

            if (batch_id + 1) % 100 == 0:
                prefix = args.save_model_dir + "/"
                idx = (batch_id + 1) // 100
                flow_image = flow_to_color(
                    flow[0].detach().cpu().numpy().transpose(1, 2, 0))
                utils.save_image(prefix + "forward_flow_%d.png" % idx,
                                 flow_image)
                warped_x, warped_x_mask = warp(x[1:], flow)
                warped_x = warped_x.detach()
                warped_x_mask *= conf
                for i in range(2):
                    utils.tensor_save_bgrimage(
                        y.data[i], prefix + "out_%d-%d.png" % (idx, i),
                        args.cuda)
                    utils.tensor_save_bgrimage(
                        x.data[i], prefix + "in_%d-%d.png" % (idx, i),
                        args.cuda)
                    if i < warped_y.shape[0]:
                        utils.tensor_save_bgrimage(
                            warped_y.data[i],
                            prefix + "wout_%d-%d.png" % (idx, i), args.cuda)
                        utils.tensor_save_bgrimage(
                            warped_x.data[i],
                            prefix + "win_%d-%d.png" % (idx, i), args.cuda)
                        utils.tensor_save_image(
                            prefix + "conf_%d-%d.png" % (idx, i),
                            warped_x_mask.data[i])

            agg_content_loss += content_loss.data
            agg_style_loss += style_loss.data
            agg_pixelofb_loss += pixel_ofb_loss.data

            agg_total = agg_content_loss + agg_style_loss + agg_pixelofb_loss
            mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\tpixel ofb: {:.6f}\ttotal: {:.6f}".format(
                time.ctime(), e + 1, batch_id + 1, len(train_loader),
                agg_content_loss / iters, agg_style_loss / iters,
                agg_pixelofb_loss / iters, agg_total / iters)
            print(mesg)
            agg_content_loss = agg_style_loss = agg_pixelofb_loss = 0.0
            iters = 0

        # save model
        transformer.eval()
        transformer.cpu()
        save_model_filename = "epoch_" + str(e) + "_" + str(
            args.content_weight) + "_" + str(args.style_weight) + ".model"
        save_model_path = os.path.join(args.save_model_dir,
                                       save_model_filename)
        torch.save(transformer.state_dict(), save_model_path)

    print("\nDone, trained model saved at", save_model_path)
Пример #3
0
def train_fdb(args):
    transformer = transformer_net.TransformerNet(args.pad_type)
    train_dataset = dataset.DAVISDataset(args.dataset,
                                         seq_size=2,
                                         use_flow=args.flow)
    train_loader = DataLoader(train_dataset, batch_size=1)

    transformer.train()
    optimizer = torch.optim.Adam(transformer.parameters(), args.lr)
    mse_loss = torch.nn.MSELoss()

    vgg = Vgg16()
    vgg.load_state_dict(
        torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight")))
    vgg.eval()

    if args.cuda:
        transformer.cuda()
        vgg.cuda()
        mse_loss.cuda()

    style = utils.tensor_load_resize(args.style_image, args.style_size)
    style = style.unsqueeze(0)
    print("=> Style image size: " + str(style.size()))

    style = utils.preprocess_batch(style)
    if args.cuda: style = style.cuda()
    style = utils.subtract_imagenet_mean_batch(style)
    features_style = vgg(style)
    gram_style = [utils.gram_matrix(y).detach() for y in features_style]

    train_loader.dataset.reset()
    agg_content_loss = agg_style_loss = agg_pixelfdb_loss = agg_featurefdb_loss = 0.
    iters = 0
    elapsed_time = 0
    for batch_id, (x, flow, conf) in enumerate(tqdm(train_loader)):
        x = x[0]
        iters += 1

        optimizer.zero_grad()
        x = utils.preprocess_batch(x)  # (N, 3, 256, 256)
        if args.cuda: x = x.cuda()
        y = transformer(x)  # (N, 3, 256, 256)

        xc = center_crop(x.detach(), y.shape[2], y.shape[3])

        y = utils.subtract_imagenet_mean_batch(y)
        xc = utils.subtract_imagenet_mean_batch(xc)

        features_y = vgg(y)
        features_xc = vgg(xc)

        # FDB
        begin_time = time.time()
        pixel_fdb_loss = mse_loss(y[1:] - y[:-1], xc[1:] - xc[:-1])
        # temporal content: 16th
        feature_fdb_loss = mse_loss(features_y[2][1:] - features_y[2][:-1],
                                    features_xc[2][1:] - features_xc[2][:-1])
        pixel_fdb_loss.backward()
        elapsed_time += time.time() - begin_time

        if batch_id > 1000: break
    print(elapsed_time / float(batch_id + 1))
Пример #4
0
def train(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    kwargs = {'num_workers': 0, 'pin_memory': False}

    transform = transforms.Compose([
        transforms.Resize((args.image_size, args.image_size)),
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.mul(255))
    ])
    train_dataset = dataset.CustomImageDataset(args.dataset,
                                               transform=transform,
                                               img_size=args.image_size)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              **kwargs)

    transformer = TransformerNet(args.pad_type)
    transformer = transformer.train()
    optimizer = torch.optim.Adam(transformer.parameters(), args.lr)
    mse_loss = torch.nn.MSELoss()
    #print(transformer)
    vgg = Vgg16()
    vgg.load_state_dict(
        torch.load(os.path.join(args.vgg_model_dir, "vgg16.weight")))
    vgg.eval()

    transformer = transformer.cuda()
    vgg = vgg.cuda()

    style = utils.tensor_load_resize(args.style_image, args.style_size)
    style = style.unsqueeze(0)
    print("=> Style image size: " + str(style.size()))

    #(1, H, W, C)
    style = utils.preprocess_batch(style).cuda()
    utils.tensor_save_bgrimage(
        style[0].detach(), os.path.join(args.save_model_dir,
                                        'train_style.jpg'), True)
    style = utils.subtract_imagenet_mean_batch(style)
    features_style = vgg(style)
    gram_style = [utils.gram_matrix(y).detach() for y in features_style]

    for e in range(args.epochs):
        train_loader.dataset.reset()
        agg_content_loss = 0.
        agg_style_loss = 0.
        iters = 0
        for batch_id, (x, _) in enumerate(train_loader):
            if x.size(0) != args.batch_size:
                print("=> Skip incomplete batch")
                continue
            iters += 1

            optimizer.zero_grad()
            x = utils.preprocess_batch(x).cuda()
            y = transformer(x)

            if (batch_id + 1) % 1000 == 0:
                idx = (batch_id + 1) // 1000
                utils.tensor_save_bgrimage(
                    y.data[0],
                    os.path.join(args.save_model_dir, "out_%d.png" % idx),
                    True)
                utils.tensor_save_bgrimage(
                    x.data[0],
                    os.path.join(args.save_model_dir, "in_%d.png" % idx), True)

            y = utils.subtract_imagenet_mean_batch(y)
            x = utils.subtract_imagenet_mean_batch(x)

            features_y = vgg(y)
            features_x = vgg(center_crop(x, y.size(2), y.size(3)))

            #content target
            f_x = features_x[2].detach()
            # content
            f_y = features_y[2]

            content_loss = args.content_weight * mse_loss(f_y, f_x)

            style_loss = 0.
            for m in range(len(features_y)):
                gram_s = gram_style[m]
                gram_y = utils.gram_matrix(features_y[m])
                batch_style_loss = 0
                for n in range(gram_y.shape[0]):
                    batch_style_loss += args.style_weight * mse_loss(
                        gram_y[n], gram_s[0])
                style_loss += batch_style_loss / gram_y.shape[0]

            total_loss = content_loss + style_loss

            total_loss.backward()
            optimizer.step()
            agg_content_loss += content_loss.data
            agg_style_loss += style_loss.data

            mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format(
                time.ctime(), e + 1, batch_id + 1, len(train_loader),
                agg_content_loss / iters, agg_style_loss / iters,
                (agg_content_loss + agg_style_loss) / iters)
            print(mesg)
            agg_content_loss = agg_style_loss = 0.0
            iters = 0

        # save model
        save_model_filename = "epoch_" + str(e) + "_" + str(
            args.content_weight) + "_" + str(args.style_weight) + ".model"
        save_model_path = os.path.join(args.save_model_dir,
                                       save_model_filename)
        torch.save(transformer.state_dict(), save_model_path)

    print("\nDone, trained model saved at", save_model_path)
Пример #5
0
def train(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    kwargs = {'num_workers': 0, 'pin_memory': False}

    if args.model_type == "rnn":
        transformer = transformer_net.TransformerRNN(args.pad_type)
        seq_size = 4
    else:
        transformer = transformer_net.TransformerNet(args.pad_type)
        seq_size = 2

    train_dataset = dataset.DAVISDataset(args.dataset,
                                         "train",
                                         seq_size=seq_size,
                                         interval=args.interval,
                                         no_flow=True)
    train_loader = DataLoader(train_dataset,
                              batch_size=1,
                              shuffle=True,
                              **kwargs)

    model_path = args.init_model
    print("=> Load from model file %s" % model_path)
    transformer.load_state_dict(torch.load(model_path))
    transformer.train()
    if args.model_type == "rnn":
        transformer.conv1 = transformer_net.ConvLayer(6,
                                                      32,
                                                      kernel_size=9,
                                                      stride=1,
                                                      pad_type=args.pad_type)
    optimizer = torch.optim.Adam(transformer.parameters(), args.lr)
    mse_loss = torch.nn.MSELoss()
    l1_loss = torch.nn.L1Loss()

    vgg = Vgg16()
    vgg.load_state_dict(torch.load(os.path.join(args.vgg_model)))
    vgg.eval()

    transformer.cuda()
    vgg.cuda()
    mse_loss.cuda()

    style = utils.tensor_load_resize(args.style_image, args.style_size)
    style = style.unsqueeze(0)
    print("=> Style image size: " + str(style.size()))
    print("=> Pixel FDB loss weight: %f" % args.time_strength1)
    print("=> Feature FDB loss weight: %f" % args.time_strength2)

    style = utils.preprocess_batch(style).cuda()
    utils.tensor_save_bgrimage(
        style[0].detach(), os.path.join(args.save_model_dir,
                                        'train_style.jpg'), True)
    style = utils.subtract_imagenet_mean_batch(style)
    features_style = vgg(style)
    gram_style = [utils.gram_matrix(y).detach() for y in features_style]

    for e in range(args.epochs):
        agg_content_loss = agg_style_loss = agg_pixelfdb_loss = agg_featurefdb_loss = 0.
        iters = 0
        for batch_id, (x, flow, occ, _) in enumerate(train_loader):
            x = x[0]
            iters += 1

            optimizer.zero_grad()
            x = utils.preprocess_batch(x).cuda()
            y = transformer(x)  # (N, 3, 256, 256)

            if (batch_id + 1) % 100 == 0:
                idx = (batch_id + 1) // 100
                for i in range(args.batch_size):
                    utils.tensor_save_bgrimage(
                        y.data[i],
                        os.path.join(args.save_model_dir,
                                     "out_%02d_%02d.png" % (idx, i)), True)
                    utils.tensor_save_bgrimage(
                        x.data[i],
                        os.path.join(args.save_model_dir,
                                     "in_%02d-%02d.png" % (idx, i)), True)

            #xc = center_crop(x.detach(), y.shape[2], y.shape[3])

            y = utils.subtract_imagenet_mean_batch(y)
            x = utils.subtract_imagenet_mean_batch(x)

            features_y = vgg(y)
            features_xc = vgg(x)

            #content target
            f_xc_c = features_xc[2].detach()
            # content
            f_c = features_y[2]

            content_loss = args.content_weight * mse_loss(f_c, f_xc_c)

            style_loss = 0.
            for m in range(len(features_y)):
                gram_s = gram_style[m]
                gram_y = utils.gram_matrix(features_y[m])
                batch_style_loss = 0
                for n in range(gram_y.shape[0]):
                    batch_style_loss += args.style_weight * mse_loss(
                        gram_y[n], gram_s[0])
                style_loss += batch_style_loss / gram_y.shape[0]

            # FDB
            pixel_fdb_loss = args.time_strength1 * mse_loss(
                y[1:] - y[:-1], x[1:] - x[:-1])
            # temporal content: 16th
            feature_fdb_loss = args.time_strength2 * l1_loss(
                features_y[2][1:] - features_y[2][:-1],
                features_xc[2][1:] - features_xc[2][:-1])

            total_loss = content_loss + style_loss + pixel_fdb_loss + feature_fdb_loss

            total_loss.backward()
            optimizer.step()

            agg_content_loss += content_loss.data
            agg_style_loss += style_loss.data
            agg_pixelfdb_loss += pixel_fdb_loss.data
            agg_featurefdb_loss += feature_fdb_loss.data

            agg_total = agg_content_loss + agg_style_loss + agg_pixelfdb_loss + agg_featurefdb_loss
            mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\tpixel fdb: {:.6f}\tfeature fdb: {:.6f}\ttotal: {:.6f}".format(
                time.ctime(), e + 1, batch_id + 1, len(train_loader),
                agg_content_loss / iters, agg_style_loss / iters,
                agg_pixelfdb_loss / iters, agg_featurefdb_loss / iters,
                agg_total / iters)
            print(mesg)
            agg_content_loss = agg_style_loss = agg_pixelfdb_loss = agg_featurefdb_loss = 0.0
            iters = 0

        # save model
        save_model_filename = "epoch_" + str(e) + "_" + str(
            args.content_weight) + "_" + str(args.style_weight) + ".model"
        save_model_path = os.path.join(args.save_model_dir,
                                       save_model_filename)
        torch.save(transformer.state_dict(), save_model_path)

    print("\nDone, trained model saved at", save_model_path)