示例#1
0
    def __init__(self, layer, param_name="weight", dim=0, power=2):
        super(WeightNormWrapper, self).__init__()

        self.param_name = param_name
        self.dim = dim
        self.power = power
        self.layer = layer

        w_v = param_name + "_v"
        w_g = param_name + "_g"

        # we could also use numpy to compute this, after all, it is run only once
        # at initialization.
        original_weight = getattr(layer, param_name)
        self.add_parameter(
            w_v,
            self.create_parameter(shape=original_weight.shape,
                                  dtype=original_weight.dtype))
        with dg.no_grad():
            F.assign(original_weight, getattr(self, w_v))
        delattr(layer, param_name)
        temp = norm_except(getattr(self, w_v), self.dim, self.power)
        self.add_parameter(
            w_g, self.create_parameter(shape=temp.shape, dtype=temp.dtype))
        with dg.no_grad():
            F.assign(temp, getattr(self, w_g))

        # also set this when setting up
        setattr(
            self.layer, self.param_name,
            compute_weight(getattr(self, w_v), getattr(self, w_g), self.dim,
                           self.power))

        self.weigth_norm_applied = True
示例#2
0
def synthesize(args, config, model, vocoder, sentence, monotonic_layers):
    print("[synthesize] {}".format(sentence))
    text = en.text_to_sequence(sentence, p=1.0)
    text = np.expand_dims(np.array(text, dtype="int64"), 0)
    lengths = np.array([text.size], dtype=np.int64)
    text_seqs = dg.to_variable(text)
    text_lengths = dg.to_variable(lengths)

    decoder_layers = config["decoder_layers"]
    force_monotonic_attention = [False] * decoder_layers
    for i in monotonic_layers:
        force_monotonic_attention[i] = True

    with dg.no_grad():
        outputs = model(text_seqs,
                        text_lengths,
                        speakers=None,
                        force_monotonic_attention=force_monotonic_attention,
                        window=(config["backward_step"],
                                config["forward_step"]))
        decoded, refined, attentions = outputs
        if args.vocoder == "griffin-lim":
            wav_np = vocoder(refined.numpy()[0].T)
        else:
            wav = vocoder(F.transpose(refined, (0, 2, 1)))
            wav_np = wav.numpy()[0]
    return wav_np
示例#3
0
文件: demo.py 项目: leeacord/Contrib
def make_animation(source_image,
                   driving_video,
                   generator,
                   kp_detector,
                   relative=True,
                   adapt_movement_scale=True):
    with dygraph.no_grad():
        predictions = []
        source = dygraph.to_variable(
            np.transpose(source_image[np.newaxis],
                         (0, 3, 1, 2)).astype(np.float32))
        driving = dygraph.to_variable(
            np.transpose(np.array(driving_video)[np.newaxis],
                         (0, 4, 1, 2, 3)).astype(np.float32))
        kp_source = kp_detector(source)
        kp_driving_initial = kp_detector(driving[:, :, 0])

        for frame_idx in tqdm(range(driving.shape[2])):
            driving_frame = driving[:, :, frame_idx]
            kp_driving = kp_detector(driving_frame)
            kp_norm = normalize_kp(kp_source=kp_source,
                                   kp_driving=kp_driving,
                                   kp_driving_initial=kp_driving_initial,
                                   use_relative_movement=relative,
                                   use_relative_jacobian=relative,
                                   adapt_movement_scale=adapt_movement_scale)
            out = generator(source, kp_source=kp_source, kp_driving=kp_norm)

            predictions.append(
                np.transpose(out['prediction'].numpy(), [0, 2, 3, 1])[0])
    return predictions
示例#4
0
    def __call__(self, oriImg):
        h, w, _ = oriImg.shape

        scale_search = [0.5, 1.0, 1.5, 2.0]
        # scale_search = [0.5]
        boxsize = 368
        stride = 8
        padValue = 128
        multiplier = [x * boxsize / h for x in scale_search]
        avg_output = np.zeros((22, h, w))

        for m in range(len(multiplier)):
            scale = multiplier[m]
            imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
            imageToTest_padded, pad = padRightDownCorner(imageToTest, stride, padValue)
            im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256. - 0.5
            im = np.ascontiguousarray(im)

            data = dg.to_variable(im)
            with dg.no_grad():
                output = self.hand_model(data)[-1]
            heatmap = output.numpy()[0].transpose((1, 2, 0)) # [h, w, c]
            heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
            heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
            heatmap = cv2.resize(heatmap, (w, h), interpolation=cv2.INTER_CUBIC)
            heatmap = heatmap.transpose((2, 0, 1)) # [c, h, w]
            
            avg_output += heatmap / len(multiplier)
        
        return self.postprocessor(avg_output)
示例#5
0
    def apply(module: dg.Layer, name, dim):
        for k, hook in module._forward_pre_hooks.items():
            if isinstance(hook, WeightNorm) and hook.name == name:
                raise RuntimeError("Cannot register two weight_norm hooks on "
                                   "the same parameter {}".format(name))

        if dim is None:
            dim = -1

        fn = WeightNorm(name, dim)

        # remove w from parameter list
        w = getattr(module, name)
        del module._parameters[name]

        # add g and v as new parameters and express w as g/||v|| * v
        g_var = norm_except_dim(w, dim)
        v = module.create_parameter(w.shape, dtype=w.dtype)
        module.add_parameter(name + "_v", v)
        g = module.create_parameter(g_var.shape, dtype=g_var.dtype)
        module.add_parameter(name + "_g", g)
        with dg.no_grad():
            F.assign(w, v)
            F.assign(g_var, g)
        setattr(module, name, fn.compute_weight(module))

        # recompute weight before every forward()
        module.register_forward_pre_hook(fn)
        return fn
示例#6
0
def get_inception_mean_cov(data_loader,
                           key_real,
                           key_fake,
                           generator,
                           sample_size,
                           preprocess,
                           is_video=False,
                           few_shot_video=False):
    """
    Load mean and covariance from saved npy file if exists. Otherwise,
    compute the mean and covariance.
    """
    print("Extract mean and covariance.")
    if is_video:
        with dg.no_grad():
            y = get_video_activations(data_loader, key_real, key_fake,
                                      generator, sample_size, preprocess,
                                      few_shot_video)
    else:
        y = get_activations(data_loader, key_real, key_fake, generator,
                            sample_size, preprocess)

    m = np.mean(y, axis=0)
    s = np.cov(y, rowvar=False)

    return m, s
示例#7
0
def evaluate(model, criterion, dataset, visualizer, output_dir, args):
    with dg.no_grad():
        model.eval()

        metric_logger = utils.MetricLogger(args, delimiter=" ")
        metric_logger.add_meter(
            "class_error", utils.SmoothedValue(window_size=1,
                                               fmt="{value:.2f}"))
        header = "Test"
        print_freq = 10
        visualize_freq = 100 * print_freq
        count = 0

        for samples, targets in metric_logger.log_every(
                dataset, print_freq, header):
            outputs = model(samples)
            loss_dict = criterion(outputs, targets)
            weight_dict = criterion.weight_dict
            losses = sum(loss_dict[k] * weight_dict[k]
                         for k in loss_dict.keys() if k in weight_dict)

            losses = losses / args.batch_size

            metric_logger.update(loss=losses.numpy(), **loss_dict)
            metric_logger.update(class_error=loss_dict["class_error"])

            count += 1
            if visualize_freq % count == 0:
                visualizer.plot_results(samples, outputs, targets)

    print("Averaged stats:", metric_logger)
    return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
示例#8
0
def power_iteration(W, u_, update=True, eps=1e-12):
    # Lists holding singular vectors and values
    us, vs, svs = [], [], []
    for i, u in enumerate(u_):
        # Run one step of the power iteration
        # with torch.no_grad():
        with dg.no_grad():
            v = fluid.layers.matmul(u, W)
            # v = torch.matmul(u, W)

            # Run Gram-Schmidt to subtract components of all other singular vectors
            v = fluid.layers.l2_normalize(gram_schmidt(v, vs), eps=eps)
            # Add to the list
            vs += [v]

            # Update the other singular vector
            u = fluid.layers.matmul(v, W.t())
            # u = torch.matmul(v, W.t())

            # Run Gram-Schmidt to subtract components of all other singular vectors
            u = fluid.layers.l2_normalize(gram_schmidt(u, us), eps=eps)
            # Add to the list
            us += [u]
            if update:
                u_[i][:] = u
        # Compute this singular value and add it to the list
        ## *** torch.squeeze == fluid.layers.squeeze (input, axes, name=None)
        svs += [
            fluid.layers.squeeze(
                fluid.layers.matmul(fluid.layers.matmul(v, W.t()), u.t()))
        ]
        # svs += [torch.squeeze(torch.matmul(torch.matmul(v, W.t()), u.t()))]

        #svs += [torch.sum(F.linear(u, W.transpose(0, 1)) * v)]
    return svs, us, vs
示例#9
0
    def gen_frames(self, data, use_model_average=False):
        net_G_output = None
        data_prev = None
        net_G = self.net_G

        # Iterate through the length of sequence.
        all_info = {'inputs': [], 'outputs': []}
        for t in range(self.sequence_length):
            # Get the data at the current time frame.
            data_t = self.get_data_t(data, net_G_output, data_prev, t)
            data_prev = data_t

            # Generator forward.
            with dg.no_grad():
                net_G_output = net_G(data_t)

            # Do any postprocessing if necessary
            data_t, net_G_output = self.post_process(data_t, net_G_output)

            if t == 0:
                # Get the output at beginning of sequence for visualization.
                first_net_G_output = net_G_output

            all_info['inputs'].append(data_t)
            all_info['outputs'].append(net_G_output)

        return first_net_G_output, net_G_output, all_info
示例#10
0
def std_gen_interpolate(batch_size=8, seed=None, out_path='data/out',
                        levels=None, interpolate_mode=0):
    default_levels = ("y;z0;z11;z12;z21;z22;z31;z32;z41;z42;z51;z52;z61;z62")
    if levels is None:
        levels = default_levels
    default_levels = default_levels.split(';')

    img_save_dir = os.path.join('/tmp', out_path+'.dir')
    os.system(f'rm -rf {img_save_dir}')
    os.system(f'mkdir {img_save_dir} -p')

    with dg.no_grad():
        model_cache.train_mode = False
        model_cache.initialized = False
        if seed is not None:
            rds.rng = np.random.RandomState(seed)
        elif rds.rng is None:
            rds.rng = np.random
        G = model_cache.G
        x_np = rds.rng.randn(batch_size,140).astype('float32')
        y_np = rds.rng.randint(0,1000,size=[batch_size]).astype('int64')
        x = dg.to_variable(x_np)
        y_cls = dg.to_variable(y_np)
        y_hot = layers.one_hot(layers.unsqueeze(y_cls,[1]), depth=1000)
        y_embed = G.embed_y(y_hot)
        x = layers.concat([x, x[:1]], 0)
        y_embed = layers.concat([y_embed, y_embed[:1]], 0)
        levels = levels.split(';')
        for level in default_levels:
            if len(level) == 1:
                locals()[level] = y_embed
                locals()['_'+level] = y_embed[:1]
            if len(level) >= 2:
                idx = int(level[1])*20
                locals()[level] = x[:,idx:idx+20]
                locals()['_'+level] = x[:1,idx:idx+20]
        imgs = []
        for i in range(batch_size):
            for j in range(40):
                alpha = j / 40
                if interpolate_mode == 1:
                    alpha = alpha**2 * (3 - 2 * alpha)
                for level in levels:
                    locals()['_'+level] = (1 - alpha) *  locals()[level][i:i+1] + alpha * locals()[level][i+1:i+2]
                inputs = []
                for level in default_levels[1:]:
                    inputs.append(locals()['_'+level])
                img_pd = G(inputs, locals()['_'+default_levels[0]], True)
                img = np.uint8(img_pd.numpy().clip(0,1)*255)[0].transpose([1,2,0])
                imgs.append(Image.fromarray(img))
                stdout.write(f'{i*40+j+1}/{40*batch_size}\r')
                stdout.flush()
        print('')
        for i, img in enumerate(imgs):
            img.save(os.path.join(img_save_dir, str(i).zfill(5)+'.png'))
        imgs[0].save(out_path+'.gif', save_all=True, append_images=imgs[1:], duration=40, loop=0)
        out_path = out_path + '.mp4'
        os.system(f'ffmpeg -r 40 -i {img_save_dir}/%05d.png -hide_banner -loglevel warning -nostats -c:v libx264 -crf 23 -y {out_path}')
        os.system(f'rm -rf {img_save_dir}')
示例#11
0
    def save_image(self, path, data):
        self.net_G.eval()

        self.net_G_output = None
        with dg.no_grad():
            first_net_G_output, last_net_G_output, _ = self.gen_frames(data)

        def get_images(data,
                       net_G_output,
                       return_first_frame=True,
                       for_model_average=False):
            frame_idx = 0 if return_first_frame else -1
            warped_idx = 0 if return_first_frame else 1
            vis_images = []

            vis_images += [
                tensor2im(data['ref_image'][:, frame_idx]),
                self.visualize_label(data['tgt_label'][:, frame_idx]),
                tensor2im(data['tgt_image'][:, frame_idx])
            ]
            vis_images += [
                tensor2im(net_G_output['fake_images']),
                tensor2im(net_G_output['fake_raw_images'])
            ]
            vis_images += [
                # tensor2im(net_G_output['warped_images'][warped_idx]),
                # tensor2flow(net_G_output['fake_flow_maps'][warped_idx]),
                # tensor2flow(self.gt_flow[warped_idx]),
                # tensor2im(net_G_output['fake_occlusion_masks'][warped_idx])
            ]
            return vis_images

        vis_images_first = get_images(data, first_net_G_output)
        if self.sequence_length > 1:
            vis_images_last = get_images(data,
                                         last_net_G_output,
                                         return_first_frame=False)

            # If generating a video, the first row of each batch will be
            # the first generated frame and the flow/mask for warping the
            # reference image, and the second row will be the last generated
            # frame and the flow/mask for warping the previous frame.
            vis_images = [[
                np.vstack((im_first, im_last))
                for im_first, im_last in zip(imgs_first, imgs_last)
            ]
                          for imgs_first, imgs_last in zip(
                              vis_images_first, vis_images_last)
                          if imgs_first is not None]

        else:
            vis_images = vis_images_first

        image_grid = np.hstack(
            [np.vstack(im) for im in vis_images if im is not None])
        print("Save output images to {}".format(path))
        os.makedirs(os.path.dirname(path), exist_ok=True)
        imageio.imwrite(path, image_grid)
示例#12
0
 def remove(self, module):
     w_var = self.compute_weight(module)
     delattr(module, self.name)
     del module._parameters[self.name + '_g']
     del module._parameters[self.name + '_v']
     w = module.create_parameter(w_var.shape, dtype=w_var.dtype)
     module.add_parameter(self.name, w)
     with dg.no_grad():
         F.assign(w_var, w)
示例#13
0
    def test_single(self,
                    data,
                    output_dir=None,
                    inference_args=None,
                    return_fake_image=True):
        # if getattr(inference_args, 'finetune', False):
        #     if not getattr(self, 'has_fine_tuned', False):
        #         self.finetune(data, inference_args)

        net_G = self.net_G
        net_G.eval()

        data_t = self.get_data_t(data, self.net_G_output, self.data_prev, 0)
        if self.is_inference or self.sequence_length > 1:
            self.data_prev = data_t

        # Generator forward.
        with dg.no_grad():
            self.net_G_output = net_G(data_t)

        if output_dir is None:
            return self.net_G_output

        save_fake_only = getattr(inference_args, 'save_fake_only', False)
        if save_fake_only:
            ys, ye, xs, xe = get_face_bbox_for_output(None,
                                                      data_t['label'][0:1],
                                                      crop_smaller=0)
            image_grid = tensor2im(self.net_G_output['fake_images'])[0]
            h, w, _ = image_grid.shape
            face_mask = Image.open(
                '/home/aistudio/vid2vid/test/images/face.png').resize(
                    (ye - ys, xe - xs))
            mask = np.zeros((h, w, 3)).astype("uint8")
            mask[ys:ye, xs:xe, :] = np.array(face_mask)[:, :, :3]
            image_grid[mask != 0] = 0
            image_grid += mask
            # image_grid = tensor2im(data_t['label'][:, 3:])[0]
        else:
            vis_images = self.get_test_output_images(data)
            image_grid = np.hstack(
                [np.vstack(im) for im in vis_images if im is not None])

        if 'img_name' in data:
            save_name = data['img_name'].split('.')[0] + '.jpg'
        else:
            save_name = "%04d.jpg" % self.t
        output_filename = os.path.join(output_dir, save_name)
        os.makedirs(output_dir, exist_ok=True)
        imageio.imwrite(output_filename, image_grid)
        self.t += 1

        if return_fake_image:
            return image_grid
        else:
            return self.net_G_output, image_grid
示例#14
0
    def forward(self, ten_first, ten_second):
        h, w = ten_first.shape[2:]

        r_h, r_w = int(math.floor(math.ceil(h / 32.0) * 32.0)), int(math.floor(math.ceil(w / 32.0) * 32.0))
        ten_first = L.image_resize(ten_first, (r_h, r_w))
        ten_second = L.image_resize(ten_second, (r_h, r_w))
        with dg.no_grad():
            flow = self.network(ten_first, ten_second)
        flow = L.image_resize(flow, (h, w))
        flow[:, 0, :, :] *= float(w) / float(r_w)
        flow[:, 1, :, :] *= float(h) / float(r_h)

        return flow
示例#15
0
    def forward(self, x, mask_in=None):
        assert len(x.shape) == 4

        if mask_in is not None or self.last_size != tuple(x.shape):
            self.last_size = tuple(x.shape)

            with dg.no_grad():
                if self.weight_maskUpdater.dtype != x.dtype:
                    self.weight_maskUpdater = self.weight_maskUpdater.astype(
                        x.dtype)

                if mask_in is None:
                    # If mask is not provided, create a mask.
                    if self.multi_channel:
                        mask = L.ones(x.shape, dtype=x.dtype)
                    else:
                        mask = L.ones((1, 1, x.shape[2], x.shape[3]),
                                      dtype=x.dtype)
                else:
                    mask = mask_in

                self.update_mask = nn.functional.conv2d(
                    mask,
                    self.weight_maskUpdater,
                    bias=None,
                    stride=self.stride,
                    padding=self.padding,
                    dilation=self.dilation,
                    groups=1)
                # For mixed precision training, eps from 1e-8 ~ 1e-6
                eps = 1e-6
                self.mask_ratio = self.slide_winsize / (self.update_mask + eps)
                self.update_mask = L.clamp(self.update_mask, 0, 1)
                self.mask_ratio = self.mask_ratio * self.update_mask

        raw_out = super(PartialConv2D,
                        self).forward(x * mask if mask_in is not None else x)

        if self.bias is not None:
            bias_view = L.reshape(self.bias, (1, self.out_channels, 1, 1))
            output = (raw_out - bias_view) * self.mask_ratio + bias_view
            output = output * self.update_mask
        else:
            output = raw_out * self.mask_ratio

        if self.return_mask:
            return output, self.update_mask
        else:
            return output
示例#16
0
    def forward(self, sen_q, seg_q, sen_k, seg_k):
        """
        Input:
            im_q: a batch of query images
            im_k: a batch of key images
        Output:
            logits, targets
        """

        # compute query features
        q = self.encoder_q(sen_q, seg_q)  # queries: N
        q = norm(q, dim=1)

        # compute key features
        with D.no_grad():  # no gradient to keys
            self._momentum_update_key_encoder()  # update the key encoder

            # shuffle for making use of BN
            #sen_k, idx_unshuffle = self._batch_shuffle_ddp(sen_k)

            k = self.encoder_k(sen_k, seg_k)  # keys: NxC
            k = norm(k, dim=1)

            # undo shuffle
            #k = self._batch_unshuffle_ddp(k, idx_unshuffle)


        l_pos=L.unsqueeze(L.reduce_sum(L.elementwise_mul(q, k), dim=1),axes=[-1])
        # negative logits: NxK
        l_neg = L.matmul(q, self.queue.detach())
        # logits: Nx(1+K)
        logits = L.concat([l_pos, l_neg], axis=-1)
        # apply temperature
        logits /= self.T

        # labels: positive key indicators
        labels = L.zeros([logits.shape[0]], dtype='int64')
        
        self._dequeue_and_enqueue(k)

        if labels is not None:
            if len(labels.shape) == 1:
                labels = L.reshape(labels, [-1, 1])
            loss = L.softmax_with_cross_entropy(logits, labels)
            loss = L.reduce_mean(loss)

        return loss
示例#17
0
 def loss_cardinality(self, outputs, targets, indices, num_boxes):
     """
     Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes
     This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients
     """
     with dg.no_grad():
         pred_logits = outputs[
             "pred_logits"]  # [bs, num_queries, num_classes]
         tgt_lengths = dg.to_variable([len(v["labels"])
                                       for v in targets]).astype("float32")
         # Count the number of predictions that are NOT "no-object" (which is the last class)
         card_pred = L.reduce_sum(
             (L.argmax(pred_logits, -1) !=
              pred_logits.shape[-1] - 1).astype("float32"))
         card_err = F.loss.l1_loss(card_pred, tgt_lengths)
         losses = {"cardinality_error": card_err}
         return losses
示例#18
0
 def W_(self):
     W_mat = self.weight.view(self.weight.size(0), -1)
     if self.transpose:
         W_mat = W_mat.t()
     # Apply num_itrs power iterations
     for _ in range(self.num_itrs):
         svs, us, vs = power_iteration(W_mat,
                                       self.u,
                                       update=self.training,
                                       eps=self.eps)
     # Update the svs
     if self.training:
         with dg.no_grad(
         ):  # Make sure to do this in a no_grad() context or you'll get memory leaks!
             for i, sv in enumerate(svs):
                 self.sv[i][:] = sv
     return self.weight / svs[0]
示例#19
0
def std_gen(batch_size=8, seed=None):
    with dg.no_grad():
        model_cache.train_mode = False
        model_cache.initialized = False
        if seed is not None:
            rds.rng = np.random.RandomState(seed)
        elif rds.rng is None:
            rds.rng = np.random
        G = model_cache.G
        x_np = rds.rng.randn(batch_size,140).astype('float32')
        y_np = rds.rng.randint(0,1000,size=[batch_size]).astype('int64')
        x = dg.to_variable(x_np)
        y = dg.to_variable(y_np)
        y_hot = layers.one_hot(layers.unsqueeze(y,[1]), depth=1000)
        img_pd = G(x, y_hot)
        img = np.uint8(img_pd.numpy().clip(0,1)*255)
        imgs = []
        for i in range(len(img)):
            imgs += [Image.fromarray(img[i].transpose([1,2,0]))]
        return imgs
示例#20
0
def compute_fid(fid_path,
                data_loader,
                net_G,
                key_real='tgt_image',
                key_fake='fake_images',
                sample_size=None,
                preprocess=None,
                is_video=False,
                few_shot_video=False):
    """
    Compute the fid score.

    Args:
        fid_path (str): Location for the numpy file to store or to load the statistics.
        data_loader (obj): data_loader object. 
        net_G (obj): 
        key_real (str): Dictionary key value for the real data. 
        key_fake (str): Dictionary key value for the fake data. 
        sample_size (int or tuple): How many samples to be used. 
        prerpocess (func): The preprocess function to be applied to the data. 
        is_video (bool): Whether we are handling video sequences. 
        few_shot_video(bool): If True, uses few-shot video synthesis. 
    """
    print("Computing FID.")
    with dg.no_grad():
        # Get the fake mean and covariance.
        fake_mean, fake_cov = load_or_compute_stats(fid_path, data_loader,
                                                    key_real, key_fake, net_G,
                                                    sample_size, preprocess,
                                                    is_video, few_shot_video)

        # Get the ground truth mean and covariance.
        mean_cov_path = os.path.join(os.path.dirname(fid_path),
                                     'real_mean_cov.npz')
        real_mean, real_cov = load_or_compute_stats(mean_cov_path, data_loader,
                                                    key_real, key_fake, None,
                                                    sample_size, preprocess,
                                                    is_video, few_shot_video)

    fid = calculate_frechet_distance(real_mean, real_cov, fake_mean, fake_cov)
    return fid
示例#21
0
def std_enc_with_D(path='miku.png',
                   steps=2000,
                   lr=4e-3,
                   levels=[0, 3],
                   weights=[100, 1]):
    model_cache.train_mode = False
    model_cache.initialized = False
    img = Image.open(path)
    w, h = img.size
    min_size = min(w, h)
    x0 = (w - min_size) // 2
    y0 = (h - min_size) // 2
    x1 = x0 + min_size
    y1 = y0 + min_size
    img = img.crop([x0, y0, x1, y1]).convert('RGB')
    img = _img = img.resize([256, 256], Image.BILINEAR)
    img = np.asarray(img) / 255.0
    img = dg.to_variable(img.transpose(2, 0, 1).astype('float32')[None, ...])
    m_latent = Latents()
    optimizer = fluid.optimizer.AdamOptimizer(
        learning_rate=lr, parameter_list=m_latent.parameters())
    for i in range(steps):
        z, class_emb = m_latent()
        out = model_cache.G(z, class_emb, input_class_emb=True)
        with dg.no_grad():
            _, real_features = model_cache.D(img)
            real_features = [img] + real_features
        _, fake_features = model_cache.D(out)
        fake_features = [out] + fake_features
        loss = 0
        for idx, weight in zip(levels, weights):
            r, f = real_features[idx], fake_features[idx]
            loss = loss + weight * layers.mean((f - r)**2)
        loss.backward()
        optimizer.minimize(loss)
        optimizer.clear_gradients()
        stdout.write(f'loss: {loss.numpy().mean()}  {i+1}/{steps}\r')
        stdout.flush()
    print('')
    out = np.uint8(out.numpy()[0].transpose(1, 2, 0).clip(0, 1) * 255)
    return Image.fromarray(out), _img
示例#22
0
    def step(self, data):
        # Whether to reuse generator output for both gen_update and dis_update.
        # It saves time but comsumes a bit more memory.
        reuse_gen_output = getattr(self.cfg.trainer, 'reuse_gen_output', False)

        past_frames = [None, None]
        net_G_output = None
        data_prev = None
        for t in range(self.sequence_length):
            data_t = self.get_data_t(data, net_G_output, data_prev, t)
            data_prev = data_t

            # Discriminator update.
            if reuse_gen_output:
                net_G_output = self.net_G(data_t)
            else:
                with dg.no_grad():
                    net_G_output = self.net_G(data_t)
            data_t, net_G_output = self.post_process(data_t, net_G_output)

            # Get losses and update D if image generated by network in training.
            if 'fake_images_source' not in net_G_output:
                net_G_output['fake_images_source'] = 'in_training'
            if net_G_output['fake_images_source'] != 'pretrained':
                net_D_output, _ = self.net_D(data_t, detach(net_G_output),
                                             past_frames)
                self.get_dis_losses(net_D_output)

            # Generator update.
            if not reuse_gen_output:
                net_G_output = self.net_G(data_t)
                data_t, net_G_output = self.post_process(data_t, net_G_output)

            # Get losses and update G if image generated by network in training.
            if 'fake_images_source' not in net_G_output:
                net_G_output['fake_images_source'] = 'in_training'
            if net_G_output['fake_images_source'] != 'pretrained':
                net_D_output, past_frames = self.net_D(data_t, net_G_output,
                                                       past_frames)
                self.get_gen_losses(data_t, net_G_output, net_D_output)
示例#23
0
def renorm_gen_interpolate(batch_size=8, seed=None, out_path='data/out.gif'):
    with dg.no_grad():
        model_cache.train_mode = True
        model_cache.initialized = True
        if seed is not None:
            rds.rng = np.random.RandomState(seed)
        elif rds.rng is None:
            rds.rng = np.random
        G = model_cache.G
        x_np = rds.rng.randn(batch_size, 140).astype('float32')
        y_np = rds.rng.randint(0, 1000, size=[batch_size]).astype('int64')
        x = dg.to_variable(x_np)
        y = dg.to_variable(y_np)
        y_hot = layers.one_hot(layers.unsqueeze(y, [1]), depth=1000)
        y_embed = G.embed_y(y_hot)
        G(x, y_embed, True)
        model_cache.train_mode = False
        model_cache.initialized = True
        x = layers.concat([x, x[:1]], 0)
        y_embed = layers.concat([y_embed, y_embed[:1]], 0)
        imgs = []
        for i in range(batch_size):
            for j in range(40):
                alpha = j / (40 - 1)
                _x = (1 - alpha) * x[i:i + 1] + alpha * x[i + 1:i + 2]
                _y_embed = (1 - alpha
                            ) * y_embed[i:i + 1] + alpha * y_embed[i + 1:i + 2]
                img_pd = G(_x, _y_embed, True)
                img = np.uint8(img_pd.numpy().clip(0, 1) * 255)[0].transpose(
                    [1, 2, 0])
                imgs.append(Image.fromarray(img))
                stdout.write(f'{i*40+j+1}/{40*batch_size}\r')
                stdout.flush()
        print('')
        imgs[0].save(out_path,
                     save_all=True,
                     append_images=imgs[1:],
                     duration=40,
                     loop=0)
        return Image.open(out_path)
示例#24
0
    def forward(self, outputs, targets):
        """
        Performs the matching

        Params:
            outputs: This is a dict contains at least these entries:
                "pred_logits": Tensor of dim[batch_size, num_queries, num_classes] with the classification logits
                "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicated box coordinates
            
            targets: This is a list of targets (len(targets) == batch_size), where each target is a dict containing:
                "labels": Tensor of dim[num_target_boxes] (where num_target_boxes is the number of ground-truth)
                          objects in the target) containing the class labels
                "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordiantes
        
        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        """
        with dg.no_grad():
            bs, num_queries, num_classes = outputs["pred_logits"].shape

            # We flatten to compute the cost matrices in a batch
            out_prob = L.reshape(
                outputs["pred_logits"],
                [-1, num_classes])  # [batch_size * num_queries, num_classes]
            out_prob = L.softmax(
                out_prob, axis=-1)  # [batch_size * num_queries, num_classes]
            out_bbox = L.reshape(outputs["pred_boxes"],
                                 [-1, 4])  # [batch_size * num_queries, 4]

            # Alse concat the target labels and boxes
            tgt_ids = L.concat([v["labels"] for v in targets]).astype(
                "int64")  # [batch_size * num_target_boxes_i]
            tgt_bbox = L.concat([v["boxes"] for v in targets]).astype(
                "float32")  # [batch_size * num_target_boxes_i]

            # Compute the classification cost. Contrary to the loss, we don't use the NLL,
            # but approximate it in 1 - proba[target class].
            # The 1 is a constant that donesn't change the matching, it can be ommitted.
            cost_class = -out_prob.numpy()[:, tgt_ids.numpy(
            )]  # [batch_size * num_queries, num_all_target_boxes]
            cost_class = dg.to_variable(cost_class)

            # Compute the L1 cost between boxes
            num_all_target_boxes = tgt_bbox.shape[0]
            expanded_out_bbox = L.expand(
                L.unsqueeze(out_bbox, [1]),
                [1, num_all_target_boxes, 1
                 ])  # [batch_size * num_queries, num_all_target_boxes, 4]
            expanded_tgt_bbox = L.expand(
                L.unsqueeze(tgt_bbox, [0]),
                [bs * num_queries, 1, 1
                 ])  # [batch_size * num_queries, num_all_target_boxes, 4]
            cost_bbox = F.loss.l1_loss(
                expanded_out_bbox, expanded_tgt_bbox, reduction='none'
            )  # [batch_size * num_queries, num_all_target_boxes, 4]
            cost_bbox = L.reduce_mean(
                cost_bbox,
                -1)  # [batch_size * num_queries, num_all_target_boxes]

            # Compute the giou cost between boxes
            cost_giou = -generalied_box_iou(box_cxcywh_to_xyxy(out_bbox),
                                            box_cxcywh_to_xyxy(tgt_bbox))

            # Final cost matrix
            C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
            C = L.reshape(
                C, [bs, num_queries, -1
                    ])  # [batch_size, num_queries, num_all_target_boxes]

            sizes = [len(v["boxes"]) for v in targets]

            indices = [
                linear_sum_assignment(c[i].numpy())
                for i, c in enumerate(L.split(C, sizes, dim=-1))
            ]

            return [(dg.to_variable(i.astype("int64")),
                     dg.to_variable(j.astype("int64"))) for i, j in indices]
示例#25
0
 def __call__(self, mel):
     with dg.no_grad():
         self.model.eval()
         audio = self.model.synthesize(mel)
     self.model.train()
     return audio
示例#26
0
 def apply_optimize(self, loss, startup_program, params_grads):
     super(AdamW, self).apply_optimize(loss, startup_program, params_grads)
     for p, g in params_grads:
         if not self.pat.match(p.name):
             with D.no_grad():
                 L.assign(p * (1. - self.wd * self.current_step_lr()), p)