示例#1
0
    def __init__(
        self,
        texture_channels=3,
        cloth_channels=19,
        num_roi=12,
        norm_type="batch",
        dropout=0.5,
        unet_type="pix2pix",
        img_size=128,
    ):
        super(TextureModule, self).__init__()
        self.roi_align = ROIAlign(output_size=(128, 128),
                                  spatial_scale=1,
                                  sampling_ratio=1)

        self.num_roi = num_roi
        channels = texture_channels * num_roi
        self.encode = UNetDown(channels, channels)

        # UNET

        if unet_type == "pix2pix":
            # fast log2 of img_size, int only. E.g. if size=128 => num_downs=7
            num_downs = math.frexp(img_size)[1] - 1
            use_dropout = True if dropout is not None else False
            norm_layer = get_norm_layer(norm_type=norm_type)
            self.unet = pix2pix_modules.UnetGenerator(
                channels + cloth_channels,
                texture_channels,
                num_downs,
                norm_layer=norm_layer,
                use_dropout=use_dropout,
            )
        else:
            self.unet = nn.Sequential(
                UNetDown(channels + cloth_channels, 64, normalize=False),
                UNetDown(64, 128),
                UNetDown(128, 256),
                UNetDown(256, 512, dropout=dropout),
                UNetDown(512, 1024, dropout=dropout),
                UNetDown(1024, 1024, normalize=False, dropout=dropout),
                UNetUp(1024, 1024, dropout=dropout),
                UNetUp(2 * 1024, 512, dropout=dropout),
                UNetUp(2 * 512, 256),
                UNetUp(2 * 256, 128),
                UNetUp(2 * 128, 64),
                # upsample and pad
                nn.Upsample(scale_factor=2),
                nn.ZeroPad2d((1, 0, 1, 0)),
                nn.Conv2d(128, texture_channels, 4, padding=1),
                nn.Tanh(),
            )
    def __init__(self, output_w, output_h, dropout=0.5, channels=3):
        super(MaskTryOnModule, self).__init__()

        self.face_down1 = UNetDown(channels, 64, normalize=False)
        self.face_down2 = UNetDown(64, 128)
        self.face_down3 = UNetDown(128, 256)
        self.face_down4 = UNetDown(256, 512)
        self.face_down5 = UNetDown(512, 1024, dropout=dropout)
        self.face_down6 = UNetDown(1024, 1024, normalize=False, dropout=dropout)
        # the two UNetUp's below will be used WITHOUT concatenation.
        # hence the input size will not double
        self.face_up1 = UNetUp(1024, 1024)
        self.face_up2 = UNetUp(1024, 512)
        self.face_up3 = UNetUp(512, 256)
        self.face_up4 = UNetUp(256, 128)
        self.face_up5 = UNetUp(128, 64)

        self.upsample_and_pad = nn.Sequential(
            nn.Upsample(scale_factor=2),
            nn.ZeroPad2d((1, 0, 1, 0)),
            nn.Conv2d(3 * 64, 3, 4, padding=1),
            nn.Tanh(),
        )
示例#3
0
    def __init__(self, body_channels=3, cloth_channels=19, dropout=0.5):
        super(WarpModule, self).__init__()

        ######################
        # Body pre-encoding  #  (top left of SwapNet diagram)
        ######################
        self.body_down1 = UNetDown(body_channels, 64, normalize=False)
        self.body_down2 = UNetDown(64, 128)
        self.body_down3 = UNetDown(128, 256)
        self.body_down4 = UNetDown(256, 512, dropout=dropout)

        ######################
        # Cloth pre-encoding #  (bottom left of SwapNet diagram)
        ######################
        self.cloth_down1 = UNetDown(cloth_channels, 64, normalize=False)
        self.cloth_down2 = UNetDown(64, 128)
        self.cloth_down3 = UNetDown(128, 256)
        self.cloth_down4 = UNetDown(256, 512)
        self.cloth_down5 = UNetDown(512, 1024, dropout=dropout)
        self.cloth_down6 = UNetDown(1024,
                                    1024,
                                    normalize=False,
                                    dropout=dropout)
        # the two UNetUp's below will be used WITHOUT concatenation.
        # hence the input size will not double
        self.cloth_up1 = UNetUp(1024, 1024)
        self.cloth_up2 = UNetUp(1024, 512)

        ######################
        #      Resblocks     #  (middle of SwapNet diagram)
        ######################
        self.resblocks = nn.Sequential(
            # I don't really know if dropout should go here. I'm just guessing
            ResidualBlock(1024, dropout=dropout),
            ResidualBlock(1024, dropout=dropout),
            ResidualBlock(1024, dropout=dropout),
            ResidualBlock(1024, dropout=dropout),
        )

        ######################
        #    Dual Decoding   #  (right of SwapNet diagram, maybe)
        ######################
        # The SwapNet diagram just says "cloth" decoder, so I don't know if they're
        # actually doing dual decoding like I've done here.
        # Still, I think it's cool and it makes more sense to me.
        # Found from "Multi-view Image Generation from a Single-View".
        # ---------------------
        # input encoded (512) & cat body_d4 (512) cloth_d4 (512)
        self.dual_up1 = DualUNetUp(1024, 256)
        # input dual_up1 (256) & cat body_d3 (256) cloth_d3 (256)
        self.dual_up2 = DualUNetUp(3 * 256, 128)
        # input dual_up2 (128) & cat body_d2 (128) cloth_d2 (128)
        self.dual_up3 = DualUNetUp(3 * 128, 64)

        # TBH I don't really know what the below code does.
        # like why don't we dualnetup with down1?
        # maybe specific to pix2pix? hm, if so maybe we should replicate.
        # ------
        # update: OHHH I get it now. it's because U-Net only outputs half the size as
        #  the original image, hence we need to upsample.
        self.upsample_and_pad = nn.Sequential(
            nn.Upsample(scale_factor=2),
            nn.ZeroPad2d((1, 0, 1, 0)),
            nn.Conv2d(3 * 64, cloth_channels, 4, padding=1),
            nn.Tanh(),
        )