def __init__(self, name_scope, indim, latentdim, half=False):
        super(VAE, self).__init__()
        self.half = half
        self.name_scope = name_scope
        if self.half is False:
            self.encoder = tf.keras.Sequential([
                tf.keras.layers.Dense(latentdim * 2),
                tf.keras.layers.Activation('elu'),
                tf.keras.layers.Dense(latentdim * 2),
                tf.keras.layers.Activation('elu'),
                tf.keras.layers.Dense(latentdim * 2)
            ],
                                               name="encoder")
            self.mean = tf.keras.layers.Dense(latentdim, name="mean")
            self.logvar = tf.keras.layers.Dense(latentdim, name="logvar")
            self.bikld = BiKLD()

        dec_out = indim
        self.decoder = tf.keras.Sequential([
            tf.keras.layers.Dense(latentdim * 2),
            tf.keras.layers.Activation('elu'),
            tf.keras.layers.Dense(latentdim * 2),
            tf.keras.layers.Activation('elu'),
            tf.keras.layers.Dense(dec_out)
        ],
                                           name="decoder")

        self.sampler = reparameterize()
Exemplo n.º 2
0
    def __init__(self, indim, latentdim, half=False):
        super(VAE, self).__init__()

        self.half = half
        if self.half is False:
            self.encoder = nn.Sequential(
                nn.Linear(indim, latentdim * 2), nn.ELU(inplace=True),
                nn.Linear(latentdim * 2, latentdim * 2), nn.ELU(inplace=True),
                nn.Linear(latentdim * 2, latentdim * 2))
            self.mean = nn.Linear(latentdim * 2, latentdim)
            self.logvar = nn.Linear(latentdim * 2, latentdim)
            self.bikld = BiKLD()

        dec_out = indim
        self.decoder = nn.Sequential(nn.Linear(latentdim, latentdim * 2),
                                     nn.ELU(inplace=True),
                                     nn.Linear(latentdim * 2, latentdim * 2),
                                     nn.ELU(inplace=True),
                                     nn.Linear(latentdim * 2, dec_out))

        self.sampler = reparameterize()
    def __init__(self,
                 hiddim=160,
                 latentdim=12,
                 word_size=[-1, 16, 16],
                 pos_size=[4, 1, 1],
                 nres=4,
                 nlayers=1,
                 nonlinear='elu',
                 dictionary=None,
                 op=['PROD', 'CAT'],
                 lmap_size=0,
                 downsample=2,
                 gpu_ids=None,
                 multigpu_full=False,
                 lambdakl=-1,
                 bg_bias=False,
                 normalize='instance_norm',
                 loss=None,
                 debug_mode=True,
                 batch_size=None):
        super(PNPNet, self).__init__()
        ## basic settings
        word_size[0] = latentdim
        # self.trainable_variables = OrderedDict()
        self.word_size = word_size
        self.latentdim = latentdim
        self.hiddim = hiddim
        self.downsample = downsample  # -> downsample times
        self.ds = 2**self.downsample  # -> downsample ratio
        self.nres = nres
        self.nlayers = nlayers
        self.lmap_size = lmap_size
        self.im_size = lmap_size * self.ds
        self.multigpu_full = multigpu_full
        self.bg_bias = bg_bias
        self.normalize = normalize
        self.debug_mode = debug_mode
        self.batch_size = batch_size
        # self.img = tf.placeholder(tf.float32, shape=[None,self.im_size, self.im_size,3])
        # dictionary
        self.dictionary = dictionary

        ########## modules ##########
        # proposal networks
        self.reader = Reader(indim=3,
                             hiddim=hiddim,
                             outdim=hiddim,
                             ds_times=self.downsample,
                             normalize=normalize,
                             nlayers=nlayers)
        # tf.keras.layers.Conv2D(outdim,3,1,padding="same")
        # self.h_mean = nn.Conv2d(hiddim, latentdim, 3, 1, 1)
        self.h_mean = h_mean(latentdim)
        # self.h_var = nn.Conv2d(hiddim, latentdim, 3, 1, 1)
        self.h_var = h_var(latentdim)

        # pixel writer

        # visual words
        # print(word_size,"word size")
        self.vis_dist = ConceptMapper(word_size)
        self.pos_dist = ConceptMapper(pos_size)

        # neural modules
        self.combine_vis = Combine_Vis(hiddim_v=latentdim, op=op[0])
        self.combine_pos = Combine_Pos(hiddim_p=pos_size[0], op=op[0])

        self.describe_vis = Describe_Vis(hiddim_v=latentdim, op=op[1])
        self.describe_pos = Describe_Pos(hiddim_p=pos_size[0], op=op[1])

        self.transform = Transform(matrix='default')

        # small vaes for bounding boxes and offsets learning
        # input: H, W
        self.box_vae = VAE("box_vae", indim=2, latentdim=pos_size[0])
        self.offset_vae = VAE("offset_vae", indim=4, latentdim=pos_size[0])

        self.renderer = DistributionRender(hiddim=latentdim)
        # input: [x0, y0, x1, y1] + condition: [H0, W0, H1, W1, im_H, im_W, (im_H-H0), (im_W-W0), (im_H-H1), (im_W-W1)]

        self.writer = Writer(indim=latentdim,
                             hiddim=hiddim,
                             outdim=3,
                             ds_times=self.downsample,
                             normalize=normalize,
                             nlayers=nlayers)
        ## loss functions&sampler
        self.sampler = reparameterize()
        if lambdakl > 0:
            from lib.LambdaBiKLD import BiKLD
            self.bikld = BiKLD(lambda_t=lambdakl, k=None)
        else:
            from lib.BiKLD import BiKLD
            self.bikld = BiKLD()

        if loss == 'l1':
            self.pixelrecon_criterion = tf.losses.MeanAbsoluteError()
        elif loss == 'l2':
            self.pixelrecon_criterion = tf.losses.MeanSquaredError()
        # self.pixelrecon_criterion.size_average = False

        self.pos_criterion = tf.losses.MeanSquaredError()
        # self.pos_criterion.size_average = False

        ## biases
        # self.bias_mean = nn.Linear(1, self.latentdim * self.lmap_size * self.lmap_size, bias=False)
        # self.bias_var = nn.Linear(1, self.latentdim * self.lmap_size * self.lmap_size, bias=False)
        self.latent_canvas_size = [
            1, self.lmap_size, self.lmap_size, self.latentdim
        ]
Exemplo n.º 4
0
    def __init__(self,
                 hiddim=160,
                 latentdim=12,
                 word_size=[-1, 16, 16],
                 pos_size=[4, 1, 1],
                 nres=4,
                 nlayers=1,
                 nonlinear='elu',
                 dictionary=None,
                 op=['PROD', 'CAT'],
                 lmap_size=0,
                 downsample=2,
                 gpu_ids=None,
                 multigpu_full=False,
                 lambdakl=-1,
                 bg_bias=False,
                 normalize='instance_norm',
                 loss=None,
                 debug_mode=True):
        super(PNPNet, self).__init__()
        ## basic settings
        word_size[0] = latentdim
        self.word_size = word_size
        self.latentdim = latentdim
        self.hiddim = hiddim
        self.downsample = downsample  # -> downsample times
        self.ds = 2**self.downsample  # -> downsample ratio
        self.nres = nres
        self.nlayers = nlayers
        self.lmap_size = lmap_size
        self.im_size = lmap_size * self.ds
        self.multigpu_full = multigpu_full
        self.bg_bias = bg_bias
        self.normalize = normalize
        self.debug_mode = debug_mode

        self.color = [
            'gray', 'green', 'blue', 'purple', 'yellow', 'cyan', 'brown', 'red'
        ]
        self.size = ['large', 'small']
        self.shape = ['sphere', 'cube', 'cylinder']
        self.material = ['rubber', 'metal']
        #self.dictionaries = [self.shape, self.material, self.color, self.size]
        #gt order -> shape, material, color, size
        self.dictionaries = [self.color, self.size, self.shape, self.material]
        #self.order = [2, 3, 0, 1]
        self.dimension = sum(len(l) for l in self.dictionaries)
        # dictionary
        self.dictionary = dictionary

        ########## modules ##########
        # proposal networks
        self.reader = Reader(indim=3,
                             hiddim=hiddim,
                             outdim=hiddim,
                             ds_times=self.downsample,
                             normalize=normalize,
                             nlayers=nlayers)
        self.h_mean = nn.Conv2d(hiddim, latentdim, 3, 1, 1)
        self.h_var = nn.Conv2d(hiddim, latentdim, 3, 1, 1)

        # pixel writer
        self.writer = Writer(indim=latentdim,
                             hiddim=hiddim,
                             outdim=3,
                             ds_times=self.downsample,
                             normalize=normalize,
                             nlayers=nlayers)

        # visual words
        #self.vis_dist = ConceptMapper(word_size, len(dictionary))
        self.pos_dist = ConceptMapper(pos_size, len(dictionary))

        self.renderer = DistributionRender(hiddim=latentdim)

        # neural modules
        self.combine = Combine(hiddim_v=latentdim,
                               hiddim_p=pos_size[0],
                               op=op[0])
        self.describe = Describe(hiddim_v=latentdim,
                                 hiddim_p=pos_size[0],
                                 op=op[1])
        self.transform = Transform(matrix='default')

        # small vaes for bounding boxes and offsets learning
        # input: H, W
        self.box_vae = VAE(indim=2, latentdim=pos_size[0])
        # input: [x0, y0, x1, y1] + condition: [H0, W0, H1, W1, im_H, im_W, (im_H-H0), (im_W-W0), (im_H-H1), (im_W-W1)]
        self.offset_vae = VAE(indim=4, latentdim=pos_size[0])

        ## loss functions&sampler
        self.sampler = reparameterize()
        if lambdakl > 0:
            from lib.LambdaBiKLD import BiKLD
            self.bikld = BiKLD(lambda_t=lambdakl, k=None)
        else:
            from lib.BiKLD import BiKLD
            self.bikld = BiKLD()

        if loss == 'l1':
            self.pixelrecon_criterion = nn.L1Loss()
        elif loss == 'l2':
            self.pixelrecon_criterion = nn.MSELoss()
        self.pixelrecon_criterion.size_average = False

        self.pos_criterion = nn.MSELoss()
        self.pos_criterion.size_average = False

        ## biases
        self.bias_mean = nn.Linear(1,
                                   self.latentdim * self.lmap_size *
                                   self.lmap_size,
                                   bias=False)
        self.bias_var = nn.Linear(1,
                                  self.latentdim * self.lmap_size *
                                  self.lmap_size,
                                  bias=False)
        self.latent_canvas_size = torch.Size(
            [1, self.latentdim, self.lmap_size, self.lmap_size])