示例#1
0
    def __init__(self,
                 input_dim,
                 attributes_dim=0,
                 output_dim=None,
                 hidden_dim=512,
                 pooling='avg',
                 mlp_normalization='none'):
        super(GraphTripleConv, self).__init__()
        if output_dim is None:
            output_dim = input_dim
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim

        assert pooling in ['sum', 'avg'], 'Invalid pooling "%s"' % pooling
        self.pooling = pooling
        net1_layers = [
            3 * input_dim + 2 * attributes_dim, hidden_dim,
            2 * hidden_dim + output_dim
        ]
        net1_layers = [l for l in net1_layers if l is not None]
        self.net1 = build_mlp(net1_layers, batch_norm=mlp_normalization)
        self.net1.apply(_init_weights)

        net2_layers = [hidden_dim, hidden_dim, output_dim]
        self.net2 = build_mlp(net2_layers, batch_norm=mlp_normalization)
        self.net2.apply(_init_weights)
    def __init__(self,
                 vocab,
                 image_size=(64, 64),
                 embedding_dim=128,
                 gconv_dim=128,
                 gconv_hidden_dim=512,
                 gconv_pooling='avg',
                 gconv_num_layers=5,
                 mask_size=32,
                 mlp_normalization='none',
                 appearance_normalization='',
                 activation='',
                 n_downsample_global=4,
                 box_dim=128,
                 use_attributes=False,
                 box_noise_dim=64,
                 mask_noise_dim=64,
                 pool_size=100,
                 rep_size=32):
        super(Model, self).__init__()

        self.vocab = vocab
        self.image_size = image_size
        self.use_attributes = use_attributes
        self.box_noise_dim = box_noise_dim
        self.mask_noise_dim = mask_noise_dim
        self.object_size = 64  #was 64 azade
        self.fake_pool = VectorPool(pool_size)

        #self.num_objs = len(vocab['object_to_idx']) #cm Azade
        self.num_objs = len(vocab['object_idx_to_name'])
        self.num_preds = len(vocab['pred_idx_to_name'])
        self.obj_embeddings = nn.Embedding(self.num_objs, embedding_dim)
        self.pred_embeddings = nn.Embedding(self.num_preds, embedding_dim)

        if use_attributes:
            attributes_dim = vocab['num_attributes']
        else:
            attributes_dim = 0
        if gconv_num_layers == 0:
            self.gconv = nn.Linear(embedding_dim, gconv_dim)
        elif gconv_num_layers > 0:
            gconv_kwargs = {
                'input_dim': embedding_dim,
                'attributes_dim': attributes_dim,
                'output_dim': gconv_dim,
                'hidden_dim': gconv_hidden_dim,
                'pooling': gconv_pooling,
                'mlp_normalization': mlp_normalization,
            }
            self.gconv = GraphTripleConv(**gconv_kwargs)

        self.gconv_net = None
        if gconv_num_layers > 1:
            gconv_kwargs = {
                'input_dim': gconv_dim,
                'hidden_dim': gconv_hidden_dim,
                'pooling': gconv_pooling,
                'num_layers': gconv_num_layers - 1,
                'mlp_normalization': mlp_normalization,
            }
            self.gconv_net = GraphTripleConvNet(**gconv_kwargs)

        box_net_dim = 4
        self.box_dim = box_dim
        box_net_layers = [self.box_dim, gconv_hidden_dim, box_net_dim]
        self.box_net = build_mlp(box_net_layers, batch_norm=mlp_normalization)

        self.g_mask_dim = gconv_dim + mask_noise_dim
        self.mask_net = mask_net(self.g_mask_dim, mask_size)

        self.repr_input = self.g_mask_dim
        rep_size = rep_size
        rep_hidden_size = 64
        repr_layers = [self.repr_input, rep_hidden_size, rep_size]
        self.repr_net = build_mlp(repr_layers, batch_norm=mlp_normalization)

        appearance_encoder_kwargs = {
            'vocab': vocab,
            'arch': 'C4-64-2,C4-128-2,C4-256-2',
            'normalization': appearance_normalization,
            'activation': activation,
            'padding': 'valid',
            'vecs_size': self.g_mask_dim
        }
        self.image_encoder = AppearanceEncoder(**appearance_encoder_kwargs)

        netG_input_nc = self.num_objs + rep_size
        output_nc = 3
        ngf = 64
        n_blocks_global = 9
        norm = 'instance'
        self.layout_to_image = define_G(netG_input_nc, output_nc, ngf,
                                        n_downsample_global, n_blocks_global,
                                        norm)