Пример #1
0
    def __init__(self, cfg):
        super().__init__()
        self.generator = DataParallel(GeneratorFactory.create_instance(cfg),
                                      device_ids=[0]).cuda()
        self.use_history = cfg.use_history
        if self.use_history:
            self.rnn = nn.DataParallel(nn.GRU(cfg.input_dim,
                                              cfg.hidden_dim,
                                              batch_first=False),
                                       dim=1,
                                       device_ids=[0]).cuda()

        self.layer_norm = nn.DataParallel(nn.LayerNorm(cfg.hidden_dim),
                                          device_ids=[0]).cuda()
        self.use_image_encoder = cfg.use_fg
        if self.use_image_encoder:
            self.image_encoder = DataParallel(ImageEncoder(cfg),
                                              device_ids=[0]).cuda()

        self.condition_encoder = DataParallel(ConditionEncoder(cfg),
                                              device_ids=[0]).cuda()

        self.sentence_encoder = nn.DataParallel(SentenceEncoder(cfg),
                                                device_ids=[0]).cuda()

        self.cfg = cfg
        self.results_path = cfg.results_path
        if not os.path.exists(cfg.results_path):
            os.mkdir(cfg.results_path)
Пример #2
0
    def __init__(self, cfg):
        """A recurrent GAN model, each time step an generated image
        (x'_{t-1}) and the current question q_{t} are fed to the RNN
        to produce the conditioning vector for the GAN.
        The following equations describe this model:

            - c_{t} = RNN(h_{t-1}, q_{t}, x^{~}_{t-1})
            - x^{~}_{t} = G(z | c_{t})
        """
        self.generator = DataParallel(
            GeneratorFactory.create_instance(cfg),
            device_ids=[0]).cuda()

        self.rnn = nn.DataParallel(
            nn.GRU(cfg.input_dim, cfg.hidden_dim,
                   batch_first=False),
            dim=1,
            device_ids=[0]).cuda()

        self.layer_norm = nn.DataParallel(nn.LayerNorm(cfg.hidden_dim),
                                          device_ids=[0]).cuda()

        self.image_encoder = DataParallel(ImageEncoder(cfg),
                                          device_ids=[0]).cuda()

        self.condition_encoder = DataParallel(ConditionEncoder(cfg),
                                              device_ids=[0]).cuda()

        self.sentence_encoder = nn.DataParallel(SentenceEncoder(cfg),
                                                device_ids=[0]).cuda()

        self.cfg = cfg
        self.results_path = cfg.results_path
        if not os.path.exists(cfg.results_path):
            os.mkdir(cfg.results_path)
Пример #3
0
    def __init__(self, cfg):
        """A recurrent GAN model, each time step an generated image
        (x'_{t-1}) and the current question q_{t} are fed to the RNN
        to produce the conditioning vector for the GAN.
        The following equations describe this model:

            - c_{t} = RNN(h_{t-1}, q_{t}, x^{~}_{t-1})
            - x^{~}_{t} = G(z | c_{t})
        """
        super(InferenceDrawer, self).__init__()
        self.generator = DataParallel(GeneratorFactory.create_instance(cfg),
                                      device_ids=[0]).cuda()

        self.generator.eval()

        self.rnn = nn.DataParallel(nn.GRU(cfg.input_dim,
                                          cfg.hidden_dim,
                                          batch_first=False),
                                   dim=1,
                                   device_ids=[0]).cuda()
        self.rnn.eval()

        self.layer_norm = nn.DataParallel(nn.LayerNorm(cfg.hidden_dim),
                                          device_ids=[0]).cuda()
        self.layer_norm.eval()

        self.image_encoder = DataParallel(ImageEncoder(cfg),
                                          device_ids=[0]).cuda()
        self.image_encoder.eval()

        self.condition_encoder = DataParallel(ConditionEncoder(cfg),
                                              device_ids=[0]).cuda()
        self.condition_encoder.eval()

        self.sentence_encoder = nn.DataParallel(SentenceEncoder(cfg),
                                                device_ids=[0]).cuda()
        self.sentence_encoder.eval()

        #############################Utterance Generation Module################
        # Drawer Img Encoder
        self.drawer_img_encoder = DataParallel(
            DrawerImageEncoder(cfg=cfg)).cuda()
        #self.img_encoder = TellerImageEncoder(network=cfg.teller_img_encoder_net)
        self.drawer_img_encoder.eval()
        # 2. Dialog Encoder
        self.dialog_encoder = DataParallel(TellerDialogEncoder(cfg=cfg)).cuda()
        self.dialog_encoder.eval()
        # 2. Caption Decoder
        self.utterance_decoder = DataParallel(
            DrawerDialogDecoder(cfg=cfg)).cuda()
        self.utterance_decoder.eval()
        ########################################################################

        self.cfg = cfg
        self.results_path = cfg.results_path
        if not os.path.exists(cfg.results_path):
            os.mkdir(cfg.results_path)
        self.unorm = UnNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
Пример #4
0
    def __init__(self, cfg):
        """A recurrent GAN model, each time step a generated image
        (x'_{t-1}) and the current question q_{t} are fed to the RNN
        to produce the conditioning vector for the GAN.
        The following equations describe this model:

            - c_{t} = RNN(h_{t-1}, q_{t}, x^{~}_{t-1})
            - x^{~}_{t} = G(z | c_{t})
        """
        super(RecurrentGAN, self).__init__()

        # region Models-Instantiation

        self.generator = DataParallel(
            GeneratorFactory.create_instance(cfg)).cuda()

        self.discriminator = DataParallel(
            DiscriminatorFactory.create_instance(cfg)).cuda()

        self.rnn = nn.DataParallel(nn.GRU(cfg.input_dim,
                                          cfg.hidden_dim,
                                          batch_first=False), dim=1).cuda()

        self.layer_norm = nn.DataParallel(nn.LayerNorm(cfg.hidden_dim)).cuda()

        self.image_encoder = DataParallel(ImageEncoder(cfg)).cuda()

        self.condition_encoder = DataParallel(ConditionEncoder(cfg)).cuda()

        self.sentence_encoder = nn.DataParallel(SentenceEncoder(cfg)).cuda()

        # endregion

        # region Optimizers

        self.generator_optimizer = OPTIM[cfg.generator_optimizer](
            self.generator.parameters(),
            cfg.generator_lr,
            cfg.generator_beta1,
            cfg.generator_beta2,
            cfg.generator_weight_decay)

        self.discriminator_optimizer = OPTIM[cfg.discriminator_optimizer](
            self.discriminator.parameters(),
            cfg.discriminator_lr,
            cfg.discriminator_beta1,
            cfg.discriminator_beta2,
            cfg.discriminator_weight_decay)

        self.rnn_optimizer = OPTIM[cfg.rnn_optimizer](
            self.rnn.parameters(),
            cfg.rnn_lr)

        self.sentence_encoder_optimizer = OPTIM[cfg.gru_optimizer](
            self.sentence_encoder.parameters(),
            cfg.gru_lr)

        self.use_image_encoder = cfg.use_fg
        feature_encoding_params = list(self.condition_encoder.parameters())
        if self.use_image_encoder:
            feature_encoding_params += list(self.image_encoder.parameters())

        self.feature_encoders_optimizer = OPTIM['adam'](
            feature_encoding_params,
            cfg.feature_encoder_lr
        )

        # endregion

        # region Criterion

        self.criterion = LOSSES[cfg.criterion]()
        self.aux_criterion = DataParallel(torch.nn.BCELoss()).cuda()

        # endregion

        self.cfg = cfg
        self.logger = Logger(cfg.log_path, cfg.exp_name)
Пример #5
0
    def __init__(self, cfg):
        """A recurrent GAN model, each time step a generated image
        (x'_{t-1}) and the current question q_{t} are fed to the RNN
        to produce the conditioning vector for the GAN.
        The following equations describe this model:

            - c_{t} = RNN(h_{t-1}, q_{t}, x^{~}_{t-1})
            - x^{~}_{t} = G(z | c_{t})
        """
        super(RecurrentGAN_Mingyang, self).__init__()

        # region Models-Instantiation

        ###############################Original DataParallel###################
        self.generator = DataParallel(
            GeneratorFactory.create_instance(cfg)).cuda()

        self.discriminator = DataParallel(
            DiscriminatorFactory.create_instance(cfg)).cuda()

        self.rnn = nn.DataParallel(nn.GRU(cfg.input_dim,
                                          cfg.hidden_dim,
                                          batch_first=False),
                                   dim=1).cuda()
        # self.rnn = DistributedDataParallel(nn.GRU(cfg.input_dim,
        #                                           cfg.hidden_dim,
        # batch_first=False), dim=1).cuda()

        self.layer_norm = nn.DataParallel(nn.LayerNorm(cfg.hidden_dim)).cuda()

        self.image_encoder = DataParallel(ImageEncoder(cfg)).cuda()

        self.condition_encoder = DataParallel(ConditionEncoder(cfg)).cuda()

        self.sentence_encoder = nn.DataParallel(SentenceEncoder(cfg)).cuda()
        #######################################################################
        # self.generator = GeneratorFactory.create_instance(cfg).cuda()

        # self.discriminator = DiscriminatorFactory.create_instance(cfg).cuda()

        # self.rnn = nn.GRU(cfg.input_dim,cfg.hidden_dim,batch_first=False).cuda()
        # # self.rnn = DistributedDataParallel(nn.GRU(cfg.input_dim,
        # #                                           cfg.hidden_dim,
        # # batch_first=False), dim=1).cuda()

        # self.layer_norm = nn.LayerNorm(cfg.hidden_dim).cuda()

        # self.image_encoder = =ImageEncoder(cfg).cuda()

        # self.condition_encoder = ConditionEncoder(cfg).cuda()

        # self.sentence_encoder = SentenceEncoder(cfg).cuda()

        # endregion

        # region Optimizers

        self.generator_optimizer = OPTIM[cfg.generator_optimizer](
            self.generator.parameters(), cfg.generator_lr, cfg.generator_beta1,
            cfg.generator_beta2, cfg.generator_weight_decay)

        self.discriminator_optimizer = OPTIM[cfg.discriminator_optimizer](
            self.discriminator.parameters(), cfg.discriminator_lr,
            cfg.discriminator_beta1, cfg.discriminator_beta2,
            cfg.discriminator_weight_decay)

        self.rnn_optimizer = OPTIM[cfg.rnn_optimizer](self.rnn.parameters(),
                                                      cfg.rnn_lr)

        self.sentence_encoder_optimizer = OPTIM[cfg.gru_optimizer](
            self.sentence_encoder.parameters(), cfg.gru_lr)

        self.use_image_encoder = cfg.use_fg
        feature_encoding_params = list(self.condition_encoder.parameters())
        if self.use_image_encoder:
            feature_encoding_params += list(self.image_encoder.parameters())

        self.feature_encoders_optimizer = OPTIM['adam'](
            feature_encoding_params, cfg.feature_encoder_lr)

        # endregion

        # region Criterion

        self.criterion = LOSSES[cfg.criterion]()
        self.aux_criterion = DataParallel(torch.nn.BCELoss()).cuda()

        #Added by Mingyang for segmentation loss
        if cfg.balanced_seg:
            label_weights = np.array([
                3.02674201e-01, 1.91545454e-03, 2.90009221e-04, 7.50949673e-04,
                1.08670452e-03, 1.11353785e-01, 4.00971053e-04, 1.06240113e-02,
                1.59590824e-01, 5.38960105e-02, 3.36431602e-02, 3.99029734e-02,
                1.88888847e-02, 2.06441476e-03, 6.33775290e-02, 5.81920411e-03,
                3.79528817e-03, 7.87975754e-02, 2.73547355e-03, 1.08308135e-01,
                0.00000000e+00, 8.44408475e-05
            ])
            #reverse the loss
            label_weights = 1 / label_weights
            label_weights[20] = 0
            label_weights = label_weights / np.min(label_weights[:20])
            #convert numpy to tensor
            label_weights = torch.from_numpy(label_weights)
            label_weights = label_weights.type(torch.FloatTensor)
            self.seg_criterion = DataParallel(
                torch.nn.CrossEntropyLoss(weight=label_weights)).cuda()
        else:
            self.seg_criterion = DataParallel(
                torch.nn.CrossEntropyLoss()).cuda()

        # endregion

        self.cfg = cfg
        self.logger = Logger(cfg.log_path, cfg.exp_name)

        # define unorm
        self.unorm = UnNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))