def __init__(self, cfg): super().__init__() self.generator = DataParallel(GeneratorFactory.create_instance(cfg), device_ids=[0]).cuda() self.use_history = cfg.use_history if self.use_history: self.rnn = nn.DataParallel(nn.GRU(cfg.input_dim, cfg.hidden_dim, batch_first=False), dim=1, device_ids=[0]).cuda() self.layer_norm = nn.DataParallel(nn.LayerNorm(cfg.hidden_dim), device_ids=[0]).cuda() self.use_image_encoder = cfg.use_fg if self.use_image_encoder: self.image_encoder = DataParallel(ImageEncoder(cfg), device_ids=[0]).cuda() self.condition_encoder = DataParallel(ConditionEncoder(cfg), device_ids=[0]).cuda() self.sentence_encoder = nn.DataParallel(SentenceEncoder(cfg), device_ids=[0]).cuda() self.cfg = cfg self.results_path = cfg.results_path if not os.path.exists(cfg.results_path): os.mkdir(cfg.results_path)
def __init__(self, cfg): """A recurrent GAN model, each time step an generated image (x'_{t-1}) and the current question q_{t} are fed to the RNN to produce the conditioning vector for the GAN. The following equations describe this model: - c_{t} = RNN(h_{t-1}, q_{t}, x^{~}_{t-1}) - x^{~}_{t} = G(z | c_{t}) """ self.generator = DataParallel( GeneratorFactory.create_instance(cfg), device_ids=[0]).cuda() self.rnn = nn.DataParallel( nn.GRU(cfg.input_dim, cfg.hidden_dim, batch_first=False), dim=1, device_ids=[0]).cuda() self.layer_norm = nn.DataParallel(nn.LayerNorm(cfg.hidden_dim), device_ids=[0]).cuda() self.image_encoder = DataParallel(ImageEncoder(cfg), device_ids=[0]).cuda() self.condition_encoder = DataParallel(ConditionEncoder(cfg), device_ids=[0]).cuda() self.sentence_encoder = nn.DataParallel(SentenceEncoder(cfg), device_ids=[0]).cuda() self.cfg = cfg self.results_path = cfg.results_path if not os.path.exists(cfg.results_path): os.mkdir(cfg.results_path)
def __init__(self, cfg): """A recurrent GAN model, each time step an generated image (x'_{t-1}) and the current question q_{t} are fed to the RNN to produce the conditioning vector for the GAN. The following equations describe this model: - c_{t} = RNN(h_{t-1}, q_{t}, x^{~}_{t-1}) - x^{~}_{t} = G(z | c_{t}) """ super(InferenceDrawer, self).__init__() self.generator = DataParallel(GeneratorFactory.create_instance(cfg), device_ids=[0]).cuda() self.generator.eval() self.rnn = nn.DataParallel(nn.GRU(cfg.input_dim, cfg.hidden_dim, batch_first=False), dim=1, device_ids=[0]).cuda() self.rnn.eval() self.layer_norm = nn.DataParallel(nn.LayerNorm(cfg.hidden_dim), device_ids=[0]).cuda() self.layer_norm.eval() self.image_encoder = DataParallel(ImageEncoder(cfg), device_ids=[0]).cuda() self.image_encoder.eval() self.condition_encoder = DataParallel(ConditionEncoder(cfg), device_ids=[0]).cuda() self.condition_encoder.eval() self.sentence_encoder = nn.DataParallel(SentenceEncoder(cfg), device_ids=[0]).cuda() self.sentence_encoder.eval() #############################Utterance Generation Module################ # Drawer Img Encoder self.drawer_img_encoder = DataParallel( DrawerImageEncoder(cfg=cfg)).cuda() #self.img_encoder = TellerImageEncoder(network=cfg.teller_img_encoder_net) self.drawer_img_encoder.eval() # 2. Dialog Encoder self.dialog_encoder = DataParallel(TellerDialogEncoder(cfg=cfg)).cuda() self.dialog_encoder.eval() # 2. Caption Decoder self.utterance_decoder = DataParallel( DrawerDialogDecoder(cfg=cfg)).cuda() self.utterance_decoder.eval() ######################################################################## self.cfg = cfg self.results_path = cfg.results_path if not os.path.exists(cfg.results_path): os.mkdir(cfg.results_path) self.unorm = UnNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
def __init__(self, cfg): """A recurrent GAN model, each time step a generated image (x'_{t-1}) and the current question q_{t} are fed to the RNN to produce the conditioning vector for the GAN. The following equations describe this model: - c_{t} = RNN(h_{t-1}, q_{t}, x^{~}_{t-1}) - x^{~}_{t} = G(z | c_{t}) """ super(RecurrentGAN, self).__init__() # region Models-Instantiation self.generator = DataParallel( GeneratorFactory.create_instance(cfg)).cuda() self.discriminator = DataParallel( DiscriminatorFactory.create_instance(cfg)).cuda() self.rnn = nn.DataParallel(nn.GRU(cfg.input_dim, cfg.hidden_dim, batch_first=False), dim=1).cuda() self.layer_norm = nn.DataParallel(nn.LayerNorm(cfg.hidden_dim)).cuda() self.image_encoder = DataParallel(ImageEncoder(cfg)).cuda() self.condition_encoder = DataParallel(ConditionEncoder(cfg)).cuda() self.sentence_encoder = nn.DataParallel(SentenceEncoder(cfg)).cuda() # endregion # region Optimizers self.generator_optimizer = OPTIM[cfg.generator_optimizer]( self.generator.parameters(), cfg.generator_lr, cfg.generator_beta1, cfg.generator_beta2, cfg.generator_weight_decay) self.discriminator_optimizer = OPTIM[cfg.discriminator_optimizer]( self.discriminator.parameters(), cfg.discriminator_lr, cfg.discriminator_beta1, cfg.discriminator_beta2, cfg.discriminator_weight_decay) self.rnn_optimizer = OPTIM[cfg.rnn_optimizer]( self.rnn.parameters(), cfg.rnn_lr) self.sentence_encoder_optimizer = OPTIM[cfg.gru_optimizer]( self.sentence_encoder.parameters(), cfg.gru_lr) self.use_image_encoder = cfg.use_fg feature_encoding_params = list(self.condition_encoder.parameters()) if self.use_image_encoder: feature_encoding_params += list(self.image_encoder.parameters()) self.feature_encoders_optimizer = OPTIM['adam']( feature_encoding_params, cfg.feature_encoder_lr ) # endregion # region Criterion self.criterion = LOSSES[cfg.criterion]() self.aux_criterion = DataParallel(torch.nn.BCELoss()).cuda() # endregion self.cfg = cfg self.logger = Logger(cfg.log_path, cfg.exp_name)
def __init__(self, cfg): """A recurrent GAN model, each time step a generated image (x'_{t-1}) and the current question q_{t} are fed to the RNN to produce the conditioning vector for the GAN. The following equations describe this model: - c_{t} = RNN(h_{t-1}, q_{t}, x^{~}_{t-1}) - x^{~}_{t} = G(z | c_{t}) """ super(RecurrentGAN_Mingyang, self).__init__() # region Models-Instantiation ###############################Original DataParallel################### self.generator = DataParallel( GeneratorFactory.create_instance(cfg)).cuda() self.discriminator = DataParallel( DiscriminatorFactory.create_instance(cfg)).cuda() self.rnn = nn.DataParallel(nn.GRU(cfg.input_dim, cfg.hidden_dim, batch_first=False), dim=1).cuda() # self.rnn = DistributedDataParallel(nn.GRU(cfg.input_dim, # cfg.hidden_dim, # batch_first=False), dim=1).cuda() self.layer_norm = nn.DataParallel(nn.LayerNorm(cfg.hidden_dim)).cuda() self.image_encoder = DataParallel(ImageEncoder(cfg)).cuda() self.condition_encoder = DataParallel(ConditionEncoder(cfg)).cuda() self.sentence_encoder = nn.DataParallel(SentenceEncoder(cfg)).cuda() ####################################################################### # self.generator = GeneratorFactory.create_instance(cfg).cuda() # self.discriminator = DiscriminatorFactory.create_instance(cfg).cuda() # self.rnn = nn.GRU(cfg.input_dim,cfg.hidden_dim,batch_first=False).cuda() # # self.rnn = DistributedDataParallel(nn.GRU(cfg.input_dim, # # cfg.hidden_dim, # # batch_first=False), dim=1).cuda() # self.layer_norm = nn.LayerNorm(cfg.hidden_dim).cuda() # self.image_encoder = =ImageEncoder(cfg).cuda() # self.condition_encoder = ConditionEncoder(cfg).cuda() # self.sentence_encoder = SentenceEncoder(cfg).cuda() # endregion # region Optimizers self.generator_optimizer = OPTIM[cfg.generator_optimizer]( self.generator.parameters(), cfg.generator_lr, cfg.generator_beta1, cfg.generator_beta2, cfg.generator_weight_decay) self.discriminator_optimizer = OPTIM[cfg.discriminator_optimizer]( self.discriminator.parameters(), cfg.discriminator_lr, cfg.discriminator_beta1, cfg.discriminator_beta2, cfg.discriminator_weight_decay) self.rnn_optimizer = OPTIM[cfg.rnn_optimizer](self.rnn.parameters(), cfg.rnn_lr) self.sentence_encoder_optimizer = OPTIM[cfg.gru_optimizer]( self.sentence_encoder.parameters(), cfg.gru_lr) self.use_image_encoder = cfg.use_fg feature_encoding_params = list(self.condition_encoder.parameters()) if self.use_image_encoder: feature_encoding_params += list(self.image_encoder.parameters()) self.feature_encoders_optimizer = OPTIM['adam']( feature_encoding_params, cfg.feature_encoder_lr) # endregion # region Criterion self.criterion = LOSSES[cfg.criterion]() self.aux_criterion = DataParallel(torch.nn.BCELoss()).cuda() #Added by Mingyang for segmentation loss if cfg.balanced_seg: label_weights = np.array([ 3.02674201e-01, 1.91545454e-03, 2.90009221e-04, 7.50949673e-04, 1.08670452e-03, 1.11353785e-01, 4.00971053e-04, 1.06240113e-02, 1.59590824e-01, 5.38960105e-02, 3.36431602e-02, 3.99029734e-02, 1.88888847e-02, 2.06441476e-03, 6.33775290e-02, 5.81920411e-03, 3.79528817e-03, 7.87975754e-02, 2.73547355e-03, 1.08308135e-01, 0.00000000e+00, 8.44408475e-05 ]) #reverse the loss label_weights = 1 / label_weights label_weights[20] = 0 label_weights = label_weights / np.min(label_weights[:20]) #convert numpy to tensor label_weights = torch.from_numpy(label_weights) label_weights = label_weights.type(torch.FloatTensor) self.seg_criterion = DataParallel( torch.nn.CrossEntropyLoss(weight=label_weights)).cuda() else: self.seg_criterion = DataParallel( torch.nn.CrossEntropyLoss()).cuda() # endregion self.cfg = cfg self.logger = Logger(cfg.log_path, cfg.exp_name) # define unorm self.unorm = UnNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))