def assign_network_at_scale_begin(self, scale): self.generator = self.generator_pyramid[scale] self.discriminator = self.discriminator_pyramid[scale] self.noise_optimal = self.noise_optimal_pyramid[scale] params_d = list(self.discriminator.parameters()) params_g = list(self.generator.parameters()) self.optimizer_d = torch.optim.Adam(params_d, self.lr, (self.beta1, self.beta2), weight_decay=self.weight_decay) self.optimizer_g = torch.optim.Adam(params_g, self.lr, (self.beta1, self.beta2), weight_decay=self.weight_decay) # --- init weights if not scale % 4: print("Init weight G & D") self.apply(weights_init(self.config['init'])) self.discriminator.apply(weights_init('gaussian')) else: print("Copy weight from previous pyramid G & D") self.generator.load_state_dict( self.generator_pyramid[scale - 1].state_dict()) self.discriminator.load_state_dict( self.discriminator_pyramid[scale - 1].state_dict())
def __init__(self, hps, use_global=False): super(V2VModel, self).__init__() # Model config self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu') self.style_dim = hps.gen_style_dim # Initialization self.gen_a = Generator(hps.input_dim_a, hps, use_global) # auto-encoder for domain a self.gen_b = Generator(hps.input_dim_b, hps, use_global) # auto-encoder for domain b self.dis_a = Discriminator(hps.input_dim_a, hps) # discriminator for domain a self.dis_b = Discriminator(hps.input_dim_b, hps) # discriminator for domain b # Setup the optimizers dis_params = list(self.dis_a.parameters()) + list(self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + list(self.gen_b.parameters()) self.dis_optimizer = optim.Adam([p for p in dis_params if p.requires_grad], lr=hps.lr, betas=(hps.beta1, hps.beta2), weight_decay=hps.weight_decay) self.gen_optimizer = optim.Adam([p for p in gen_params if p.requires_grad], lr=hps.lr, betas=(hps.beta1, hps.beta2), weight_decay=hps.weight_decay) self.dis_scheduler = get_scheduler(self.dis_optimizer, hps) self.gen_scheduler = get_scheduler(self.gen_optimizer, hps) if hps.g_comp > 0: self.temp_loss = TemporalLoss() # Network weight initialization self.apply(weights_init(hps.init)) self.dis_a.apply(weights_init('gaussian')) self.dis_b.apply(weights_init('gaussian'))
def __init__(self, hyperparameters): super(UNIT_Trainer, self).__init__() lr = hyperparameters['lr'] # Initiate the networks self.gen_a = VAEGen(hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain a self.gen_b = VAEGen(hyperparameters['input_dim_b'], hyperparameters['gen']) # auto-encoder for domain b self.dis_a = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator for domain a self.dis_b = MsImageDis(hyperparameters['input_dim_b'], hyperparameters['dis']) # discriminator for domain b self.instancenorm = nn.InstanceNorm2d(512, affine=False) # Setup the optimizers beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] dis_params = list(self.dis_a.parameters()) + list(self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + list(self.gen_b.parameters()) self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam([p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) # Network weight initialization self.apply(weights_init(hyperparameters['init'])) self.dis_a.apply(weights_init('gaussian')) self.dis_b.apply(weights_init('gaussian')) # Load VGG model if needed if 'vgg_w' in hyperparameters.keys() and hyperparameters['vgg_w'] > 0: self.vgg = load_vgg16(hyperparameters['vgg_model_path'] + '/models') self.vgg.eval() for param in self.vgg.parameters(): param.requires_grad = False
def __init__(self, input_size, hidden_size, num_class, with_dropout=False): super(MLPClassifier, self).__init__() self.h1_weights = nn.Linear(input_size, hidden_size) self.h2_weights = nn.Linear(hidden_size, num_class) self.with_dropout = with_dropout utils.weights_init(self)
def __init__(self, param): super(Trainer, self).__init__() lr_d = param['lr_d'] # Initiate the networks self.generator = get_generator(param) self.discriminator_bg = get_discriminator(param) self.discriminator_rf = get_discriminator(param) # ############################################################################ # from thop import profile # from thop import clever_format # input_i = torch.randn(1, 3, 224, 224) # macs, params = profile(self.discriminator_bg, inputs=(input_i, )) # print('========================') # print('MACs: ', macs) # print('PARAMs: ', params) # print('------------------------') # macs, params = clever_format([macs, params], "%.3f") # print('Clever MACs: ', macs) # print('Clever PARAMs: ', params) # print('========================') # ############################################################################ # Setup the optimizers beta1 = param['beta1'] beta2 = param['beta2'] dis_params = list(self.discriminator_bg.parameters()) + list( self.discriminator_rf.parameters()) self.dis_opt = torch.optim.Adam(dis_params, lr=lr_d, betas=(beta1, beta2), weight_decay=param['weight_decay']) self.gen_opt = torch.optim.SGD(params=[{ 'params': self.get_params(self.generator, key='1x'), 'lr': param.lr_g }, { 'params': self.get_params(self.generator, key='10x'), 'lr': 10 * param.lr_g }], momentum=param.momentum) self.dis_scheduler = get_scheduler(self.dis_opt, param) self.gen_scheduler = get_scheduler(self.gen_opt, param) # self.dis_scheduler = None # self.gen_scheduler = None # Network weight initialization # self.apply(weights_init(param['init'])) self.discriminator_bg.apply(weights_init('gaussian')) self.discriminator_rf.apply(weights_init('gaussian')) self.best_result = float('inf') self.perceptual_criterion = PerceptualLoss() self.retina_criterion = RetinaLoss() self.semantic_criterion = nn.CrossEntropyLoss(ignore_index=255) self.best_result = 0
def __init__(self, config): """Initialize the model.""" super(RNNLM_Model, self).__init__() self.config = config ### YOUR CODE HERE ### Define the Embedding layer. Hint: check nn.Embedding self.embedding = nn.Embedding(config.vocab_size, config.embed_size) ### Define the H, I, b1 in HW4. Hint: check nn.Parameter self.H = nn.Parameter( torch.randn((config.hidden_size, config.hidden_size))) self.I = nn.Parameter( torch.randn((config.embed_size, config.hidden_size))) self.b1 = nn.Parameter(torch.zeros((1, config.hidden_size))) ### Define the projection layer, U, b2 in HW4 self.smax = nn.Softmax(dim=0) self.U = nn.Parameter( torch.randn((config.hidden_size, config.vocab_size))) self.b2 = nn.Parameter(torch.zeros((1, config.vocab_size))) ## Define the input dropout and output dropout. self.input_drop = nn.Dropout(p=config.dropout) self.output_drop = nn.Dropout(p=config.dropout) ### END YOUR CODE ## Initialize the weights. weights_init(self)
def __init__(self, hyperparameters): super(MUNIT_Trainer, self).__init__() lr = hyperparameters['lr'] self.gen_a = AdaInGenerator(hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain a self.gen_b = AdaInGenerator(hyperparameters['input_dim_b'], hyperparameters['gen']) # auto-encoder for domain b self.dis_a = ImageDiscriminator(hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator for domain a self.dis_b = ImageDiscriminator(hyperparameters['input_dim_b'], hyperparameters['dis']) # discriminator for domain b self.instancenorm = nn.InstanceNorm2d(512, affine=False) self.style_dim = hyperparameters['gen']['style_dim'] display_size = int(hyperparameters['display_size']) self.s_a = torch.randn(display_size, self.style_dim, 1, 1).cuda() self.s_b = torch.randn(display_size, self.style_dim, 1, 1).cuda() beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] dis_params = list(self.dis_a.parameters()) + list(self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + list(self.gen_b.parameters()) self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam([p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) self.apply(weights_init(hyperparameters['init'])) self.dis_a.apply(weights_init('gaussian')) self.dis_b.apply(weights_init('gaussian')) if 'vgg_w' in hyperparameters.keys() and hyperparameters['vgg_w'] > 0: self.vgg = load_vgg16(hyperparameters['vgg_model_path'] + '/models') self.vgg.eval() for param in self.vgg.parameters(): param.requires_grad = False
def __init__(self, hyperparameters): super(MUNIT_Trainer, self).__init__() lr = hyperparameters['lr'] # Initiate the networks self.gen_a = AdaINGen(hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain a self.gen_b = AdaINGen(hyperparameters['input_dim_b'], hyperparameters['gen']) # auto-encoder for domain b self.dis_a = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator for domain a self.dis_b = MsImageDis(hyperparameters['input_dim_b'], hyperparameters['dis']) # discriminator for domain b self.instancenorm = nn.InstanceNorm2d(512, affine=False) self.style_dim = hyperparameters['gen']['style_dim'] self.criterionGAN = GANLoss(hyperparameters['dis']['gan_type']).cuda() self.featureLoss = nn.MSELoss(reduction='mean') # fix the noise used in sampling display_size = int(hyperparameters['display_size']) self.s_a = torch.randn(display_size, self.style_dim, 1, 1).cuda() self.s_b = torch.randn(display_size, self.style_dim, 1, 1).cuda() # Setup the optimizers beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] dis_params = list(self.dis_a.parameters()) + list(self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + list(self.gen_b.parameters()) self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam([p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) # Network weight initialization self.apply(weights_init(hyperparameters['init'])) self.dis_a.apply(weights_init('gaussian')) self.dis_b.apply(weights_init('gaussian'))
def setup(model, opt): if opt.criterion == "l1": criterion = nn.L1Loss().cuda() elif opt.criterion == "mse": criterion = nn.MSELoss().cuda() elif opt.criterion == "crossentropy": criterion = nn.CrossEntropyLoss().cuda() elif opt.criterion == "hingeEmbedding": criterion = nn.HingeEmbeddingLoss().cuda() elif opt.criterion == "tripletmargin": criterion = nn.TripletMarginLoss(margin=opt.margin, swap=opt.anchorswap).cuda() parameters = filter(lambda p: p.requires_grad, model.parameters()) if opt.optimType == 'sgd': optimizer = optim.SGD(parameters, lr=opt.lr, momentum=opt.momentum, nesterov=opt.nesterov, weight_decay=opt.weightDecay) elif opt.optimType == 'adam': optimizer = optim.Adam(parameters, lr=opt.maxlr, weight_decay=opt.weightDecay) if opt.weight_init: utils.weights_init(model, opt) return model, criterion, optimizer
def __init__(self, hyperparameters, resume_epoch=-1, snapshot_dir=None): super(UNIT_Trainer, self).__init__() lr = hyperparameters['lr'] # Initiate the networks. self.gen = VAEGen( hyperparameters['input_dim'] + hyperparameters['n_datasets'], hyperparameters['gen'], hyperparameters['n_datasets']) # Auto-encoder for domain a. self.dis = MsImageDis( hyperparameters['input_dim'] + hyperparameters['n_datasets'], hyperparameters['dis']) # Discriminator for domain a. self.instancenorm = nn.InstanceNorm2d(512, affine=False) self.sup = UNet(input_channels=hyperparameters['input_dim'], num_classes=2).cuda() # Setup the optimizers. beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] dis_params = list(self.dis.parameters()) gen_params = list(self.gen.parameters()) + list(self.sup.parameters()) self.dis_opt = torch.optim.Adam( [p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam( [p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) # Network weight initialization. self.apply(weights_init(hyperparameters['init'])) self.dis.apply(weights_init('gaussian')) # Presetting one hot encoding vectors. self.one_hot_img = torch.zeros(hyperparameters['n_datasets'], hyperparameters['batch_size'], hyperparameters['n_datasets'], 256, 256).cuda() self.one_hot_h = torch.zeros(hyperparameters['n_datasets'], hyperparameters['batch_size'], hyperparameters['n_datasets'], 64, 64).cuda() for i in range(hyperparameters['n_datasets']): self.one_hot_img[i, :, i, :, :].fill_(1) self.one_hot_h[i, :, i, :, :].fill_(1) if resume_epoch != -1: self.resume(snapshot_dir, hyperparameters)
def __init__(self, hyperparameters): super(MUNIT_Trainer, self).__init__() lr = hyperparameters['lr'] # Initiate the networks self.is_ganilla_gen = hyperparameters['gen']['ganilla_gen'] if self.is_ganilla_gen == False: self.gen_a = AdaINGen(hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain a self.gen_b = AdaINGen(hyperparameters['input_dim_b'], hyperparameters['gen']) # auto-encoder for domain b else: self.gen_a = AdaINGanilla(hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain a with ganilla architecture self.gen_b = AdaINGanilla(hyperparameters['input_dim_b'], hyperparameters['gen']) # auto-encoder for domain b with ganilla architecture print(self.gen_a) if hyperparameters['dis']['dis_type'] == 'patch': if hyperparameters['dis']['use_patch_gan']: self.dis_a = PatchDis(hyperparameters['input_dim_a'], hyperparameters['dis']) self.dis_b = PatchDis(hyperparameters['input_dim_b'], hyperparameters['dis']) else: self.dis_a = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator for domain a self.dis_b = MsImageDis(hyperparameters['input_dim_b'], hyperparameters['dis']) # discriminator for domain b print(self.dis_a) else: self.dis_a = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator for domain a self.dis_b = MsImageDis(hyperparameters['input_dim_b'], hyperparameters['dis']) # discriminator for domain b self.instancenorm = nn.InstanceNorm2d(512, affine=False) self.style_dim = hyperparameters['gen']['style_dim'] # fix the noise used in sampling display_size = int(hyperparameters['display_size']) self.s_a = torch.randn(display_size, self.style_dim, 1, 1).cuda() self.s_b = torch.randn(display_size, self.style_dim, 1, 1).cuda() # Setup the optimizers beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] dis_params = list(self.dis_a.parameters()) + list(self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + list(self.gen_b.parameters()) self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam([p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) # Network weight initialization self.apply(weights_init(hyperparameters['init'])) self.dis_a.apply(weights_init('gaussian')) self.dis_b.apply(weights_init('gaussian')) # Load VGG model if needed if 'vgg_w' in hyperparameters.keys() and hyperparameters['vgg_w'] > 0: self.vgg = load_vgg16(hyperparameters['vgg_model_path'] + '/models') self.vgg.eval() self.VggExtract = VggExtract(self.vgg) for param in self.vgg.parameters(): param.requires_grad = False
def __init__(self, config): self.config = config # Create dataloader source_loader, target_loader, nclasses = datasets.form_visda_datasets( config=config, ignore_anomaly=False) self.source_loader = source_loader self.target_loader = target_loader self.nclasses = nclasses # Create model self.netF, self.nemb = models.form_models(config) print(self.netF) self.netC = models.Classifier(self.nemb, self.nclasses, nlayers=1) utils.weights_init(self.netC) print(self.netC) if self.config.exp == 'openset': self.ano_class_id = self.source_loader.dataset.class_to_idx[ self.config.anomaly_class] self.netF = torch.nn.DataParallel(self.netF).cuda() self.netC = torch.nn.DataParallel(self.netC).cuda() # Create optimizer self.optimizerF = optim.SGD(self.netF.parameters(), lr=self.config.lr, momentum=config.momentum, weight_decay=0.0005) self.optimizerC = optim.SGD(self.netC.parameters(), lr=self.config.lrC, momentum=config.momentum, weight_decay=0.0005) self.lr_scheduler_F = optim.lr_scheduler.StepLR(self.optimizerF, step_size=7000, gamma=0.1) self.lr_scheduler_C = optim.lr_scheduler.StepLR(self.optimizerC, step_size=7000, gamma=0.1) # restoring checkpoint print('Restoring checkpoint ...') try: ckpt_data = torch.load( os.path.join(config.logdir, 'checkpoint.pth')) self.start_iter = ckpt_data['iter'] self.netF.load_state_dict(ckpt_data['F_dict']) self.netC.load_state_dict(ckpt_data['C_dict']) except: # If loading failed, begin from scratch print('Checkpoint not found. Training from scratch ...') self.start_iter = 0 # Other vars self.criterion = nn.CrossEntropyLoss().cuda()
def __init__(self, hyperparameters): super(IPMNet_Trainer, self).__init__() lr = hyperparameters['lr'] vgg_weight_file = hyperparameters['vgg_weight_file'] # Initiate the networks self.gen_a = AdaINGen( hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain a self.gen_b = self.gen_a # AdaINGen(hyperparameters['input_dim_b'], hyperparameters['gen']) # auto-encoder for domain b self.dis_a = MsImageDis( hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator for domain a self.dis_b = MsImageDis( hyperparameters['input_dim_b'], hyperparameters['dis']) # discriminator for domain b self.instancenorm = nn.InstanceNorm2d(512, affine=False) self.style_dim = hyperparameters['gen']['style_dim'] # fix the noise used in sampling display_size = int(hyperparameters['display_size']) self.s_a = torch.randn(display_size, self.style_dim, 1, 1).cuda() self.s_b = torch.randn(display_size, self.style_dim, 1, 1).cuda() # Setup the optimizers beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] dis_params = list(self.dis_a.parameters()) + list( self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + list( self.gen_b.parameters()) self.dis_opt = torch.optim.Adam( [p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam( [p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) # Network weight initialization self.apply(weights_init(hyperparameters['init'])) self.dis_a.apply(weights_init(hyperparameters['init'])) self.dis_b.apply(weights_init(hyperparameters['init'])) # Load VGGFace model if needed if 'vgg_w' in hyperparameters.keys() and hyperparameters['vgg_w'] > 0: self.vgg = load_resnet50(vgg_weight_file) self.vgg.eval() self.vgg.fc.reset_parameters() for param in self.vgg.parameters(): param.requires_grad = False
def __init__(self, n_filters, image_size, spec_norm=True): super(ReadoutDiscriminator, self).__init__() # number of features in the top level (before latents) self.n_features = n_filters * 8 * (image_size // 16) ** 2 if spec_norm: self.readout = nn.utils.spectral_norm(nn.Linear(self.n_features + 1, 1)) else: self.readout = nn.Linear(self.n_features + 1, 1) weights_init(self)
def __init__(self, hyperparameters): super(UNIT_Trainer, self).__init__() lr = hyperparameters["lr"] # Initiate the networks self.gen_a = VAEGen( hyperparameters["input_dim_a"], hyperparameters["gen"]) # auto-encoder for domain a self.gen_b = VAEGen( hyperparameters["input_dim_b"], hyperparameters["gen"]) # auto-encoder for domain b self.dis_a = MsImageDis( hyperparameters["input_dim_a"], hyperparameters["dis"]) # discriminator for domain a self.dis_b = MsImageDis( hyperparameters["input_dim_b"], hyperparameters["dis"]) # discriminator for domain b self.instancenorm = nn.InstanceNorm2d(512, affine=False) # Setup the optimizers beta1 = hyperparameters["beta1"] beta2 = hyperparameters["beta2"] dis_params = list(self.dis_a.parameters()) + list( self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + list( self.gen_b.parameters()) self.dis_opt = torch.optim.Adam( [p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters["weight_decay"], ) self.gen_opt = torch.optim.Adam( [p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters["weight_decay"], ) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) # Network weight initialization self.apply(weights_init(hyperparameters["init"])) self.dis_a.apply(weights_init("gaussian")) self.dis_b.apply(weights_init("gaussian")) # Load VGG model if needed if "vgg_w" in hyperparameters.keys() and hyperparameters["vgg_w"] > 0: self.vgg = load_vgg16(hyperparameters["vgg_model_path"] + "/models") self.vgg.eval() for param in self.vgg.parameters(): param.requires_grad = False
def setup(disp_model, pose_model, opt): parameters = chain(disp_model.parameters(), pose_model.parameters()) if opt.optimType == 'sgd': optimizer = optim.SGD(parameters, lr = opt.lr, momentum = opt.momentum, nesterov = opt.nesterov, weight_decay = opt.weightDecay) elif opt.optimType == 'adam': optimizer = optim.Adam(parameters, lr = opt.maxlr, weight_decay = opt.weightDecay) if opt.weight_init: utils.weights_init(disp_model, opt) utils.weights_init(pose_model, opt) return disp_model, pose_model, optimizer
def __init__(self, hyperparameters): super(Trainer, self).__init__() lr = hyperparameters['lr'] # Initiate the networks # auto-encoder for domain a self.trait_dim = hyperparameters['gen']['trait_dim'] self.gen_a = VAEGen(hyperparameters['input_dim'], hyperparameters['basis_encoder_dims'], hyperparameters['trait_encoder_dims'], hyperparameters['decoder_dims'], self.trait_dim) # auto-encoder for domain b self.gen_b = VAEGen(hyperparameters['input_dim'], hyperparameters['basis_encoder_dims'], hyperparameters['trait_encoder_dims'], hyperparameters['decoder_dims'], self.trait_dim) # discriminator for domain a self.dis_a = Discriminator(hyperparameters['input_dim'], hyperparameters['dis_dims'], 1) # discriminator for domain b self.dis_b = Discriminator(hyperparameters['input_dim'], hyperparameters['dis_dims'], 1) # fix the noise used in sampling self.trait_a = torch.randn(8, self.trait_dim, 1, 1) self.trait_b = torch.randn(8, self.trait_dim, 1, 1) # Setup the optimizers dis_params = list(self.dis_a.parameters()) + \ list(self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + \ list(self.gen_b.parameters()) for _p in gen_params: print(_p.data.shape) self.dis_opt = torch.optim.Adam( [p for p in dis_params if p.requires_grad], lr=lr, weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam( [p for p in gen_params if p.requires_grad], lr=lr, weight_decay=hyperparameters['weight_decay']) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) # Network weight initialization self.apply(weights_init(hyperparameters['init'])) self.gen_a.apply(weights_init('gaussian')) self.gen_b.apply(weights_init('gaussian')) self.dis_a.apply(weights_init('gaussian')) self.dis_b.apply(weights_init('gaussian'))
def setup(model, opt): if opt.criterion == "nllLoss": criterion = nn.NLLLoss().cuda() if opt.optimType == 'sgd': optimizer = optim.SGD(model.parameters(), lr = opt.lr, momentum = opt.momentum, nesterov = opt.nesterov, weight_decay = opt.weightDecay) elif opt.optimType == 'adam': optimizer = optim.Adam(model.parameters(), lr = opt.maxlr, weight_decay = opt.weightDecay) if opt.weight_init: utils.weights_init(model, opt) return model, criterion, optimizer
def __init__(self, in_channels, num_feat=64, num_repeat=6): super().__init__() layers = [] layers.append( nn.Conv2d(in_channels, num_feat, kernel_size=4, stride=2, padding=1)) layers.append(nn.LeakyReLU(0.02, inplace=True)) curr_dim = num_feat for _ in range(1, num_repeat): layers.append( nn.Conv2d(curr_dim, curr_dim * 2, kernel_size=4, stride=2, padding=1)) layers.append(nn.LeakyReLU(0.02, inplace=True)) curr_dim = curr_dim * 2 self.main = nn.Sequential(*layers) self.conv1 = nn.Conv2d(curr_dim, 1, kernel_size=3, stride=1, padding=1) self.apply(weights_init())
def __init__(self, image_size, in_channels, num_feat=64, num_repeat=5, gamma=10): super().__init__() layers = [] self.gamma = gamma layers.append( nn.Conv2d(in_channels, num_feat, kernel_size=4, stride=2, padding=1)) layers.append(nn.LeakyReLU(0.02, inplace=True)) curr_dim = num_feat for _ in range(1, num_repeat): layers.append( nn.Conv2d(curr_dim, curr_dim * 2, kernel_size=4, stride=2, padding=1)) layers.append(nn.LeakyReLU(0.02, inplace=True)) curr_dim = curr_dim * 2 in_feat = image_size // 2**(num_repeat) self.main = nn.Sequential(*layers) self.linear = nn.Linear(curr_dim * in_feat**2, 1024) self.apply(weights_init())
def __init__(self, in_channels, num_feat, num_res): super().__init__() layers = [] layers.append( nn.Conv2d(in_channels, num_feat, kernel_size=7, stride=1, padding=3, bias=False)) layers.append(nn.ReLU(inplace=True)) curr_dim = num_feat for _ in range(2): layers.append( nn.Conv2d(curr_dim, curr_dim * 2, kernel_size=4, stride=2, padding=1, bias=False)) layers.append(nn.BatchNorm2d(curr_dim * 2)) layers.append(nn.ReLU(inplace=True)) curr_dim = curr_dim * 2 for _ in range(num_res): layers.append(ResidualBlock(curr_dim, curr_dim)) for _ in range(2): layers.append( nn.ConvTranspose2d(curr_dim, curr_dim // 2, kernel_size=4, stride=2, padding=1, bias=False)) layers.append(nn.BatchNorm2d(curr_dim // 2)) layers.append(nn.ReLU(inplace=True)) curr_dim = curr_dim // 2 layers.append( nn.Conv2d(curr_dim, curr_dim, kernel_size=3, stride=1, padding=1, bias=False)) layers.append(nn.BatchNorm2d(curr_dim)) layers.append(nn.ReLU(inplace=True)) layers.append( nn.Conv2d(curr_dim, in_channels, kernel_size=7, stride=1, padding=3, bias=False)) layers.append(nn.Tanh()) self.main = nn.Sequential(*layers) self.apply(weights_init())
def __init__(self, input_dim, params, fp16): super(MsImageDis, self).__init__() self.n_layer = params['n_layer'] # D 的层数 self.gan_type = params['gan_type'] # GAN loss [lsgan/nsgan],默认为lsgan self.dim = params['dim'] # 最后一层filters数目 self.norm = params['norm'] # 正则化方式 self.activ = params['activ'] # 激活函数 self.num_scales = params['num_scales'] # 图片缩放的次数:默认为3 self.pad_type = params['pad_type'] # 填充类型 self.LAMBDA = params['LAMBDA'] # 正则化的一个超参数 self.non_local = params['non_local'] # self attention self.n_res = params['n_res'] # 跳跃链接的层数 self.input_dim = input_dim # 图片的通道数, 默认为3 self.fp16 = fp16 # 重新定义下采样 self.downsample = nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False) # 定义self.cnns if not self.gan_type == 'wgan': self.cnns = nn.ModuleList() for _ in range(self.num_scales): Dis = self._make_net() Dis.apply(weights_init('gaussian')) self.cnns.append(Dis) # 定义了3个CNN,会有3个不同的输出 else: self.cnn = self.one_cnn()
def __init__(self, opt=None): super(LSTM_Model, self).__init__() self.input_time_window = 4 self.output_time_horizon = 4 self.temporal_stride = 1 self.temporal_frames = 2 self.time_steps = (self.input_time_window - self.temporal_frames + 1) // self.temporal_stride # print("time steps ", self.time_steps) self.tau = 5 self.hidden_size = 64 self.lstm_layers = 4 self.input_shape = (2, 2, 32, 32) self.output_shape = (2, 4, 32, 32) self.encoder = E3DLSTM(self.input_shape, self.hidden_size, self.lstm_layers, self.tau) self.decoder = nn.Conv3d(self.hidden_size * self.time_steps, self.output_shape[0], [1, 5, 5], padding=(1, 2, 2)) # self.to(self.device) params = self.parameters(recurse=True) # TODO learning rate scheduler # Weight decay stands for L2 regularization self.optimizer = torch.optim.Adam(params, lr=1e-3, weight_decay=0) self.apply(weights_init())
def __init__(self, hyperparameters, multi_gpus=False): super(HiSD_Trainer, self).__init__() # Initiate the networks self.multi_gpus = multi_gpus self.models = HiSD(hyperparameters) # Setup the optimizers beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] self.dis_opt = torch.optim.Adam(self.models.dis.parameters(), lr=hyperparameters['lr_dis'], betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam([{'params': self.models.gen.encoder.parameters()}, {'params': self.models.gen.translators.parameters()}, {'params': self.models.gen.extractors.parameters()}, {'params': self.models.gen.decoder.parameters()}, # Different LR for mappers. {'params': self.models.gen.mappers.parameters(), 'lr': hyperparameters['lr_gen_mappers']}, ], lr=hyperparameters['lr_gen_others'], betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.apply(weights_init(hyperparameters['init'])) # For historical average version of the generators self.models.gen_test = copy.deepcopy(self.models.gen)
def __init__(self, n_latents, n_filters, n_img_channels, image_size=32, hidden_dim=100, hard_norm=False, dropout = 0): super(Discriminator, self).__init__() self.linear01 = (nn.Linear(n_img_channels * image_size ** 2 + n_filters * (image_size//2) ** 2 + 2, hidden_dim)) self.linear12 = (nn.Linear(n_filters * (image_size//2) ** 2 + n_filters * 2 * (image_size//4) ** 2 + 2, hidden_dim)) self.linear23 = (nn.Linear(n_filters * 2 * (image_size//4) ** 2 + n_filters * 4 * (image_size//8) ** 2 + 2, hidden_dim)) self.linear34 = (nn.Linear(n_filters * 4 * (image_size//8) ** 2 + n_filters * 8 * (image_size//16) ** 2 + 2, hidden_dim)) self.linear45 = (nn.Linear(n_filters * 8 * (image_size//16) ** 2 + n_latents + 2, hidden_dim)) self.linear2 = (nn.Linear(hidden_dim * 5, 1)) self.relu = nn.LeakyReLU() self.normalizer = NormalizationLayer() if hard_norm else null() self.dropout = nn.Dropout(dropout) self.sigma2 = 0.01 weights_init(self)
def __init__(self, hyperparameters): super(UNIT_Trainer, self).__init__() lr = hyperparameters['lr'] # Initiate the networks self.gen_a = VAEGen(hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain a self.gen_b = VAEGen(hyperparameters['input_dim_b'], hyperparameters['gen']) # auto-encoder for domain b if not hyperparameters['origin']: self.dis_a = MultiscaleDiscriminator(hyperparameters['input_dim_a'], # discriminator for a ndf=64, n_layers=3, norm_layer=nn.InstanceNorm2d, use_sigmoid=False, num_D=2, getIntermFeat=True ) self.dis_b = MultiscaleDiscriminator(hyperparameters['input_dim_b'], # discriminator for b ndf=64, n_layers=3, norm_layer=nn.InstanceNorm2d, use_sigmoid=False, num_D=2, getIntermFeat=True ) self.criterionGAN = GANLoss(use_lsgan=True, tensor=torch.cuda.FloatTensor) else: self.dis_a = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis']) self.dis_b = MsImageDis(hyperparameters['input_dim_b'], hyperparameters['dis']) self.instancenorm = nn.InstanceNorm2d(512, affine=False) # Setup the optimizers beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] dis_params = list(self.dis_a.parameters()) + list(self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + list(self.gen_b.parameters()) self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam([p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) # Network weight initialization self.apply(weights_init(hyperparameters['init'])) self.dis_a.apply(weights_init('gaussian')) self.dis_b.apply(weights_init('gaussian')) # Load VGG model if needed if 'vgg_w' in hyperparameters.keys() and hyperparameters['vgg_w'] > 0: self.vgg = load_vgg16(hyperparameters['vgg_model_path'] + '/models') self.vgg.eval() for param in self.vgg.parameters(): param.requires_grad = False
def __init__(self, hyperparameters): super(aclgan_Trainer, self).__init__() lr = hyperparameters['lr'] # Initiate the networks self.gen_AB = AdaINGen(hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain A self.gen_BA = AdaINGen(hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain B self.dis_A = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator for domain A self.dis_B = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator for domain B self.dis_2 = MsImageDis(hyperparameters['input_dim_b'], hyperparameters['dis']) # discriminator 2 # self.dis_2B = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator 2 for domain B self.instancenorm = nn.InstanceNorm2d(512, affine=False) self.style_dim = hyperparameters['gen']['style_dim'] # fix the noise used in sampling display_size = int(hyperparameters['display_size']) self.z_1 = torch.randn(display_size, self.style_dim, 1, 1).cuda() self.z_2 = torch.randn(display_size, self.style_dim, 1, 1).cuda() self.z_3 = torch.randn(display_size, self.style_dim, 1, 1).cuda() # Setup the optimizers beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] dis_params = list(self.dis_A.parameters()) + list(self.dis_B.parameters()) + list(self.dis_2.parameters()) gen_params = list(self.gen_AB.parameters()) + list(self.gen_BA.parameters()) self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam([p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) self.alpha = hyperparameters['alpha'] self.focus_lam = hyperparameters['focus_loss'] # Network weight initialization self.apply(weights_init(hyperparameters['init'])) self.dis_A.apply(weights_init('gaussian')) self.dis_B.apply(weights_init('gaussian')) self.dis_2.apply(weights_init('gaussian')) # Load VGG model if needed if 'vgg_w' in hyperparameters.keys() and hyperparameters['vgg_w'] > 0: self.vgg = load_vgg16(hyperparameters['vgg_model_path'] + '/models') self.vgg.eval() for param in self.vgg.parameters(): param.requires_grad = False
def __init__(self, hyperparameters): super(MUNIT_Trainer, self).__init__() # super() 函数是用于调用父类(超类)的一个方法。 lr = hyperparameters['lr'] # Initiate the networks, 需要好好看看生成器和鉴别器到底是如何构造的 self.gen_a = AdaINGen(hyperparameters['input_dim_a'], hyperparameters['gen']) # auto-encoder for domain a self.gen_b = AdaINGen(hyperparameters['input_dim_b'], hyperparameters['gen']) # auto-encoder for domain b self.dis_a = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis']) # discriminator for domain a self.dis_b = MsImageDis(hyperparameters['input_dim_b'], hyperparameters['dis']) # discriminator for domain b # https://blog.csdn.net/liuxiao214/article/details/81037416 self.instancenorm = nn.InstanceNorm2d(512, affine=False) self.style_dim = hyperparameters['gen']['style_dim'] # fix the noise used in sampling display_size = int(hyperparameters['display_size']) # s_a , s_b 表示的是两个不同的style self.s_a = torch.randn(display_size, self.style_dim, 1, 1).cuda() # 16*8*1*1 self.s_b = torch.randn(display_size, self.style_dim, 1, 1).cuda() # Setup the optimizers beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] # 两个鉴别器 dis_params = list(self.dis_a.parameters()) + list(self.dis_b.parameters()) # 两个生成器 gen_params = list(self.gen_a.parameters()) + list(self.gen_b.parameters()) # 优化器 self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam([p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) # 优化策略 self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) # Network weight initialization # 解释 apply apply(lambda x,y : x+y, (1),{'y' : 2}) https://zhuanlan.zhihu.com/p/42756654 self.apply(weights_init(hyperparameters['init'])) # 初始化当前类 self.dis_a.apply(weights_init('gaussian')) # 初始化dis_a,是一个类对象 self.dis_b.apply(weights_init('gaussian')) # Load VGG model if needed if 'vgg_w' in hyperparameters.keys() and hyperparameters['vgg_w'] > 0: self.vgg = load_vgg16(hyperparameters['vgg_model_path'] + '/models') self.vgg.eval() for param in self.vgg.parameters(): param.requires_grad = False
def __init__(self, n_latents, n_filters, n_img_channels, image_size = 32, bn = True, noise_before=False, hard_norm=False, spec_norm=True, derelu = True): super(Inference, self).__init__() self.n_latents = n_latents self.n_filters = n_filters self.n_img_channels = n_img_channels p1=1 if noise_before else 0 self.noise_before = noise_before self.inference_5from4_conv = BasicBlock(n_filters * 8 + p1, n_latents, image_size//16, 1, 0, spec_norm = spec_norm, bn=bn, derelu = derelu) self.inference_4from3_conv = BasicBlock(n_filters * 4 + p1, n_filters * 8, 4, 2, 1, spec_norm = spec_norm, bn=bn, derelu = derelu) self.inference_3from2_conv = BasicBlock(n_filters * 2 + p1, n_filters * 4, 4, 2, 1, spec_norm = spec_norm, bn=bn, derelu = derelu) self.inference_2from1_conv = BasicBlock(n_filters + p1, n_filters * 2, 4, 2, 1, spec_norm = spec_norm, bn=bn, derelu = derelu) self.inference_1from0_conv = BasicBlock(n_img_channels + p1, n_filters , 4, 2, 1, derelu=False, bn=bn, spec_norm = spec_norm ) self.normalizer = NormalizationLayer() if hard_norm else null() self.listed_modules = [self.inference_1from0_conv, self.inference_2from1_conv, self.inference_3from2_conv, self.inference_4from3_conv, self.inference_5from4_conv] self.intermediate_state_dict = OrderedDict([('Input', None), ('Layer1', None), ('Layer2', None), ('Layer3', None), ('Layer4', None), ('Layer5', None)]) self.activations = OrderedDict([('Layer1', nn.ReLU()), ('Layer2', nn.ReLU()), ('Layer3', nn.ReLU()), ('Layer4', nn.ReLU()), ('Layer5', null())]) self.layer_names = list(self.intermediate_state_dict.keys()) weights_init(self) #noise applied after each conv self.sigma2 = 0.01
def __init__(self, hyperparameters): super(MUNIT_Trainer, self).__init__() lr = hyperparameters['lr'] # Initiate the networks self.gen_b = AdaINGen( hyperparameters['input_dim_b'], hyperparameters['gen']) # auto-encoder for domain b self.dis_b = MsImageDis( hyperparameters['input_dim_b'], hyperparameters['new_size'], hyperparameters['dis']) # discriminator for domain b self.instancenorm = nn.InstanceNorm2d(512, affine=False) self.style_dim = hyperparameters['gen']['style_dim'] self.reg_param = hyperparameters['reg_param'] self.beta_step = hyperparameters['beta_step'] self.target_kl = hyperparameters['target_kl'] self.gan_type = hyperparameters['gan_type'] # Setup the optimizers beta1 = hyperparameters['beta1'] beta2 = hyperparameters['beta2'] dis_params = list(self.dis_b.parameters()) gen_params = list(self.gen_b.parameters()) self.dis_opt = torch.optim.Adam( [p for p in dis_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.gen_opt = torch.optim.Adam( [p for p in gen_params if p.requires_grad], lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay']) self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters) self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters) # Network weight initialization self.gen_b.apply(weights_init(hyperparameters['init'])) self.dis_b.apply(weights_init('gaussian')) # SSIM Loss self.ssim_loss = pytorch_ssim.SSIM()