def __init__(self, models, lr, args, momentum=0, dampening=0, alpha=0.99, eps=1e-8, bn=False, weight_decay=0, nesterov=False): gSGD_RMSprop_method.__init__(self, args=args) self.models = models # print([(x[0], x[1].shape )for x in list(models.named_parameters())]) self.lr_0 = self.lr_t = lr self.params = list(models.parameters()) self.args = args # self.momentum = momentum self.alpha = alpha self.eps = eps # self.momentum_buffer = {} self.square_avg = {'cnn': [], 'mlp': []} self.bn = bn if self.bn == False: assert self.args.rmsprop_method == 'noweight' self.ec_layer = self.cal_ec_layer() self.mask = self.cal_mask(models) self.cal_internal_optim() self.internal_optim.cal_ec_layer(self.ec_layer) self.init_square_avg()
def __init__(self, models, lr, args, betas=(0.9, 0.99), eps=1e-8, bn=False, weight_decay=0, nesterov=False): gSGD_Adam_method.__init__(self, args=args) self.models = models # print([(x[0], x[1].shape )for x in list(models.named_parameters())]) self.lr_0 = self.lr_t = lr self.params = list(models.parameters()) self.args = args # self.momentum = momentum self.betas = betas self.beta1, self.beta2 = self.betas self.eps = eps # self.momentum_buffer = {} self.exp_avg = {'cnn': [], 'mlp': []} self.exp_avg_sq = {'cnn': [], 'mlp': []} self.bn = bn if self.bn == False: assert self.args.adam_method == 'noweight' self.ec_layer = self.cal_ec_layer() self.mask = self.cal_mask() self.cal_internal_optim() self.internal_optim.cal_ec_layer(self.ec_layer) self.init_exp_avg() self.init_exp_avg_sq() self.itr_num = 0
def __init__(self, models, lr, args, momentum=0.0, bn=False, weight_decay=0, nesterov=False): gSGD_SGD_method.__init__(self, args=args) self.models = models self.lr_0 = self.lr_t = lr self.params = list(models.parameters()) self.args = args # self.momentum = momentum self.momentum = momentum # self.momentum_buffer = {} self.exp_avg = {'cnn': [], 'mlp': []} self.exp_avg_sq = {'cnn': [], 'mlp': []} self.bn = bn self.weight_decay = weight_decay if self.bn == False: assert self.args.adam_method == 'noweight' self.ec_layer = self.cal_ec_layer() self.mask = self.cal_mask() self.cal_internal_optim() self.internal_optim.cal_ec_layer(self.ec_layer) self.init_exp_avg() self.itr_num = 0
def children_and_parameters(m: nn.Module): "Return the children of `m` and its direct parameters not registered in modules." children = list(m.children()) children_p = sum([[id(p) for p in c.parameters()] for c in m.children()], []) for p in m.parameters(): if id(p) not in children_p: children.append(ParameterModule(p)) return children
def __init__(self, models, lr, args, momentum=0, dampening=0, weight_decay=0, nesterov=False): self.models = models self.lr_0 = self.lr_t = lr self.params = list(models.parameters()) self.args = args # if args.opt == 'ec': self.mask, self.s_mask_idx, self.s_mask_idx_shape = self.cal_mask( models) self.cal_internal_optim()
def __init__(self, models, lr, args, momentum=0, dampening=0, weight_decay=0, nesterov=False): gSGD_method.__init__(self, args=args) self.models = models # print([(x[0], x[1].shape )for x in list(models.named_parameters())]) self.lr_0 = self.lr_t = lr self.params = list(models.parameters()) self.args = args self.ec_layer = self.cal_ec_layer() self.mask = self.cal_mask(models) self.cal_internal_optim() self.internal_optim.cal_ec_layer(self.ec_layer)
idx = int(generated_caption[i][0]) caption.append(vocabulary.itos[idx]) return caption if __name__ == "__main__": train_dataloader = torch.load("train_dataloader.pt") test_dataloader = torch.load("test_dataloader.pt") train_dataset = torch.load("train_dataset.pt") test_dataset = torch.load("test_dataset.pt") # Hyper-Parameters embed_size = 256 hidden_size = 256 vocab_size = len(train_dataset.vocab) num_layers = 1 learning_rate = 3e-4 if torch.cuda.is_available(): device = "cuda" else: device = "cpu" # initialize model, loss etc model = ImageToCaption(embed_size, hidden_size, vocab_size, num_layers).to(device) criterion = nn.CrossEntropyLoss(ignore_index=train_dataset.vocab.stoi["<PAD>"]) optimizer = optim.Adam(model.parameters(), lr=learning_rate) step = load_checkpoint(torch.load("saved_checkpoint.pt"), model, optimizer) print_caption(model, device, train_dataset)
totalSample_train = len(batchHelper_train._label) rHC_val = RandomHandClass.RandomHandClass() rHC_val.readAllDatabase("./ClassImage_val") batchHelper_val = BatchHelper.BatchHelp(rHC_val.ImageAll, rHC_val.labelAll) totalSample_val = len(batchHelper_val._label) rHC_test = RandomHandClass.RandomHandClass() rHC_test.readAllDatabase("./ClassImage_test") batchHelper_test = BatchHelper.BatchHelp(rHC_test.ImageAll, rHC_test.labelAll) totalSample_test = len(batchHelper_test._label) criterion = nn.NLLLoss() optimizer = optim.Adam(models.parameters(), lr=0.0001) # apply optimizer to model ACC = 0 ACC_Max_val = 0 Average_ACC_val = 0 ACC_Max_test = 0 Average_ACC_test = 0 #check folder model is exist. if not os.path.exists("./{}".format(FolderSaveModel)): os.mkdir("./{}".format(FolderSaveModel)) print("totalTime per epoach {}".format(int(totalSample_train / batch_size))) for y in range(num_epochs): models.train()
import matplotlib.pyplot as plt import torch from torch import nn from torch import optim import torch.nn.functional as F from torchvision import datasets, transforms, models device = torch.device("cuda" if torch.cuda.is_available() else "cpu") models = models.resnet50(pretrained=True) # Turn off gradient for our models for param in models.parameters(): param.requires_grad = False model.fc = classifier criterion = nn.NLLLoss() optimizer = optim.Adam(model.fc.parameters(), lr=0.003) model.to(device) epochs = 1 steps = 0 running_loss = 0 print_every = 5 for epoch in range(epochs): for images, labels in trainloader: steps += 1
optimizer = torch.optim.Adam( model.parameters(), lr=args.lr) #, weight_decay=args.weight_decay, momentum=args.momentum) print(model) if args.model == 'SimpleNet': print(model) params_to_optimize = [] backprop_depth = 5 # List of modules in the network mods = list(model.features.children()) + list(model.classifier.children()) # Extract parameters from the last `backprop_depth` modules in the network and collect them in # the params_to_optimize list. for m in mods[::-1][:backprop_depth]: params_to_optimize.extend(list(m.parameters())) optimizer = torch.optim.Adam( params=params_to_optimize, lr=args.lr) #, weight_decay=args.weight_decay, momentum=args.momentum) def train(epoch): ''' Train the model for one epoch. ''' # Some models use slightly different forward passes and train and test # time (e.g., any model with Dropout). This puts the model in train mode # (as opposed to eval mode) so it knows which one to use. model.train() # train loop
def set_grad(m, b): if isinstance(m, (nn.Linear, nn.BatchNorm2d)): return if hasattr(m, "weight"): for p in m.parameters(): p.requires_grad_(b)
def freez(self, model, freez=True): if freez == True: for para in model.parameters(): para.requires_grad = False
totalSample_train = len(batchHelper_train._label) rHC_val = RandomHandClass.RandomHandClass() rHC_val.readAllDatabase("./ClassImage_val") batchHelper_val = BatchHelper.BatchHelp(rHC_val.ImageAll,rHC_val.labelAll) totalSample_val = len(batchHelper_val._label) rHC_test = RandomHandClass.RandomHandClass() rHC_test.readAllDatabase("./ClassImage_test") batchHelper_test = BatchHelper.BatchHelp(rHC_test.ImageAll,rHC_test.labelAll) totalSample_test = len(batchHelper_test._label) criterion = nn.NLLLoss() optimizer = optim.Adam(models.parameters(), lr=0.0001) # apply optimizer to model ACC = 0 ACC_Max_val = 0 Average_ACC_val = 0 ACC_Max_test = 0 Average_ACC_test = 0 print("totalTime per epoach {}".format(int(totalSample_train/batch_size))) for y in range(num_epochs): models.train() ACC = 0 batchHelper_train.resetIndex() while batchHelper_train._epochs_completed == 0: input_image,labelImage = batchHelper_train.next_batch(batch_size,True) # for i in range(int(totalSample/batch_size)):