def __init__(self, model, train_data, val_data, criterion, optimizer, max_epochs, device, path, cut_output=False, recurrent_model=False, patience=5, grid_mask=None, is_reconstruction=False, lilw=False): self.model = model self.train_data = train_data self.val_data = val_data self.criterion = criterion self.max_epochs = max_epochs self.device = device self.optimizer = optimizer self.cut_output = cut_output self.path = path self.grid = None self.lilw = lilw self.is_reconstruction = is_reconstruction self.recurrent_model = recurrent_model self.earlyStop = EarlyStop(patience, self.path) if (grid_mask is not None): self.grid = GridMask(grid_mask['d1'], grid_mask['d2'], device, grid_mask['ratio'], grid_mask['max_prob'], grid_mask['max_epochs'])
# cv2.imshow("scr",img ) # cv2.waitKey() # # img_flip = img.transpose(Image.FLIP_LEFT_RIGHT) # img.show() # img_flip.show() import albumentations as A train_transforms = A.Compose([ # A.Resize(cfg.INPUT.SIZE_TRAIN+20,cfg.INPUT.SIZE_TRAIN+20), # (h, w) A.Resize(335, 335), A.RandomSizedCrop(min_max_height=(290, 335), height=300, width=300, p=1), A.HorizontalFlip(p=0.5), A.ChannelShuffle(always_apply=False, p=0.5), # A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, always_apply=False, p=1), # A.RandomGridShuffle(grid=(2, 2), always_apply=False, p=1), # A.Resize(320,320), # (h, w) # A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), GridMask(num_grid=(3, 7), rotate=90, p=1), # T.ToTensor(), ]) # # a = train_transforms(image = img) # # a = a['image'] # # a = Image.fromarray(a) # a.show()
parser.add_argument('--amp', action='store_true', help='train with amp') parser.add_argument('--scheduler', action='store_true', help='train with scheduler') args = parser.parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch # Data print('==> Preparing data..') transform_train = albumentations.Compose([ # Rotate(limit=10), # Resize(224, 224), ShiftScaleRotate(rotate_limit=15), albumentations.OneOf([ GridMask(num_grid=3, rotate=15), GridMask(num_grid=(3,7)), GridMask(num_grid=3, mode=2) ], p=1), ToTensor() ]) transform_test = albumentations.Compose([ # Resize(224, 224), ToTensor() ]) train_dataset = BengaliImageDataset(
import argparse import pandas as pd import torch import torch.nn as nn import torch.nn.functional as F from torch.utils.data import Dataset, DataLoader from dataset_cutmix import * from efficientnet_pytorch import EfficientNet from sklearn.metrics import recall_score from timeit import default_timer as timer from efficientnet import * from gridmask import GridMask from apex import amp import apex grid = GridMask(64, 128, rotate=15, ratio=0.6, mode=1, prob=1.) def time_to_str(t, mode='min'): if mode == 'min': t = int(t) / 60 hr = t // 60 min = t % 60 return '%2d hr %02d min' % (hr, min) elif mode == 'sec': t = int(t) min = t // 60 sec = t % 60 return '%2d min %02d sec' % (min, sec)
params_dict = dict(net.named_parameters()) for key, value in params_dict.items(): if key.startswith('features'): params += [{'params': [value], 'lr': learning_rate * 1}] else: params += [{'params': [value], 'lr': learning_rate}] # 定义优化算法为sdg:随机梯度下降 # optimizer = optim.Adam(params, lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) optimizer = torch.optim.SGD(params, lr=learning_rate, momentum=0.9, weight_decay=5e-4) grid = GridMask(d1=96, d2=224, rotate=360, ratio=0.6, mode=1, prob=0.8) # train for epoch in range(epochs): total_loss = 0 net.train() if epoch < 8: learning_rate = 0.001 if 8 < epoch < 11: learning_rate = 0.0001 else: learning_rate = 0.00001 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate
class Trainer(): def __init__(self, model, train_data, val_data, criterion, optimizer, max_epochs, device, path, cut_output=False, recurrent_model=False, patience=5, grid_mask=None, is_reconstruction=False, lilw=False): self.model = model self.train_data = train_data self.val_data = val_data self.criterion = criterion self.max_epochs = max_epochs self.device = device self.optimizer = optimizer self.cut_output = cut_output self.path = path self.grid = None self.lilw = lilw self.is_reconstruction = is_reconstruction self.recurrent_model = recurrent_model self.earlyStop = EarlyStop(patience, self.path) if (grid_mask is not None): self.grid = GridMask(grid_mask['d1'], grid_mask['d2'], device, grid_mask['ratio'], grid_mask['max_prob'], grid_mask['max_epochs']) def train_evaluate(self): train_losses = [] val_losses = [] if (self.grid is not None): #self.grid.set_prob(epoch) print(self.grid.get_prob()) for epoch in range(self.max_epochs): self.train(train_losses) print('Train - Epoch %d, Epoch Loss: %f' % (epoch, train_losses[epoch])) self.evaluate(val_losses) print('Val Avg. Loss: %f' % (val_losses[epoch])) if (torch.cuda.is_available()): torch.cuda.empty_cache() if (self.earlyStop.check_stop_condition(epoch, self.model, self.optimizer, val_losses[epoch])): break return train_losses, val_losses def train(self, train_losses): train_loss = self.model.train() epoch_train_loss = 0.0 for i, (x, y, removed) in enumerate(self.train_data): x, y, removed = x.to(self.device), y.to(self.device), removed.to( self.device) if (self.grid is not None): x_grid = self.grid(x) self.optimizer.zero_grad() x_in = x if self.grid == None else x_grid if (self.recurrent_model): if (self.is_reconstruction): states_fwd = self.init_hidden(x.size()[0], x.size()[3] * x.size()[4]) states_bckwd = self.init_hidden(x.size()[0], x.size()[3] * x.size()[4]) if (self.lilw): output = self.model(x, states_fwd, states_bckwd, removed, original_x=x) else: output = self.model(x, states_fwd, states_bckwd, removed) else: states = self.init_hidden(x.size()[0], x.size()[3] * x.size()[4]) if (self.lilw): output = self.model(x, states, original_x=x) else: output = self.model(x, states) else: output = self.model(x_in) #batch : channel : time-steps : lat : lon if (self.cut_output and not self.recurrent_model): loss = self.criterion(output[:, :, 0, :, :], y) else: loss = self.criterion(output, y, removed) loss.backward() self.optimizer.step() epoch_train_loss += loss.detach().item() avg_epoch_loss = epoch_train_loss / len(self.train_data) train_losses.append(avg_epoch_loss) def evaluate(self, val_losses): epoch_val_loss = 0.0 self.model.eval() with torch.no_grad(): for i, (x, y, removed) in enumerate(self.val_data): x, y, removed = x.to(self.device), y.to( self.device), removed.to(self.device) if (self.recurrent_model): if (self.is_reconstruction): states_fwd = self.init_hidden( x.size()[0], x.size()[3] * x.size()[4]) states_bckwd = self.init_hidden( x.size()[0], x.size()[3] * x.size()[4]) output = self.model(x, states_fwd, states_bckwd, removed) else: states = self.init_hidden(x.size()[0], x.size()[3] * x.size()[4]) output = self.model(x, states) else: output = self.model(x) if (self.cut_output and not self.recurrent_model): loss = self.criterion(output[:, :, 0, :, :], y) else: loss = self.criterion(output, y, removed) epoch_val_loss += loss.detach().item() avg_val_loss = epoch_val_loss / len(self.val_data) val_losses.append(avg_val_loss) def load_model(self): checkpoint = torch.load(self.path) self.model.load_state_dict(checkpoint['model_state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] loss = checkpoint['loss'] return self.model, self.optimizer, epoch, loss def init_hidden(self, batch_size, hidden_size): h = torch.zeros(batch_size, hidden_size, device=self.device) return (h, h)