def __getitem__(self, index): if self.is_train: img, target = self.train_img[index], self.train_label[index] if len(img.shape) == 2: img = np.stack([img] * 3, 2) img = Image.fromarray(img, mode="RGB") img = transforms.Resize((256, 256), Image.BILINEAR)(img) img = transforms.RandomCrop(INPUT_SIZE)(img) img = transforms.RandomHorizontalFlip()(img) img = transforms.ToTensor()(img) img = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(img) else: img, target = self.test_img[index], self.test_label[index] if len(img.shape) == 2: img = np.stack([img] * 3, 2) img = Image.fromarray(img, mode="RGB") img = transforms.Resize((256, 256), Image.BILINEAR)(img) img = transforms.CenterCrop(INPUT_SIZE)(img) img = transforms.ToTensor()(img) img = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(img) return img, target
def __init__(self, root: str, annotation: str, targetHeight: int, targetWidth: int, numClass: int): self.root = root self.coco = COCO(annotation) self.ids = list(self.coco.imgs.keys()) self.targetHeight = targetHeight self.targetWidth = targetWidth self.numClass = numClass self.transforms = T.Compose([ T.RandomOrder( [T.RandomHorizontalFlip(), T.RandomSizeCrop(numClass)]), T.Resize((targetHeight, targetWidth)), T.ColorJitter(brightness=.2, contrast=.1, saturation=.1, hue=0), T.Normalize() ]) self.newIndex = {} classes = [] for i, (k, v) in enumerate(self.coco.cats.items()): self.newIndex[k] = i classes.append(v['name']) with open('classes.txt', 'w') as f: f.write(str(classes))
def __init__(self, annFile: str, imageDir: str, targetHeight: int, targetWidth: int, numClass: int, train: bool = True): self.annotations = {} self.table = { '不良-機械傷害': 0, '不良-著色不佳': 1, '不良-炭疽病': 2, '不良-乳汁吸附': 3, '不良-黑斑病': 4 } self.imageDir = imageDir self.numClass = numClass with open(annFile, 'r', encoding='utf-8-sig') as f: for line in f.readlines(): arr = line.rstrip().split(',') ans = [] for idx in range(1, len(arr), 5): tlx, tly, w, h, c = arr[idx:idx + 5] if tlx: tlx, tly, w, h = list(map(float, (tlx, tly, w, h))) if c not in self.table: self.table[c] = len(self.table) cx = tlx + w / 2 cy = tly + h / 2 c = self.table[c] ans.append(list(map(int, (cx, cy, w, h, c)))) self.annotations[arr[0]] = ans self.names = list(self.annotations) with open('table.txt', 'w') as f: f.write(str(self.table)) print(self.table) if train: self.transforms = T.Compose([ T.RandomOrder([ T.RandomHorizontalFlip(), T.RandomVerticalFlip(), T.RandomSizeCrop(numClass) ]), T.Resize((targetHeight, targetWidth)), T.ColorJitter(brightness=.2, contrast=0, saturation=0, hue=0), T.Normalize() ]) else: self.transforms = T.Compose( [T.Resize((targetHeight, targetWidth)), T.Normalize()])
def get_transform(train): transforms = [] # converts the image, a PIL image, into a PyTorch Tensor transforms.append(T.ToTensor()) if train: # during training, randomly flip the training images # and ground-truth for data augmentation transforms.append(T.RandomHorizontalFlip(0.5)) return T.Compose(transforms)
def get_transform(train=False): transforms = [] if train: transforms.append(custom_T.RandomHorizontalFlip()) transforms.append(custom_T.RandomCrop()) transforms.append(custom_T.ToTensor()) transforms.append(custom_T.FasterRCNNResizer()) return custom_T.Compose(transforms)
def __init__(self, root=None, dataloader=default_loader): self.transform1 = transforms.Compose([ transforms.RandomRotation(30), transforms.Resize([256, 256]), transforms.RandomCrop(INPUT_SIZE), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9, 1.1)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), transforms.RandomErasing(probability=0.5, sh=0.05) ]) # 增强方法2: 关注更小的区域 self.transform2 = transforms.Compose([ transforms.RandomRotation(30), transforms.Resize([336, 336]), transforms.RandomCrop(INPUT_SIZE), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9, 1.1)), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), transforms.RandomErasing(probability=0.5, sh=0.05) ]) self.dataloader = dataloader self.root = root with open(os.path.join(self.root, TRAIN_DATASET), 'r') as fid: self.imglist = fid.readlines() self.labels = [] for line in self.imglist: image_path, label = line.strip().split() self.labels.append(int(label)) self.labels = np.array(self.labels) self.labels = torch.LongTensor(self.labels)
def get_transforms(args): transforms_list = [ custom_transforms.RandomSizedCrop(args.image_size, args.resize_min, args.resize_max), custom_transforms.RandomHorizontalFlip(), custom_transforms.toTensor() ] if args.color_space == 'lab': transforms_list.insert(2, custom_transforms.toLAB()) elif args.color_space == 'rgb': transforms_list.insert(2, custom_transforms.toRGB('RGB')) transforms = custom_transforms.Compose(transforms_list) return transforms
def __init__(self, root: str, targetHeight: int, targetWidth: int, numClass: int, train: bool = True): """ :param root: should contain .jpg files and corresponding .txt files :param targetHeight: desired height for model input :param targetWidth: desired width for model input :param numClass: number of classes in the given dataset """ self.cache = {} imagePaths = glob(os.path.join(root, '*.jpg')) for path in imagePaths: name = path.split('/')[-1].split('.jpg')[0] self.cache[path] = os.path.join(root, f'{name}.txt') self.paths = list(self.cache.keys()) self.targetHeight = targetHeight self.targetWidth = targetWidth self.numClass = numClass if train: self.transforms = T.Compose([ T.RandomOrder([ T.RandomHorizontalFlip(), T.RandomVerticalFlip(), T.RandomSizeCrop(numClass) ]), T.Resize((targetHeight, targetWidth)), T.ColorJitter(brightness=.2, contrast=.1, saturation=.1, hue=0), T.Normalize() ]) else: self.transforms = T.Compose( [T.Resize((targetHeight, targetWidth)), T.Normalize()])
def get_transform(train=False, yolo=False, aug=None): assert aug == 'dirty_camera_lens' or aug == 'gan' or aug is None, "Aug parameter not valid" transforms = [] if yolo: transforms.append(custom_T.PadToSquare()) transforms.append(custom_T.Resize(img_size=None)) if train: transforms.append(custom_T.RandomHorizontalFlip()) transforms.append(custom_T.RandomCrop()) if aug == 'dirty_camera_lens': print("Augmentation: Dirty Camera Lens") transforms.append(custom_T.DirtyCameraLens()) transforms.append(custom_T.ToTensor()) # transforms.append(custom_T.FasterRCNNResizer()) return custom_T.Compose(transforms)
def __init__(self, root=None, dataloader=default_loader): self.transform = transforms.Compose([ transforms.Resize([256, 256]), transforms.RandomCrop(INPUT_SIZE), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) self.dataloader = dataloader self.root = root self.imgs = [] self.labels = [] with open(os.path.join(self.root, TRAIN_DATASET), 'r') as fid: for line in fid.readlines(): img_path, label = line.strip().split() img = self.dataloader(img_path) label = int(label) self.imgs.append(img) self.labels.append(label)
def main(): net = AFENet(classes=21, pretrained_model_path=args['pretrained_model_path']).cuda() net_ori = [net.layer0, net.layer1, net.layer2, net.layer3, net.layer4] net_new = [ net.ppm, net.cls, net.aux, net.ppm_reduce, net.aff1, net.aff2, net.aff3 ] value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] train_transform = transform.Compose([ transform.RandScale([0.75, 2.0]), transform.RandomHorizontalFlip(), transform.Crop([480, 480], crop_type='rand', padding=mean, ignore_label=255), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) train_data = voc2012.VOC2012(split='train', data_root=args['dataset_root'], data_list=args['train_list'], transform=train_transform) train_loader = DataLoader(train_data, batch_size=args['train_batch_size'], shuffle=True, num_workers=8, drop_last=True) val_transform = transform.Compose([ transform.Crop([480, 480], crop_type='center', padding=mean, ignore_label=255), transform.ToTensor(), transform.Normalize(mean=mean, std=std) ]) val_data = voc2012.VOC2012(split='val', data_root=args['dataset_root'], data_list=args['val_list'], transform=val_transform) val_loader = DataLoader(val_data, batch_size=args['val_batch_size'], shuffle=False, num_workers=8) if len(args['snapshot']) == 0: curr_epoch = 1 args['best_record'] = { 'epoch': 0, 'val_loss': 1e10, 'acc': 0, 'acc_cls': 0, 'mean_iu': 0, 'fwavacc': 0 } else: print('training resumes from ' + args['snapshot']) net.load_state_dict( torch.load(os.path.join(args['model_save_path'], args['snapshot']))) split_snapshot = args['snapshot'].split('_') curr_epoch = int(split_snapshot[1]) + 1 args['best_record'] = { 'epoch': int(split_snapshot[1]), 'val_loss': float(split_snapshot[3]), 'acc': float(split_snapshot[5]), 'acc_cls': float(split_snapshot[7]), 'mean_iu': float(split_snapshot[9]), 'fwavacc': float(split_snapshot[11]) } params_list = [] for module in net_ori: params_list.append(dict(params=module.parameters(), lr=args['lr'])) for module in net_new: params_list.append(dict(params=module.parameters(), lr=args['lr'] * 10)) args['index_split'] = 5 criterion = torch.nn.CrossEntropyLoss(ignore_index=255) optimizer = torch.optim.SGD(params_list, lr=args['lr'], momentum=args['momentum'], weight_decay=args['weight_decay']) if len(args['snapshot']) > 0: optimizer.load_state_dict( torch.load( os.path.join(args['model_save_path'], 'opt_' + args['snapshot']))) check_makedirs(args['model_save_path']) all_iter = args['epoch_num'] * len(train_loader) for epoch in range(curr_epoch, args['epoch_num'] + 1): train(train_loader, net, optimizer, epoch, all_iter) validate(val_loader, net, criterion, optimizer, epoch)
def build_model(self): """ DataLoader """ pad = int(30 * self.img_size // 256) train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.Resize((self.img_size + pad, self.img_size + pad)), transforms.RandomCrop(self.img_size), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) test_transform = transforms.Compose([ transforms.Resize((self.img_size, self.img_size)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) self.trainA = ImageFolder( os.path.join('dataset', self.dataset, 'trainA'), train_transform) self.trainB = ImageFolder( os.path.join('dataset', self.dataset, 'trainB'), train_transform) self.testA = ImageFolder( os.path.join('dataset', self.dataset, 'testA'), test_transform) self.testB = ImageFolder( os.path.join('dataset', self.dataset, 'testB'), test_transform) self.trainA_loader = DataLoader(self.trainA, batch_size=self.batch_size, shuffle=True) self.trainB_loader = DataLoader(self.trainB, batch_size=self.batch_size, shuffle=True) self.testA_loader = DataLoader(self.testA, batch_size=1, shuffle=False) self.testB_loader = DataLoader(self.testB, batch_size=1, shuffle=False) """ Define Generator, Discriminator """ self.genA2B = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.genB2A = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.disGA = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disGB = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disLA = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) self.disLB = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) """ Define Loss """ self.L1_loss = loss.L1Loss() self.MSE_loss = loss.MSELoss() self.BCE_loss = loss.BCEWithLogitsLoss() """ Trainer """ def get_params(block): out = [] for name, param in block.named_parameters(): if 'instancenorm' in name or 'weight_u' in name or 'weight_v' in name: continue out.append(param) return out genA2B_parameters = get_params(self.genA2B) genB2A_parameters = get_params(self.genB2A) disGA_parameters = get_params(self.disGA) disGB_parameters = get_params(self.disGB) disLA_parameters = get_params(self.disLA) disLB_parameters = get_params(self.disLB) G_parameters = genA2B_parameters + genB2A_parameters D_parameters = disGA_parameters + disGB_parameters + disLA_parameters + disLB_parameters self.G_optim = fluid.optimizer.Adam( parameter_list=G_parameters, learning_rate=self.lr, beta1=0.5, beta2=0.999, regularization=fluid.regularizer.L2Decay(self.weight_decay)) self.D_optim = fluid.optimizer.Adam( parameter_list=D_parameters, learning_rate=self.lr, beta1=0.5, beta2=0.999, regularization=fluid.regularizer.L2Decay(self.weight_decay)) """ Define Rho clipper to constraint the value of rho in AdaILN and ILN"""
#dataset prepare #--------------------------------- print('Loading dataset...') cache_size = 256 if args.image_size == 448: cache_size = 256 * 2 if args.image_size == 352: cache_size = 402 transform_train = transforms.Compose([ transforms.Resize((cache_size,cache_size)), #transforms.Resize((args.image_size,args.image_size)), #transforms.RandomRotation(10), transforms.RandomCrop(args.image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225]), ]) transform_test = transforms.Compose([ transforms.Resize((cache_size,cache_size)), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225]), ]) print("Dataset Initializing...") trainset = SRDataset.SRDataset(max_person=args.max_person,image_dir=args.images_root, \ images_list=args.train_file_pre + '_images.txt',bboxes_list=args.train_file_pre + '_bbox.json', \
def get_transform(train=False): transform = [] transform.append(transforms.ToTensor()) if train: transform.append(transforms.RandomHorizontalFlip(0.5)) return transforms.Compose(transform)
def get_transform(train): transforms = [] transforms.append(T.ToTensor()) if train: transforms.append(T.RandomHorizontalFlip(0.5)) return T.Compose(transforms)
if __name__ == "__main__": dataset_root_path_1 = "" dataset_root_path_2 = "" split = "train" percentage = 15 NUM_WORKERS = 0 BATCH_SIZE = 4 TGT_IMAGES_IN_BATCH = 1 DEVICE = "cuda" DATASET_NAME_1 = "" DATASET_NAME_2 = "" transforms = None if split == "train": transforms = custom_T.Compose([ custom_T.RandomHorizontalFlip(), custom_T.RandomCrop(), custom_T.ToTensor(), custom_T.FasterRCNNResizer() ]) elif split == "val" or split == "test": transforms = custom_T.Compose( [custom_T.ToTensor(), custom_T.FasterRCNNResizer()]) dataset_1 = CustomYoloAnnotatedDataset(dataset_root_path_1, transforms=transforms, dataset_name=DATASET_NAME_1, percentage=percentage, split=split) dataset_2 = CustomYoloAnnotatedDataset(dataset_root_path_2,
def main(): """Create the model and start the training.""" or_nyu_dict = { 0: 255, 1: 16, 2: 40, 3: 39, 4: 7, 5: 14, 6: 39, 7: 12, 8: 38, 9: 40, 10: 10, 11: 6, 12: 40, 13: 39, 14: 39, 15: 40, 16: 18, 17: 40, 18: 4, 19: 40, 20: 40, 21: 5, 22: 40, 23: 40, 24: 30, 25: 36, 26: 38, 27: 40, 28: 3, 29: 40, 30: 40, 31: 9, 32: 38, 33: 40, 34: 40, 35: 40, 36: 34, 37: 37, 38: 40, 39: 40, 40: 39, 41: 8, 42: 3, 43: 1, 44: 2, 45: 22 } or_nyu_map = lambda x: or_nyu_dict.get(x, x) - 1 or_nyu_map = np.vectorize(or_nyu_map) device = torch.device("cuda" if not args.cpu else "cpu") w, h = map(int, args.input_size.split(',')) input_size = (w, h) w, h = map(int, args.input_size_target.split(',')) input_size_target = (w, h) cudnn.enabled = True args.or_nyu_map = or_nyu_map # Create network if args.model == 'DeepLab': model = DeeplabMulti(num_classes=args.num_classes) if args.restore_from[:4] == 'http': saved_state_dict = model_zoo.load_url(args.restore_from) elif args.restore_from == "": saved_state_dict = None else: saved_state_dict = torch.load(args.restore_from) new_params = model.state_dict().copy() for i in saved_state_dict: # Scale.layer5.conv2d_list.3.weight i_parts = i.split('.') # print i_parts if not args.num_classes == 40 or not i_parts[1] == 'layer5': new_params['.'.join(i_parts[1:])] = saved_state_dict[i] # print i_parts model.load_state_dict(new_params) model.train() model.to(device) cudnn.benchmark = True if args.mode != "baseline" and args.mode != "baseline_tar": # init D model_D1 = FCDiscriminator(num_classes=args.num_classes).to(device) model_D2 = FCDiscriminator(num_classes=args.num_classes).to(device) model_D1.train() model_D1.to(device) model_D2.train() model_D2.to(device) if not os.path.exists(args.snapshot_dir): os.makedirs(args.snapshot_dir) scale_min = 0.5 scale_max = 2.0 rotate_min = -10 rotate_max = 10 ignore_label = 255 value_scale = 255 mean = [0.485, 0.456, 0.406] mean = [item * value_scale for item in mean] std = [0.229, 0.224, 0.225] std = [item * value_scale for item in std] args.width = w args.height = h train_transform = transforms.Compose([ # et.ExtResize( 512 ), transforms.RandScale([scale_min, scale_max]), transforms.RandRotate([rotate_min, rotate_max], padding=IMG_MEAN_RGB, ignore_label=ignore_label), transforms.RandomGaussianBlur(), transforms.RandomHorizontalFlip(), transforms.Crop([args.height + 1, args.width + 1], crop_type='rand', padding=IMG_MEAN_RGB, ignore_label=ignore_label), #et.ExtRandomCrop(size=(opts.crop_size, opts.crop_size)), #et.ExtColorJitter(brightness=0.5, contrast=0.5, saturation=0.5), #et.ExtRandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=IMG_MEAN, std=[1, 1, 1]), ]) val_transform = transforms.Compose([ # et.ExtResize( 512 ), transforms.Crop([args.height + 1, args.width + 1], crop_type='center', padding=IMG_MEAN_RGB, ignore_label=ignore_label), transforms.ToTensor(), transforms.Normalize(mean=IMG_MEAN, std=[1, 1, 1]), ]) if args.mode != "baseline_tar": src_train_dst = OpenRoomsSegmentation(root=args.data_dir, opt=args, split='train', transform=train_transform, imWidth=args.width, imHeight=args.height, remap_labels=args.or_nyu_map) else: src_train_dst = NYU_Labelled(root=args.data_dir_target, opt=args, split='train', transform=train_transform, imWidth=args.width, imHeight=args.height, phase="TRAIN", randomize=True) tar_train_dst = NYU(root=args.data_dir_target, opt=args, split='train', transform=train_transform, imWidth=args.width, imHeight=args.height, phase="TRAIN", randomize=True, mode=args.mode) tar_val_dst = NYU(root=args.data_dir, opt=args, split='val', transform=val_transform, imWidth=args.width, imHeight=args.height, phase="TRAIN", randomize=False) trainloader = data.DataLoader(src_train_dst, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) trainloader_iter = enumerate(trainloader) targetloader = data.DataLoader(tar_train_dst, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) targetloader_iter = enumerate(targetloader) # implement model.optim_parameters(args) to handle different models' lr setting optimizer = optim.SGD(model.optim_parameters(args), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer.zero_grad() if args.mode != "baseline" and args.mode != "baseline_tar": optimizer_D1 = optim.Adam(model_D1.parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99)) optimizer_D1.zero_grad() optimizer_D2 = optim.Adam(model_D2.parameters(), lr=args.learning_rate_D, betas=(0.9, 0.99)) optimizer_D2.zero_grad() if args.gan == 'Vanilla': bce_loss = torch.nn.BCEWithLogitsLoss() elif args.gan == 'LS': bce_loss = torch.nn.MSELoss() seg_loss = torch.nn.CrossEntropyLoss(ignore_index=255) interp = nn.Upsample(size=(input_size[1] + 1, input_size[0] + 1), mode='bilinear', align_corners=True) interp_target = nn.Upsample(size=(input_size_target[1] + 1, input_size_target[0] + 1), mode='bilinear', align_corners=True) # labels for adversarial training source_label = 0 target_label = 1 # set up tensor board if args.tensorboard: if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) writer = SummaryWriter(args.log_dir) for i_iter in range(args.num_steps): loss_seg_value1 = 0 loss_seg_value1_tar = 0 loss_adv_target_value1 = 0 loss_D_value1 = 0 loss_seg_value2 = 0 loss_seg_value2_tar = 0 loss_adv_target_value2 = 0 loss_D_value2 = 0 optimizer.zero_grad() adjust_learning_rate(optimizer, i_iter) if args.mode != "baseline" and args.mode != "baseline_tar": optimizer_D1.zero_grad() optimizer_D2.zero_grad() adjust_learning_rate_D(optimizer_D1, i_iter) adjust_learning_rate_D(optimizer_D2, i_iter) sample_src = None sample_tar = None sample_res_src = None sample_res_tar = None sample_gt_src = None sample_gt_tar = None for sub_i in range(args.iter_size): # train G if args.mode != "baseline" and args.mode != "baseline_tar": # don't accumulate grads in D for param in model_D1.parameters(): param.requires_grad = False for param in model_D2.parameters(): param.requires_grad = False # train with source try: _, batch = trainloader_iter.__next__() except: trainloader_iter = enumerate(trainloader) _, batch = trainloader_iter.__next__() images, labels, _ = batch sample_src = images.clone() sample_gt_src = labels.clone() images = images.to(device) labels = labels.long().to(device) pred1, pred2 = model(images) pred1 = interp(pred1) pred2 = interp(pred2) sample_pred_src = pred2.detach().cpu() loss_seg1 = seg_loss(pred1, labels) loss_seg2 = seg_loss(pred2, labels) loss = loss_seg2 + args.lambda_seg * loss_seg1 # proper normalization loss = loss / args.iter_size loss.backward() loss_seg_value1 += loss_seg1.item() / args.iter_size loss_seg_value2 += loss_seg2.item() / args.iter_size # train with target try: _, batch = targetloader_iter.__next__() except: targetloader_iter = enumerate(targetloader) _, batch = targetloader_iter.__next__() images, tar_labels, _, labelled = batch n_labelled = labelled.sum().detach().item() batch_size = images.shape[0] sample_tar = images.clone() sample_gt_tar = tar_labels.clone() images = images.to(device) pred_target1, pred_target2 = model(images) pred_target1 = interp_target(pred_target1) pred_target2 = interp_target(pred_target2) #print("N_labelled {}".format(n_labelled)) if args.mode == "sda" and n_labelled != 0: labelled = labelled.to(device) == 1 tar_labels = tar_labels.to(device) loss_seg1_tar = seg_loss(pred_target1[labelled], tar_labels[labelled]) loss_seg2_tar = seg_loss(pred_target2[labelled], tar_labels[labelled]) loss_tar_labelled = loss_seg2_tar + args.lambda_seg * loss_seg1_tar loss_tar_labelled = loss_tar_labelled / args.iter_size loss_seg_value1_tar += loss_seg1_tar.item() / args.iter_size loss_seg_value2_tar += loss_seg2_tar.item() / args.iter_size else: loss_tar_labelled = torch.zeros( 1, requires_grad=True).float().to(device) # proper normalization sample_pred_tar = pred_target2.detach().cpu() if args.mode != "baseline" and args.mode != "baseline_tar": D_out1 = model_D1(F.softmax(pred_target1)) D_out2 = model_D2(F.softmax(pred_target2)) loss_adv_target1 = bce_loss( D_out1, torch.FloatTensor( D_out1.data.size()).fill_(source_label).to(device)) loss_adv_target2 = bce_loss( D_out2, torch.FloatTensor( D_out2.data.size()).fill_(source_label).to(device)) loss = args.lambda_adv_target1 * loss_adv_target1 + args.lambda_adv_target2 * loss_adv_target2 loss = loss / args.iter_size + loss_tar_labelled #loss = loss_tar_labelled loss.backward() loss_adv_target_value1 += loss_adv_target1.item( ) / args.iter_size loss_adv_target_value2 += loss_adv_target2.item( ) / args.iter_size # train D # bring back requires_grad for param in model_D1.parameters(): param.requires_grad = True for param in model_D2.parameters(): param.requires_grad = True # train with source pred1 = pred1.detach() pred2 = pred2.detach() D_out1 = model_D1(F.softmax(pred1)) D_out2 = model_D2(F.softmax(pred2)) loss_D1 = bce_loss( D_out1, torch.FloatTensor( D_out1.data.size()).fill_(source_label).to(device)) loss_D2 = bce_loss( D_out2, torch.FloatTensor( D_out2.data.size()).fill_(source_label).to(device)) loss_D1 = loss_D1 / args.iter_size / 2 loss_D2 = loss_D2 / args.iter_size / 2 loss_D1.backward() loss_D2.backward() loss_D_value1 += loss_D1.item() loss_D_value2 += loss_D2.item() # train with target pred_target1 = pred_target1.detach() pred_target2 = pred_target2.detach() D_out1 = model_D1(F.softmax(pred_target1)) D_out2 = model_D2(F.softmax(pred_target2)) loss_D1 = bce_loss( D_out1, torch.FloatTensor( D_out1.data.size()).fill_(target_label).to(device)) loss_D2 = bce_loss( D_out2, torch.FloatTensor( D_out2.data.size()).fill_(target_label).to(device)) loss_D1 = loss_D1 / args.iter_size / 2 loss_D2 = loss_D2 / args.iter_size / 2 loss_D1.backward() loss_D2.backward() loss_D_value1 += loss_D1.item() loss_D_value2 += loss_D2.item() optimizer.step() if args.mode != "baseline" and args.mode != "baseline_tar": optimizer_D1.step() optimizer_D2.step() if args.tensorboard: scalar_info = { 'loss_seg1': loss_seg_value1, 'loss_seg2': loss_seg_value2, 'loss_adv_target1': loss_adv_target_value1, 'loss_adv_target2': loss_adv_target_value2, 'loss_D1': loss_D_value1, 'loss_D2': loss_D_value2, 'loss_seg1_tar': loss_seg_value1_tar, 'loss_seg2_tar': loss_seg_value2_tar, } if i_iter % 10 == 0: for key, val in scalar_info.items(): writer.add_scalar(key, val, i_iter) if i_iter % 1000 == 0: img = sample_src.cpu()[:, [2, 1, 0], :, :] + torch.from_numpy( np.array(IMG_MEAN_RGB).reshape(1, 3, 1, 1)).float() img = img.type(torch.uint8) writer.add_images("Src/Images", img, i_iter) label = tar_train_dst.decode_target(sample_gt_src).transpose( 0, 3, 1, 2) writer.add_images("Src/Labels", label, i_iter) preds = sample_pred_src.permute(0, 2, 3, 1).cpu().numpy() preds = np.asarray(np.argmax(preds, axis=3), dtype=np.uint8) preds = tar_train_dst.decode_target(preds).transpose( 0, 3, 1, 2) writer.add_images("Src/Preds", preds, i_iter) tar_img = sample_tar.cpu()[:, [2, 1, 0], :, :] + torch.from_numpy( np.array(IMG_MEAN_RGB).reshape( 1, 3, 1, 1)).float() tar_img = tar_img.type(torch.uint8) writer.add_images("Tar/Images", tar_img, i_iter) tar_label = tar_train_dst.decode_target( sample_gt_tar).transpose(0, 3, 1, 2) writer.add_images("Tar/Labels", tar_label, i_iter) tar_preds = sample_pred_tar.permute(0, 2, 3, 1).cpu().numpy() tar_preds = np.asarray(np.argmax(tar_preds, axis=3), dtype=np.uint8) tar_preds = tar_train_dst.decode_target(tar_preds).transpose( 0, 3, 1, 2) writer.add_images("Tar/Preds", tar_preds, i_iter) print('exp = {}'.format(args.snapshot_dir)) print( 'iter = {0:8d}/{1:8d}, loss_seg1 = {2:.3f} loss_seg2 = {3:.3f} loss_adv1 = {4:.3f}, loss_adv2 = {5:.3f} loss_D1 = {6:.3f} loss_D2 = {7:.3f} loss_seg1_tar={8:.3f} loss_seg2_tar={9:.3f}' .format(i_iter, args.num_steps, loss_seg_value1, loss_seg_value2, loss_adv_target_value1, loss_adv_target_value2, loss_D_value1, loss_D_value2, loss_seg_value1_tar, loss_seg_value2_tar)) if i_iter >= args.num_steps_stop - 1: print('save model ...') torch.save( model.state_dict(), osp.join(args.snapshot_dir, 'OR_' + str(args.num_steps_stop) + '.pth')) if args.mode != "baseline" and args.mode != "baseline_tar": torch.save( model_D1.state_dict(), osp.join(args.snapshot_dir, 'OR_' + str(args.num_steps_stop) + '_D1.pth')) torch.save( model_D2.state_dict(), osp.join(args.snapshot_dir, 'OR_' + str(args.num_steps_stop) + '_D2.pth')) break if i_iter % args.save_pred_every == 0 and i_iter != 0: print('taking snapshot ...') torch.save( model.state_dict(), osp.join(args.snapshot_dir, 'OR_' + str(i_iter) + '.pth')) if args.mode != "baseline" and args.mode != "baseline_tar": torch.save( model_D1.state_dict(), osp.join(args.snapshot_dir, 'OR_' + str(i_iter) + '_D1.pth')) torch.save( model_D2.state_dict(), osp.join(args.snapshot_dir, 'OR_' + str(i_iter) + '_D2.pth')) if args.tensorboard: writer.close()
def build_transform(deterministic, color=True, daug=0): transforms = [] if not deterministic: transforms = [csl_tf.RandomHorizontalFlip(0.5)] if daug == 1: transforms += [ csl_tf.RandomAffine( 3, translate=[0.025, 0.025], scale=[0.975, 1.025], shear=0, keep_aspect=False, ) ] elif daug == 2: transforms += [ csl_tf.RandomAffine( 3, translate=[0.035, 0.035], scale=[0.970, 1.030], shear=2, keep_aspect=False, ) ] elif daug == 3: transforms += [ csl_tf.RandomAffine( 20, translate=[0.035, 0.035], scale=[0.970, 1.030], shear=0, keep_aspect=False, ) ] elif daug == 4: transforms += [ csl_tf.RandomAffine( 45, translate=[0.035, 0.035], scale=[0.940, 1.030], shear=5, keep_aspect=False, ) ] elif daug == 5: transforms += [ csl_tf.RandomAffine( 60, translate=[0.035, 0.035], scale=[0.940, 1.030], shear=5, keep_aspect=False, ) ] elif daug == 6: # CVPR landmark training transforms += [ csl_tf.RandomAffine( 30, translate=[0.04, 0.04], scale=[0.940, 1.050], shear=5, keep_aspect=False, ) ] elif daug == 7: transforms += [ csl_tf.RandomAffine( 0, translate=[0.04, 0.04], scale=[0.940, 1.050], shear=5, keep_aspect=False, ) ] return tf.Compose(transforms)
import os from data_provider import VOCSegmentation from experiment_builder import ExperimentBuilder from utils.arg_extractor import get_args import utils.transforms as trans from model.deeplab import DeepLab import matplotlib.pyplot as plt from tools import prediction from utils.metrics import Evaluator args = get_args() rng = np.random.RandomState(seed=args.seed) torch.manual_seed(seed=args.seed) transform_train = trans.Compose([ trans.RandomHorizontalFlip(), #trans.FixScale((args.crop_size,args.crop_size)), trans.RandomScale((0.5, 2.0)), #trans.FixScale(args.crop_size), trans.RandomCrop(args.crop_size), trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), trans.ToTensor(), ]) transform_val = trans.Compose([ #trans.FixScale((args.crop_size,args.crop_size)), trans.FixScale(args.crop_size), trans.CenterCrop(args.crop_size), trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), trans.ToTensor(), ])
def generate_dataloader(model, eval_type, args): cache_size = 256 if args.image_size == 448: cache_size = 256 * 2 if args.image_size == 352: cache_size = 402 transform_train = transforms.Compose([ transforms.Resize((cache_size, cache_size)), transforms.RandomCrop(args.image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) transform_test = transforms.Compose([ transforms.Resize((cache_size, cache_size)), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) if eval_type == 'train': data = ImageDataset(max_person=args.max_person + 1, image_dir=args.images_root, images_list=args.train_file_pre + '_images.txt', bboxes_list=args.train_file_pre + '_bbox.json', image_size=args.image_size, input_transform=transform_train) elif eval_type == 'valid': data = ImageDataset(max_person=args.max_person + 1, image_dir=args.images_root, \ images_list=args.valid_file_pre + '_images.txt', \ bboxes_list=args.valid_file_pre + '_bbox.json', \ image_size=args.image_size, \ input_transform=transform_test) else: data = ImageDataset(max_person=args.max_person + 1, image_dir=args.images_root, \ images_list=args.test_file_pre + '_images.txt', \ bboxes_list=args.test_file_pre + '_bbox.json', \ image_size=args.image_size, \ input_transform=transform_test) loader = torch.utils.data.DataLoader(data, batch_size=args.test_batch_size, shuffle=False, num_workers=0, worker_init_fn=np.random.seed( args.manualSeed)) model.eval() batch_time = AverageMeter.AverageMeter() end_time = time.time() union_filename = './para_data/' + args.dataset + '_' + eval_type + '_union' + '.npy' feat_filename = './para_data/' + args.dataset + '_' + eval_type + '_ssl.npy' ssl_model = load_ssl_model() ssl_model.eval() if args.cuda: ssl_model.cuda() ssl_model = torch.nn.DataParallel(ssl_model) if os.path.exists(feat_filename) and os.path.exists( union_filename) and not args.regenerate_roifeat: all_feat = np.load(feat_filename) logger.info('loadding RoI feature npy from {} successfully'.format( feat_filename)) all_union_feat = np.load(union_filename, mmap_mode='r') # [B, N, N, 2048] logger.info( 'loading union npy from {} successfully'.format(union_filename)) else: all_feat, all_union_feat = [], [] for batch_idx, (img, image_bboxes) in enumerate(loader): if args.cuda: img, image_bboxes = img.cuda(), image_bboxes.cuda() img, image_bboxes = Variable(img), Variable(image_bboxes) node_num = image_bboxes.shape[1] union_boxes = get_union_box(image_bboxes) if args.cuda: union_boxes = union_boxes.cuda() image_bboxes = torch.cat((image_bboxes, union_boxes), axis=1) del union_boxes image_bboxes = Variable(image_bboxes) # [batcn, node_num, 2048] rois_feature_all = model(img, image_bboxes) feature_num = rois_feature_all.shape[2] rois_feature = rois_feature_all[:, :node_num, :] union_feature = rois_feature_all[:, node_num:, :].reshape( -1, node_num, node_num, feature_num) if args.load_ssl_model: img_feat = ssl_model( img, image_bboxes) # [batch_size, max_person+1, feat_dim] rois_feature = torch.cat((rois_feature, img_feat[:, -1:, :]), dim=1) all_feat.append(rois_feature.cpu().data.numpy()) all_union_feat.append(union_feature.cpu().data.numpy()) batch_time.update(time.time() - end_time) end_time = time.time() if batch_idx % args.print_freq == 0: logger.info('%s Epoch: [%d/%d] ' 'Time %.3f (%.3f)\t' % (eval_type, batch_idx, len(loader), batch_time.val, batch_time.avg)) all_feat = np.concatenate(all_feat) all_union_feat = np.concatenate(all_union_feat) np.save(feat_filename, all_feat) np.save(union_filename, all_union_feat) class_weight, class_count = [], [] if eval_type == 'train': dataset = SRDataset(all_feat, all_union_feat, max_person=args.max_person+1, image_dir=args.images_root, \ images_list=args.train_file_pre + '_images.txt', relations_list=args.train_file_pre + '_relation.json', image_size=args.image_size) class_weight, class_count = dataset.class_weight() batch_size = args.batch_size is_shuffle = True elif eval_type == 'valid': dataset = SRDataset(all_feat, all_union_feat, max_person=args.max_person+1,image_dir=args.images_root, \ images_list=args.valid_file_pre + '_images.txt', relations_list=args.valid_file_pre + '_relation.json', image_size=args.image_size) batch_size = args.test_batch_size is_shuffle = False else: dataset = SRDataset(all_feat, all_union_feat, max_person=args.max_person+1,image_dir=args.images_root, \ images_list=args.test_file_pre + '_images.txt', \ relations_list=args.test_file_pre + '_relation.json', image_size=args.image_size ) batch_size = args.test_batch_size is_shuffle = False dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=is_shuffle, num_workers=args.num_workers, worker_init_fn=np.random.seed( args.manualSeed)) return dataloader, class_weight, class_count
def get_data(data_dir, source, target, source_train_path, target_train_path, source_extension, target_extension, height, width, batch_size, re=0, workers=8): dataset = DA(data_dir, source, target, source_train_path, target_train_path, source_extension, target_extension) normalizer = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) source_num_classes = dataset.num_source_train_ids train_transformer = T.Compose([ T.RandomSizedRectCrop(height, width), T.RandomHorizontalFlip(), T.ToTensor(), normalizer, T.RandomErasing(EPSILON=re), ]) test_transformer = T.Compose([ T.Resize((height, width), interpolation=3), T.ToTensor(), normalizer, ]) source_train_loader = DataLoader(Preprocessor( dataset.source_train, root=osp.join(dataset.source_images_dir, dataset.source_train_path), transform=train_transformer), batch_size=batch_size, num_workers=0, shuffle=True, pin_memory=False, drop_last=True) target_train_loader = DataLoader(Preprocessor( dataset.target_train, root=osp.join(dataset.target_images_dir, dataset.target_train_path), transform=train_transformer), batch_size=batch_size, num_workers=0, shuffle=True, pin_memory=False, drop_last=True) # source_train_loader = DataLoader( # UnsupervisedCamStylePreprocessor(dataset.source_train, root=osp.join(dataset.source_images_dir, dataset.source_train_path), # camstyle_root=osp.join(dataset.source_images_dir, dataset.source_train_path), # transform=train_transformer), # batch_size=batch_size, num_workers=0, # shuffle=True, pin_memory=False, drop_last=True) # target_train_loader = DataLoader( # UnsupervisedCamStylePreprocessor(dataset.target_train, # root=osp.join(dataset.target_images_dir, dataset.target_train_path), # camstyle_root=osp.join(dataset.target_images_dir, # dataset.target_train_camstyle_path), # num_cam=dataset.target_num_cam, transform=train_transformer), # batch_size=batch_size, num_workers=workers, # shuffle=True, pin_memory=True, drop_last=True) query_loader = DataLoader(Preprocessor(dataset.query, root=osp.join( dataset.target_images_dir, dataset.query_path), transform=test_transformer), batch_size=batch_size, num_workers=workers, shuffle=False, pin_memory=True) gallery_loader = DataLoader(Preprocessor(dataset.gallery, root=osp.join( dataset.target_images_dir, dataset.gallery_path), transform=test_transformer), batch_size=batch_size, num_workers=workers, shuffle=False, pin_memory=True) return dataset, source_num_classes, source_train_loader, target_train_loader, query_loader, gallery_loader
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_imgreid_dataset( root=args.root, name=args.dataset, ) transform_train = T.Compose([ T.Random2DTranslation(args.height, args.width), T.RandomHorizontalFlip(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) transform_test = T.Compose([ T.Resize((args.height, args.width)), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) pin_memory = True if use_gpu else False trainloader = DataLoader( ImageDataset(dataset.train, transform=transform_train), sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances), batch_size=args.train_batch, num_workers=args.workers, pin_memory=pin_memory, drop_last=True, ) queryloader = DataLoader( ImageDataset(dataset.test_3000_query, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) galleryloader = DataLoader( ImageDataset(dataset.test_3000_gallery, transform=transform_test), batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=pin_memory, drop_last=False, ) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dataset.train_vehicle_nums, loss={'xent', 'htri'}) print("Model size: {:.3f} M".format(sum(p.numel() for p in model.parameters()) / 1000000.0)) modelid = 0 if args.upload: modelid = upload_data.updatemodel(args.arch, args.lr, args.stepsize, args.gamma, args.loss, args.dataset, args.height, args.width, args.seq_len, args.train_batch, args.other) # criterion_xent = CrossEntropyLabelSmooth(num_classes=dataset.train_vehicle_nums, use_gpu=use_gpu) criterion_xent = nn.CrossEntropyLoss() criterion_htri = TripletLoss(margin=args.margin) optimizer = init_optim(args.optim, model.parameters(), args.lr, args.weight_decay) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma) start_epoch = args.start_epoch if args.load_weights: # load pretrained weights but ignore layers that don't match in size print("Loading pretrained weights from '{}'".format(args.load_weights)) checkpoint = torch.load(args.load_weights) pretrain_dict = checkpoint['state_dict'] model_dict = model.state_dict() pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()} model_dict.update(pretrain_dict) model.load_state_dict(model_dict) if args.resume: checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] rank1 = checkpoint['rank1'] print("Loaded checkpoint from '{}'".format(args.resume)) print("- start_epoch: {}\n- rank1: {}".format(start_epoch, rank1)) if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") test(model, queryloader, galleryloader, use_gpu, return_distmat=True) return start_time = time.time() train_time = 0 best_rank1 = -np.inf best_epoch = 0 print("==> Start training") for epoch in range(start_epoch, args.max_epoch): start_train_time = time.time() train(modelid, epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu) train_time += round(time.time() - start_train_time) scheduler.step() if (epoch + 1) > args.start_eval and args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or ( epoch + 1) == args.max_epoch: print("==> Test") cmc, mAP = test(model, queryloader, galleryloader, use_gpu) rank1 = cmc[0] is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) if args.upload: upload_data.updatetest(modelid, epoch + 1, mAP.item(), cmc[0].item(), cmc[4].item(), cmc[9].item(), cmc[19].item()) upload_data.updaterank(modelid, best_rank1.item()) print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(best_rank1, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time))
def main(): global BEST_LOSS cudnn.benchmark = True start_epoch = cfg.OPOSE.start_epoch # start from epoch 0 or last checkpoint epoch # Create ckpt & vis folder if not os.path.isdir(cfg.OPOSE.ckpt): os.makedirs(cfg.OPOSE.ckpt) if not os.path.exists(os.path.join(cfg.OPOSE.ckpt, 'vis')): os.makedirs(os.path.join(cfg.OPOSE.ckpt, 'vis')) if args.cfg_file is not None and not cfg.OPOSE.evaluate: shutil.copyfile( args.cfg_file, os.path.join(cfg.OPOSE.ckpt, args.cfg_file.split('/')[-1])) model = pose_estimation.PoseModel(num_point=19, num_vector=19, pretrained=True) # # Calculate FLOPs & Param # n_flops, n_convops, n_params = measure_model(model, cfg.OPOSE.input_size, cfg.OPOSE.input_size) criterion = nn.MSELoss().cuda() # Dataset and Loader train_dataset = dataset.CocoOpenposeData( cfg, cfg.OPOSE.data_root, cfg.OPOSE.info_root, 'train2017', transformer=transforms.Compose([ transforms.RandomResized(), transforms.RandomRotate(40), transforms.RandomCrop(368), transforms.RandomHorizontalFlip(), ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=int(cfg.workers), pin_memory=True) if cfg.OPOSE.validate or cfg.OPOSE.evaluate: val_dataset = dataset.CocoOpenposeData( cfg, cfg.OPOSE.data_root, cfg.OPOSE.info_root, 'val2017', transformer=transforms.Compose( [transforms.TestResized(cfg.OPOSE.input_size)])) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=int(cfg.workers), pin_memory=True) # Load nets into gpu if NUM_GPUS > 1: model = torch.nn.DataParallel(model, device_ids=gpu).cuda() # Set up optimizers params, multiple = get_parameters(model, cfg, False) optimizer = torch.optim.SGD(params, cfg.OPOSE.base_lr, momentum=cfg.OPOSE.momentum, weight_decay=cfg.OPOSE.weight_decay) # Resume training title = 'Pytorch-OPOSE-{}-{}'.format(cfg.OPOSE.arch_encoder, cfg.OPOSE.arch_decoder) if cfg.OPOSE.resume: # Load checkpoint. print("==> Resuming from checkpoint '{}'".format(cfg.OPOSE.resume)) assert os.path.isfile( cfg.OPOSE.resume), 'Error: no checkpoint directory found!' ckpt = torch.load(cfg.OPOSE.resume) BEST_LOSS = ckpt['best_loss'] start_epoch = ckpt['epoch'] try: model.module.load_state_dict(ckpt['state_dict']) except: model.load_state_dict(ckpt['state_dict']) optimizer.load_state_dict(ckpt['optimizer']) logger = Logger(os.path.join(cfg.OPOSE.ckpt, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(cfg.OPOSE.ckpt, 'log.txt'), title=title) logger.set_names( ['epoch', 'Learning Rate', 'Train Loss', 'Valid Loss']) # Train and val for epoch in range(start_epoch, cfg.OPOSE.epochs): print('\nEpoch: [{}/{}] | LR: {:.8f} '.format(epoch + 1, cfg.OPOSE.epochs, cfg.OPOSE.base_lr)) train_loss = train(train_loader, model, criterion, optimizer, epoch, USE_CUDA) if cfg.OPOSE.validate: test_loss = test(val_loader, model, criterion, optimizer, epoch, USE_CUDA) else: test_loss = 0.0, 0.0 # Append logger file logger.append([epoch, cfg.OPOSE.base_lr, train_loss, test_loss]) # Save model save_checkpoint(model, optimizer, test_loss, epoch) # Adjust learning rate adjust_learning_rate(optimizer, epoch) # Draw curve try: draw_curve('model', cfg.OPOSE.ckpt) print('==> Success saving log curve...') except: print('==> Saving log curve error...') logger.close() try: savefig(os.path.join(cfg.OPOSE.ckpt, 'log.eps')) shutil.copyfile( os.path.join(cfg.OPOSE.ckpt, 'log.txt'), os.path.join( cfg.OPOSE.ckpt, 'log{}.txt'.format( datetime.datetime.now().strftime('%Y%m%d%H%M%S')))) except: print('Copy log error.') print('==> Training Done!') print('==> Best acc: {:.4f}%'.format(BEST_LOSS))
def main(args): if args.gpu is None: args.gpu = str(os.environ["CUDA_VISIBLE_DEVICES"]) else: os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) device = torch.device('cuda') best_acc = 0 torch.manual_seed(0) np.random.seed(0) random.seed(0) num_gpu = len(str(args.gpu).split(',')) args.batch_size = num_gpu * args.batch_size print('=> Effective BatchSize = %d' % args.batch_size) args.img_path, args.model_path, args.exp_path = set_path(args) ### classifier model ### num_class_dict = { 'ucf101': 101, 'hmdb51': 51, 'k400': 400, 'ucf101-f': 101, 'hmdb51-f': 51, 'k400-f': 400 } args.num_class = num_class_dict[args.dataset] if args.train_what == 'last': # for linear probe args.final_bn = True args.final_norm = True args.use_dropout = False else: # for training the entire network args.final_bn = False args.final_norm = False args.use_dropout = True if args.model == 'lincls': model = LinearClassifier(network=args.net, num_class=args.num_class, dropout=args.dropout, use_dropout=args.use_dropout, use_final_bn=args.final_bn, use_l2_norm=args.final_norm) else: raise NotImplementedError model.to(device) ### optimizer ### if args.train_what == 'last': print('=> [optimizer] only train last layer') params = [] for name, param in model.named_parameters(): if 'backbone' in name: param.requires_grad = False else: params.append({'params': param}) elif args.train_what == 'ft': print('=> [optimizer] finetune backbone with smaller lr') params = [] for name, param in model.named_parameters(): if 'backbone' in name: params.append({'params': param, 'lr': args.lr / 10}) else: params.append({'params': param}) else: # train all params = [] print('=> [optimizer] train all layer') for name, param in model.named_parameters(): params.append({'params': param}) if args.train_what == 'last': print('\n===========Check Grad============') for name, param in model.named_parameters(): if param.requires_grad: print(name, param.requires_grad) print('=================================\n') if args.optim == 'adam': optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.wd) elif args.optim == 'sgd': optimizer = optim.SGD(params, lr=args.lr, weight_decay=args.wd, momentum=0.9) else: raise NotImplementedError model = torch.nn.DataParallel(model) model_without_dp = model.module ce_loss = nn.CrossEntropyLoss() args.iteration = 1 ### test: higher priority ### if args.test: if os.path.isfile(args.test): print("=> loading testing checkpoint '{}'".format(args.test)) checkpoint = torch.load(args.test, map_location=torch.device('cpu')) epoch = checkpoint['epoch'] state_dict = checkpoint['state_dict'] if args.retrieval_ucf or args.retrieval_full: # if directly test on pretrained network new_dict = {} for k, v in state_dict.items(): k = k.replace('encoder_q.0.', 'backbone.') new_dict[k] = v state_dict = new_dict try: model_without_dp.load_state_dict(state_dict) except: neq_load_customized(model_without_dp, state_dict, verbose=True) else: print("[Warning] no checkpoint found at '{}'".format(args.test)) epoch = 0 print("[Warning] if test random init weights, press c to continue") import ipdb ipdb.set_trace() args.logger = Logger(path=os.path.dirname(args.test)) args.logger.log('args=\n\t\t' + '\n\t\t'.join( ['%s:%s' % (str(k), str(v)) for k, v in vars(args).items()])) transform_test_cuda = transforms.Compose([ T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], channel=1) ]) if args.retrieval: test_retrieval(model, ce_loss, transform_test_cuda, device, epoch, args) elif args.center_crop or args.five_crop or args.ten_crop: transform = get_transform('test', args) test_dataset = get_data(transform, 'test', args) test_10crop(test_dataset, model, ce_loss, transform_test_cuda, device, epoch, args) else: raise NotImplementedError sys.exit(0) ### data ### transform_train = get_transform('train', args) train_loader = get_dataloader(get_data(transform_train, 'train', args), 'train', args) transform_val = get_transform('val', args) val_loader = get_dataloader(get_data(transform_val, 'val', args), 'val', args) transform_train_cuda = transforms.Compose([ T.RandomHorizontalFlip(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], channel=1) ]) # ImageNet transform_val_cuda = transforms.Compose([ T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], channel=1) ]) # ImageNet print('===================================') ### restart training ### if args.resume: if os.path.isfile(args.resume): checkpoint = torch.load(args.resume, map_location='cpu') args.start_epoch = checkpoint['epoch'] + 1 args.iteration = checkpoint['iteration'] best_acc = checkpoint['best_acc'] state_dict = checkpoint['state_dict'] try: model_without_dp.load_state_dict(state_dict) except: print('[WARNING] resuming training with different weights') neq_load_customized(model_without_dp, state_dict, verbose=True) print("=> load resumed checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) try: optimizer.load_state_dict(checkpoint['optimizer']) except: print( '[WARNING] failed to load optimizer state, initialize optimizer' ) else: print("[Warning] no checkpoint found at '{}', use random init". format(args.resume)) elif args.pretrain: if os.path.isfile(args.pretrain): checkpoint = torch.load(args.pretrain, map_location='cpu') state_dict = checkpoint['state_dict'] new_dict = {} for k, v in state_dict.items(): k = k.replace('encoder_q.0.', 'backbone.') new_dict[k] = v state_dict = new_dict try: model_without_dp.load_state_dict(state_dict) except: neq_load_customized(model_without_dp, state_dict, verbose=True) print("=> loaded pretrained checkpoint '{}' (epoch {})".format( args.pretrain, checkpoint['epoch'])) else: print("[Warning] no checkpoint found at '{}', use random init". format(args.pretrain)) else: print("=> train from scratch") torch.backends.cudnn.benchmark = True # plot tools writer_val = SummaryWriter(logdir=os.path.join(img_path, 'val')) writer_train = SummaryWriter(logdir=os.path.join(img_path, 'train')) args.val_plotter = TB.PlotterThread(writer_val) args.train_plotter = TB.PlotterThread(writer_train) args.logger = Logger(path=args.img_path) args.logger.log('args=\n\t\t' + '\n\t\t'.join( ['%s:%s' % (str(k), str(v)) for k, v in vars(args).items()])) # main loop for epoch in range(args.start_epoch, args.epochs): np.random.seed(epoch) random.seed(epoch) adjust_learning_rate(optimizer, epoch, args) train_one_epoch(train_loader, model, ce_loss, optimizer, transform_train_cuda, device, epoch, args) if epoch % args.eval_freq == 0: _, val_acc = validate(val_loader, model, ce_loss, transform_val_cuda, device, epoch, args) # save check_point is_best = val_acc > best_acc best_acc = max(val_acc, best_acc) state_dict = model_without_dp.state_dict() save_dict = { 'epoch': epoch, 'state_dict': state_dict, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), 'iteration': args.iteration } save_checkpoint(save_dict, is_best, 1, filename=os.path.join(args.model_path, 'epoch%d.pth.tar' % epoch), keep_all=False) print('Training from ep %d to ep %d finished' % (args.start_epoch, args.epochs)) sys.exit(0)