def get(args): """ Entry point. Call this function to get all Charades dataloaders """ normalize = arraytransforms.Normalize(mean=[0.502], std=[1.0]) train_file = args.train_file val_file = args.val_file train_dataset = Charadesflow(args.data, 'train', train_file, args.cache, transform=transforms.Compose([ arraytransforms.RandomResizedCrop(224), arraytransforms.ToTensor(), normalize, transforms.Lambda(lambda x: torch.cat(x)), ])) val_transforms = transforms.Compose([ arraytransforms.Resize(256), arraytransforms.CenterCrop(224), arraytransforms.ToTensor(), normalize, transforms.Lambda(lambda x: torch.cat(x)), ]) val_dataset = Charadesflow(args.data, 'val', val_file, args.cache, transform=val_transforms) valvideo_dataset = Charadesflow(args.data, 'val_video', val_file, args.cache, transform=val_transforms) return train_dataset, val_dataset, valvideo_dataset
def train_transform(rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling # print("scale factor s={}".format(s)) depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize( 250.0 / iheight ), # this is for computational efficiency, since rotation is very slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) # random color jittering rgb_np = color_jitter(rgb_np) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform(rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling # print("scale factor s={}".format(s)) depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) # random color jittering rgb_np = color_jitter(rgb_np) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 # Scipy affine_transform produced RuntimeError when the depth map was # given as a 'numpy.ndarray' depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform(rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling # print("scale factor s={}".format(s)) depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # set zeros in depth as NaN depth_np[depth_np == 0] = np.nan # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize( float(image_size) / iheight ), # this is for computational efficiency, since rotation is very slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip), ]) rgb_np = transform(rgb) # random color jittering rgb_np = color_jitter(rgb_np) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 rgb_np = normalize(rgb_np) # from [0,1] to [-1,1] depth_np = transform(depth_np) depth_np[np.isnan(depth_np)] = 0 depth_np = depth_np / 10.0 return rgb_np, depth_np
def singleImageResult(): cut_size = 90 # Data transform_test = transforms.Compose([ transforms.CenterCrop(cut_size), transforms.ToTensor(), ]) raw_img = io.imread('images/2.jpg') gray = rgb2gray(raw_img) gray = resize(gray, (96, 96), mode='symmetric').astype(np.uint8) img = gray[:, :, np.newaxis] img = np.concatenate((img, img, img), axis=2) img = Image.fromarray(img) inputs = transform_test(img) # intputs = inputs.cpu() inputs = inputs[np.newaxis, :, :, :] ncrops, c, h, w = np.shape(inputs) inputs = inputs.view(-1, c, h, w) inputs = inputs.cuda() inputs = Variable(inputs, volatile=True) outputs = net(inputs) score = F.softmax(outputs) print(outputs)
def loading_data(): mean_std = cfg.DATA.MEAN_STD train_simul_transform = own_transforms.Compose([ own_transforms.Scale(int(cfg.TRAIN.IMG_SIZE[0] / 0.875)), own_transforms.RandomCrop(cfg.TRAIN.IMG_SIZE), own_transforms.RandomHorizontallyFlip() ]) val_simul_transform = own_transforms.Compose([ own_transforms.Scale(int(cfg.TRAIN.IMG_SIZE[0] / 0.875)), own_transforms.CenterCrop(cfg.TRAIN.IMG_SIZE) ]) img_transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) target_transform = standard_transforms.Compose([ own_transforms.MaskToTensor(), own_transforms.ChangeLabel(cfg.DATA.IGNORE_LABEL, cfg.DATA.NUM_CLASSES - 1) ]) restore_transform = standard_transforms.Compose([ own_transforms.DeNormalize(*mean_std), standard_transforms.ToPILImage() ]) train_set = CityScapes('train', simul_transform=train_simul_transform, transform=img_transform, target_transform=target_transform) train_loader = DataLoader(train_set, batch_size=cfg.TRAIN.BATCH_SIZE, num_workers=16, shuffle=True) val_set = CityScapes('val', simul_transform=val_simul_transform, transform=img_transform, target_transform=target_transform) val_loader = DataLoader(val_set, batch_size=cfg.VAL.BATCH_SIZE, num_workers=16, shuffle=False) return train_loader, val_loader, restore_transform
def train_transform(self, rgb: np.ndarray, depth_raw: np.ndarray, depth_fix: np.ndarray) -> TNpData: s = np.random.uniform(1.0, 1.5) # random scaling depth_raw = depth_raw / s depth_fix = depth_fix / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize( 250.0 / self.iheight ), # this is for computational efficiency, since rotation is very slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((self.oheight, self.owidth)), transforms.HorizontalFlip(do_flip) ]) rgb = transform(rgb) # random color jittering rgb = color_jitter(rgb) rgb = np.asfarray(rgb, dtype='float') / 255 depth_raw = transform(depth_raw) depth_fix = transform(depth_fix) return rgb, depth_raw, depth_fix
def get(args): """ Entry point. Call this function to get all Charades dataloaders """ normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_file = args.train_file val_file = args.val_file train_dataset = Charades( args.data, 'train', train_file, args.cache, transform=transforms.Compose([ transforms.RandomResizedCrop(args.inputsize), transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), # missing PCA lighting jitter normalize, ])) val_dataset = Charades(args.data, 'val', val_file, args.cache, transform=transforms.Compose([ transforms.Resize( int(256. / 224 * args.inputsize)), transforms.CenterCrop(args.inputsize), transforms.ToTensor(), normalize, ])) valvideo_dataset = Charades(args.data, 'val_video', val_file, args.cache, transform=transforms.Compose([ transforms.Resize( int(256. / 224 * args.inputsize)), transforms.CenterCrop(args.inputsize), transforms.ToTensor(), normalize, ])) return train_dataset, val_dataset, valvideo_dataset
def val_transform(self, rgb: np.ndarray, depth_raw: np.ndarray, depth_fix: np.ndarray) -> TNpData: # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize(240.0 / self.iheight), transforms.CenterCrop((self.oheight, self.owidth)), ]) rgb = transform(rgb) rgb = np.asfarray(rgb, dtype='float') / 255 depth_raw = transform(depth_raw) depth_fix = transform(depth_fix) return rgb, depth_raw, depth_fix
def imageNet_loader(train_size, valid_size, test_size, crop_size): # http://blog.outcome.io/pytorch-quick-start-classifying-an-image/ normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(datasets.ImageFolder( './data/kaggle/train', transforms.Compose([ transforms.RandomResizedCrop(crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size=train_size, shuffle=True, pin_memory=True, drop_last=True) valid_loader = torch.utils.data.DataLoader(datasets.ImageFolder( './data/kaggle/valid', transforms.Compose([ transforms.CenterCrop(crop_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size=valid_size, shuffle=True, pin_memory=True, drop_last=True) test_loader = torch.utils.data.DataLoader(datasets.ImageFolder( './data/image_net/small_classes/', transforms.Compose([ transforms.CenterCrop(crop_size), transforms.ToTensor(), normalize, ])), batch_size=test_size, shuffle=False) return train_loader, valid_loader, test_loader
def val_transform(rgb, depth): depth_np = depth # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.CenterCrop((oheight, owidth)), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def open_nii(volpaths, segpaths, ind, num, in_z, out_z, center_crop_sz,\ series_names, seg_series_names, txforms=None,nrrd=True): vols = [] segs = [] if nrrd: series, seg_series = get_nii_nrrd(volpaths, segpaths) assert np.shape(series)[3] == np.shape(seg_series)[3] num_slices = np.arange(np.shape(series[0])[2]) if in_z != 0: num_slices = num_slices[in_z:-in_z] sub_rand = np.random.choice(num_slices, size=num, replace=False) center = transforms.CenterCrop(center_crop_sz) depth_center = transforms.DepthCenterCrop(out_z) series = [vol.astype(np.float) for vol in series] for i in sub_rand: if in_z == 0: nascent_series = [vol[:, :, i] for vol in series] nascent_seg_series = [seg[:, :, i] for seg in seg_series] nascent_series = np.expand_dims(nascent_series, axis=0) nascent_seg_series = np.expand_dims(nascent_seg_series, axis=0) else: nascent_series = [ vol[:, :, i - in_z:i + 1 + in_z] for vol in series ] assert nascent_series[0].shape[2] == in_z * 2 + 1 nascent_series = [np.squeeze(np.split(v,\ v.shape[2], axis=2)) for v in nascent_series] nascent_seg_series = [seg[:,:,i-in_z:i+1+in_z] for seg in \ seg_series] nascent_seg_series = [depth_center.engage(s) for s in \ nascent_seg_series] nascent_seg_series = [np.squeeze(np.split(s,\ s.shape[2], axis=2)) for s in nascent_seg_series] if out_z == 1: nascent_seg_series = \ np.expand_dims(nascent_seg_series, axis=0) if txforms is not None: for j in txforms: nascent_series, nascent_seg_series = \ j.engage(nascent_series, nascent_seg_series) vols.append(np.squeeze(nascent_series)) segs.append(np.squeeze(center.engage(nascent_seg_series, \ out_z > 1))) return vols, segs
def _make_test_transform(self, crop_type, crop_size_img, crop_size_label, pad_size): test_transform_ops = self.basic_transform_ops.copy() if pad_size is not None: test_transform_ops.append(transforms.Pad(pad_size, 0)) if crop_type == 'center': test_transform_ops.append( transforms.CenterCrop(crop_size_img, crop_size_label)) elif crop_type is None: pass else: raise RuntimeError('Unknown test crop type.') return transforms.Compose(test_transform_ops)
def val_transform(rgb, depth): # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize(float(image_size) / iheight), transforms.CenterCrop((oheight, owidth)), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 rgb_np = normalize(rgb_np) # from [0,1] to [-1,1] depth_np = transform(depth) depth_np = depth_np / 10.0 return rgb_np, depth_np
def test(valdir, bs, sz, rect_val=False): if rect_val: idx_ar_sorted = sort_ar(valdir) idx_sorted, _ = zip(*idx_ar_sorted) idx2ar = map_idx2ar(idx_ar_sorted, bs) ar_tfms = [transforms.Resize(int(sz * 1.14)), CropArTfm(idx2ar, sz)] val_dataset = ValDataset(valdir, transform=ar_tfms) return PaddleDataLoader(val_dataset, concurrent=1, indices=idx_sorted, shuffle=False).reader() val_tfms = [transforms.Resize(int(sz * 1.14)), transforms.CenterCrop(sz)] val_dataset = datasets.ImageFolder(valdir, transforms.Compose(val_tfms)) return PaddleDataLoader(val_dataset).reader()
def val_transform(rgb, depth): depth_np = depth # perform 1st part of data augmentation transform = transforms.Compose([ #transforms.Resize(528.0 / iheight), transforms.Resize(240.0 / iheight), transforms.CenterCrop((oheight, owidth)), ]) rgb_np = transform(rgb) # 自己添加 rgb_np = cv2.resize(rgb_np, (512, 512), interpolation=cv2.INTER_NEAREST) ########## rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) # 自己添加 depth_np = cv2.resize(depth_np, (512, 512), interpolation=cv2.INTER_NEAREST) ########### return rgb_np, depth_np
def load_data_transformers(resize_reso=512, crop_reso=448, swap_num=[7, 7]): center_resize = 600 Normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) data_transforms = { 'swap': transforms.Compose([ transforms.Randomswap((swap_num[0], swap_num[1])), ]), 'common_aug': transforms.Compose([ transforms.Resize((resize_reso, resize_reso)), transforms.RandomRotation(degrees=15), transforms.RandomCrop((crop_reso, crop_reso)), transforms.RandomHorizontalFlip(), ]), 'train_totensor': transforms.Compose([ transforms.Resize((crop_reso, crop_reso)), # ImageNetPolicy(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]), 'val_totensor': transforms.Compose([ transforms.Resize((crop_reso, crop_reso)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]), 'test_totensor': transforms.Compose([ transforms.Resize((resize_reso, resize_reso)), transforms.CenterCrop((crop_reso, crop_reso)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]), 'None': None, } return data_transforms
def _make_train_transform(self, crop_type, crop_size_img, crop_size_label, rand_flip, mod_drop_rate, balance_rate, pad_size, rand_rot90, random_black_patch_size, mini_positive): train_transform_ops = self.basic_transform_ops.copy() train_transform_ops += [ transforms.RandomBlack(random_black_patch_size), transforms.RandomDropout(mod_drop_rate), transforms.RandomFlip(rand_flip) ] if pad_size is not None: train_transform_ops.append(transforms.Pad(pad_size, 0)) if rand_rot90: train_transform_ops.append(transforms.RandomRotate2d()) if crop_type == 'random': if mini_positive: train_transform_ops.append( transforms.RandomCropMinSize(crop_size_img, mini_positive)) else: train_transform_ops.append( transforms.RandomCrop(crop_size_img)) elif crop_type == 'balance': train_transform_ops.append( transforms.BalanceCrop(balance_rate, crop_size_img, crop_size_label)) elif crop_type == 'center': train_transform_ops.append( transforms.CenterCrop(crop_size_img, crop_size_label)) elif crop_type is None: pass else: raise RuntimeError('Unknown train crop type.') return transforms.Compose(train_transform_ops)
def train_transform(rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling # print("scale factor s={}".format(s)) depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st part of data augmentation transform = transforms.Compose([ #transforms.Resize(530 / iheight), # this is for computational efficiency, since rotation is very slow transforms.Resize(250 / iheight), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) # 自己添加 # rgb_np = Transform.resize(rgb_np, [512, 512]) rgb_np = cv2.resize(rgb_np, (512, 512), interpolation=cv2.INTER_NEAREST) ########### # random color jittering rgb_np = color_jitter(rgb_np) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) #自己添加 depth_np = cv2.resize(depth_np, (512, 512), interpolation=cv2.INTER_NEAREST) #depth_np=Transform.resize(depth_np,[512,512]) ########### #data=rgb_np*255 #data=Image.fromarray(data.astype(np.uint8)) #data.show() return rgb_np, depth_np
pred = 8 top_3 = [9, 8, 7] out = np.zeros(10) # Load model print('Loading model...') curr_folder = 'models_jester' model = FullModel(batch_size=1, seq_lenght=16) loaded_dict = torch.load(curr_folder + '/demo.ckp') model.load_state_dict(loaded_dict) model = model.cuda() model.eval() std, mean = [0.2674, 0.2676, 0.2648], [0.4377, 0.4047, 0.3925] transform = Compose([ t.CenterCrop((96, 96)), t.ToTensor(), t.Normalize(std=std, mean=mean), ]) print('Starting prediction') s = time.time() n = 0 hist = [] mean_hist = [] setup = True plt.ion() fig, ax = plt.subplots() cooldown = 0 eval_samples = 2
def test_net_cheap(test_volpath, test_segpath, mult_inds, in_z, model,\ t_transform_plan, orig_dim, batch_size, out_file, num_labels,\ num_labels_final, volpaths, segpaths,\ nrrd=True, vol_only=False, get_dice=False, make_niis=False,\ verbose=True): t_out_z, t_center_crop_sz = get_out_size(orig_dim, in_z,\ t_transform_plan, model) t_center = transforms.CenterCrop(t_center_crop_sz) dices = [] jaccards = [] hds = [] assds = [] dice_inds = [] times = [] for ind in range(len(mult_inds)): t0 = time.time() if vol_only: series, seg_series = open_double_vol(volpaths[ind]) seg_series = [a*0 for a in seg_series] else: series, seg_series = preprocess.get_nii_nrrd(volpaths[ind],\ segpaths[ind]) num_slices = np.arange(np.shape(series[0])[2]) if in_z == 0: num_slices = num_slices else: num_slices = num_slices[in_z:-in_z] slice_inds = num_slices for slice_ind in slice_inds: assert slice_ind >= np.min(num_slices)\ and slice_ind <= np.max(num_slices) tout, tvol, tseg = get_subvols_cheap(series, seg_series, slice_inds,\ in_z, t_out_z, t_center_crop_sz, model, num_labels,\ batch_size, t_transform_plan, verbose=verbose) duration = time.time() - t0 tseg = np.clip(tseg, 0,2) times.append(duration) if get_dice: hd, assd = get_dists_non_volumetric(tseg.astype(np.int64),\ np.argmax(tout, axis=0)) tseg_hot = get_hot(tseg, num_labels_final) tout_hot = np.argmax(tout,axis=0) tout_hot = np.clip(tout_hot, 0,1) tout_hot = get_hot(tout_hot, num_labels_final) dce = dice(tseg_hot[1:],tout_hot[1:]) jc = jaccard(tseg_hot[1:], tout_hot[1:]) if verbose: print(('\r{}: Duration: {:.2f} ; Dice: {:.2f} ; Jaccard: {:.2f}' +\ ' ; Hausdorff: {:.2f} ; ASSD: {:.2f}').format(\ mult_inds[ind], duration, dce, jc, np.mean(hd),\ np.mean(assd))) jaccards.append(jc) dices.append(dce) hds.append(hd) assds.append(assd) dice_inds.append(mult_inds[ind]) else: if verbose: print('\r{}'.format(mult_inds[ind])) com_fake = [] com_real = [] if make_niis: # out_out = tout out_out = np.zeros_like(tout[0]) maxes = np.argmax(tout, axis=0) sparse_maxes = sparsify(maxes) for i in range(sparse_maxes.shape[1]): lw1, num1 = measurements.label(sparse_maxes[1,i]) area1 = measurements.sum(sparse_maxes[1,i],lw1,\ index=np.arange(lw1.max() + 1)) areaImg1 = area1[lw1] sparse_maxes[1,i] = np.where(areaImg1 < np.max(areaImg1), 0, 1) com_lateral = list(measurements.center_of_mass(sparse_maxes[1,i])) lw2, num2 = measurements.label(sparse_maxes[2,i]) area2 = measurements.sum(sparse_maxes[2,i],lw2,\ index=np.arange(lw2.max() + 1)) areaImg2 = area2[lw2] sparse_maxes[2,i] = np.where(areaImg2 < np.max(areaImg2), 0, 1) com_septal = list(measurements.center_of_mass(sparse_maxes[2,i])) com_fake.append(com_lateral + com_septal) maxes = np.argmax(sparse_maxes, axis=0) out_out = np.flip(maxes, -1) out_out = np.rot90(out_out, k=-1, axes=(-2,-1)) out_out = np.transpose(out_out,[1,2,0]) write_nrrd(out_out.astype(np.uint8), \ out_file + '/tout-{}.seg.nrrd'.format(\ mult_inds[ind])) seg_out = tseg sparse_seg = sparsify(tseg.astype(np.uint8)) for i in range(sparse_seg.shape[1]): com_lateral_seg = list(measurements.center_of_mass(sparse_seg[1,i])) com_septal_seg = list(measurements.center_of_mass(sparse_seg[2,i])) com_real.append(com_lateral_seg + com_septal_seg) seg_out = np.flip(seg_out, -1) seg_out = np.rot90(seg_out, k=-1, axes=(-2,-1)) seg_out = np.transpose(seg_out,[1,2,0]) write_nrrd(seg_out.astype(np.uint8), \ out_file + '/tseg-{}.seg.nrrd'.format(\ mult_inds[ind])) tv = np.stack(t_center.engage(np.expand_dims(tvol, 0),True)) vol_out = tv vol_out = np.flip(vol_out, -1) vol_out = np.rot90(vol_out, k=-1, axes=(-2,-1)) vol_out = np.transpose(vol_out,[1,2,0]) vol_out = nib.Nifti1Image(vol_out, np.eye(4)) nib.save(vol_out, \ out_file + '/tvol-{}.nii'.format(\ mult_inds[ind])) # print('Jaccard summary: ' + str(get_CI(jaccards))) [a.extend(b) for a, b in zip(com_real, com_fake)] com_merged = com_real com_merged = [[round(b, 2) for b in a] for a in com_merged] headers = ['real_y_l', 'real_x_l', 'real_y_s', 'real_x_s', \ 'fake_y_l', 'fake_x_l', 'fake_y_s', 'fake_x_s'] df = pd.DataFrame(com_merged, columns=headers) df.to_csv(out_file + '/{}.csv'.format(mult_inds[ind])) # return vol_out, out_out, seg_out if get_dice: return np.array(dices), np.array(jaccards), np.array(hds), np.array(assds),\ np.array(times) else: return
def get_subvols_cheap(series, seg_series, slice_inds, in_z, out_z, \ center_crop_sz, model, num_labels, batch_size, txforms=None,\ verbose=True): # get beginning index of output since the z dim is smaller than vol z0 = (in_z*2+1 - out_z)//2 sz = np.array([num_labels, slice_inds.shape[0]+2*in_z, center_crop_sz,\ center_crop_sz]) bigout = np.zeros(sz) bigvol = np.zeros(sz[1:]) bigseg = np.zeros(sz[1:]) center = transforms.CenterCrop(center_crop_sz) depth_center = transforms.DepthCenterCrop(out_z) vols = [] segs = [] batch_ind = 0 absolute_ind = 0 for i in slice_inds: if in_z == 0: nascent_series = [vol[:,:,i] for vol in series] nascent_seg_series = [seg[:,:,i] for seg in seg_series] nascent_series = np.expand_dims(nascent_series, axis=0) nascent_seg_series = np.expand_dims(nascent_seg_series, axis=0) else: nascent_series = [v[:,:,i-in_z:i+1+in_z] for v in series] assert nascent_series[0].shape[2]==in_z*2+1 nascent_series = [np.squeeze(np.split(v,\ v.shape[2], axis=2)) for v in nascent_series] nascent_seg_series = [s[:,:,i-in_z:i+1+in_z] for s in seg_series] nascent_seg_series = [depth_center.engage(s) for s in\ nascent_seg_series] nascent_seg_series = [np.squeeze(np.split(s,\ s.shape[2], axis=2)) for s in nascent_seg_series] if out_z == 1: nascent_seg_series = \ np.expand_dims(nascent_seg_series, axis=0) if txforms is not None: for j in txforms: nascent_series, nascent_seg_series = \ j.engage(nascent_series, nascent_seg_series) vols.append(np.squeeze(nascent_series)) segs.append(np.squeeze(center.engage(nascent_seg_series, \ out_z > 1))) absolute_ind += 1 if (absolute_ind >= batch_size or (i >= slice_inds[-1] and vols)): # nascent_vol = np.array(vols).squeeze() # nascent_seg = np.array(segs).squeeze() nascent_series = np.array(vols) nascent_seg_series = np.array(segs) nascent_series = preprocess.rot_and_flip(nascent_series) nascent_seg_series = preprocess.rot_and_flip(nascent_seg_series) nascent_series = nascent_series-np.min(nascent_series) nascent_series = nascent_series/np.max(nascent_series) if len(nascent_series.shape) < 4: nascent_series = np.expand_dims(nascent_series, 0) tv = torch.from_numpy(nascent_series).float() tv = Variable(tv).cuda() # print(i) if verbose: sys.stdout.write('\r{:.2f}%'.format(i/sz[1])) sys.stdout.flush() if in_z == 0: tv = tv.permute(1,0,2,3) tout = model(tv).data.cpu().numpy().astype(np.int8) if in_z == 0: nascent_series = nascent_series.squeeze() if np.array(nascent_series.shape).shape[0] < 3: nascent_series = np.expand_dims(nascent_series, 0) for j in range(len(nascent_series)): bsz = len(nascent_series) beg = i - in_z + z0 - bsz + j + 1 end = i - in_z + z0 - bsz + j + out_z + 1 bigout[:,beg:end] += np.expand_dims(tout[j], 1) bigseg[beg:end] = nascent_seg_series[j] beg = i - in_z + 1 - bsz + j end = i + in_z - bsz + j + 2 bigvol[beg:end] = nascent_series[j] absolute_ind = 0 batch_ind += 1 vols = [] segs = [] return bigout, bigvol, bigseg
def open_nii(volpaths, segpaths, ind, num, in_z, out_z, center_crop_sz,\ series_names, seg_series_names, txforms=None,nrrd=True): vols = [] segs = [] # volpath = os.path.join(volpaths, 'volume-' + str(ind) + '.nii') if nrrd: # segpath = os.path.join(segpath, 'segmentation-' + str(ind) + '.seg.nrrd') series, seg_series = get_nii_nrrd(volpaths, segpaths) # else: # segpath = os.path.join(segpath, 'segmentation-' + str(ind) + '.nii') # vol, seg = get_nii_nii(volpaths, segpaths) assert np.shape(series)[3] == np.shape(seg_series)[3] num_slices = np.arange(np.shape(series[0])[2]) if in_z != 0: num_slices = num_slices[in_z:-in_z] if(num_slices.size <= 2 and in_z != 0): print(ind) print(num_slices) print(num) sub_rand = np.random.choice(num_slices, size=num, replace=False) center = transforms.CenterCrop(center_crop_sz) depth_center = transforms.DepthCenterCrop(out_z) series = [vol.astype(np.float) for vol in series] for i in sub_rand: if in_z == 0: nascent_series = [vol[:,:,i] for vol in series] nascent_seg_series = [seg[:,:,i] for seg in seg_series] nascent_series = np.expand_dims(nascent_series, axis=0) nascent_seg_series = np.expand_dims(nascent_seg_series, axis=0) else: nascent_series = [vol[:,:,i-in_z:i+1+in_z] for vol in series] assert nascent_series[0].shape[2] == in_z*2+1 nascent_series = [np.squeeze(np.split(v,\ v.shape[2], axis=2)) for v in nascent_series] nascent_seg_series = [seg[:,:,i-in_z:i+1+in_z] for seg in \ seg_series] nascent_seg_series = [depth_center.engage(s) for s in \ nascent_seg_series] nascent_seg_series = [np.squeeze(np.split(s,\ s.shape[2], axis=2)) for s in nascent_seg_series] if out_z == 1: nascent_seg_series = \ np.expand_dims(nascent_seg_series, axis=0) if txforms is not None: for j in txforms: nascent_series, nascent_seg_series = \ j.engage(nascent_series, nascent_seg_series) bad = False for s in nascent_seg_series: m = np.max(s) if np.mod(m, 1) != 0: bad = True if bad == True: print(j) bad = False vols.append(np.squeeze(nascent_series)) segs.append(np.squeeze(center.engage(nascent_seg_series, \ out_z > 1))) return vols, segs
def train(self): use_cuda = torch.cuda.is_available() path = os.path.join('./out_models/' + self.model_name + '_' + self.task_name + '_' + self.job_id) ## get logger logger = self.get_logger(self.model_name, self.task_name, self.job_id, path) logger.info("Job_id : {}".format(self.job_id)) logger.info("gpus_device_ids : {}".format(self.device_ids)) logger.info("Task Name : {}".format(self.task_name)) logger.info("Backbone_name : {}".format(self.model_name)) logger.info("input_shape : ({},{}.{})".format(self.input_shape[0], self.input_shape[1], self.input_shape[2])) logger.info("batch_size : {}".format(self.batch_size)) logger.info("num_epochs : {}".format(self.num_epochs)) logger.info("warmup_steps : {}".format(self.warmup_steps)) logger.info("resume_from : {}".format(self.resume_from)) logger.info("pretrained : {}".format(self.pretrained)) logger.info("mixup : {}".format(self.mixup)) logger.info("cutmix : {}".format(self.cutmix)) ## tensorboard writer log_dir = os.path.join(path, "{}".format("tensorboard_log")) if not os.path.isdir(log_dir): os.mkdir(log_dir) writer = SummaryWriter(log_dir) ## get model of train net = get_model(self.model_name) net = torch.nn.DataParallel(net, device_ids=self.device_ids) net = net.cuda(device=self.device_ids[0]) ## loss criterion = nn.CrossEntropyLoss() ## optimizer if self.optimizers == 'SGD': optimizer = optim.SGD(net.parameters(), lr=self.init_lr, momentum=0.9, weight_decay=self.weight_decay) elif self.optimizers == 'Adam': optimizer = optim.Adam(net.parameters(), lr=self.init_lr, weight_decay=self.weight_decay) milestones = [80, 150, 200, 300] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) logger.info(("===========opti==========")) logger.info("Optimizer:{}".format(self.optimizers)) logger.info("lr:{}".format(self.init_lr)) logger.info("weight_decay:{}".format(self.weight_decay)) logger.info("lr_scheduler: MultiStepLR") logger.info("milestones:{}".format(milestones)) ## augumation normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ## train aug transform_train = transforms.Compose([ transforms.RandomCrop(int(self.input_shape[-1])), transforms.RandomHorizontalFlip(), transforms.RandomBrightness(brightness = self.brightness, brightness_ratio=self.brightness_ratio), transforms.RandomBlur(blur_ratio = self.blur_ratio), transforms.RandomRotation(degrees = self.degrees, rotation_ratio = 0.1), transforms.ColorJitter(brightness = self.color_brightnesss, contrast = self.color_contrast,\ saturation = self.color_saturation, hue=0), transforms.ToTensor(), #normalize, ]) ## test aug transform_test = transforms.Compose([ transforms.CenterCrop(int(self.input_shape[-1])), transforms.ToTensor(), #normalize, ]) logger.info(("============aug===========")) logger.info("crop: RandomCrop") logger.info("RandomHorizontalFlip: True") logger.info("brightness:{}".format(self.brightness)) logger.info("brightness_ratio:{}".format(self.brightness_ratio)) logger.info("blur_ratio:{}".format(self.blur_ratio)) logger.info("degrees:{}".format(self.degrees)) logger.info("color_brightnesss:{}".format(self.color_brightnesss)) logger.info("color_contrast:{}".format(self.color_contrast)) logger.info("color_saturation:{}".format(self.color_saturation)) ## prepara data print('==> Preparing data..') logger.info(("==========Datasets=========")) logger.info("train_datasets:{}".format(self.train_datasets)) logger.info("val_datasets:{}".format(self.val_datasets)) logger.info("test_datasets:{}".format(self.test_datasets)) #trainset = DataLoader(split = 'Training', transform=transform_train) trainset = DataLoader(self.train_datasets, self.val_datasets, self.test_datasets, split='Training', transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=self.batch_size * len(self.device_ids), shuffle=True) Valset = DataLoader(self.train_datasets, self.val_datasets, self.test_datasets, split='Valing', transform=transform_test) Valloader = torch.utils.data.DataLoader(Valset, batch_size=64 * len(self.device_ids), shuffle=False) Testset = DataLoader(self.train_datasets, self.val_datasets, self.test_datasets, split='Testing', transform=transform_test) Testloader = torch.utils.data.DataLoader(Testset, batch_size=64 * len(self.device_ids), shuffle=False) ## train logger.info(("======Begain Training======")) #self.train_model(net, criterion, optimizer, scheduler, trainloader, Valloader, Testloader, logger, writer, path) self.train_model(net, criterion, optimizer, scheduler, trainloader, Valloader, Testloader, logger, writer, path) logger.info(("======Finsh Training !!!======")) logger.info(("best_val_acc_epoch: %d, best_val_acc: %0.3f" % (self.best_Val_acc_epoch, self.best_Val_acc))) logger.info(("best_test_acc_epoch: %d, best_test_acc: %0.3f" % (self.best_Test_acc_epoch, self.best_Test_acc)))
df['is_manip'] = 0 df = df[df['target'].notnull()] df['to_rotate'] = 0 return df return None train_transform = Compose([ albu_trans.RandomCrop(target_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) val_transform = Compose([ albu_trans.CenterCrop(target_size), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def add_args(parser): arg = parser.add_argument arg('--root', default='runs/debug', help='checkpoint root') arg('--batch-size', type=int, default=4) arg('--n-epochs', type=int, default=30) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=12) arg('--device-ids', type=str, help='For example 0,1 to run on two GPUs') arg('--model', type=str)
transforms.RandomResizedCrop(img_size), transforms.RandomHorizontalFlip(), # transforms.RandomHorizontalFlip(), # transforms.ColorJitter(brightness=.5,contrast=.9,saturation=.5,hue=.1), transforms.ToTensor(), transforms.Normalize(mean, std), ]) transform_push = transforms.Compose([ transforms.Resize(size=(img_size, img_size)), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(img_size), transforms.ToTensor(), transforms.Normalize(mean, std), ]) # train set train_dataset = datasets.ImageFolder(train_dir, transform_train) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=4, pin_memory=False) # push set train_push_dataset = datasets.ImageFolder(train_push_dir, transform_push) train_push_loader = torch.utils.data.DataLoader( train_push_dataset,
def main(args): if args.apex and amp is None: raise RuntimeError( "Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " "to enable mixed-precision training.") if args.output_dir: utils.mkdir(args.output_dir) utils.init_distributed_mode(args) print(args) print("torch version: ", torch.__version__) print("torchvision version: ", torchvision.__version__) device = torch.device(args.device) torch.backends.cudnn.benchmark = True # Data loading code print("Loading data") traindir = os.path.join(args.data_path, args.train_dir) valdir = os.path.join(args.data_path, args.val_dir) normalize = T.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989]) print("Loading training data") st = time.time() cache_path = _get_cache_path(traindir) transform_train = torchvision.transforms.Compose([ T.ToFloatTensorInZeroOne(), T.Resize((128, 171)), T.RandomHorizontalFlip(), normalize, T.RandomCrop((112, 112)) ]) if args.cache_dataset and os.path.exists(cache_path): print("Loading dataset_train from {}".format(cache_path)) dataset, _ = torch.load(cache_path) dataset.transform = transform_train else: if args.distributed: print("It is recommended to pre-compute the dataset cache " "on a single-gpu first, as it will be faster") dataset = torchvision.datasets.Kinetics400( traindir, frames_per_clip=args.clip_len, step_between_clips=1, transform=transform_train, frame_rate=15) if args.cache_dataset: print("Saving dataset_train to {}".format(cache_path)) utils.mkdir(os.path.dirname(cache_path)) utils.save_on_master((dataset, traindir), cache_path) print("Took", time.time() - st) print("Loading validation data") cache_path = _get_cache_path(valdir) transform_test = torchvision.transforms.Compose([ T.ToFloatTensorInZeroOne(), T.Resize((128, 171)), normalize, T.CenterCrop((112, 112)) ]) if args.cache_dataset and os.path.exists(cache_path): print("Loading dataset_test from {}".format(cache_path)) dataset_test, _ = torch.load(cache_path) dataset_test.transform = transform_test else: if args.distributed: print("It is recommended to pre-compute the dataset cache " "on a single-gpu first, as it will be faster") dataset_test = torchvision.datasets.Kinetics400( valdir, frames_per_clip=args.clip_len, step_between_clips=1, transform=transform_test, frame_rate=15) if args.cache_dataset: print("Saving dataset_test to {}".format(cache_path)) utils.mkdir(os.path.dirname(cache_path)) utils.save_on_master((dataset_test, valdir), cache_path) print("Creating data loaders") train_sampler = RandomClipSampler(dataset.video_clips, args.clips_per_video) test_sampler = UniformClipSampler(dataset_test.video_clips, args.clips_per_video) if args.distributed: train_sampler = DistributedSampler(train_sampler) test_sampler = DistributedSampler(test_sampler) data_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, pin_memory=True, collate_fn=collate_fn) data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, sampler=test_sampler, num_workers=args.workers, pin_memory=True, collate_fn=collate_fn) print("Creating model") model = torchvision.models.video.__dict__[args.model]( pretrained=args.pretrained) model.to(device) if args.distributed and args.sync_bn: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) criterion = nn.CrossEntropyLoss() lr = args.lr * args.world_size optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.apex: model, optimizer = amp.initialize(model, optimizer, opt_level=args.apex_opt_level) # convert scheduler to be per iteration, not per epoch, for warmup that lasts # between different epochs warmup_iters = args.lr_warmup_epochs * len(data_loader) lr_milestones = [len(data_loader) * m for m in args.lr_milestones] lr_scheduler = WarmupMultiStepLR(optimizer, milestones=lr_milestones, gamma=args.lr_gamma, warmup_iters=warmup_iters, warmup_factor=1e-5) model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) model_without_ddp = model.module if args.resume: checkpoint = torch.load(args.resume, map_location='cpu') model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) args.start_epoch = checkpoint['epoch'] + 1 if args.test_only: evaluate(model, criterion, data_loader_test, device=device) return print("Start training") start_time = time.time() for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, device, epoch, args.print_freq, args.apex) evaluate(model, criterion, data_loader_test, device=device) if args.output_dir: checkpoint = { 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch, 'args': args } utils.save_on_master( checkpoint, os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) utils.save_on_master( checkpoint, os.path.join(args.output_dir, 'checkpoint.pth')) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))
import utils cut_size = 44 bs = 1 model_path = 'FER2013_Resnet18/PublicTest_model.t7' epls = [0.001 * i for i in range(11)] transform_train = transforms.Compose([ transforms.RandomCrop(44), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.CenterCrop(cut_size), transforms.ToTensor(), ]) transform_eval = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack( [transforms.ToTensor()(crop) for crop in crops])), ]) tbs = 32 trainset = FER2013(split='Training', transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=tbs, shuffle=True, num_workers=1) PublicTestset = FER2013(split='PublicTest', transform=transform_eval)
from PIL import Image import transforms size = 64 channel = 3 max_no = 202599 img_key = 'img_raw' file_tpl = '%6d.jpg' home_dir = os.path.expanduser('~') celeb_source = os.path.join(home_dir, "Pictures/img_align_celeba") default_attribs = {img_key: tf.FixedLenFeature([], tf.string)} default_transf = transforms.Compose([ transforms.Scale(size), transforms.CenterCrop(size), transforms.ToFloat(), transforms.Normalize(0.5, 0.5) ]) def process_celebA(dest='celebA', celeb_source=celeb_source, force=False, transform=default_transf, files=None): dest_file = '%s.tfr' % dest if os.path.exists(dest_file) and not force: return dest_file print 'Processing celeb data into a Tensorflow Record file. It may take a while depending on your computer speed...'
def feature_extractor(): # loading net with tf.variable_scope('RGB'): net = i3d.InceptionI3d(400, spatial_squeeze=True, final_endpoint='Logits') rgb_input = tf.placeholder(tf.float32, shape=(batch_size, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, 3)) _, end_points = net(rgb_input, is_training=False, dropout_keep_prob=1.0) end_feature = end_points['avg_pool3d'] sess = tf.Session() transform = torchvision.transforms.Compose([ T.ToFloatTensorInZeroOne(), T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), T.Resize((224, 224)), T.CenterCrop((224, 224)) ]) # rgb_input = tf.placeholder(tf.float32, shape=(1, _SAMPLE_VIDEO_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, 3)) # with tf.variable_scope('RGB'): # rgb_model = i3d.InceptionI3d( # 400, spatial_squeeze=True, final_endpoint='Logits') # rgb_logits, _ = rgb_model( # rgb_input, is_training=False, dropout_keep_prob=1.0) rgb_variable_map = {} for variable in tf.global_variables(): if variable.name.split('/')[0] == 'RGB': rgb_variable_map[variable.name.replace(':0', '')] = variable rgb_saver = tf.train.Saver(var_list=rgb_variable_map, reshape=True) rgb_saver.restore(sess, _CHECKPOINT_PATHS['rgb_imagenet']) video_list = open(VIDEO_PATH_FILE).readlines() video_list = [name.strip() for name in video_list] print('video_list', video_list) if not os.path.isdir(OUTPUT_FEAT_DIR): os.mkdir(OUTPUT_FEAT_DIR) print('Total number of videos: %d'%len(video_list)) for cnt, video_name in enumerate(video_list): # video_path = os.path.join(VIDEO_DIR, video_name) video_path = os.path.join(VIDEO_DIR, video_name+'.avi') feat_path = os.path.join(OUTPUT_FEAT_DIR, video_name + '.npy') if os.path.exists(feat_path): print('Feature file for video %s already exists.'%video_name) continue print('video_path', video_path) vframes, _, info = torchvision.io.read_video(video_path, start_pts=0, end_pts=None, pts_unit='sec') vframes = T.frame_temporal_sampling(vframes,start_idx=0,end_idx=None,num_samples=int(round(len(vframes)/info['video_fps']*24))) vframes = transform(vframes).permute(1, 2, 3, 0).numpy() n_frame = vframes.shape[0] print('Total frames: %d'%n_frame) features = [] n_feat = int(n_frame // 8) n_batch = n_feat // batch_size + 1 print('n_frame: %d; n_feat: %d'%(n_frame, n_feat)) print('n_batch: %d'%n_batch) for i in range(n_batch): input_blobs = [] for j in range(batch_size): start_idx = i*batch_size*L + j*L if i==0 else i*batch_size*L + j*L - 8 end_idx = min(n_frame, start_idx+L) input_blob = vframes[start_idx:end_idx].reshape(-1, resize_w, resize_h, 3) # input_blob = [] # for k in range(L): # idx = i*batch_size*L + j*L + k # idx = int(idx) # idx = idx%n_frame + 1 # frame = vframes[idx-1] # # image = Image.open(os.path.join('/data/home2/hacker01/Share/Data/TACoS/images_256p/{}'.format(video_name), '%d.jpg'%idx)) # # image = image.resize((resize_w, resize_h)) # # image = np.array(image, dtype='float32') # ''' # image[:, :, 0] -= 104. # image[:, :, 1] -= 117. # image[:, :, 2] -= 123. # ''' # # image[:, :, :] -= 127.5 # # image[:, :, :] /= 127.5 # input_blob.append(frame) # # input_blob = np.array(input_blob, dtype='float32') input_blobs.append(input_blob) input_blobs = np.array(input_blobs, dtype='float32') clip_feature = sess.run(end_feature, feed_dict={rgb_input: input_blobs}) clip_feature = np.reshape(clip_feature, (-1, clip_feature.shape[-1])) features.append(clip_feature) features = np.concatenate(features, axis=0) # features = features[:n_feat:2] # 16 frames per feature (since 64-frame snippet corresponds to 8 features in I3D) feat_path = os.path.join(OUTPUT_FEAT_DIR, video_name + '.npy') print('Saving features and probs for video: %s ...'%video_name) np.save(feat_path, features) print('%d: %s has been processed...'%(cnt, video_name))