def __init__(self, ann_file='data/coco2017/annotations/instances_train2017.json', pipeline=(LoadImageFromFile(), LoadAnnotations(), Resize(img_scale=(1333, 800), keep_ratio=True), RandomFlip(flip_ratio=0.5), Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True), Pad(size_divisor=32), DefaultFormatBundle(), Collect(keys=['img', 'gt_bboxes', 'gt_labels'])), test_mode=False, filter_empty_gt=True): self.ann_file = ann_file self.img_prefix = 'data/coco2017/train2017/' if not test_mode else 'data/coco2017/test2017/' self.test_mode = test_mode self.filter_empty_gt = filter_empty_gt # load annotations (and proposals) self.img_infos = self.load_annotations(self.ann_file) # filter images too small if not test_mode: valid_inds = self._filter_imgs() # 去除长或宽小于32的图片和没有标注的图片 self.img_infos = [self.img_infos[i] for i in valid_inds] # set group flag for the sampler if not self.test_mode: self._set_group_flag() # processing pipeline self.pipeline = Compose(pipeline)
def __init__(self, data_root, pipeline=train_pipeline, split_num=10, split_id=0, mode='train'): assert mode == 'train' or mode == 'val' or mode == 'test' or mode == 'train_val' self.data_dir = os.path.join(data_root, 'train') self.test_dir = os.path.join(data_root, 'toPredict') self.__idx__ = split_id self.class2label = Class2Label self.img_infos = [] self.spilt_imginfos = [] self.train_imginfos = [] self.val_imginfos = [] self.split_num = split_num self.__mode__ = mode if self.__mode__ != 'test': self.img_infos = self.get_img_infos(self.data_dir) self.split_data() self.get_train_val_data() else: self.test_imginfos = self.get_Testimg_infos(self.test_dir) self.pipeline = Compose(pipeline)
def __init__(self, x, y, num_classes, format='NCHW', transforms=[], weights=None): self.x = x self.y = y self.num_classes = num_classes self.format = format self.transforms = Compose(transforms) self.subsets = self.set_subsets() self.weights = self.set_weights(weights)
def __init__(self, root): self.size = (192, 192) self.root = root if not os.path.exists(self.root): raise Exception("[!] {} not exists.".format(root)) self.img_transform = Compose([ ToTensor(), CenterCrop(self.size), # RangeNormalize(min_val=-1,max_val=1), # RandomFlip(), ]) #sort file names self.input_paths = sorted( glob(os.path.join(self.root, '{}/*.npy'.format("val")))) self.name = os.path.basename(root) if len(self.input_paths) == 0: raise Exception("No validations are found in {}".format(self.root))
res = g(img.unsqueeze(0)) res_img = to_pil(res[0].cpu()) res_img.save(output_path / (name + '.png'), "PNG") gt_YCbCr = np.asarray(gt.convert('YCbCr'))[:, :, 0] res_YCbCr = np.asarray(res_img.convert('YCbCr'))[:, :, 0] ssim = compare_ssim(gt_YCbCr, res_YCbCr) psnr = compare_psnr(gt_YCbCr, res_YCbCr) print(ssim, psnr) ssim_acc += ssim psnr_acc += psnr count += 1 print(ssim_acc / count, psnr_acc / count) if __name__ == "__main__": trans = Compose([RandomCrop(480)]) ds = RaindropDataset(cfg.test_a_path, trans) output_path = Path('output') if not output_path.is_dir(): output_path.mkdir() load_path = Path('save') / cfg.name / 'weights_G' predict(ds, output_path, load_path, cfg.mean, cfg.std)
import cv2 import torch from torch.utils.data import DataLoader from dataset import FightDataset from transform import Compose, ToTensor, Resize from model import MyNet torch.cuda.set_device(0) transform_ = Compose([Resize((112, 112)), ToTensor()]) xx = FightDataset("./fight_classify", tranform=transform_) dataloader = DataLoader(xx, batch_size=1, shuffle=True) # for i_batch, sample_batched in enumerate(dataloader): # print(i_batch) # print(sample_batched["image"].size()) dev = torch.device("cuda:0") model = MyNet().to(dev) criterion = torch.nn.MSELoss(reduction='sum') optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9) for t in range(20): # Forward pass: Compute predicted y by passing x to the model for i_batch, sample_batched in enumerate(dataloader): image = sample_batched["image"] label = sample_batched["label"] # label = torch.transpose(label, 0,1) y_pred = model(image) # print(y_pred) # print(label)
def get_merged_common_dataset(opt, skip_study=None, dataset_dict_cache=[], data_cache=[]): cancer_data_dir = opt.curated_breast_data_dir if dataset_dict_cache: dataset_dict = dataset_dict_cache[0] else: dataset_dict = util.load_curated(cancer_data_dir) dataset_dict_cache.append(dataset_dict) mergedCurated = dataset_dict['merged'].copy() if data_cache: data = data_cache[0] else: data = metagx_util.load_metagx_dataset(opt.metagx_data_dir, min_genes=opt.min_genes) data_cache.append(data) merged = data['merged'].copy() genes_list = data['genes_features'].copy() metagx_pos_outcome = merged[merged.posOutcome.isin([-1, 1])] print('num pos outcome studies {0}'.format(len(metagx_pos_outcome.study.unique()))) if skip_study is not None: study_to_skip = metagx_pos_outcome.study.unique()[skip_study] else: study_to_skip = None merged_common = util.merge_metagx_curated(merged, mergedCurated) merged_treatments = list(metagx_util.treatment_columns_metagx) + util.treatment_columns_bmc merged_treatments = [x for x in merged_treatments if x in merged_common] merged_treatments = list(set(merged_treatments)) # add continious covariates to genes cont_columns = [x for x in merged_treatments if len(merged_common[x].unique()) > 20] merged_treatments = [x for x in merged_treatments if x not in cont_columns] common_genes_list = [x for x in genes_list if x in merged_common] if opt.use_covars: non_genes = cont_columns + merged_treatments + ['posOutcome'] else: non_genes = [] if study_to_skip is None: train_data, train_labels, val_data, val_labels = util.random_split(merged_common, common_genes_list + non_genes, ['study', 'posOutcome'], balance_validation=False, balance_by_study=False, ratio=opt.test_ratio, to_numpy=False) else: train_data, train_labels, val_data, val_labels = next(util.split_by_study(merged_common, common_genes_list + non_genes, ['study', 'posOutcome'], study=study_to_skip, to_numpy=False)) # it's ok to use gene expression in unsupervised model copy = val_data.copy() copy.loc[:, non_genes] = 0 val_copy = val_labels.copy() val_copy.loc[:, 'posOutcome'] = 0 train_data = pandas.concat([train_data, copy], ignore_index=True) train_labels = pandas.concat([train_labels, val_copy], ignore_index=True) print('validation study {0}'.format(study_to_skip)) print(val_data.shape) train_data.fillna(0, inplace=True) val_data.fillna(0, inplace=True) to_tensor = ToTensor() to_float = ToType('float') add_age = AdditiveUniform(-0.5, 0.5, 'age') add_tumor_size = AdditiveUniform(-0.5, 0.5, 'tumor_size') add_posOutcome = AdditiveUniformTriary(0.0, 0.05, 'posOutcome') add_treat = Compose([AdditiveUniformTriary(0.0, 0.05, x) for x in merged_treatments]) lst = [] if 'posOutcome' in train_data.columns: lst = [add_age, add_tumor_size, add_posOutcome, add_treat] compose = Compose(lst + [to_tensor, to_float]) compose_label = Compose([add_posOutcome, to_tensor, to_float]) num_binary = len(merged_treatments + ['posOutcome']) num_binary = 0 transform = DataLabelCompose(compose, compose_label) train_set = GeneDataset(train_data, train_labels, transform, binary=num_binary) test_set = GeneDataset(val_data, val_labels, transform, binary=num_binary) return train_set, test_set
ims = ims[:, :32] return ims def get_multiple(self, ): pass if __name__ == '__main__': from transform import (Compose, Normalize, Scale, CenterCrop, CornerCrop, MultiScaleCornerCrop, MultiScaleRandomCrop, RandomHorizontalFlip, ToTensor) D = EPIC_KITCHENS( '/mnt/nisho_data2/hyf/EPIC-annotations/EPIC_train_action_labels.csv', transform=Compose([ Scale([224, 224]), ToTensor(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])) loader = DataLoader( dataset=D, batch_size=2, shuffle=False, num_workers=8, pin_memory=True, ) print(len(loader)) from tqdm import tqdm for i, sample in tqdm(enumerate(loader)): pass # print(sample['ims'].size()) #(b, 3, cliplen, 224, 224) # print(sample['vid']) #['P01_01', 'P01_01']
optimizer_G, losses) def set_logging(name): log_path = Path('.') / 'log' if not (log_path.exists() and log_path.is_dir()): log_path.mkdir(parents=True) logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[ logging.StreamHandler(), logging.FileHandler( os.path.join('log', '{}.log'.format(name))) ]) if __name__ == "__main__": set_logging(cfg.name) ds = RaindropDataset(cfg.train_path, transform=Compose([ RandomCrop(480), RandomHorizontalFlip(), ToTensor(), Normalize(cfg.mean, cfg.std) ])) loader = DataLoader(ds, 10, shuffle=True, num_workers=4) train(cfg.name, loader, True, cfg.num_rep, cfg.lr, cfg.beta1, cfg.gamma_gan, cfg.num_epoch, cfg.wd, cfg.device)
def main(): parser = ArgumentParser() parser.add_argument('-d', '--data_path', dest='data_path', type=str, default=None, help='path to the data') parser.add_argument('-e', '--epochs', dest='epochs', default=20, type=int, help='number of epochs') parser.add_argument('-b', '--batch_size', dest='batch_size', default=40, type=int, help='batch size') parser.add_argument('-s', '--image_size', dest='image_size', default=256, type=int, help='input image size') parser.add_argument('-lr', '--learning_rate', dest='lr', default=0.0001, type=float, help='learning rate') parser.add_argument('-wd', '--weight_decay', dest='weight_decay', default=5e-4, type=float, help='weight decay') parser.add_argument('-lrs', '--learning_rate_step', dest='lr_step', default=10, type=int, help='learning rate step') parser.add_argument('-lrg', '--learning_rate_gamma', dest='lr_gamma', default=0.5, type=float, help='learning rate gamma') parser.add_argument('-m', '--model', dest='model', default='unet', choices=('unet', )) parser.add_argument('-w', '--weight_bce', default=0.5, type=float, help='weight BCE loss') parser.add_argument('-l', '--load', dest='load', default=False, help='load file model') parser.add_argument('-v', '--val_split', dest='val_split', default=0.8, help='train/val split') parser.add_argument('-o', '--output_dir', dest='output_dir', default='/tmp/logs/', help='dir to save log and models') args = parser.parse_args() # os.makedirs(args.output_dir, exist_ok=True) logger = get_logger(os.path.join(args.output_dir, 'train.log')) logger.info('Start training with params:') for arg, value in sorted(vars(args).items()): logger.info("Argument %s: %r", arg, value) # net = UNet( ) # TODO: to use move novel arch or/and more lightweight blocks (mobilenet) to enlarge the batch_size # TODO: img_size=256 is rather mediocre, try to optimize network for at least 512 logger.info('Model type: {}'.format(net.__class__.__name__)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if args.load: net.load_state_dict(torch.load(args.load)) net.to(device) # net = nn.DataParallel(net) optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.weight_decay) # TODO: loss experimentation, fight class imbalance, there're many ways you can tackle this challenge criterion = lambda x, y: (args.weight_bce * nn.BCELoss()(x, y), (1. - args.weight_bce) * dice_loss(x, y)) # TODO: you can always try on plateau scheduler as a default option scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step, gamma=args.lr_gamma) \ if args.lr_step > 0 else None # dataset # TODO: to work on transformations a lot, look at albumentations package for inspiration train_transforms = Compose([ Crop(min_size=1 - 1 / 3., min_ratio=1.0, max_ratio=1.0, p=0.5), Flip(p=0.05), Pad(max_size=0.6, p=0.25), Resize(size=(args.image_size, args.image_size), keep_aspect=True) ]) # TODO: don't forget to work class imbalance and data cleansing val_transforms = Resize(size=(args.image_size, args.image_size)) train_dataset = DetectionDataset(args.data_path, os.path.join(args.data_path, 'train_mask.json'), transforms=train_transforms) val_dataset = DetectionDataset(args.data_path, None, transforms=val_transforms) # split dataset into train/val, don't try to do this at home ;) train_size = int(len(train_dataset) * args.val_split) val_dataset.image_names = train_dataset.image_names[train_size:] val_dataset.mask_names = train_dataset.mask_names[train_size:] train_dataset.image_names = train_dataset.image_names[:train_size] train_dataset.mask_names = train_dataset.mask_names[:train_size] # TODO: always work with the data: cleaning, sampling train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=8, shuffle=True, drop_last=True) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=4, shuffle=False, drop_last=False) logger.info('Length of train/val=%d/%d', len(train_dataset), len(val_dataset)) logger.info('Number of batches of train/val=%d/%d', len(train_dataloader), len(val_dataloader)) try: train(net, optimizer, criterion, scheduler, train_dataloader, val_dataloader, logger=logger, args=args, device=device) except KeyboardInterrupt: torch.save(net.state_dict(), os.path.join(args.output_dir, 'INTERRUPTED.pth')) logger.info('Saved interrupt') sys.exit(0)