Exemplo n.º 1
0
 def __init__(self,
              ann_file='data/coco2017/annotations/instances_train2017.json',
              pipeline=(LoadImageFromFile(), LoadAnnotations(),
                        Resize(img_scale=(1333, 800),
                               keep_ratio=True), RandomFlip(flip_ratio=0.5),
                        Normalize(mean=[123.675, 116.28, 103.53],
                                  std=[58.395, 57.12, 57.375],
                                  to_rgb=True), Pad(size_divisor=32),
                        DefaultFormatBundle(),
                        Collect(keys=['img', 'gt_bboxes', 'gt_labels'])),
              test_mode=False,
              filter_empty_gt=True):
     self.ann_file = ann_file
     self.img_prefix = 'data/coco2017/train2017/' if not test_mode else 'data/coco2017/test2017/'
     self.test_mode = test_mode
     self.filter_empty_gt = filter_empty_gt
     # load annotations (and proposals)
     self.img_infos = self.load_annotations(self.ann_file)
     # filter images too small
     if not test_mode:
         valid_inds = self._filter_imgs()  # 去除长或宽小于32的图片和没有标注的图片
         self.img_infos = [self.img_infos[i] for i in valid_inds]
     # set group flag for the sampler
     if not self.test_mode:
         self._set_group_flag()
     # processing pipeline
     self.pipeline = Compose(pipeline)
    def __init__(self,
                 data_root,
                 pipeline=train_pipeline,
                 split_num=10,
                 split_id=0,
                 mode='train'):
        assert mode == 'train' or mode == 'val' or mode == 'test' or mode == 'train_val'
        self.data_dir = os.path.join(data_root, 'train')
        self.test_dir = os.path.join(data_root, 'toPredict')
        self.__idx__ = split_id
        self.class2label = Class2Label
        self.img_infos = []
        self.spilt_imginfos = []
        self.train_imginfos = []
        self.val_imginfos = []
        self.split_num = split_num
        self.__mode__ = mode

        if self.__mode__ != 'test':
            self.img_infos = self.get_img_infos(self.data_dir)
            self.split_data()
            self.get_train_val_data()
        else:
            self.test_imginfos = self.get_Testimg_infos(self.test_dir)

        self.pipeline = Compose(pipeline)
 def __init__(self,
              x,
              y,
              num_classes,
              format='NCHW',
              transforms=[],
              weights=None):
     self.x = x
     self.y = y
     self.num_classes = num_classes
     self.format = format
     self.transforms = Compose(transforms)
     self.subsets = self.set_subsets()
     self.weights = self.set_weights(weights)
Exemplo n.º 4
0
 def __init__(self, root):
     self.size = (192, 192)
     self.root = root
     if not os.path.exists(self.root):
         raise Exception("[!] {} not exists.".format(root))
     self.img_transform = Compose([
         ToTensor(),
         CenterCrop(self.size),
         # RangeNormalize(min_val=-1,max_val=1),
         # RandomFlip(),
     ])
     #sort file names
     self.input_paths = sorted(
         glob(os.path.join(self.root, '{}/*.npy'.format("val"))))
     self.name = os.path.basename(root)
     if len(self.input_paths) == 0:
         raise Exception("No validations are found in {}".format(self.root))
Exemplo n.º 5
0
            res = g(img.unsqueeze(0))

            res_img = to_pil(res[0].cpu())
            res_img.save(output_path / (name + '.png'), "PNG")

            gt_YCbCr = np.asarray(gt.convert('YCbCr'))[:, :, 0]
            res_YCbCr = np.asarray(res_img.convert('YCbCr'))[:, :, 0]

            ssim = compare_ssim(gt_YCbCr, res_YCbCr)
            psnr = compare_psnr(gt_YCbCr, res_YCbCr)

            print(ssim, psnr)
            ssim_acc += ssim
            psnr_acc += psnr
            count += 1

        print(ssim_acc / count, psnr_acc / count)


if __name__ == "__main__":
    trans = Compose([RandomCrop(480)])
    ds = RaindropDataset(cfg.test_a_path, trans)

    output_path = Path('output')

    if not output_path.is_dir():
        output_path.mkdir()
    load_path = Path('save') / cfg.name / 'weights_G'

    predict(ds, output_path, load_path, cfg.mean, cfg.std)
Exemplo n.º 6
0
import cv2
import torch
from torch.utils.data import DataLoader

from dataset import FightDataset
from transform import Compose, ToTensor, Resize
from model import MyNet

torch.cuda.set_device(0)
transform_ = Compose([Resize((112, 112)), ToTensor()])
xx = FightDataset("./fight_classify", tranform=transform_)

dataloader = DataLoader(xx, batch_size=1, shuffle=True)
# for i_batch, sample_batched in enumerate(dataloader):
#     print(i_batch)
#     print(sample_batched["image"].size())
dev = torch.device("cuda:0")
model = MyNet().to(dev)

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)

for t in range(20):
    # Forward pass: Compute predicted y by passing x to the model
    for i_batch, sample_batched in enumerate(dataloader):
        image = sample_batched["image"]
        label = sample_batched["label"]
        # label = torch.transpose(label, 0,1)
        y_pred = model(image)
        # print(y_pred)
        # print(label)
Exemplo n.º 7
0
def get_merged_common_dataset(opt, skip_study=None, dataset_dict_cache=[], data_cache=[]):
    cancer_data_dir = opt.curated_breast_data_dir
    if dataset_dict_cache:
        dataset_dict = dataset_dict_cache[0]
    else:
        dataset_dict = util.load_curated(cancer_data_dir)
        dataset_dict_cache.append(dataset_dict)
    mergedCurated = dataset_dict['merged'].copy()

    if data_cache:
        data = data_cache[0]
    else:
        data = metagx_util.load_metagx_dataset(opt.metagx_data_dir, min_genes=opt.min_genes)
        data_cache.append(data)
    merged = data['merged'].copy()
    genes_list = data['genes_features'].copy()

    metagx_pos_outcome = merged[merged.posOutcome.isin([-1, 1])]
    print('num pos outcome studies {0}'.format(len(metagx_pos_outcome.study.unique())))
    if skip_study is not None:
        study_to_skip = metagx_pos_outcome.study.unique()[skip_study]
    else:
        study_to_skip = None

    merged_common = util.merge_metagx_curated(merged, mergedCurated)

    merged_treatments = list(metagx_util.treatment_columns_metagx) + util.treatment_columns_bmc
    merged_treatments = [x for x in merged_treatments if x in merged_common]
    merged_treatments = list(set(merged_treatments))
    # add continious covariates to genes
    cont_columns = [x for x in merged_treatments if len(merged_common[x].unique()) > 20]
    merged_treatments = [x for x in merged_treatments if x not in cont_columns]
    common_genes_list = [x for x in genes_list if x in merged_common]
    if opt.use_covars:
        non_genes = cont_columns + merged_treatments + ['posOutcome']
    else:
        non_genes = []
    if study_to_skip is None:
        train_data, train_labels, val_data, val_labels = util.random_split(merged_common,
                                                              common_genes_list + non_genes,
                                                              ['study', 'posOutcome'],
                                                              balance_validation=False,
                                                              balance_by_study=False,
                                                              ratio=opt.test_ratio,
                                                              to_numpy=False)
    else:
        train_data, train_labels, val_data, val_labels = next(util.split_by_study(merged_common,
                                                              common_genes_list + non_genes,
                                                              ['study', 'posOutcome'],
                                                              study=study_to_skip,
                                                              to_numpy=False))
        # it's ok to use gene expression in unsupervised model
        copy = val_data.copy()
        copy.loc[:, non_genes] = 0
        val_copy = val_labels.copy()
        val_copy.loc[:, 'posOutcome'] = 0
        train_data = pandas.concat([train_data, copy], ignore_index=True)
        train_labels = pandas.concat([train_labels, val_copy], ignore_index=True)
        print('validation study {0}'.format(study_to_skip))
        print(val_data.shape)

    train_data.fillna(0, inplace=True)
    val_data.fillna(0, inplace=True)
    to_tensor = ToTensor()
    to_float = ToType('float')
    add_age = AdditiveUniform(-0.5, 0.5, 'age')
    add_tumor_size = AdditiveUniform(-0.5, 0.5, 'tumor_size')
    add_posOutcome = AdditiveUniformTriary(0.0, 0.05, 'posOutcome')
    add_treat = Compose([AdditiveUniformTriary(0.0, 0.05, x) for x in merged_treatments])
    lst = []
    if 'posOutcome' in train_data.columns:
        lst = [add_age, add_tumor_size, add_posOutcome, add_treat]
    compose = Compose(lst + [to_tensor, to_float])
    compose_label = Compose([add_posOutcome, to_tensor, to_float])
    num_binary = len(merged_treatments + ['posOutcome'])
    num_binary = 0
    transform = DataLabelCompose(compose, compose_label)

    train_set = GeneDataset(train_data, train_labels, transform, binary=num_binary)
    test_set = GeneDataset(val_data, val_labels, transform, binary=num_binary)
    return train_set, test_set
Exemplo n.º 8
0
            ims = ims[:, :32]
        return ims

    def get_multiple(self, ):
        pass


if __name__ == '__main__':
    from transform import (Compose, Normalize, Scale, CenterCrop, CornerCrop,
                           MultiScaleCornerCrop, MultiScaleRandomCrop,
                           RandomHorizontalFlip, ToTensor)
    D = EPIC_KITCHENS(
        '/mnt/nisho_data2/hyf/EPIC-annotations/EPIC_train_action_labels.csv',
        transform=Compose([
            Scale([224, 224]),
            ToTensor(255),
            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]))
    loader = DataLoader(
        dataset=D,
        batch_size=2,
        shuffle=False,
        num_workers=8,
        pin_memory=True,
    )
    print(len(loader))
    from tqdm import tqdm
    for i, sample in tqdm(enumerate(loader)):
        pass
        # print(sample['ims'].size())  #(b, 3, cliplen, 224, 224)
        # print(sample['vid'])  #['P01_01', 'P01_01']
Exemplo n.º 9
0
                            optimizer_G, losses)


def set_logging(name):
    log_path = Path('.') / 'log'
    if not (log_path.exists() and log_path.is_dir()):
        log_path.mkdir(parents=True)
    logging.basicConfig(level=logging.INFO,
                        format='%(message)s',
                        handlers=[
                            logging.StreamHandler(),
                            logging.FileHandler(
                                os.path.join('log', '{}.log'.format(name)))
                        ])


if __name__ == "__main__":

    set_logging(cfg.name)

    ds = RaindropDataset(cfg.train_path,
                         transform=Compose([
                             RandomCrop(480),
                             RandomHorizontalFlip(),
                             ToTensor(),
                             Normalize(cfg.mean, cfg.std)
                         ]))
    loader = DataLoader(ds, 10, shuffle=True, num_workers=4)
    train(cfg.name, loader, True, cfg.num_rep, cfg.lr, cfg.beta1,
          cfg.gamma_gan, cfg.num_epoch, cfg.wd, cfg.device)
Exemplo n.º 10
0
def main():
    parser = ArgumentParser()
    parser.add_argument('-d',
                        '--data_path',
                        dest='data_path',
                        type=str,
                        default=None,
                        help='path to the data')
    parser.add_argument('-e',
                        '--epochs',
                        dest='epochs',
                        default=20,
                        type=int,
                        help='number of epochs')
    parser.add_argument('-b',
                        '--batch_size',
                        dest='batch_size',
                        default=40,
                        type=int,
                        help='batch size')
    parser.add_argument('-s',
                        '--image_size',
                        dest='image_size',
                        default=256,
                        type=int,
                        help='input image size')
    parser.add_argument('-lr',
                        '--learning_rate',
                        dest='lr',
                        default=0.0001,
                        type=float,
                        help='learning rate')
    parser.add_argument('-wd',
                        '--weight_decay',
                        dest='weight_decay',
                        default=5e-4,
                        type=float,
                        help='weight decay')
    parser.add_argument('-lrs',
                        '--learning_rate_step',
                        dest='lr_step',
                        default=10,
                        type=int,
                        help='learning rate step')
    parser.add_argument('-lrg',
                        '--learning_rate_gamma',
                        dest='lr_gamma',
                        default=0.5,
                        type=float,
                        help='learning rate gamma')
    parser.add_argument('-m',
                        '--model',
                        dest='model',
                        default='unet',
                        choices=('unet', ))
    parser.add_argument('-w',
                        '--weight_bce',
                        default=0.5,
                        type=float,
                        help='weight BCE loss')
    parser.add_argument('-l',
                        '--load',
                        dest='load',
                        default=False,
                        help='load file model')
    parser.add_argument('-v',
                        '--val_split',
                        dest='val_split',
                        default=0.8,
                        help='train/val split')
    parser.add_argument('-o',
                        '--output_dir',
                        dest='output_dir',
                        default='/tmp/logs/',
                        help='dir to save log and models')
    args = parser.parse_args()
    #
    os.makedirs(args.output_dir, exist_ok=True)
    logger = get_logger(os.path.join(args.output_dir, 'train.log'))
    logger.info('Start training with params:')
    for arg, value in sorted(vars(args).items()):
        logger.info("Argument %s: %r", arg, value)
    #
    net = UNet(
    )  # TODO: to use move novel arch or/and more lightweight blocks (mobilenet) to enlarge the batch_size
    # TODO: img_size=256 is rather mediocre, try to optimize network for at least 512
    logger.info('Model type: {}'.format(net.__class__.__name__))
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if args.load:
        net.load_state_dict(torch.load(args.load))
    net.to(device)
    # net = nn.DataParallel(net)

    optimizer = optim.Adam(net.parameters(),
                           lr=args.lr,
                           weight_decay=args.weight_decay)
    # TODO: loss experimentation, fight class imbalance, there're many ways you can tackle this challenge
    criterion = lambda x, y: (args.weight_bce * nn.BCELoss()(x, y),
                              (1. - args.weight_bce) * dice_loss(x, y))
    # TODO: you can always try on plateau scheduler as a default option
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step, gamma=args.lr_gamma) \
        if args.lr_step > 0 else None

    # dataset
    # TODO: to work on transformations a lot, look at albumentations package for inspiration
    train_transforms = Compose([
        Crop(min_size=1 - 1 / 3., min_ratio=1.0, max_ratio=1.0, p=0.5),
        Flip(p=0.05),
        Pad(max_size=0.6, p=0.25),
        Resize(size=(args.image_size, args.image_size), keep_aspect=True)
    ])
    # TODO: don't forget to work class imbalance and data cleansing
    val_transforms = Resize(size=(args.image_size, args.image_size))

    train_dataset = DetectionDataset(args.data_path,
                                     os.path.join(args.data_path,
                                                  'train_mask.json'),
                                     transforms=train_transforms)
    val_dataset = DetectionDataset(args.data_path,
                                   None,
                                   transforms=val_transforms)

    # split dataset into train/val, don't try to do this at home ;)
    train_size = int(len(train_dataset) * args.val_split)
    val_dataset.image_names = train_dataset.image_names[train_size:]
    val_dataset.mask_names = train_dataset.mask_names[train_size:]
    train_dataset.image_names = train_dataset.image_names[:train_size]
    train_dataset.mask_names = train_dataset.mask_names[:train_size]

    # TODO: always work with the data: cleaning, sampling
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  num_workers=8,
                                  shuffle=True,
                                  drop_last=True)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                num_workers=4,
                                shuffle=False,
                                drop_last=False)
    logger.info('Length of train/val=%d/%d', len(train_dataset),
                len(val_dataset))
    logger.info('Number of batches of train/val=%d/%d', len(train_dataloader),
                len(val_dataloader))

    try:
        train(net,
              optimizer,
              criterion,
              scheduler,
              train_dataloader,
              val_dataloader,
              logger=logger,
              args=args,
              device=device)
    except KeyboardInterrupt:
        torch.save(net.state_dict(),
                   os.path.join(args.output_dir, 'INTERRUPTED.pth'))
        logger.info('Saved interrupt')
        sys.exit(0)