Пример #1
0
    def __init__(self,
                 hdf5_path,
                 batch_size,
                 shuffle=True,
                 shape=[7, 32, 32],
                 validation_split=0.0,
                 num_workers=1,
                 training=True):
        rs = np.random.RandomState()
        mean = 143.68
        stdev = 23.53
        trsfm_train = [  #t3d.shotNoise(rs, alpha = 0.7, execution_prob = 0.2), 
            #t3d.Downsample(rs, factor = 4.0, order=2),
            #t3d.RandomFlip(rs),
            t3d.RandomRotate90(rs),
            #t3d.RandomContrast(rs, factor = 0.8, execution_probability=0.2),
            #t3d.ElasticDeformation(rs, 3, alpha=20, sigma=3, execution_probability=0.2),
            #t3d.GaussianNoise(rs, 3),
            t3d.Normalize(mean, stdev),
            t3d.ToTensor(True)
        ]

        trsfm_test = [t3d.Normalize(mean, stdev), t3d.ToTensor(True)]
        '''
        means and std for IMPRS dataset
        allDAPI_volume 141.42 18.85
        mask_avgproj 128.16, 0.54
        mask_maxproj 128.51, 1.23
        mask_sumproj 126.145, 31.49
        mask_volume 128.23, 0.84
        allDAPI_volume0701 140.61 17.60
        '''
        if training == True:
            trsfm = trsfm_train
        else:
            trsfm = trsfm_test

        self.hdf5_path = hdf5_path
        self.batch_size = batch_size
        self.shape = shape
        importlib.reload(
            databases
        )  #used to get load any recent changes from database class
        self.dataset = databases.hdf5dataset(hdf5_path,
                                             shape=self.shape,
                                             training=training,
                                             transforms=trsfm)
        super(hdf5_3d_dataloader,
              self).__init__(self.dataset, batch_size, shuffle,
                             validation_split, num_workers)
Пример #2
0
def combine_folds(config, files):
    cv_seed = 1234
    wfn = os.path.join(config['saved_dir'], 'combined_folds_{}.npy'.format(config['name']))
    print('This method will overwrite file: {}'.format(wfn))
    print('Computing fold indices. This takes 15 seconds.')
    # Prepare labels
    rs = np.random.RandomState()
    mean = config['data_loader']['args']['mean']
    stdev = config['data_loader']['args']['stdev']
    trsfm_train = [
                   t3d.Normalize(mean, stdev), 
                   t3d.ToTensor(True)]
    train_dataset = getattr(module_datasets, 
        'hdf5dataset', config['data_loader']['args']['hdf5_path'],
        shape = config['data_loader']['args']['shape'], 
        transforms = trsfm_train,
        training = True)

    labels = [label for img, label in train_dataset]
    num_classes = config['arch']['args']['num_classes']
    # Intialize pyx array (output of trained network)
    pyx = np.empty((len(labels), num_classes))
    
    # Split train into train and holdout for each cv_fold.
    kf = StratifiedKFold(n_splits = args.cvn, shuffle = True, random_state = cv_seed)
    for k, (cv_train_idx, cv_holdout_idx) in enumerate(kf.split(range(len(labels)), labels)):
        probs = np.load(files[k]) 
        pyx[cv_holdout_idx] = probs[:, :num_classes]
    print('Writing final predicted probabilities.')
    np.save(wfn, pyx) 
    
    # Compute overall accuracy
    print('Computing Accuracy.', flush=True)
    acc = sum(np.array(labels) == np.argmax(pyx, axis = 1)) / float(len(labels))
    print('Accuracy: {:.25}'.format(acc))
Пример #3
0
    def __init__(self,
                 hdf5_path,
                 batch_size,
                 shuffle=True,
                 shape=[7, 32, 32],
                 validation_split=0.0,
                 num_workers=1,
                 training=True,
                 mean=None,
                 stdev=None):
        rs = np.random.RandomState()
        if mean is None or stdev is None:
            mean = 0
            std = 1
            print("No mean and std given!")

        trsfm = [t3d.Normalize(mean, stdev), t3d.ToTensor(True)]

        self.hdf5_path = hdf5_path
        self.batch_size = batch_size
        self.shape = shape
        importlib.reload(
            databases
        )  #used to get load any recent changes from database class
        self.dataset = databases.hdf5dataset(hdf5_path,
                                             shape=self.shape,
                                             training=training,
                                             transforms=trsfm)
        super(hdf5_3d_dataloader,
              self).__init__(self.dataset, batch_size, shuffle,
                             validation_split, num_workers)
Пример #4
0
    def __init__(self,
                 hdf5_path,
                 batch_size,
                 shuffle=True,
                 shape=[7, 32, 32],
                 validation_split=0.0,
                 num_workers=1,
                 training=True):
        mean = 14.5
        stdev = 17
        rs = np.random.RandomState()
        trsfm_train = [  #t3d.shotNoise(rs, alpha = 0.7, execution_prob = 0.2), 
            #t3d.Downsample(rs, factor = 4.0, order=2),
            #t3d.RandomFlip(rs),
            #t3d.RandomRotate90(rs),
            #t3d.RandomContrast(rs, factor = 0.8, execution_probability=0.2),
            #t3d.ElasticDeformation(rs, 3, alpha=20, sigma=3, execution_probability=0.2),
            t3d.GaussianNoise(rs, 3),
            t3d.Normalize(mean, stdev),
            t3d.ToTensor(True)
        ]

        trsfm_test = [t3d.Normalize(mean, stdev), t3d.ToTensor(True)]

        if training == True:
            trsfm = trsfm_train
        else:
            trsfm = trsfm_test

        self.hdf5_path = hdf5_path
        self.batch_size = batch_size
        self.shape = shape
        importlib.reload(
            databases
        )  #used to get load any recent changes from database class
        self.dataset = databases.hdf5dataset1D(hdf5_path,
                                               shape=self.shape,
                                               training=training,
                                               transforms=trsfm)
        super(hdf5_1d_dataloader,
              self).__init__(self.dataset, batch_size, shuffle,
                             validation_split, num_workers)
Пример #5
0
    def __init__(self,
                 hdf5_path,
                 batch_size,
                 shuffle=True,
                 shape=[7, 32, 32],
                 validation_split=0.0,
                 num_workers=1,
                 training=True,
                 dataset=None,
                 mean=0,
                 stdev=1):
        rs = np.random.RandomState()
        trsfm_train = [
            t3d.RandomRotate90(rs),
            t3d.Normalize(mean, stdev),
            t3d.ToTensor(True)
        ]

        trsfm_test = [t3d.Normalize(mean, stdev), t3d.ToTensor(True)]
        if training == True:
            trsfm = trsfm_train
        else:
            trsfm = trsfm_test

        self.hdf5_path = hdf5_path
        self.batch_size = batch_size
        self.shape = shape
        importlib.reload(
            databases
        )  #used to get load any recent changes from database class
        if dataset == None:
            self.dataset = databases.hdf5dataset(hdf5_path,
                                                 shape=self.shape,
                                                 training=training,
                                                 transforms=trsfm)
        else:
            self.dataset = dataset
        super(hdf5_3d_dataloader,
              self).__init__(self.dataset, batch_size, shuffle,
                             validation_split, num_workers)
Пример #6
0
def one_fold(config, cv_n_folds, cv_fold):
    print("GPUs available: " + str(torch.cuda.device_count()))
    train_logger = Logger()
    cv_seed = 1234
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
    num_classes = config['arch']['args']['num_classes']
    cudnn.benchmark = True
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    #Initialize training dataset
    rs = np.random.RandomState()
    mean = config['data_loader']['args']['mean']
    stdev = config['data_loader']['args']['stdev']
    trsfm_train = [
                   t3d.RandomRotate90(rs), 
                   t3d.Normalize(mean, stdev), 
                   t3d.ToTensor(True)]
    train_dataset = getattr(module_datasets, 
        'hdf5dataset', config['data_loader']['args']['hdf5_path'],
        shape = config['data_loader']['args']['shape'], 
        transforms = trsfm_train,
        training = True)

    labels = [label for img, label in train_dataset]
    # Split train into train and holdout for particular cv_fold.
    kf = StratifiedKFold(n_splits = cv_n_folds, shuffle = True, random_state = cv_seed)
    cv_train_idx, cv_holdout_idx = list(kf.split(range(len(labels)), labels))[cv_fold]
    np.random.seed(cv_seed)
    # Seperate datasets        
    holdout_dataset = copy.deepcopy(train_dataset)
    holdout_dataset.imgs = [train_dataset.imgs[i] for i in cv_holdout_idx]
    holdout_dataset.samples = holdout_dataset.imgs
    # Subset of holdout used to choose the best model.
    val_dataset = copy.deepcopy(holdout_dataset)
    val_size = int(len(cv_holdout_idx) / 5)
    val_imgs_idx = np.random.choice(range(len(holdout_dataset.imgs)), size=val_size, replace=False,)
    val_dataset.imgs = [holdout_dataset.imgs[i] for i in val_imgs_idx]
    val_dataset.samples = val_dataset.imgs
    train_dataset.imgs = [train_dataset.imgs[i] for i in cv_train_idx]
    train_dataset.samples = train_dataset.imgs
    print('Train size:', len(cv_train_idx), len(train_dataset.imgs))
    print('Holdout size:', len(cv_holdout_idx), len(holdout_dataset.imgs))
    print('Val size (subset of holdout):', len(val_imgs_idx), len(val_dataset.imgs))


   #create data_loaders
    train_loader = hdf5_3d_dataloader(
        config['data_loader']['args']['hdf5_path'],
        config['data_loader']['args']['batch_size'],
        shuffle = config['data_loader']['args']['shuffle'],
        shape = config['data_loader']['args']['shape'],
        num_workers = config['data_loader']['args']['num_workers'],
        training = config['data_loader']['args']['training'],
        dataset = train_dataset,
        mean = config['data_loader']['args']['mean'],
        stdev = config['data_loader']['args']['stdev'])

    val_loader = hdf5_3d_dataloader(
        config['data_loader']['args']['hdf5_path'],
        config['data_loader']['args']['batch_size'],
        shuffle = config['data_loader']['args']['shuffle'],
        shape = config['data_loader']['args']['shape'],
        num_workers = config['data_loader']['args']['num_workers'],
        training = config['data_loader']['args']['training'],
        dataset = val_dataset,
        mean = config['data_loader']['args']['mean'],
        stdev = config['data_loader']['args']['stdev'])
    
    model = get_instance(module_arch, 'arch', config)
    train_logger = Logger()
    loss = getattr(module_loss, config['loss']) #looks in model/loss.py for criterion function specified in config
    criterion = loss(data_loader.dataset.weight.to(device)) # for imbalanced datasets
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = get_instance(torch.optim, 'optimizer', config, trainable_params)
    lr_scheduler = get_instance(torch.optim.lr_scheduler, 'lr_scheduler', config, optimizer)

    trainer = Trainer(model, criterion, metrics, optimizer,
                      resume=resume,
                      config=config,
                      data_loader=data_loader,
                      valid_data_loader=valid_data_loader,
                      lr_scheduler=lr_scheduler,
                      train_logger=train_logger)

    trainer.train()

    # Load model best and make predictions on holdout dataset
    holdout_loader = hdf5_3d_dataloader(
        config['data_loader']['args']['hdf5_path'],
        config['data_loader']['args']['batch_size'],
        shuffle = config['data_loader']['args']['shuffle'],
        shape = config['data_loader']['args']['shape'],
        num_workers = config['data_loader']['args']['num_workers'],
        training = config['data_loader']['args']['training'],
        dataset = holdout_dataset,
        mean = config['data_loader']['args']['mean'],
        stdev = config['data_loader']['args']['stdev'])
    
    saved_dir = trainer.checkpoint_dir
    best_model = os.path.join(saved_dir, "model_best.pth")
    print("=> loading {}".format(best_model))

    checkpoint = torch.load(best_model)
    model.load_state_dict(checkpoint['state_dict'])
    print("Running forward pass on holdout set of size:", len(holdout_dataset.imgs))
    probs = get_probs(holdout_loader, model)
    filename = os.path.join( saved_dir, 'model_{}__fold_{}__probs.npy'.format(config['name'], cv_fold))
    
    np.save(filename, probs)    
    return filename
Пример #7
0
    def __init__(self,
                 hdf5_path,
                 batch_size,
                 shuffle=True,
                 shape=[7, 32, 32],
                 validation_split=0.0,
                 num_workers=1,
                 training=True,
                 mean=None,
                 stdev=None):
        rs = np.random.RandomState()
        if mean is None or stdev is None:
            mean = 0
            std = 1
            print("No mean and std given!")
        #mean = 15.26
        #stdev = 18.21
        trsfm_train = [
            t3d.shotNoise(rs, alpha=0.8, execution_prob=0.3),
            t3d.Downsample(rs, factor=999, order=0, execution_prob=0.2),
            t3d.RandomFlip(rs),
            t3d.RandomRotate90(rs),
            t3d.RandomRotate(rs,
                             angle_spectrum=35,
                             axes=[(1, 2)],
                             mode='constant',
                             order=0),
            t3d.Translate(rs, pixels=8, execution_prob=0.3),
            t3d.RandomContrast(rs, factor=0.8, execution_probability=0.3),
            #t3d.ElasticDeformation(rs, 3, alpha=20, sigma=3, execution_probability=0.2),
            #t3d.GaussianNoise(rs, 3),
            t3d.Normalize(mean, stdev),
            t3d.ToTensor(True)
        ]

        trsfm_test = [  #t3d.shotNoise(rs, alpha = 0.4, execution_prob = 1.0),
            #t3d.Downsample(rs, factor = 4.0, order=0, execution_prob = 1.0),
            t3d.Normalize(mean, stdev),
            t3d.ToTensor(True)
        ]
        '''
        means and std for IMPRS dataset
        allDAPI_volume 141.42 18.85
        mask_avgproj 128.16, 0.54
        mask_maxproj 128.51, 1.23
        mask_sumproj 126.145, 31.49
        mask_volume 128.23, 0.84
        allDAPI_volume0701 140.61 17.60
        '''
        if training == True:
            trsfm = trsfm_train
        else:
            trsfm = trsfm_test

        self.hdf5_path = hdf5_path
        self.batch_size = batch_size
        self.shape = shape
        importlib.reload(
            databases
        )  #used to get load any recent changes from database class
        self.dataset = databases.hdf5dataset(hdf5_path,
                                             shape=self.shape,
                                             training=training,
                                             transforms=trsfm)
        super(hdf5_3d_dataloader,
              self).__init__(self.dataset, batch_size, shuffle,
                             validation_split, num_workers)