示例#1
0
def prepare_datalist(args):
    dimensions = args.dimensions
    dataset_json = os.path.join(args.output, 'dataset.json')
    if not os.path.exists(dataset_json):
        logging.info('Processing dataset...')
        with open(os.path.join(args.dataset_json)) as f:
            datalist = json.load(f)

        datalist = create_dataset(datalist=datalist[args.datalist_key],
                                  base_dir=args.dataset_root,
                                  output_dir=args.output,
                                  dimension=dimensions,
                                  pixdim=[1.0] * dimensions,
                                  limit=args.limit,
                                  relative_path=args.relative_path)

        with open(dataset_json, 'w') as fp:
            json.dump(datalist, fp, indent=2)
    else:
        logging.info('Pre-load existing dataset.json')

    dataset_json = os.path.join(args.output, 'dataset.json')
    with open(dataset_json) as f:
        datalist = json.load(f)
    logging.info('+++ Dataset File: {}'.format(dataset_json))
    logging.info('+++ Total Records: {}'.format(len(datalist)))
    logging.info('')
 def test_create_dataset_3d(self):
     with tempfile.TemporaryDirectory() as tempdir:
         datalist = self._create_data(tempdir)
         output_dir = os.path.join(tempdir, "3d")
         deepgrow_datalist = create_dataset(datalist=datalist,
                                            output_dir=output_dir,
                                            dimension=3,
                                            pixdim=(1, 1, 1))
         self.assertEqual(len(deepgrow_datalist), 1)
         self.assertEqual(deepgrow_datalist[0]["region"], 1)
示例#3
0
    def pre_process(self, request, datastore: Datastore):
        self.cleanup(request)

        cache_dir = self.get_cache_dir(request)
        output_dir = os.path.join(cache_dir,
                                  f"deepgrow_{self.dimension}D_train")
        logger.info(
            f"Preparing Dataset for Deepgrow-{self.dimension}D:: {output_dir}")

        datalist = create_dataset(
            datalist=datastore.datalist(),
            base_dir=None,
            output_dir=output_dir,
            dimension=self.dimension,
            pixdim=[1.0] * self.dimension,
        )

        logging.info(f"+++ Total Records: {len(datalist)}")
        return datalist
示例#4
0
def prepare_datalist(args):
    dimensions = args.dimensions
    dataset_json = os.path.join(args.output, 'dataset.json')

    logging.info('Processing dataset...')
    with open(os.path.join(args.dataset_json)) as f:
        datalist = json.load(f)

    datalist = create_dataset(datalist=datalist[args.datalist_key],
                              base_dir=args.dataset_root,
                              output_dir=args.output,
                              dimension=dimensions,
                              pixdim=[1.0] * dimensions,
                              limit=args.limit,
                              relative_path=args.relative_path)

    with open(dataset_json, 'w') as fp:
        json.dump(datalist, fp, indent=2)

    dataset_json = os.path.join(args.output, 'dataset.json')
    with open(dataset_json) as f:
        datalist = json.load(f)
    logging.info('+++ Dataset File: {}'.format(dataset_json))
    logging.info('+++ Total Records: {}'.format(len(datalist)))
    logging.info('')

    train_ds, val_ds = partition_dataset(datalist,
                                         ratios=[args.split, (1 - args.split)],
                                         shuffle=True,
                                         seed=args.seed)
    dataset_json = os.path.join(args.output, 'dataset_0.json')
    with open(dataset_json, 'w') as fp:
        json.dump({'training': train_ds, 'validation': val_ds}, fp, indent=2)

    logging.info('*** Dataset File: {}'.format(dataset_json))
    logging.info('*** Total Records for Training: {}'.format(len(train_ds)))
    logging.info('*** Total Records for Validation: {}'.format(len(val_ds)))

    assert len(train_ds) > 0, "Train Dataset/Records is EMPTY"
    assert len(val_ds) > 0, "Validation Dataset/Records is EMPTY"
示例#5
0
 def test_empty_datalist(self):
     with self.assertRaises(ValueError):
         create_dataset(datalist=[], output_dir=self.tempdir, dimension=3, pixdim=(1, 1, 1))
示例#6
0
 def test_invalid_dim(self):
     with self.assertRaises(ValueError):
         create_dataset(datalist=self._create_data(), output_dir=self.tempdir, dimension=4, pixdim=(1, 1, 1, 1))
示例#7
0
 def test_create_dataset(self, args, data_args, expected_length, expected_region):
     datalist = self._create_data(**data_args)
     deepgrow_datalist = create_dataset(datalist=datalist, output_dir=self.tempdir, **args)
     self.assertEqual(len(deepgrow_datalist), expected_length)
     if expected_region is not None:
         self.assertEqual(deepgrow_datalist[0]["region"], expected_region)