Exemplo n.º 1
0
    def __load_sample(self, cipath, copath, dpath):
        """Loads a sample from given files

        :param cipath: filepath to the inner contour file
        :param copath: filepath to the outer contour file (may be None)
        :param dpath: filepath to the DICOM file
        :return: tuple (image, inner mask, outer mask), all numpy arrays of shape (height, width), outer mask None if copath None.
                 tuple of Nones in case of error.
        """

        dcm_dict = parsing.parse_dicom_file(dpath)
        if dcm_dict is None:
            logging.warning('Dicom file invalid: ' + dpath)
            return None, None, None
        img = dcm_dict['pixel_data']

        coords_lst = parsing.parse_contour_file(cipath)
        if len(coords_lst) == 0:
            logging.warning('Inner contour file empty: ' + cipath)
            return None, None, None
        imask = parsing.poly_to_mask(coords_lst, img.shape[1], img.shape[0])

        if copath:
            coords_lst = parsing.parse_contour_file(copath)
            if len(coords_lst) == 0:
                logging.warning('Outer contour file empty: ' + copath)
                return None, None, None
            omask = parsing.poly_to_mask(coords_lst, img.shape[1],
                                         img.shape[0])
        else:
            omask = None

        logging.debug('Loaded: ' + dpath)
        return img, imask, omask
Exemplo n.º 2
0
 def __getitem__(self, index):
     """
     :param index (int): sample index
     :return: dictionary with fields 'pixel_data' and 'imask' and 'omask'
     """
     sample = parse_dicom_file(self.filenames[index][0])
     if sample is None:
         raise SkipSampleError()
     if self.filenames[index][1]:
         icontour = parse_contour_file(self.filenames[index][1])
         sample['icontour'] = icontour
     if self.filenames[index][2]:
         ocontour = parse_contour_file(self.filenames[index][2])
         sample['ocontour'] = ocontour
     if self.transform:
         sample = self.transform(sample)
     height = sample['pixel_data'].shape[0]
     width = sample['pixel_data'].shape[1]
     if 'icontour' in sample:
         sample['imask'] = poly_to_mask(sample['icontour'],
                                        width=width,
                                        height=height)
         del sample['icontour']
     if 'ocontour' in sample:
         sample['omask'] = poly_to_mask(sample['ocontour'],
                                        width=width,
                                        height=height)
         del sample['ocontour']
     return sample
Exemplo n.º 3
0
def sample_datum(spec):
    '''
    Given a contour datum, open the files associated, parse them and return training
    data as a dictionary of numpy arrays.

    :param spec: A ContourDatum object.
    :return: A dictionary of numpy arrays representing the training data for the datum.
    '''
    dicom = parse_dicom_file(spec.dicom_path)
    if dicom is None:
        return None
    dicom = dicom['pixel_data']

    if spec.i_contour is not None:
        contours = parse_contour_file(spec.i_contour_path)
        i_contour = poly_to_mask(contours, *dicom.shape)
    else:
        i_contour = np.zeros(dicom.shape, dtype=bool)
    i_contour = np.expand_dims(i_contour, -1)

    if spec.o_contour is not None:
        contours = parse_contour_file(spec.o_contour_path)
        o_contour = poly_to_mask(contours, *dicom.shape)
    else:
        o_contour = np.zeros(dicom.shape, dtype=bool)
    o_contour = np.expand_dims(o_contour, -1)

    return {
        'image': np.expand_dims(dicom, -1),
        'target': np.concatenate((i_contour, o_contour), -1)
    }
Exemplo n.º 4
0
    def generate_X_Y(self, contour_dir, patient_id, original_id, is_icontour):
        """Given a contour directory (i-contour or o-contour), generate the X,Y training pairs for the
		dicom/contour files respectively. 

		:param contour_dir: the contour directory we want to generate X,Y data for
		:return: the X,Y matrices 
		"""
        X = []
        Y = []
        average_center_density = 0
        num_centers = 0
        for contour_file in os.listdir(contour_dir):
            # Extract the id and construct the "x" and "y" file names (dicom/icontour files respectively)
            id = self.extract_id(contour_file)
            x_file = self.construct_dicom_dir(
                patient_id) + "/" + self.construct_dicom_filename(id)
            y_file = None
            if is_icontour:
                y_file = self.construct_icontour_dir(
                    original_id) + "/" + contour_file
            else:
                y_file = self.construct_ocontour_dir(
                    original_id) + "/" + contour_file

            # If we have a corresponding dicom image for the icontour label we
            # are looking at, generate a training (x,y) pair
            if os.path.isfile(x_file):
                parsed_contour_file = parse_contour_file(y_file)
                parsed_dicom_file = parse_dicom_file(x_file)
                width, height = parsed_dicom_file.shape
                bool_mask = poly_to_mask(parsed_contour_file, width, height)
                X.append(parsed_dicom_file)
                Y.append(bool_mask)
        return np.asarray(X), np.asarray(Y)
Exemplo n.º 5
0
def map_imgs_to_masks():
    img_contour_list = {}
    imgs_masks = {}
    dir_dicom_map = csv_to_dict('final_data/link.csv')

    for key in dir_dicom_map:
        dicom_path = "./final_data/dicoms/{}/".format(key)
        contour_path = "./final_data/contourfiles/{}/i-contours/*.txt".format(
            dir_dicom_map[key])
        contour_files = glob.glob(contour_path)
        img_contours = parse_files(contour_files)

        for img in img_contours:
            img_path = dicom_path + img
            img_contour_list[img_path] = img_contours[img]

    for img_path in img_contour_list:
        assert os.path.exists(img_path)
        # Should normalize brightness, height, and width
        dcm_data = parsing.parse_dicom_file(img_path)

        if (not dcm_data): continue

        mask = parsing.poly_to_mask(img_contour_list[img_path],
                                    dcm_data['width'], dcm_data['height'])
        imgs_masks[img_path] = {}
        imgs_masks[img_path]['mask'] = mask
        imgs_masks[img_path]['image'] = dcm_data['pixel_data']
        imgs_masks[img_path]['img_width'] = dcm_data['width']
        imgs_masks[img_path]['img_height'] = dcm_data['height']
        imgs_masks[img_path]['mask_erroneous'] = any(
            img_path in err_img for err_img in erroneous_masks)

    return imgs_masks
Exemplo n.º 6
0
def batch_wrapper(_linkfile_path, contour_type='o'):
    '''main function to call, returns DataSet Obeject'''
    all_path2contours = []
    contour_files, _, pairs = get_file_pairs(_linkfile_path)
    contour_paths = [
        './final_data/contourfiles/' + x + '/' + contour_type + '-contours'
        for x in contour_files
    ]

    for _path in contour_paths:
        path2contours, contours_avail = get_files_in_path(_path)
        all_path2contours.extend(
            [path2contours + '/' + x for x in contours_avail])

    # fix: memoize for scalability
    dicoms = [
        parsing.parse_dicom_file(DATA_PATH + '/dicoms/' +
                                 pairs[match_dicom_path(x)[0]] + '/' +
                                 match_dicom_path(x)[1])
        for x in all_path2contours
    ]
    contours = [{
        'mask':
        parsing.poly_to_mask(parsing.parse_contour_file(x), 256, 256),
        'file_path':
        x.split('/')[-3:]
    } for x in all_path2contours]
    return make_data(dicoms, contours)
Exemplo n.º 7
0
    def parse_single_img_annotation(self,
                                    dicom_file,
                                    icontour_file,
                                    ocontour_file=None):
        """
        Parses a single DICOM, icontour, ocontour triple. The dicom file informs the image dimensions of the contour array.
        :param dicom_file: Path to dicom file
        :param icontour_file: Path to icontour file
        :param ocontour_file: Path to ocontour file
        :return: Tuple (dicom_arr, icontour_arr, ocontour_arr).  Ocontour returns None if no corollary ocontour exists
            Each array is of dimension (height, width)
        """

        dicom_arr = parsing.parse_dicom_file(dicom_file)
        if self.img_height is None or self.img_width is None:
            self.img_height = dicom_arr.shape[0]
            self.img_width = dicom_arr.shape[1]
        else:
            assert self.img_height == dicom_arr.shape[
                0], 'All DICOM img height should be the same: ' + dicom_file
            assert self.img_width == dicom_arr.shape[
                1], 'All DICOM img width should be the same: ' + dicom_file

        icontour_lst = parsing.parse_contour_file(icontour_file)
        assert self.img_height >= max([
            i[0] for i in icontour_lst
        ]), 'Contour out of width bounds: ' + icontour_file
        assert self.img_width >= max([
            i[1] for i in icontour_lst
        ]), 'Contour out of height bounds: ' + icontour_file
        icontour_arr = parsing.poly_to_mask(icontour_lst, self.img_width,
                                            self.img_height).astype(bool)

        try:
            ocontour_lst = parsing.parse_contour_file(ocontour_file)
            assert self.img_height >= max([
                i[0] for i in icontour_lst
            ]), 'Contour out of width bounds: ' + ocontour_file
            assert self.img_width >= max([
                i[1] for i in icontour_lst
            ]), 'Contour out of height bounds: ' + ocontour_file
            ocontour_arr = parsing.poly_to_mask(ocontour_lst, self.img_width,
                                                self.img_height).astype(bool)
        except (TypeError, FileNotFoundError):
            ocontour_arr = np.zeros_like(dicom_arr)

        return dicom_arr, icontour_arr, ocontour_arr
Exemplo n.º 8
0
 def test_poly_to_mask_valid(self):
     dcm_dict = parsing.parse_dicom_file('test_data/dicoms/study-1/48.dcm')
     coords = parsing.parse_contour_file(
         'test_data/contourfiles/cf-1/i-contours/IM-0001-0048-icontour-manual.txt'
     )
     dicom_img = dcm_dict['pixel_data']
     boolean_mask = parsing.poly_to_mask(coords, dicom_img.shape[0],
                                         dicom_img.shape[1])
     #TODO: check if contents of boolean mask are correct
     self.assertTrue(boolean_mask.shape == dicom_img.shape)
Exemplo n.º 9
0
 def test_square(self):
     """Tests that the inside of a square is marked with True."""
     test_rect = parsing.poly_to_mask([(0, 0), (0, 3), (3, 3), (3, 0)],
                                      width=4,
                                      height=4)
     expected = np.array([[False, False, False, False],
                          [False, True, True, False],
                          [False, True, True, False],
                          [False, False, False, False]])
     assert np.array_equal(test_rect, expected)
Exemplo n.º 10
0
def process_image(image_filepath, contour_filepath=None):
    img = parse_dicom_file(image_filepath)
    height, width = img.shape
    if contour_filepath != None:
        coordinates = parse_contour_file(contour_filepath)
        mask = poly_to_mask(coordinates, width, height)
    else:
        mask = None

    return (img, mask)
Exemplo n.º 11
0
    def __iter__(self):
        """Converts loader to generator for iterating

        :return: generator which yields batches of (DICOM image, contour mask) pairs
        """

        num_samples = len(self._files)
        indices_shuffled = np.random.RandomState(seed=self._seed).permutation(num_samples)
        num_batches = math.ceil(num_samples / self._batch_size)

        for batch_index in range(num_batches):
            images = []

            i_contours = [] if self._include_i_contours else None
            o_contours = [] if self._include_o_contours else None

            start = batch_index * self._batch_size
            stop = (batch_index + 1) * self._batch_size
            sample_indices = indices_shuffled[start:stop]

            for sample_index in sample_indices:
                sample = self._files[sample_index]
                image = parsing.parse_dicom_file(sample['image_path'])
                images.append(image)

                if self._include_i_contours:
                    i_contour = parsing.parse_contour_file(sample['i_contour_path'])
                    i_contour = parsing.poly_to_mask(i_contour, width=image.shape[1], height=image.shape[0])
                    i_contours.append(i_contour)
                if self._include_o_contours:
                    o_contour = parsing.parse_contour_file(sample['o_contour_path'])
                    o_contour = parsing.poly_to_mask(o_contour, width=image.shape[1], height=image.shape[0])
                    o_contours.append(o_contour)

            images = np.array(images)

            if self._include_i_contours:
                i_contours = np.array(i_contours)
            if self._include_o_contours:
                o_contours = np.array(o_contours)

            yield images, i_contours, o_contours
Exemplo n.º 12
0
def test_poly_to_mask():
    polygon = [
        (64.0, 64.0),
        (64.0, 192.0),
        (192.0, 192.0),
        (192.0, 64.0)
    ]
    width, height = 256, 256
    mask = parsing.poly_to_mask(polygon, width, height)
    square = mask[65:192, 65:192]
    assert all(list(square.flatten()))
Exemplo n.º 13
0
def test_poly_to_mask():
    # L-shaped polygon
    poly = [(0, 0), (1, 0), (2, 0), (2, 1), (3, 1), (3, 2), (3, 3), (2, 3),
            (1, 3), (0, 3), (0, 2), (0, 1)]

    actual = poly_to_mask(poly, 4, 4)

    expected = np.array([[0, 0, 0, 0], [0, 1, 0, 0], [0, 1, 1, 0],
                         [0, 0, 0, 0]]).astype(np.bool)

    assert np.array_equal(actual, expected)
Exemplo n.º 14
0
def _load_dicom_contour_paths(path_tups):
    '''Returns an iterator over image/mask tuples loaded from the given paths'''

    for i, (dicom_path, contour_path) in enumerate(path_tups):
        # read dicom data
        logger.debug('loading dicom_path: %s' % dicom_path)
        dcm_dict = parse_dicom_file(dicom_path)
        dicom = dcm_dict['pixel_data']
        width, height = dicom.shape

        # read contour data
        logger.debug('loading contour_path: %s' % contour_path)
        coords_list = parse_contour_file(contour_path)
        mask = poly_to_mask(coords_list, width, height)

        yield (dicom, mask)
Exemplo n.º 15
0
def generate_all_pairing(save_comb = False):
	"""Pair dicom image with its corresponding for all patients

    :param save_comb: whether combined image is saved
    :return: None
    """
	pair_file = open(config.PAIR_PATH, "w+")
	header = config.DICOM_FILE_TYPE + ',' + config.MASK_FILE_TYPE +'\n'
	pair_file.write(header)
	for patient_id, original_id in tqdm(generate_link()):
		cur_contour_path = config.CONTOUR_PATH.format(original_id)
		cur_mask_path = config.MASK_PATH.format(original_id)
		cur_comb_path = config.COMB_PATH.format(original_id)
		try:
			mkdir(cur_mask_path)
		except FileExistsError:
			print("has directory{}".format(cur_mask_path))
		if save_comb:
			try:
				mkdir(cur_comb_path)
			except FileExistsError:
				print("has directory{}".format(cur_comb_path))
		for contour_name in listdir(cur_contour_path):
			contour = parse_contour_file(join(cur_contour_path, contour_name))
			index = str(int(contour_name.split('-')[2]))
			dcm_name = index + config.DCM_EXT
			try:
				dcm_path = join(config.DICOMS_PATH, patient_id, dcm_name)
				dcm = parse_dicom_file(dcm_path) # FileNotFoundError
				if not dcm: # invalid dicom file
					continue
				dcm_pixel = dcm['pixel_data']
				width, height = dcm_pixel.shape
				mask = poly_to_mask(contour, width, height)
                               
				img = mask_to_image(mask)
				mask_path = join(cur_mask_path, index + '.png')
				img.save(mask_path)
				pair_file.write(dcm_path + ',' + mask_path+'\n')
				if save_comb:
					# save comb as image
					plt = plot_dcm_with_mask(dcm_pixel, mask) 
					comb_path = join(cur_comb_path, 'val' + index + '.png')
					plt.savefig(comb_path, bbox_inches='tight', transparent=True, pad_inches=0.0)
			except FileNotFoundError:
				print('no dcm match')	
	pair_file.close()
Exemplo n.º 16
0
    def test_integration(self):
        """Test all 3 methods by creating blended image of mask and dicom image.
           Result needs to be verified manually in test_data/merged*.png. """

        for id in (68,108,148,179):
            dcm_dict = parsing.parse_dicom_file('test_data/dicoms/dicom1/{:d}.dcm'.format(id))
            self.assertTrue(dcm_dict is not None)
            img = dcm_dict['pixel_data']

            coords_lst = parsing.parse_contour_file('test_data/contourfiles/folder1/i-contours/IM-0001-{:04d}-icontour-manual.txt'.format(id))
            self.assertTrue(len(coords_lst) > 0)

            mask = parsing.poly_to_mask(coords_lst, img.shape[1], img.shape[0])
            self.assertTrue(np.sum(mask) > 0)
            self.assertTrue(mask.shape == img.shape)

            imgRGB = np.tile(img, (3,1,1))
            imgRGB[1][mask] = 0
            misc.imsave('test_data/merged{:d}.png'.format(id), imgRGB) #needs to be verified manually
Exemplo n.º 17
0
    def _add_datapoints_for_patient(self, dicom_files, contour_files):
        """Match available contour files to dicom files to add an datapoint.
        
        :param dicom_files: list of all dicom files.
        :param contour_files: list of all contour files.
        """
        dicom_id_to_contour_file = dict()
        for contour_file in contour_files:
            dicom_id = util.extract_dicom_id_from_contour_filepath(
                contour_file)
            if dicom_id == -1:
                continue
            dicom_id_to_contour_file[dicom_id] = contour_file
        for dicom_file in dicom_files:
            dicom_id = util.extract_dicom_id_from_dicom_filepath(dicom_file)
            if dicom_id == -1:
                continue

            # Check if there is a matching contour file with the dicom id.
            # Create a datapoint if there is.
            if dicom_id in dicom_id_to_contour_file:
                contour_file = dicom_id_to_contour_file[dicom_id]
                contour_list = parsing.parse_contour_file(contour_file)
                if not contour_list:
                    continue
                dicom_data = parsing.parse_dicom_file(dicom_file)
                if not dicom_data or 'pixel_data' not in dicom_data:
                    continue
                dicom_image = dicom_data['pixel_data']

                try:
                    mask = parsing.poly_to_mask(contour_list,
                                                dicom_image.shape[1],
                                                dicom_image.shape[0])
                except parsing.MaskConversionError:
                    mask = None
                if mask is None:
                    continue
                new_data = DataPoint(dicom_file, contour_file, dicom_image,
                                     mask)
                if new_data.is_valid():
                    self.data_points.append(new_data)
Exemplo n.º 18
0
    def get_patient_data(self, patient_id, contour): 
        """ Get the image and masks for a given patient

        :param patient_id: string, the id of the patient  
        :param contour: The contour type to use. Can be either 'i' or 'o'.
    
        :return: [[image1, mask1], [image2, mask2], ...]
        """    
        patient_data = []
        fnames = self.get_patient_fnames(patient_id, contour)
        for dicom_fname, label_fname in fnames:
            image = parsing.parse_dicom_file(dicom_fname)
            if not image == None:
                image = image['pixel_data']
                w, h = image.shape
                mask = parsing.parse_contour_file(label_fname)
                if not mask == None:
                    mask = parsing.poly_to_mask(mask, w, h)
                    patient_data.append([image, mask])
        return patient_data
def getMask(contourFilename, imgWidth, imgHeight, maskThreshold=0.5):
    '''
	contourFilename = absolute path to the contour file
	imgWidth = desired width
	imgHeight = desired height
	maskThreshold = [0,1] Sanity check. If mask is larger than this percentage, then contour might be bad.
	TODO: Add a Hough ellipse detector to validate one and only one round mask.
	'''

    # Extract the polygon contour points
    polygonPoints = parse_contour_file(contourFilename)
    # Fill the polygon
    imgMask = poly_to_mask(polygonPoints, imgWidth, imgHeight)

    # Sanity check - What if the polygon is malformed? Let's check to make sure the mask isn't
    # more than a certain percentage of the entire image
    percentMask = imgMask.sum() / float(imgMask.shape[0] * imgMask.shape[1])
    if percentMask > maskThreshold:
        print(
            "The mask is more than {} of the image. Please check if polygon is correct. {} {}"
            .format(maskThreshold, dicomFilename, sliceName))
    return imgMask
Exemplo n.º 20
0
 def convert_tuple_to_img_boolean_mask(self, dicom_file, icontour_file):
     """
     Convert a dicom file and an icontour file into a numpy image array and a boolean mask
     respectively.
     :param dicom_file: path of dicom file
     :param icontour_file: path of icontourfile
     :return: a tuple containing a numpy image array and a boolean mask array, or 2 Nones if
     either of the files are invalid.
     """
     dcm_dict = parsing.parse_dicom_file(dicom_file)
     if dcm_dict is None:
         logging.warning('Dicom file invalid: ' + dicom_file)
         return (None, None)
     dicom_img = dcm_dict['pixel_data']
     coords_lst = parsing.parse_contour_file(icontour_file)
     if len(coords_lst) == 0:
         logging.warning('Inner contour file empty: ' + icontour_file)
         return (None, None)
     icontour_boolean_mask = parsing.poly_to_mask(coords_lst,
                                                  dicom_img.shape[0],
                                                  dicom_img.shape[1])
     return (dicom_img, icontour_boolean_mask)
Exemplo n.º 21
0
def load_data_files(dicom_file, contour_file):
    """Load DICOM and corresponding contour file from disk

    :param dicom_file: path to the DICOM file
    :param contour_dir: path to the contour file
    :return: (dicom_image, contour_mask) tuple
    """

    # if loading any of the resources fails, just return None so it will be
    # skipped by the consumer
    print("loading DICOM {}".format(dicom_file))
    dicom = parsing.parse_dicom_file(dicom_file)
    if dicom is None:
        return None
    dicom_size = np.shape(dicom)
    print("loading contour {}".format(contour_file))
    contour = parsing.parse_contour_file(contour_file)
    if contour is None:
        return None
    mask = parsing.poly_to_mask(contour, dicom_size[0], dicom_size[1])
    if mask is None:
        return None
    return (dicom, mask)
Exemplo n.º 22
0
 def test_polygon_out_of_bound(self):
     """Tests an all False map is returned when there is only 1 vertex."""
     with pytest.raises(parsing.MaskConversionError):
         test_rect = parsing.poly_to_mask([(0, 0), (1, 1), (3, 0)],
                                          width=2,
                                          height=2)
Exemplo n.º 23
0
 def test_less_than_two(self):
     """Tests an all False map is returned when there is only 1 vertex."""
     with pytest.raises(parsing.MaskConversionError):
         test_rect = parsing.poly_to_mask([(3, 0)], width=4, height=4)
Exemplo n.º 24
0
 def test_poly_to_mask_invalid(self):
     with self.assertRaises(TypeError):
         invalid_mask = parsing.poly_to_mask([], 0, 0)
Exemplo n.º 25
0
    def _load(self, fn):
        """Load the dataset stored in a csv file (implementation)

        :param fn: filepath to the csv file
        """

        if not os.path.isfile(fn):
            raise RuntimeError('file does not exist / input is not a file')
        data_dir = os.path.dirname(fn)

        # regexp for extracting z-index from file name
        cont_file_pat = re.compile('IM-0001-(\d+)-.contour-manual.txt')

        PIXEL_DATA = 'pixel_data'
        PATIENT_ID = 'patient_id'
        ORIGINAL_ID = 'original_id'
        DICOM_FN_FORMAT = '{}/dicoms/{}/{}.dcm'  # fill in dir, patient id, and z-index
        ICONT_DIR_FORMAT = '{}/contourfiles/{}/i-contours'  # fill in dir, patient id
        OCONT_DIR_FORMAT = '{}/contourfiles/{}/o-contours'  # fill in dir, patient id

        try:
            with open(fn, 'r') as csvfile:
                reader = csv.DictReader(csvfile)
                self._data = {self._DATA_KEY: [], self._LABEL_KEY: []}
                for row in reader:
                    patient_id = row[PATIENT_ID]
                    original_id = row[ORIGINAL_ID]

                    # get the dimensions of the first slice in this volume
                    z = 1
                    dicom_fn = DICOM_FN_FORMAT.format(data_dir, patient_id, z)
                    img = parse_dicom_file(dicom_fn)
                    pixels = img[PIXEL_DATA]
                    h, w = pixels.shape

                    icont_dir = ICONT_DIR_FORMAT.format(data_dir, original_id)
                    ocont_dir = OCONT_DIR_FORMAT.format(data_dir, original_id)

                    # load only the intersection of the i- and o-contours
                    try:
                        icont_files = os.listdir(icont_dir)
                        ocont_files = os.listdir(ocont_dir)
                        icont_z = [int(cont_file_pat.match(f).groups()[0]) for f in icont_files]
                        ocont_z = [int(cont_file_pat.match(f).groups()[0]) for f in ocont_files]
                        icont_z, icont_files = zip(*sorted(zip(icont_z, icont_files)))
                        ocont_z, ocont_files = zip(*sorted(zip(ocont_z, ocont_files)))
                        zs = np.intersect1d(icont_z, ocont_z)
                        icont_files = np.array(icont_files)[np.in1d(icont_z, zs)]
                        ocont_files = np.array(ocont_files)[np.in1d(ocont_z, zs)]
                    except:
                        raise RuntimeError('failed to parse the file name of icontours')

                    # iterate all relevant files
                    for z, icont_file, ocont_file in zip(zs, icont_files, ocont_files):
                        # read icontour
                        icont_file_path = os.path.join(icont_dir, icont_file)
                        try:
                            i_mask = poly_to_mask(parse_contour_file(icont_file_path), w, h)
                        except:
                            raise RuntimeError('cannot parse contour file "{}"'.format(icont_file_path))

                        # read ocontour
                        ocont_file_path = os.path.join(ocont_dir, ocont_file)
                        try:
                            o_mask = poly_to_mask(parse_contour_file(ocont_file_path), w, h)
                        except:
                            raise RuntimeError('cannot parse contour file "{}"'.format(ocont_file_path))

                        # read dicom
                        dicom_fn = DICOM_FN_FORMAT.format(data_dir, patient_id, z)
                        try:
                            img = parse_dicom_file(dicom_fn)
                            pixels = img[PIXEL_DATA]
                        except:
                            raise RuntimeError('cannot parse dicom file "{}"'.format(dicom_fn))

                        # print(z, ocont_file_path)

                        # make sure everything looks good
                        assert ((h, w) == pixels.shape)
                        assert (i_mask.dtype == np.bool)
                        assert (o_mask.dtype == np.bool)

                        self._data[self._DATA_KEY].append(pixels)
                        # store as uint8 to accommodate for multi-class problems
                        label = np.zeros(o_mask.shape, np.uint8)
                        label[o_mask] = self.LABEL_HEART_MUSCLE
                        label[i_mask] = self.LABEL_BLOOD_POOL
                        self._data[self._LABEL_KEY].append(label)
                        # self._data[self._LABEL_KEY].append(i_mask)

                self._data[self._DATA_KEY] = np.array(self._data[self._DATA_KEY])
                self._data[self._LABEL_KEY] = np.array(self._data[self._LABEL_KEY])
                self._n = len(self._data[self._LABEL_KEY])
        except IOError:
            raise RuntimeError('cannot open csv file "{}"'.format(fn))
Exemplo n.º 26
0
def build_dataset(ct_dir, dcm_dir, link_file, out_h5):
    """ build a h5 file with two datasets in it: 'image' and 'label'.


    ct_dir: contour file root dir, i.e. 'contourfiles'
    dcm_dir: dicom file root dir, i.e. 'dicoms'

    out_h5: output h5 file. 
    """

    h5_fh = h5py.File(out_h5, 'w-')

    links = np.genfromtxt(link_file,
                          skip_header=1,
                          delimiter=',',
                          dtype='unicode')
    links_dic = {row[1]: row[0] for row in links}

    for subject_dir in sorted(os.listdir(ct_dir)):

        # Find original dicom ID. Will be used as dir name
        orig_id = links_dic[subject_dir]

        sub_con_dir = os.path.join(ct_dir, subject_dir, 'i-contours')

        for fname in sorted(os.listdir(sub_con_dir)):

            if fname.endswith('.txt') and not fname.startswith('.'):

                print("Working on subject {}, slice {}".format(
                    subject_dir, fname))

                coords_list = parse_contour_file(
                    os.path.join(sub_con_dir, fname))

                slice_idx = fname[8:12].strip('0')

                dcm_file = os.path.join(dcm_dir, orig_id, slice_idx + '.dcm')

                dcm_im = parse_dicom_file(dcm_file)['pixel_data']

                mask_im = poly_to_mask(coords_list, dcm_im.shape[0],
                                       dcm_im.shape[1])
                mask_im = mask_im.astype(np.bool8)

                # Need dicom image shape to create dataset.
                if 'image' in h5_fh:
                    pass

                else:
                    image_dset = h5_fh.create_dataset(
                        'image',
                        shape=(0, dcm_im.shape[0], dcm_im.shape[1]),
                        maxshape=(None, dcm_im.shape[0], dcm_im.shape[1]),
                        dtype=np.float32)

                    label_dset = h5_fh.create_dataset(
                        'label',
                        shape=(0, dcm_im.shape[0], dcm_im.shape[1]),
                        maxshape=(None, dcm_im.shape[0], dcm_im.shape[1]),
                        dtype=np.bool8)

                sh = image_dset.shape
                image_dset.resize((sh[0] + 1, sh[1], sh[2]))
                label_dset.resize((sh[0] + 1, sh[1], sh[2]))

                image_dset[-1, ] = dcm_im
                label_dset[-1, ] = mask_im

    h5_fh.close()