def rearrange_dir(root_dir): image_dir = os.path.join(root_dir, 'images') label_dir = os.path.join(root_dir, 'labels') if not os.path.exists(image_dir): os.makedirs(image_dir) print('Created' + image_dir + '...') if not os.path.exists(label_dir): os.makedirs(label_dir) print('Created' + label_dir + '...') nii_files = subfiles(root_dir, suffix=".nii.gz", join=False) for i in range(0, len(nii_files)): src_dir = os.path.join(root_dir, nii_files[i]) if 'image' in nii_files[i]: shutil.move(src_dir, os.path.join(image_dir, nii_files[i])) elif 'label' in nii_files[i]: shutil.move(src_dir, os.path.join(label_dir, nii_files[i])) print('moving' + nii_files[i] + '...') files = subfiles(root_dir, suffix=".nii.gz", join=False) if files == []: print("rearrange directory finished")
def preprocess_data(root_dir): image_dir = os.path.join(root_dir, 'images') label_dir = os.path.join(root_dir, 'labels') output_dir = os.path.join(root_dir, 'preprocessed') if not os.path.exists(output_dir): os.makedirs(output_dir) print('Created' + output_dir + '...') class_stats = defaultdict(int) total = 0 nii_files = subfiles(image_dir, suffix=".nii.gz", join=False) for f in nii_files: file_dir = os.path.join(output_dir, f.split('.')[0] + '.npy') if not os.path.exists(file_dir) and '081' not in f: image, _ = load(os.path.join(image_dir, f)) label, _ = load( os.path.join(label_dir, f.replace('image', 'label'))) # normalize images image = (image - image.min()) / (image.max() - image.min()) # image = reshape(image, append_value=0, new_shape=(64, 64, 64)) # label = reshape(label, append_value=0, new_shape=(64, 64, 64)) result = np.stack((image, label)) result = reshape_array(result) np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), result) print(f) print(total)
def create_splits(output_dir, image_dir): npy_files = subfiles(image_dir, suffix=".npy", join=False) trainset_size = len(npy_files)*50//100 valset_size = len(npy_files)*25//100 testset_size = len(npy_files)*25//100 splits = [] for split in range(0, 5): image_list = npy_files.copy() trainset = [] valset = [] testset = [] for i in range(0, trainset_size): patient = np.random.choice(image_list) image_list.remove(patient) trainset.append(patient[:-4]) for i in range(0, valset_size): patient = np.random.choice(image_list) image_list.remove(patient) valset.append(patient[:-4]) for i in range(0, testset_size): patient = np.random.choice(image_list) image_list.remove(patient) testset.append(patient[:-4]) split_dict = dict() split_dict['train'] = trainset split_dict['val'] = valset split_dict['test'] = testset splits.append(split_dict) with open(os.path.join(output_dir, 'splits.pkl'), 'wb') as f: pickle.dump(splits, f)
def downsampling_image(data_dir, output_dir): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if not os.path.exists(output_dir): os.makedirs(output_dir) print('Created' + output_dir + '...') npy_files = subfiles(data_dir, suffix=".npy", join=False) for file in npy_files: np_path = os.path.join(data_dir, file) save_path = os.path.join(output_dir, file.split('.')[0] + '.npy') if not os.path.exists(save_path): numpy_array = reshape_array(np.load(np_path), axis=3) shape = numpy_array.shape[3] num_of_pooling = math.ceil(math.log(shape, 2)) - 4 ################ test num_of_pooling ############### num_of_pooling = num_of_pooling - 1 slice_data = torch.from_numpy(numpy_array).to(device) for k in range(num_of_pooling): # pooling_data = F.max_pool2d(slice_data, kernel_size=2, stride=2) pooling_data = F.interpolate(slice_data, scale_factor=1 / 2, mode='bilinear') slice_data = pooling_data pooling_array = slice_data.cpu().numpy() np.save(os.path.join(output_dir, file.split('.')[0] + '.npy'), pooling_array) print(file)
def preprocess_data(root_dir, y_shape=256, z_shape=64): image_dir = os.path.join(root_dir, 'imagesTr') label_dir = os.path.join(root_dir, 'labelsTr') output_dir = os.path.join(root_dir, 'preprocessed') classes = 2 if not os.path.exists(output_dir): os.makedirs(output_dir) print('Created' + output_dir + '...') class_stats = defaultdict(int) total = 0 nii_files = subfiles(image_dir, suffix=".nii", join=False) for i in range(0, len(nii_files)): if nii_files[i].startswith("._"): nii_files[i] = nii_files[i][2:] for f in nii_files: image, _ = load(os.path.join(image_dir, f)) label, _ = load(os.path.join(label_dir, f.replace('_0000', ''))) print(f) for i in range(classes): class_stats[i] += np.sum(label == i) total += np.sum(label == i) # normalize images image = (image - image.min())/(image.max()-image.min()) # not original!! print("********************\n") print(image.shape) print(label.shape) image = reshape(image, append_value=0, new_shape=(256, 256, image.shape[2])) label = reshape(label, append_value=0, new_shape=(256, 256, image.shape[2])) # end of not original!! #original!!! #image = reshape(image, append_value=0, new_shape=(image.shape[0], y_shape, z_shape)) #label = reshape(label, append_value=0, new_shape=(label.shape[0], y_shape, z_shape)) #result = np.stack((image, label)) # not original!! result = np.stack((image, label)) result = np.swapaxes(result, 1, 3) result = np.swapaxes(result, 2, 3) # end of not original!! np.save(os.path.join(output_dir, f.split('.')[0]+'.npy'), result) print(f) print(total) for i in range(classes): print(class_stats[i], class_stats[i]/total)
def preprocess_data(root_dir): #y_shape=64, z_shape=64): c = get_config() image_dir = os.path.join(root_dir, 'imagesTr') label_dir = os.path.join(root_dir, 'labelsTr') output_dir = os.path.join(root_dir, 'preprocessed') classes = c.num_classes if not os.path.exists(output_dir): os.makedirs(output_dir) print('Created' + output_dir + '...') class_stats = defaultdict(int) total = 0 nii_files = subfiles(image_dir, suffix=".nii.gz", join=False) for i in range(0, len(nii_files)): if nii_files[i].startswith("._"): nii_files[i] = nii_files[i][2:] for f in nii_files: image, _ = load(os.path.join(image_dir, f)) label, _ = load(os.path.join(label_dir, f.replace('_0000', ''))) print(f) for i in range(classes): class_stats[i] += np.sum(label == i) total += np.sum(label == i) # normalize images image = (image - image.min()) / (image.max() - image.min()) image = np.swapaxes(image, 0, 2) label = np.swapaxes(label, 0, 2) result = reshape(np.stack([image, label], axis=0), crop_size=c.patch_size) # image = reshape(image, append_value=0, new_shape=(image.shape[0], y_shape, z_shape)) # label = reshape(label, append_value=0, new_shape=(label.shape[0], y_shape, z_shape)) np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), result) print(f) print(total) for i in range(classes): print(class_stats[i], class_stats[i] / total)
def preprocess_data(root_dir, y_shape=64, z_shape=64): image_dir = os.path.join(root_dir, 'imagesTr') label_dir = os.path.join(root_dir, 'labelsTr') output_dir = os.path.join(root_dir, 'preprocessed') classes = 3 if not os.path.exists(output_dir): os.makedirs(output_dir) print('Created' + output_dir + '...') class_stats = defaultdict(int) total = 0 nii_files = subfiles(image_dir, suffix=".nii.gz", join=False) for i in range(0, len(nii_files)): if nii_files[i].startswith("._"): nii_files[i] = nii_files[i][2:] for f in nii_files: image, _ = load(os.path.join(image_dir, f)) label, _ = load(os.path.join(label_dir, f.replace('_0000', ''))) print(f) for i in range(classes): class_stats[i] += np.sum(label == i) total += np.sum(label == i) # normalize images image = (image - image.min()) / (image.max() - image.min()) image = pad_nd_image(image, (image.shape[0], y_shape, z_shape), "constant", kwargs={'constant_values': image.min()}) label = pad_nd_image(label, (image.shape[0], y_shape, z_shape), "constant", kwargs={'constant_values': label.min()}) result = np.stack((image, label)) np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), result) print(f) print(total) for i in range(classes): print(class_stats[i], class_stats[i] / total)
def testdata_preprocess(input_dir, output_dir): nii_files = subfiles(input_dir, suffix=".nii.gz", join=False) for i in range(0, len(nii_files)): if nii_files[i].startswith("._"): nii_files[i] = nii_files[i][2:] for f in nii_files: image, a = load(os.path.join( input_dir, f)) # ??? what's the output-- image_header? print(f) image = (image - image.min()) / (image.max() - image.min()) image = reshape(image, append_value=0, new_shape=(64, 64, 64)) np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), image)
def create_splits(output_dir, image_dir): """File to split the dataset into multiple folds and the train, validation and test set. :param output_dir: Directory to write the splits file to :param image_dir: Directory where the images lie in. """ npy_files = subfiles(image_dir, suffix=".npy", join=False) sample_size = len(npy_files) testset_size = int(sample_size * 0.25) valset_size = int(sample_size * 0.25) trainset_size = sample_size - valset_size - testset_size # Assure all samples are used. if sample_size < (trainset_size + valset_size + testset_size): raise ValueError("Assure more total samples exist than train test and val samples combined!") splits = [] sample_set = {sample[:-4] for sample in npy_files.copy()} # Remove the file extension test_samples = random.sample(sample_set, testset_size) # IMO the Testset should be static for all splits for split in range(0, 5): train_samples = random.sample(sample_set - set(test_samples), trainset_size) val_samples = list(sample_set - set(train_samples) - set(test_samples)) train_samples.sort() val_samples.sort() split_dict = dict() split_dict['train'] = train_samples split_dict['val'] = val_samples split_dict['test'] = test_samples splits.append(split_dict) # Todo: IMO it is better to write that dict as JSON. This (unlike pickle) allows the user to inspect the file with an editor with open(os.path.join(output_dir, 'splits.pkl'), 'wb') as f: pickle.dump(splits, f) splits_sanity_check(output_dir)
def preprocess_data(root_dir, y_shape=64, z_shape=64): image_dir = os.path.join(root_dir, 'imagesTr') label_dir = os.path.join(root_dir, 'labelsTr') output_dir = os.path.join(root_dir, 'preprocessed') if not os.path.exists(output_dir): os.makedirs(output_dir) print('Created' + output_dir + '...') class_stats = defaultdict(int) total = 0 nii_files = subfiles(image_dir, suffix=".nii.gz", join=False) for i in range(0, len(nii_files)): if nii_files[i].startswith("._"): nii_files[i] = nii_files[i][2:] for f in nii_files: image, _ = load(os.path.join(image_dir, f)) label, _ = load(os.path.join(label_dir, f.replace('_0000', ''))) print(f) # normalize images image = (image - image.min()) / (image.max() - image.min()) image = np.swapaxes(image, 0, 2) image = np.swapaxes(image, 1, 2) label = np.swapaxes(label, 0, 2) label = np.swapaxes(label, 1, 2) result = np.stack((image, label)) np.save(os.path.join(output_dir, f.split('.')[0] + '.npy'), result) print(f) print(total)
def inference(self): self.elog.print('=====INFERENCE=====') image_files = subfiles(self.config.scaled_image_32_dir, suffix='.npy') with torch.no_grad(): if os.path.exists(self.config.stage_1_dir_32): print('stage_1_dir already exists') else: for data_batch in self.data_16_loader: file_dir = data_batch['fnames'] data_16 = data_batch['data'][0].float().to(self.device) # size (8, 1, 16, 16) target_16 = data_batch['seg'][0].float().to(self.device) if not os.path.exists(self.config.stage_1_dir_32): os.mkdir(self.config.stage_1_dir_32) print('Creatting stage_1_dir...') pred_16 = self.model(data_16) pred_16_softmax = F.softmax(pred_16, dim=1) dice_16 = 1 - self.dice_loss(pred_16_softmax, target_16.squeeze()) ce_16 = self.ce_loss(pred_16, target_16.squeeze().long()) if dice_16 < 0.6: print(file_dir[0]) print(data_batch['slice_idxs']) pred_32 = F.interpolate(pred_16, scale_factor=2, mode='bilinear') target_32 = F.interpolate(target_16, scale_factor=2, mode='bilinear') pred_32_softmax = F.softmax(pred_32, dim=1) dice_32 = 1 - self.dice_loss(pred_32_softmax, target_32.squeeze()) ce_32 = self.ce_loss(pred_32, target_32.squeeze().long()) # print('dice_16: %.4f dice_32: %.4f' % (dice_16, dice_32)) print('dice_16: %.4f dice_32: %.4f ce_16: %.4f ce_32: %.4f' % (dice_16, dice_32, ce_16, ce_32)) for k in range(self.config.batch_size): filename = file_dir[k][0][-14:-4] output_dir = os.path.join(self.config.stage_1_dir_32, 'pred_' + filename + '_64') if os.path.exists(output_dir + '.npy'): all_data = np.load(output_dir + '.npy') new_data = np.concatenate((pred_32[k:k + 1], target_32[k:k + 1]), axis=1) # size (1,9,32,16) all_data = np.concatenate((all_data, new_data), axis=0) else: all_data = np.concatenate((pred_32[k:k + 1], target_32[k:k + 1]), axis=1) print(filename) np.save(output_dir, all_data) # do softmax analysis, and divide the pred image into 4 parts pred_32_files = subfiles(self.config.stage_1_dir_32, suffix='32.npy', join=False) with torch.no_grad(): softmax = [] dice_score = [] for file in pred_32_files: dice_score = [] pred_32 = np.load(os.path.join(self.config.stage_1_dir_32, file))[:, 0:8] # size (N,8,32,32) target_32 = np.load(os.path.join(self.config.stage_1_dir_32, file))[:, 8:9] pred_32 = torch.tensor(pred_32).float() target_32 = torch.tensor(target_32).long() md_softmax, index, weak_image = softmax_analysis(pred_32, threshold=0) softmax = softmax + md_softmax shape = pred_32.shape image_num = shape[0] for k in range(image_num): pred_softmax = F.softmax(pred_32[k:k+1], 1) dice = self.dice_loss(pred_softmax, target_32[k]) pred_image = torch.argmax(pred_softmax, dim=1) dice_score.append(dice) # visualize dice dice_score = np.array(dice_score) avg_dice = np.average(dice_score) min_dice = min(dice_score) # print(file, 'dice_loss:%.4f min_dice:%.4f' % (avg_dice, min_dice)) plot_bar(softmax)
plt.xlabel('original image') elif i == 1: plt.xlabel('label image') else: plt.xlabel('segmented image') if not os.path.exists(os.path.join(pred_dir, 'images')): os.makedirs(os.path.join(pred_dir, 'images')) plt.savefig(os.path.join(pred_dir, 'images') + '/_006_25.jpg') plt.show() """ n = 4 k = 115 scaled_16_files = subfiles(c.scaled_image_16_dir, suffix='.npy', join=False) pred_32_files = subfiles(c.stage_1_dir_32, suffix='64.npy', join=False) org_files = subfiles(c.data_dir, suffix='.npy', join=False) ############ original image and target ######################## file = org_files[2] data = np.load(os.path.join(c.data_dir, file)) data = reshape_array(data, axis=3) image = data[:, 0] target = data[:, 1] ############ down scale using interpolation ######################## data = torch.tensor(data) data_256 = F.interpolate(data, scale_factor=1 / 16, mode='bilinear')