def extract_patches_multifiles(data_dir, names, target_label, patch_size, patch_step, save_dir): patches = [] labeles = [] coding_labeles = [] for name in names: if name is not None and not name.endswith(target_label): continue cur_data_dir = os.path.join(data_dir, name) print 'extract patches from ', cur_data_dir, ' at ', str(os.getpid()) pv_mask_image, pv_mhd_image = read_from_dir(cur_data_dir) coding_image = convert_coding(cur_data_dir) [x_min, x_max, y_min, y_max] = get_boundingbox(pv_mask_image) r = patch_size / 2 cur_patches = [] cur_coding_labeles = [] for i in range(x_min, x_max, patch_step): for j in range(y_min, y_max, patch_step): cur_patch = pv_mhd_image[i - r:i + r + 1, j - r:j + r + 1] cur_mask_patch = pv_mhd_image[i - r:i + r + 1, j - r:j + r + 1] if ((1.0 * np.sum(cur_mask_patch)) / (1.0 * patch_size * patch_size)) < 0.1: continue cur_label = target_label cur_coding_label = coding_image[i - x_min, j - y_min] if save_dir is not None: save_path_dir = os.path.join(save_dir, str(cur_coding_label), str(cur_label)) if os.path.exists(save_path_dir): cur_id = len(os.listdir(save_path_dir)) else: cur_id = 0 save_path = os.path.join(save_dir, str(cur_coding_label), str(cur_label), str(cur_id) + '.npy') check_save_path(save_path) np.save(save_path, cur_patch) else: cur_patches.append(np.array(cur_patch).flatten()) cur_coding_labeles.append(cur_coding_label) if save_dir is None: if len(cur_patches) == 0: continue patches.append(cur_patches) coding_labeles.append(cur_coding_labeles) labeles.append(int(target_label)) print len(patches), len(coding_labeles), len(labeles) return patches, coding_labeles, labeles
def extract_patches_multidir( data_dir, subclasses=['train', 'val', 'test'], target_labels=[0, 1, 2, 3], patch_size=7, patch_step=1, save_dir='/home/give/Documents/dataset/ICPR2018/BoVW-TextSpecific-multiphase', return_flag=False): patches = [] labeles = [] coding_labeles = [] for subclass in subclasses: for target_label in target_labels: cur_data_dir = os.path.join(data_dir, subclass) cur_patches, cur_coding_labels, cur_labeles = extract_patches_singledir( cur_data_dir, str(target_label), patch_size=patch_size, patch_step=patch_step, save_dir=None, multiprocess=8) patches.extend(cur_patches) coding_labeles.extend(cur_coding_labels) labeles.extend(cur_labeles) if return_flag: return patches, coding_labeles, labeles if save_dir is not None: save_dict = {} for i, cur_patches in enumerate(patches): cur_label = labeles[i] for j, cur_patch in enumerate(cur_patches): cur_coding_label = coding_labeles[i][j] if cur_coding_label not in save_dict.keys(): save_dict[cur_coding_label] = {} if cur_label not in save_dict[cur_coding_label].keys(): save_dict[cur_coding_label][cur_label] = [] save_dict[cur_coding_label][cur_label].append(cur_patch) # save save_dict save_path = os.path.join(save_dir, 'save_dict.mat') check_save_path(save_path) # scio.savemat(save_path, save_dict) for subkey in save_dict.keys(): for subsubkey in save_dict[subkey].keys(): save_path = os.path.join(save_dir, str(subkey), str(subsubkey), 'data.npy') check_save_path(save_path) np.save(save_path, save_dict[subkey][subsubkey])
def generate_representor_multidir(data_dir, patch_dir, reload=None, save_dir=None): import scipy.io as scio if reload is None: train_features, train_labels = generate_representor( data_dir, patch_dir=patch_dir, subclass='train') check_save_path(os.path.join(save_dir, 'training.mat')) scio.savemat(os.path.join(save_dir, 'training.mat'), { 'features': train_features, 'labels': train_labels }) test_features, test_labels = generate_representor(data_dir, patch_dir=patch_dir, subclass='test') scio.savemat(os.path.join(save_dir, 'testing.mat'), { 'features': test_features, 'labels': test_labels }) val_features, val_labels = generate_representor(data_dir, patch_dir=patch_dir, subclass='val') scio.savemat(os.path.join(save_dir, 'validation.mat'), { 'features': val_features, 'labels': val_labels }) else: train_data = scio.loadmat(os.path.join(save_dir, 'training.mat')) train_features = train_data['features'] train_labels = train_data['labels'] test_data = scio.loadmat(os.path.join(save_dir, 'testing.mat')) test_features = test_data['features'] test_labels = test_data['labels'] val_data = scio.loadmat(os.path.join(save_dir, 'validation.mat')) val_features = val_data['features'] val_labels = val_data['labels'] acc = execute_classify(train_features, train_labels, val_features, val_labels, test_features, test_labels) return acc
def extract_patches_multifiles(data_dir, names, target_label, patch_size, patch_step, save_dir): patches = [] labeles = [] coding_labeles = [] for name in names: if name is not None and not name.endswith(target_label): continue cur_data_dir = os.path.join(data_dir, name) patches_save_path = os.path.join(cur_data_dir, 'patches.npy') coding_labeles_path = os.path.join(cur_data_dir, 'coding_labeles.npy') labeles_path = os.path.join(cur_data_dir, 'labeles.npy') mask_images = [] mhd_images = [] if not os.path.exists(patches_save_path): for phasename in ['NC', 'ART', 'PV']: image_path = glob(os.path.join(data_dir, name, phasename + '_Image*.mhd'))[0] mask_path = os.path.join(data_dir, name, phasename + '_Registration.mhd') mhd_image = read_mhd_image(image_path, rejust=True) mhd_image = np.squeeze(mhd_image) # show_image(mhd_image) mask_image = read_mhd_image(mask_path) mask_image = np.squeeze(mask_image) [xmin, xmax, ymin, ymax] = get_boundingbox(mask_image) mask_image = mask_image[xmin: xmax, ymin: ymax] mhd_image = mhd_image[xmin: xmax, ymin: ymax] mhd_image[mask_image != 1] = 0 mask_images.append(mask_image) mhd_images.append(mhd_image) # show_image(mhd_image) cur_patches = [] cur_coding_labeles = [] mask_images = convert2depthlaster(mask_images) coding_image = convert_coding(cur_data_dir) mhd_images = convert2depthlaster(mhd_images) [width, height, _] = list(np.shape(mhd_images)) print 'extract patches from ', cur_data_dir, ' at ', str( os.getpid()), ' corresponding size is [', width, height, ']' for i in range(patch_size / 2, width - patch_size / 2, patch_step): for j in range(patch_size / 2, height - patch_size / 2, patch_step): cur_patch = mhd_images[i - patch_size / 2:i + patch_size / 2 + 1, j - patch_size / 2: j + patch_size / 2 + 1, :] cur_label = target_label cur_coding_label = coding_image[i, j] if save_dir is not None: save_path_dir = os.path.join(save_dir, str(cur_coding_label), str(cur_label)) if os.path.exists(save_path_dir): cur_id = len(os.listdir(save_path_dir)) else: cur_id = 0 save_path = os.path.join(save_dir, str(cur_coding_label), str(cur_label), str(cur_id) + '.npy') check_save_path(save_path) np.save(save_path, cur_patch) else: cur_patches.append(flatten_multiphase(np.array(cur_patch))) cur_coding_labeles.append(cur_coding_label) if save_dir is None: if len(cur_patches) == 0: continue patches.append(cur_patches) coding_labeles.append(cur_coding_labeles) labeles.append(int(target_label)) np.save(patches_save_path, cur_patches) np.save(coding_labeles_path, cur_coding_labeles) else: cur_patches = np.load(patches_save_path) cur_coding_labeles = np.load(coding_labeles_path) coding_labeles.append(cur_coding_labeles) patches.append(cur_patches) labeles.append(int(target_label)) print len(patches), len(coding_labeles), len(labeles) return patches, coding_labeles, labeles