def gen_patch_data(img_file, patch_h=64, patch_w=64): if isinstance(img_file, str): image1 = cv2.imread(img_file) else: image1 = img_file img_height, img_width = image1.shape[:-1] row_num = img_height // patch_h col_num = img_width // patch_w list_patches = extract_patch_non_overlap(image1, patch_h=patch_h, patch_w=patch_w) list_tmp = [] for patch1 in list_patches: patch1 = patch1[:, :, 1] # G channel patch1 = np.expand_dims(patch1, axis=-1) patch1 = np.asarray(patch1, dtype=np.float16) patch1 = input_norm(patch1) list_tmp.append(patch1) x_valid = np.array(list_tmp) return x_valid
def gen(self): n_samples = len(self.train_files) current_epoch = 0 # dynamic weights need current_epoch while True: # Sampling weight based on every class' sample size weights = _get_class_weight(self.list_class_samples_num, self.file_weight_power, current_epoch) # balanced dataset train_files_balanced, train_labels_balanced = get_balance_class( self.train_files, self.train_labels, weights=weights) print('\nlabels:', train_labels_balanced) for i in range(math.ceil(n_samples / self.batch_size)): # 数组末尾不满一个批次是否没有利用? slice超过会不理会,利用全部数据 sl = slice(i * self.batch_size, (i + 1) * self.batch_size) files_batch, labels_batch = train_files_balanced[sl], train_labels_balanced[sl] x_train = my_image_helper.load_resize_images(files_batch, self.image_shape) if self.imgaug_seq is not None: x_train = self.imgaug_seq.augment_images(x_train) x_train = np.asarray(x_train, dtype=np.float16) if self.do_normalize: x_train = input_norm(x_train) y_train = np.asarray(labels_batch, dtype=np.uint8) # (batch,1) y_train = np_utils.to_categorical(y_train, num_classes=self.num_class) yield x_train, y_train current_epoch += 1
def my_Generator_test(files, image_shape=(299, 299, 3), batch_size=64, imgaug_seq=None, do_normalize=True, imgaug_times=1): n_samples = len(files) for i in range(math.ceil(n_samples / batch_size)): sl = slice(i * batch_size, (i + 1) * batch_size) files_batch = files[sl] images = my_image_helper.load_resize_images(files_batch, image_shape) if imgaug_seq is None: x_test = images else: images_times = [] for _ in range(imgaug_times): images_times += images x_test = imgaug_seq.augment_images(images_times) x_test = np.asarray(x_test, dtype=np.float16) if do_normalize: x_test = input_norm(x_test) yield x_test
def my_Generator_seg_multiclass(list_images, list_masks, image_shape=(299, 299, 3), batch_size=64, train_or_valid='train', do_binary=True, img_aug_mode_rotate_flip=1, img_aug_mode_contrast=False): n_samples = len(list_images) while True: for i in range((n_samples + batch_size - 1) // batch_size): sl = slice(i * batch_size, (i + 1) * batch_size) files_images_batch = list_images[sl] files_masks_batch = list_masks[sl] X_train = None Y_train = None for j in range(len(files_images_batch)): file_image = files_images_batch[j] file_masks = files_masks_batch[j] x_train, y_train = my_images_aug.img_aug_seg_multiclass( list_images_files=file_image, list_masks_files=file_masks, image_shape=image_shape, train_or_valid=train_or_valid, img_aug_mode_rotate_flip=img_aug_mode_rotate_flip, img_aug_mode_contrast=img_aug_mode_contrast) x_train = np.asarray(x_train, dtype=np.float16) #x_train OK y_train = np.asarray(y_train, dtype=np.float16) # (2,384,384,1) to (1,384,384,2) y_train = y_train.transpose((3, 1, 2, 0)) if j == 0: if X_train is None: X_train = x_train if Y_train is None: Y_train = y_train else: # concatenate 这个函数就是按照特定方向轴进行拼接 X_train = np.concatenate((X_train, x_train), axis=0) Y_train = np.concatenate((Y_train, y_train), axis=0) # np.float16 is enough, keras.json float32 X_train = np.asarray(X_train, dtype=np.float16) x_train = input_norm(x_train) # list convert to numpy Y_train = np.asarray(Y_train, dtype=np.uint8) # sigmoid 经过了变换,需要二值化 if do_binary: Y_train //= 128 # 分割,y_train是图像 分类的话不用 需要动态判断BBOX yield X_train, Y_train
def gen(self): n_samples = len(self.files) while True: for i in range(math.ceil(n_samples / self.batch_size)): sl = slice(i * self.batch_size, (i + 1) * self.batch_size) files_batch = self.files[sl] labels_batch = self.labels[sl] if self.ndim == '2D': x_train = my_image_helper.load_resize_images( files_batch, self.image_shape) else: #3D DICOM files have save to npy files. x_train = get_npy(files_batch) if self.imgaug_seq is not None: x_train = self.imgaug_seq.augment_images(x_train) x_train = np.asarray(x_train, dtype=np.float16) if self.do_normalize: x_train = input_norm(x_train) if not self.regressin: if not self.multi_labels: y_train = np.asarray(labels_batch, dtype=np.uint8) y_train = np_utils.to_categorical( y_train, num_classes=self.num_output) else: y_train = [] for labels_str in labels_batch: labels_str = str(labels_str) # convert '4_8_28' to [4,8,28] list_labels = [] for label1 in labels_str.split('_'): if label1 == '': continue list_labels.append(int(label1)) # convert [1,4] to [0,1,0,0,1,0,0...] list_labels_convert = [] for j in range(self.num_output): if j in list_labels: list_labels_convert.append(1) else: list_labels_convert.append(0) y_train.append(list_labels_convert) y_train = np.asarray(y_train, dtype=np.uint8) else: # regression y_train = np.asarray(labels_batch, dtype=np.float16) if self.ndim == '3D': x_train = np.expand_dims(x_train, axis=-1) yield x_train, y_train
def gen(self): n_samples = len(self.train_files) current_batch_num = 0 current_epoch = 0 while True: weights = self.resampling_dynamic( weight_class_start=self.weight_class_start, weight_class_end=self.weight_class_end, balance_ratio=self.balance_ratio, epoch=current_epoch) train_files_balanced, train_labels_balanced = get_balance_class( self.train_files, self.train_labels, weights=weights) # print('\nlabels:', train_labels_balanced) for i in range(math.ceil(n_samples / self.batch_size)): sl = slice(i * self.batch_size, (i + 1) * self.batch_size) files_batch, labels_batch = train_files_balanced[ sl], train_labels_balanced[sl] if self.ndim == '2D': x_train = my_image_helper.load_resize_images( files_batch, self.image_shape) else: x_train = get_npy(files_batch) if self.imgaug_seq is not None: x_train = self.imgaug_seq.augment_images(x_train) # imgs_aug返回的x_train 的是list,每个元素(299,299,3) float32 x_train = np.asarray(x_train, dtype=np.float16) if self.do_normalize: x_train = input_norm(x_train) y_train = np.asarray(labels_batch, dtype=np.uint8) # 64*1 y_train = np_utils.to_categorical(y_train, num_classes=self.num_class) if self.smooth_factor > 0: y_train = smooth_labels(y_train, self.smooth_factor) current_batch_num = current_batch_num + 1 if self.ndim == '3D': x_train = np.expand_dims(x_train, axis=-1) yield x_train, y_train current_epoch = current_epoch + 1
def my_Generator_seg(files_images, files_masks, image_shape=(299, 299, 3), batch_size=64, do_binary=True, imgaug_seq=None, single_channel_no=None): n_samples = len(files_images) while True: for i in range((n_samples + batch_size - 1) // batch_size): sl = slice(i * batch_size, (i + 1) * batch_size) files_images_batch = files_images[sl] files_masks_batch = files_masks[sl] list_images = my_image_helper.load_resize_images( files_images_batch, image_shape) # 训练文件列表 list_masks = my_image_helper.load_resize_images(files_masks_batch, image_shape, grayscale=True) if imgaug_seq is None: x_train = list_images y_train = list_masks else: seq_det = imgaug_seq.to_deterministic() x_train = seq_det.augment_images(list_images) y_train = seq_det.augment_images(list_masks) x_train = np.asarray(x_train, dtype=np.float16) x_train = input_norm(x_train) if single_channel_no is not None: #BGR choose green channel green 1 x_train = x_train[:, :, :, single_channel_no] x_train = np.expand_dims(x_train, axis=-1) y_train = np.asarray(y_train, dtype=np.uint8) #sigmoid 经过了变换,需要二值化 if do_binary: y_train //= 128 #分割,y_train是图像 分类的话不用 需要动态判断BBOX #返回的类型 # x_train.shape: (batch, 384, 384, 3) single channel: (batch, 384, 384, 1) #y_train.shape: (batch, 384, 384, 1) yield x_train, y_train
def my_Generator_fovea_reg(files_images, files_masks, image_shape=(299, 299, 3), batch_size=64, imgaug_seq=None): n_samples = len(files_images) while True: for i in range((n_samples + batch_size - 1) // batch_size): sl = slice(i * batch_size, (i + 1) * batch_size) files_images_batch = files_images[sl] files_masks_batch = files_masks[sl] list_images = my_image_helper.load_resize_images( files_images_batch, image_shape) # 训练文件列表 list_masks = my_image_helper.load_resize_images(files_masks_batch, image_shape, grayscale=True) if imgaug_seq is None: x_train = list_images y_train = list_masks else: seq_det = imgaug_seq.to_deterministic() x_train = seq_det.augment_images(list_images) y_train = seq_det.augment_images(list_masks) x_train = np.asarray(x_train, dtype=np.float16) x_train = input_norm(x_train) list_y_train = [] for x in y_train: (left, right, bottom, top) = my_image_object_boundary.get_boundry(x) center_x, center_y, width, height = my_image_object_boundary.convert_to_center_w_h( left, right, bottom, top) if width < 2 or height < 2: print('error:', left, right, bottom, top) list_y_train.append([center_x, center_y]) # x_train.shape: (batch, 384, 384, 3) #: (batch, 2) y_train = np.array(list_y_train) yield x_train, y_train
def gen(self): n_samples = len(self.files) for i in range(math.ceil(n_samples / self.batch_size)): sl = slice(i * self.batch_size, (i + 1) * self.batch_size) files_batch = self.files[sl] x_train = load_resize_images(files_batch, self.image_shape) if self.imgaug_seq is not None: x_train = self.imgaug_seq.augment_images(x_train) x_train = np.asarray(x_train, dtype=np.float16) if self.do_normalize: x_train = input_norm(x_train) yield x_train
def my_Generator_test_time_aug(file, image_shape=(299, 299, 3), do_normalize=True, random_times=0): images = my_image_helper.load_resize_images(file, image_shape) from imgaug import augmenters as iaa imgaug_seq = iaa.Sequential([ iaa.Fliplr(1), ]) images1 = imgaug_seq.augment_images(images) imgaug_seq = iaa.Sequential([ iaa.Flipud(1) ]) images2 = imgaug_seq.augment_images(images) imgaug_seq = iaa.Sequential([ iaa.Fliplr(1), iaa.Flipud(1) ]) images3 = imgaug_seq.augment_images(images) x_test = images + images1 + images2 + images3 if random_times > 0: imgaug_seq = iaa.Sequential([ # iaa.CropAndPad(percent=(-0.04, 0.04)), iaa.Fliplr(0.5), # horizontally flip 50% of the images iaa.Flipud(0.2), # horizontally flip 50% of the images # iaa.ContrastNormalization((0.94, 1.06)), # sometimes1(iaa.Add((-6, 6)), iaa.Affine( scale=(0.97, 1.03), translate_percent={"x": (-0.04, 0.04), "y": (-0.04, 0.04)}, # rotate=(0, 360), # rotate by -10 to +10 degrees ), ]) for i in range(random_times): images_i = imgaug_seq.augment_images(images) x_test += images_i x_test = np.asarray(x_test, dtype=np.float16) if do_normalize: x_test = input_norm(x_test) return x_test
def my_gen_img_tensor(file, image_shape=(299, 299, 3), imgaug_seq=None, imgaug_times=1): images = load_resize_images(file, image_shape) if imgaug_seq is None: x_test = images else: images_times = [] for _ in range(imgaug_times): images_times += images x_test = imgaug_seq.augment_images(images_times) x_test = np.asarray(x_test, dtype=np.float16) x_test = input_norm(x_test) return x_test
def __getitem__(self, idx): sl = slice(idx * self.batch_size, (idx + 1) * self.batch_size) files_images_batch = self.files_images[sl] x_train = my_image_helper.load_resize_images(files_images_batch, self.image_shape) # 训练文件列表 x_train = np.asarray(x_train, dtype=np.float16) x_train = input_norm(x_train) if self.single_channel_no is not None: # BGR choose green channel green 1 x_train = x_train[:, :, :, self.single_channel_no] x_train = np.expand_dims(x_train, axis=-1) # x_train.shape: (batch, 384, 384, 3) single channel: (batch, 384, 384, 1) yield x_train
def gen(self): n_samples = len(self.files) for i in range(math.ceil(n_samples / self.batch_size)): sl = slice(i * self.batch_size, (i + 1) * self.batch_size) files_batch = self.files[sl] from LIBS.ImgPreprocess.my_test_time_img_aug import load_resize_images_imgaug x_train = load_resize_images_imgaug(files_batch, self.image_shape, dx=self.dx, dy=self.dy, do_flip=self.do_flip) x_train = np.asarray(x_train, dtype=np.float16) if self.do_normalize: x_train = input_norm(x_train) yield x_train
def my_Generator_seg_test(list_images, image_shape=(299, 299, 3), do_normalize=True, batch_size=64, single_channel_no=None): n_samples = len(list_images) while True: for i in range((n_samples + batch_size - 1) // batch_size): sl = slice(i * batch_size, (i + 1) * batch_size) files_images_batch = list_images[sl] x_train = my_image_helper.load_resize_images(files_images_batch, image_shape) x_train = np.asarray(x_train, dtype=np.float16) if do_normalize: x_train = input_norm(x_train) if single_channel_no is not None: #BGR choose green channel x_train = x_train[:, :, :, single_channel_no] x_train = np.expand_dims(x_train, axis=-1) yield x_train
def __getitem__(self, idx): sl = slice(idx * self.batch_size, (idx + 1) * self.batch_size) files_images_batch = self.files_images[sl] files_masks_batch = self.files_masks[sl] list_images = my_image_helper.load_resize_images(files_images_batch, self.image_shape) # 训练文件列表 list_masks = my_image_helper.load_resize_images(files_masks_batch, self.image_shape, grayscale=True) if self.imgaug_seq is None: x_train = list_images y_train = list_masks else: seq_det = self.imgaug_seq.to_deterministic() x_train = seq_det.augment_images(list_images) y_train = seq_det.augment_images(list_masks) x_train = np.asarray(x_train, dtype=np.float16) x_train = input_norm(x_train) if self.single_channel_no is not None: # BGR choose green channel green 1 x_train = x_train[:, :, :, self.single_channel_no] x_train = np.expand_dims(x_train, axis=-1) y_train = np.asarray(y_train, dtype=np.uint8) # sigmoid 经过了变换,需要二值化 if self.do_binary: y_train //= 128 # 分割,y_train是图像 分类的话不用 需要动态判断BBOX # 返回的类型 # x_train.shape: (batch, 384, 384, 3) single channel: (batch, 384, 384, 1) # y_train.shape: (batch, 384, 384, 1) yield x_train, y_train
imagefiles = df[0:SAMPLES_NUM - 1]['images'].tolist() image_size = 299 x_train = my_images_aug.imgs_aug(list_image_files=imagefiles, train_or_valid='valid', image_shape=(image_size, image_size, 3)) #add black images img_black = np.zeros((image_size, image_size, 3)) for i in range(SAMPLES_NUM): if (i % ADD_BLACK_INTERVAL == 0): x_train.insert(i, img_black) # x_train = x_train[:SAMPLES_NUM] #clip to sample_num after add black images x_train = np.asarray(x_train, dtype=np.float16) from LIBS.ImgPreprocess.my_image_norm import input_norm x_train = input_norm(x_train) # x_train /= 255. # x_train -= 0.5 # x_train *= 2. # (samples_num, image_size, image_size, 3) save_filename = 'ref.npy' np.save(save_filename, x_train) background = np.load(save_filename) print('OK')
def gen(self): n_samples = len(self.train_files) current_epoch = 0 # use it to set default dynamic resampling weights while True: #region get balanced dataset using dynamic resampling based on single label(smallest class) classes_weights = _get_class_weight(self.list_class_samples_num, self.file_weight_power, current_epoch) classes_weights_total = sum(classes_weights) files_balanced = [] labels_balanced = [] current_index = 0 #index train_files, may iterate train_files more than one time. while len(labels_balanced) < n_samples: #sampling probability of this single simple prob_sampling = classes_weights[int(self.train_labels_single[current_index])] / classes_weights_total max_num = 1000000 rand1 = random.randint(1, max_num) if rand1 <= max_num * prob_sampling: # This image has been selected. files_balanced.append(self.train_files[current_index]) labels_balanced.append(self.train_labels[current_index]) current_index += 1 if current_index >= n_samples: current_index = 0 print('\nlabels:', labels_balanced) # endregion for i in range(math.ceil(n_samples / self.batch_size)): sl = slice(i * self.batch_size, (i + 1) * self.batch_size) files_batch, labels_batch = files_balanced[sl], labels_balanced[sl] #region generate x_train(images) x_train = my_image_helper.load_resize_images(files_batch, self.image_shape) if self.imgaug_seq is not None: x_train = self.imgaug_seq.augment_images(x_train) x_train = np.asarray(x_train, dtype=np.float16) if self.do_normalize: x_train = input_norm(x_train) # endregion #region generate y_train(labels) y_train = [] for labels_str in labels_batch: labels_str = str(labels_str) # labels like '0_1_1' labels = labels_str.split('_') list_labels_convert = [] for _, label in enumerate(labels): if label == '': continue list_labels_convert.append(int(label)) ''' for label1 in labels_str.split('_'): if label1 == '': continue list_labels.append(int(label1)) # convert '4_8_28' to [4,8,28] #convert [1,4] to [0,1,0,0,1,0,0...] list_labels_convert = [] for j in range(self.num_class): if j in list_labels: list_labels_convert.append(1) else: list_labels_convert.append(0) ''' y_train.append(list_labels_convert) y_train = np.asarray(y_train, dtype=np.uint8) if self.label_smoothing > 0: y_train = smooth_labels(y_train, self.label_smoothing) #endregion yield x_train, y_train current_epoch += 1
def my_gen_img_tensor(files, image_shape=(299, 299, 3)): images = load_resize_images(files, image_shape) x_test = np.asarray(images, dtype=np.float16) x_test = input_norm(x_test) return x_test
imagefiles = df[0:SAMPLES_NUM]['images'].tolist() image_size = 299 my_gen_test = my_images_generator.my_Generator_test(files=imagefiles, image_shape=(image_size, image_size, 3), do_normalize=True, batch_size=SAMPLES_NUM) x_train = my_gen_test.__next__() #add black images img_black = np.zeros((image_size, image_size, 3)) from LIBS.ImgPreprocess.my_image_norm import input_norm img_black = input_norm(img_black) img_black = np.expand_dims(img_black, axis=0) for i in range(SAMPLES_NUM): if (i % ADD_BLACK_INTERVAL == 0): x_train = np.append(x_train, img_black, axis=0) # x_train = x_train[:SAMPLES_NUM] #clip to sample_num after add black images x_train = np.asarray(x_train, dtype=np.float16) save_filename = 'ref.npy' np.save(save_filename, x_train) background = np.load(save_filename) print('OK')
def gen(self): n_samples = len(self.files) while True: for i in range(math.ceil(n_samples / self.batch_size)): sl = slice(i * self.batch_size, (i + 1) * self.batch_size) files_batch = self.files[sl] labels_batch = self.labels[sl] x_train = load_resize_images(files_batch, self.image_shape) if self.imgaug_seq is not None: x_train = self.imgaug_seq.augment_images(x_train) x_train = np.asarray(x_train, dtype=np.float16) if self.do_normalize: x_train = input_norm(x_train) if not self.regression: if not self.multi_labels: y_train = np.asarray(labels_batch, dtype=np.uint8) y_train = to_categorical(y_train, num_classes=self.num_output) else: y_train = [] for labels_str in labels_batch: # print(labels_str) labels = str(labels_str).split('_') # 0_1_1 or [1,2] list_labels = [] for _, label in enumerate(labels): if label == '': continue list_labels.append(int(label)) # print(list_labels) # print('\n') y_train.append(list_labels) # convert '4_8_28' to [4,8,28] ''' list_labels = [] for label1 in labels_str.split('_'): if label1 == '': continue list_labels.append(int(label1)) # convert [1,4] to [0,1,0,0,1,0,0...] list_labels_convert = [] for j in range(self.num_output): if j in list_labels: list_labels_convert.append(1) else: list_labels_convert.append(0) y_train.append(list_labels_convert) ''' y_train = np.asarray(y_train, dtype=np.uint8) else: # regression y_train = np.asarray(labels_batch, dtype=np.float16) yield x_train, y_train