def _setting_vgg(self): required_params = ['layer'] assert all( [k in self.hyper_parameters.keys() for k in required_params]) assert self.task_id == Task.CLASSIFICATION.value, self.task_id num_layer = int(self.hyper_parameters['layer']) assert num_layer in [11, 16, 19], "Not supported layer num. - Vgg" if num_layer == 11: VGG = VGG11 elif num_layer == 16: VGG = VGG16 elif num_layer == 19: VGG = VGG19 self.model = VGG(class_map=self.class_map, imsize=self.imsize, load_pretrained_weight=self.get_weight_path(VGG), train_whole_network=self.train_whole) self.train_dist = ImageDistributor( self.train_img, self.train_target, augmentation=self.augmentation, target_builder=self.model.build_data()) self.valid_dist = ImageDistributor( self.valid_img, self.valid_target, target_builder=self.model.build_data())
def _setting_inception(self): required_params = ['version'] assert all( [k in self.hyper_parameters.keys() for k in required_params]) assert self.task_id == Task.CLASSIFICATION.value, self.task_id version_num = int(self.hyper_parameters['version']) assert version_num in [ 1, 2, 3, 4 ], "Not supported version number. - InceptionNet" if version_num == 1: Inception = InceptionV1 elif num_layer == 2: Inception = InceptionV2 elif num_layer == 3: Inception = InceptionV3 elif num_layer == 4: Inception = InceptionV4 self.model = Inception( class_map=self.class_map, imsize=self.imsize, load_pretrained_weight=self.get_weight_path(Inception), train_whole_network=self.train_whole) self.train_dist = ImageDistributor( self.train_img, self.train_target, augmentation=self.augmentation, target_builder=self.model.build_data()) self.valid_dist = ImageDistributor( self.valid_img, self.valid_target, target_builder=self.model.build_data())
def _setting_resnext(self): required_params = ['layer', 'plateau'] assert all( [k in self.hyper_parameters.keys() for k in required_params]) assert self.task_id == Task.CLASSIFICATION.value, self.task_id num_layer = int(self.hyper_parameters['layer']) assert num_layer in [50, 101], "Not supported layer num. - ResNeXt" if num_layer == 50: ResNeXt = ResNeXt50 elif num_layer == 101: ResNeXt = ResNeXt101 self.model = ResNeXt( class_map=self.class_map, imsize=self.imsize, train_whole_network=self.train_whole, load_pretrained_weight=self.get_weight_path(ResNeXt), plateau=self.hyper_parameters["plateau"]) self.train_dist = ImageDistributor( self.train_img, self.train_target, augmentation=self.augmentation, target_builder=self.model.build_data()) self.valid_dist = ImageDistributor( self.valid_img, self.valid_target, target_builder=self.model.build_data())
def get_probability(self, img_list): batch_size = 32 self.set_models(inference=True) if isinstance(img_list, (list, str)): if isinstance(img_list, (tuple, list)): if len(img_list) >= 32: test_dist = ImageDistributor(img_list) results = [] bar = tqdm(range(int(np.ceil(len(test_dist) / batch_size)))) for i, (x_img_list, _) in enumerate( test_dist.batch(batch_size, shuffle=False)): img_array = np.vstack([ load_img(path, self.imsize)[None] for path in x_img_list ]) img_array = self.preprocess(img_array) results.extend( np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)) bar.update(1) return results img_array = np.vstack( [load_img(path, self.imsize)[None] for path in img_list]) img_array = self.preprocess(img_array) else: img_array = load_img(img_list, self.imsize)[None] img_array = self.preprocess(img_array) return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)[0] else: img_array = img_list return rm.softmax(self(img_array)).as_ndarray()
def create_dist(self, filename_list, train=True): """ This function creates img path list and annotation list from filename list. Image file name and label file must be same. Because of that, data_list is a list of file names. Data formats are bellow. image path list: [path_to_img1, path_to_img2, ...] annotation list: .. code-block :: python [ [ # Annotations of each image. {"box":[x, y, w, h], "name":"dog", "class":1}, {"box":[x, y, w, h], "name":"cat", "class":0}, ], [ {"box":[x, y, w, h], "name":"cat", "class":0}, ], ... ] Args: filename_list(list): [filename1, filename2, ...] train(bool): If it's ture, augmentation will be added to distributor. Returns: (ImageDistributor): ImageDistributor object with augmentation. """ img_path_list = [] label_path_list = [] for path in filename_list: name = os.path.splitext(path)[0] img_path = os.path.join(DATASRC_IMG, path) label_path = os.path.join(DATASRC_LABEL, name + ".xml") if os.path.exists(img_path) and os.path.exists(label_path): img_path_list.append(img_path) label_path_list.append(label_path) else: print("{} not found.".format(name)) annotation_list, _ = parse_xml_detection(label_path_list) if train: augmentation = Augmentation([ Shift(min(self.imsize[0] // 10, 20), min(self.imsize[1] // 10, 20)), Flip(), Rotate(), WhiteNoise(), ContrastNorm([0.5, 1.0]) ]) return ImageDistributor(img_path_list, annotation_list, augmentation=augmentation) else: return ImageDistributor(img_path_list, annotation_list)
def _setting_fcn(self): required_params = ['layer'] assert all( [k in self.hyper_parameters.keys() for k in required_params]) assert self.task_id == Task.SEGMENTATION.value, self.task_id num_layer = int(self.hyper_parameters['layer']) assert num_layer in [8, 16, 32], "Not supported layer num. - FCN" if num_layer == 8: FCN = FCN8s elif num_layer == 16: FCN = FCN16s elif num_layer == 32: FCN = FCN32s self.model = FCN(class_map=self.class_map, imsize=self.imsize, load_pretrained_weight=self.get_weight_path(FCN), train_whole_network=self.train_whole) self.train_dist = ImageDistributor( self.train_img, self.train_target, augmentation=self.augmentation, target_builder=self.model.build_data()) self.valid_dist = ImageDistributor( self.valid_img, self.valid_target, target_builder=self.model.build_data())
def _setting_densenet(self): required_params = ['layer'] assert all( [k in self.hyper_parameters.keys() for k in required_params]) assert self.task_id == Task.CLASSIFICATION.value, self.task_id num_layer = int(self.hyper_parameters['layer']) assert num_layer in [121, 169, 201], "Not supported layer num. - DenseNet" if num_layer == 121: DenseNet = DenseNet121 elif num_layer == 169: DenseNet = DenseNet169 elif num_layer == 201: DenseNet = DenseNet201 self.model = DenseNet( class_map=self.class_map, imsize=self.imsize, load_pretrained_weight=self.get_weight_path(DenseNet), train_whole_network=self.train_whole) self.train_dist = ImageDistributor( self.train_img, self.train_target, augmentation=self.augmentation, target_builder=self.model.build_data()) self.valid_dist = ImageDistributor( self.valid_img, self.valid_target, target_builder=self.model.build_data())
def _setting_unet(self): assert self.task_id == Task.SEGMENTATION.value, self.task_id self.model = UNet(class_map=self.class_map, imsize=self.imsize, load_pretrained_weight=self.get_weight_path(UNet), train_whole_network=self.train_whole) self.train_dist = ImageDistributor( self.train_img, self.train_target, augmentation=self.augmentation, target_builder=self.model.build_data()) self.valid_dist = ImageDistributor( self.valid_img, self.valid_target, target_builder=self.model.build_data())
def _setting_ssd(self): assert all([self.hyper_parameters.keys()]) assert self.task_id == Task.DETECTION.value, self.task_id self.model = SSD(class_map=self.class_map, imsize=self.imsize, train_whole_network=self.train_whole, load_pretrained_weight=self.get_weight_path(SSD)) self.train_dist = ImageDistributor( self.train_img, self.train_target, augmentation=self.augmentation, target_builder=self.model.build_data()) self.valid_dist = ImageDistributor( self.valid_img, self.valid_target, target_builder=self.model.build_data())
def predict(self, img_list): """Perform prediction. Argument can be an image array, image path list or a image path. Args: img_list(ndarray, list, string): Image array, image path list or image path. Return: (list): List of class of each image. """ batch_size = 32 self.set_models(inference=True) if isinstance(img_list, (list, str)): if isinstance(img_list, (tuple, list)): if len(img_list) >= 32: test_dist = ImageDistributor(img_list) results = [] bar = tqdm(range(int(np.ceil(len(test_dist) / batch_size)))) for i, (x_img_list, _) in enumerate( test_dist.batch(batch_size, shuffle=False)): img_array = np.vstack([ load_img(path, self.imsize)[None] for path in x_img_list ]) img_array = self.preprocess(img_array) results.extend( np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)) bar.update(1) return results img_array = np.vstack( [load_img(path, self.imsize)[None] for path in img_list]) img_array = self.preprocess(img_array) else: img_array = load_img(img_list, self.imsize)[None] img_array = self.preprocess(img_array) return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)[0] else: img_array = img_list return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)
def _setting_yolov1(self): required_params = ['cell', 'box'] # check hyper parameters value are set assert all( [k in self.hyper_parameters.keys() for k in required_params]) assert self.task_id == Task.DETECTION.value, self.task_id self.model = Yolov1( class_map=self.class_map, imsize=self.imsize, train_whole_network=self.train_whole, load_pretrained_weight=self.get_weight_path(Yolov1)) self.train_dist = ImageDistributor( self.train_img, self.train_target, augmentation=self.augmentation, target_builder=self.model.build_data()) self.valid_dist = ImageDistributor( self.valid_img, self.valid_target, target_builder=self.model.build_data())
def predict(self, img_list): """ Returns: (Numpy.array or list): If only an image or a path is given, an array whose shape is **(width, height)** is returned. If multiple images or paths are given, then a list in which there are arrays whose shape is **(width, height)** is returned. """ batch_size = 32 self.set_models(inference=True) if isinstance(img_list, (list, str)): if isinstance(img_list, (tuple, list)): if len(img_list) >= 32: test_dist = ImageDistributor(img_list) results = [] bar = tqdm() bar.total = int(np.ceil(len(test_dist) / batch_size)) for i, (x_img_list, _) in enumerate( test_dist.batch(batch_size, shuffle=False)): img_array = np.vstack([ load_img(path, self.imsize)[None] for path in x_img_list ]) img_array = self.preprocess(img_array) results.extend( np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)) bar.update(1) return results img_array = np.vstack( [load_img(path, self.imsize)[None] for path in img_list]) img_array = self.preprocess(img_array) else: img_array = load_img(img_list, self.imsize)[None] img_array = self.preprocess(img_array) return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)[0] else: img_array = img_list return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)
def _setting_yolov2(self): required_params = ['anchor'] assert all( [k in self.hyper_parameters.keys() for k in required_params]) assert self.task_id == Task.DETECTION.value, self.task_id self.model = Yolov2( class_map=self.class_map, imsize=self.imsize, anchor=create_anchor(self.train_target, int(self.hyper_parameters.get('anchor')), base_size=self.imsize), train_whole_network=self.train_whole, load_pretrained_weight=self.get_weight_path(Yolov2)) self.train_dist = ImageDistributor( self.train_img, self.train_target, augmentation=self.augmentation, target_builder=self.model.build_data( imsize_list=[(i * 32, i * 32) for i in range(9, 14)])) self.valid_dist = ImageDistributor( self.valid_img, self.valid_target, target_builder=self.model.build_data())
def fit(self, train_img_path_list=None, train_annotation_list=None, valid_img_path_list=None, valid_annotation_list=None, epoch=136, batch_size=64, augmentation=None, callback_end_epoch=None): """ This function performs training with given data and hyper parameters. Args: train_img_path_list(list): List of image path. train_annotation_list(list): List of annotations. valid_img_path_list(list): List of image path for validation. valid_annotation_list(list): List of annotations for validation. epoch(int): Number of training epoch. batch_size(int): Number of batch size. augmentation(Augmentation): Augmentation object. callback_end_epoch(function): Given function will be called at the end of each epoch. Returns: (tuple): Training loss list and validation loss list. Example: >>> train_img_path_list, train_annot_list = ... # Define own data. >>> valid_img_path_list, valid_annot_list = ... >>> model = ${class}() # Any algorithm which provided by ReNomIMG here. >>> model.fit( ... # Feeds image and annotation data. ... train_img_path_list, ... train_annot_list, ... valid_img_path_list, ... valid_annot_list, ... epoch=8, ... batch_size=8) >>> Following arguments will be given to the function ``callback_end_epoch``. - **epoch** (int) - Number of current epoch. - **model** (Model) - Model object. - **avg_train_loss_list** (list) - List of average train loss of each epoch. - **avg_valid_loss_list** (list) - List of average valid loss of each epoch. """ train_dist = ImageDistributor( train_img_path_list, train_annotation_list, augmentation=augmentation) valid_dist = ImageDistributor(valid_img_path_list, valid_annotation_list) batch_loop = int(np.ceil(len(train_dist) / batch_size)) avg_train_loss_list = [] avg_valid_loss_list = [] for e in range(epoch): bar = tqdm(range(batch_loop)) display_loss = 0 for i, (train_x, train_y) in enumerate(train_dist.batch(batch_size, target_builder=self.build_data())): self.set_models(inference=False) with self.train(): loss = self.loss(self(train_x), train_y) reg_loss = loss + self.regularize() reg_loss.grad().update(self.get_optimizer(e, epoch, i, batch_loop, avg_valid_loss_list=avg_valid_loss_list)) try: loss = loss.as_ndarray()[0] except: loss = loss.as_ndarray() display_loss += loss bar.set_description("Epoch:{:03d} Train Loss:{:5.3f}".format(e, loss)) bar.update(1) avg_train_loss = display_loss / (i + 1) avg_train_loss_list.append(avg_train_loss) if valid_img_path_list is not None: bar.n = 0 bar.total = int(np.ceil(len(valid_dist) / batch_size)) display_loss = 0 for i, (valid_x, valid_y) in enumerate(valid_dist.batch(batch_size, target_builder=self.build_data())): self.set_models(inference=True) loss = self.loss(self(valid_x), valid_y) try: loss = loss.as_ndarray()[0] except: loss = loss.as_ndarray() display_loss += loss bar.set_description("Epoch:{:03d} Valid Loss:{:5.3f}".format(e, loss)) bar.update(1) avg_valid_loss = display_loss / (i + 1) avg_valid_loss_list.append(avg_valid_loss) bar.set_description("Epoch:{:03d} Avg Train Loss:{:5.3f} Avg Valid Loss:{:5.3f}".format( e, avg_train_loss, avg_valid_loss)) else: bar.set_description("Epoch:{:03d} Avg Train Loss:{:5.3f}".format(e, avg_train_loss)) bar.close() if callback_end_epoch is not None: callback_end_epoch(e, self, avg_train_loss_list, avg_valid_loss_list) return avg_train_loss_list, avg_valid_loss_list
def fit(self, train_img_path_list, train_annotation_list, valid_img_path_list=None, valid_annotation_list=None, epoch=160, batch_size=16, imsize_list=None, augmentation=None, callback_end_epoch=None): """ This function performs training with given data and hyper parameters. Yolov2 is trained using multiple scale images. Therefore, this function requires list of image size. If it is not given, the model will be trained using fixed image size. Args: train_img_path_list(list): List of image path. train_annotation_list(list): List of annotations. valid_img_path_list(list): List of image path for validation. valid_annotation_list(list): List of annotations for validation. epoch(int): Number of training epoch. batch_size(int): Number of batch size. imsize_list(list): List of image size. augmentation(Augmentation): Augmentation object. callback_end_epoch(function): Given function will be called at the end of each epoch. Returns: (tuple): Training loss list and validation loss list. Example: >>> from renom_img.api.detection.yolo_v2 import Yolov2 >>> train_img_path_list, train_annot_list = ... # Define own data. >>> valid_img_path_list, valid_annot_list = ... >>> model = Yolov2() >>> model.fit( ... # Feeds image and annotation data. ... train_img_path_list, ... train_annot_list, ... valid_img_path_list, ... valid_annot_list, ... epoch=8, ... batch_size=8) >>> Following arguments will be given to the function ``callback_end_epoch``. - **epoch** (int) - Number of current epoch. - **model** (Model) - Yolo2 object. - **avg_train_loss_list** (list) - List of average train loss of each epoch. - **avg_valid_loss_list** (list) - List of average valid loss of each epoch. """ if imsize_list is None: imsize_list = [self.imsize] else: for ims in imsize_list: assert (ims[0] / 32.) % 1 == 0 and (ims[1] / 32.) % 1 == 0, \ "Yolo v2 only accepts 'imsize' argument which is list of multiple of 32. \ exp),imsize=[(288, 288), (320, 320)]." train_dist = ImageDistributor( train_img_path_list, train_annotation_list, augmentation=augmentation, num_worker=8) if valid_img_path_list is not None and valid_annotation_list is not None: valid_dist = ImageDistributor(valid_img_path_list, valid_annotation_list) else: valid_dist = None batch_loop = int(np.ceil(len(train_dist) / batch_size)) avg_train_loss_list = [] avg_valid_loss_list = [] for e in range(epoch): bar = tqdm(range(batch_loop)) display_loss = 0 for i, (train_x, train_y) in enumerate(train_dist.batch(batch_size, shuffle=True, target_builder=self.build_data(imsize_list))): # This is for avoiding memory over flow. if is_cuda_active() and i % 10 == 0: release_mem_pool() self.set_models(inference=False) with self.train(): loss = self.loss(self(train_x), train_y) reg_loss = loss + self.regularize() reg_loss.grad().update(self.get_optimizer(loss.as_ndarray(), e, epoch, i, batch_loop)) try: loss = float(loss.as_ndarray()[0]) except: loss = float(loss.as_ndarray()) display_loss += loss bar.set_description("Epoch:{:03d} Train Loss:{:5.3f}".format(e, loss)) bar.update(1) avg_train_loss = display_loss / (i + 1) avg_train_loss_list.append(avg_train_loss) if valid_dist is not None: if is_cuda_active(): release_mem_pool() bar.n = 0 bar.total = int(np.ceil(len(valid_dist) / batch_size)) display_loss = 0 for i, (valid_x, valid_y) in enumerate(valid_dist.batch(batch_size, shuffle=False, target_builder=self.build_data())): self.set_models(inference=True) loss = self.loss(self(valid_x), valid_y) try: loss = float(loss.as_ndarray()[0]) except: loss = float(loss.as_ndarray()) display_loss += loss bar.set_description("Epoch:{:03d} Valid Loss:{:5.3f}".format(e, loss)) bar.update(1) avg_valid_loss = display_loss / (i + 1) avg_valid_loss_list.append(avg_valid_loss) bar.set_description("Epoch:{:03d} Avg Train Loss:{:5.3f} Avg Valid Loss:{:5.3f}".format( e, avg_train_loss, avg_valid_loss)) else: bar.set_description("Epoch:{:03d} Avg Train Loss:{:5.3f}".format(e, avg_train_loss)) bar.close() if callback_end_epoch is not None: callback_end_epoch(e, self, avg_train_loss_list, avg_valid_loss_list) return avg_train_loss_list, avg_valid_loss_list
def predict(self, img_list, score_threshold=0.3, nms_threshold=0.4): """ This method accepts either ndarray and list of image path. Args: img_list (string, list, ndarray): Path to an image, list of path or ndarray. score_threshold (float): The threshold for confidence score. Predicted boxes which have lower confidence score than the threshold are discarderd. Defaults to 0.3 nms_threshold (float): The threshold for non maximum supression. Defaults to 0.4 Return: (list): List of predicted bbox, score and class of each image. The format of return value is bellow. Box coordinates and size will be returned as ratio to the original image size. Therefore the range of 'box' is [0 ~ 1]. .. code-block :: python # An example of return value. [ [ # Prediction of first image. {'box': [x, y, w, h], 'score':(float), 'class':(int), 'name':(str)}, {'box': [x, y, w, h], 'score':(float), 'class':(int), 'name':(str)}, ... ], [ # Prediction of second image. {'box': [x, y, w, h], 'score':(float), 'class':(int), 'name':(str)}, {'box': [x, y, w, h], 'score':(float), 'class':(int), 'name':(str)}, ... ], ... ] Example: >>> >>> model.predict(['img01.jpg', 'img02.jpg']]) [[{'box': [0.21, 0.44, 0.11, 0.32], 'score':0.823, 'class':1, 'name':'dog'}], [{'box': [0.87, 0.38, 0.84, 0.22], 'score':0.423, 'class':0, 'name':'cat'}]] Note: Box coordinate and size will be returned as ratio to the original image size. Therefore the range of 'box' is [0 ~ 1]. """ batch_size = 32 self.set_models(inference=True) if isinstance(img_list, (list, str)): if isinstance(img_list, (tuple, list)): if len(img_list) >= 32: test_dist = ImageDistributor(img_list) results = [] bar = tqdm() bar.total = int(np.ceil(len(test_dist) / batch_size)) for i, (x_img_list, _) in enumerate( test_dist.batch(batch_size, shuffle=False)): img_array = np.vstack([ load_img(path, self.imsize)[None] for path in x_img_list ]) img_array = self.preprocess(img_array) results.extend( self.get_bbox( self(img_array).as_ndarray(), score_threshold, nms_threshold)) bar.update(1) return results img_array = np.vstack( [load_img(path, self.imsize)[None] for path in img_list]) img_array = self.preprocess(img_array) else: img_array = load_img(img_list, self.imsize)[None] img_array = self.preprocess(img_array) return self.get_bbox( self(img_array).as_ndarray(), score_threshold, nms_threshold)[0] else: img_array = img_list return self.get_bbox( self(img_array).as_ndarray(), score_threshold, nms_threshold)
def fit(self, train_img_path_list=None, train_annotation_list=None, valid_img_path_list=None, valid_annotation_list=None, epoch=136, batch_size=64, augmentation=None, callback_end_epoch=None, class_weight=None): train_dist = ImageDistributor(train_img_path_list, train_annotation_list, augmentation=augmentation) valid_dist = ImageDistributor(valid_img_path_list, valid_annotation_list) batch_loop = int(np.ceil(len(train_dist) / batch_size)) avg_train_loss_list = [] avg_valid_loss_list = [] for e in range(epoch): bar = tqdm(range(batch_loop)) display_loss = 0 for i, (train_x, train_y) in enumerate( train_dist.batch(batch_size, target_builder=self.build_data())): self.set_models(inference=False) with self.train(): loss = self.loss(self(train_x), train_y, class_weight=class_weight) reg_loss = loss + self.regularize() try: loss = loss.as_ndarray()[0] except: loss = loss.as_ndarray() reg_loss.grad().update( self.get_optimizer(loss, e, epoch, i, batch_loop)) display_loss += loss bar.set_description("Epoch:{:03d} Train Loss:{:5.3f}".format( e, loss)) bar.update(1) avg_train_loss = display_loss / (i + 1) avg_train_loss_list.append(avg_train_loss) if valid_img_path_list is not None: bar.n = 0 bar.total = int(np.ceil(len(valid_dist) / batch_size)) display_loss = 0 for i, (valid_x, valid_y) in enumerate( valid_dist.batch(batch_size, target_builder=self.build_data())): self.set_models(inference=True) loss = self.loss(self(valid_x), valid_y, class_weight=class_weight) try: loss = loss.as_ndarray()[0] except: loss = loss.as_ndarray() display_loss += loss bar.set_description( "Epoch:{:03d} Valid Loss:{:5.3f}".format(e, loss)) bar.update(1) avg_valid_loss = display_loss / (i + 1) avg_valid_loss_list.append(avg_valid_loss) bar.set_description( "Epoch:{:03d} Avg Train Loss:{:5.3f} Avg Valid Loss:{:5.3f}" .format(e, avg_train_loss, avg_valid_loss)) else: bar.set_description( "Epoch:{:03d} Avg Train Loss:{:5.3f}".format( e, avg_train_loss)) bar.close() if callback_end_epoch is not None: callback_end_epoch(e, self, avg_train_loss_list, avg_valid_loss_list) return avg_train_loss_list, avg_valid_loss_list