def create_segmentation_dataset(images_folder, target_labels, color_palette, n_images, batch_size=10, split="train"): try: # creating dataset dataset = Coco( version=2017, # versions 5 and 6 supported split=split, task="segmentation", labels=list(target_labels.keys()), # target labels n_images=n_images, # number of images by class batch_size=batch_size # batch images size ) print(len(dataset)) # size of dataset data_folder = Path(images_folder) data_folder.mkdir(exist_ok=True) FileUtil.clear_folder(data_folder) # Download images for i, batch_images in enumerate(dataset): print(f"download done for batch {i+1} of {dataset.batches_count}") for image in batch_images: image.export(data_folder, labels_map, color_palette) # copy images to disk except Exception as ex: print(f"[ERROR] Error creating the dataset {ex}")
def _load_dataset(self): img_files = FileUtil.get_files(self._images_folder, [".jpg", ".jpeg"]) xml_files = FileUtil.get_files(self._xml_folder, [".xml"]) mask_files = FileUtil.get_files(self._xml_folder, [".png"]) files = img_files + xml_files + mask_files files = sorted(files, key=lambda img: img.stem) images_files = [] for img_name, img_files in itertools.groupby(files, key=lambda img: img.stem): img_file, xml_file, mask_file = None, None, None for file in img_files: if file.suffix in [".jpg", ".jpeg"]: img_file = file elif file.suffix == ".xml": xml_file = file elif file.suffix == ".png": mask_file = file if img_file and xml_file: images_files.append(PascalVOCImage( img_path=img_file, xml_Path=xml_file, mask_Path=mask_file )) self._dataset.load(images_files, batch_size=200) assert not self._dataset.empty, "Not images found at the folder {}".format(self._images_folder) # generate labels map if is not provided if self._labels_map is None: labels = set() for img in self._dataset: if img.annotations: for bounding_box in img.annotations["object"]: labels.add(bounding_box["name"].strip().title()) self._labels_map = {l: i + 1 for i, l in enumerate(labels)} # update number of classes getattr(self.pipeline.model, self.arch).num_classes = len(self._labels_map)
def train(self, epochs=100, val_split=0.3, clear_folder=False, override_pipeline=False, eval=False): try: if clear_folder: FileUtil.clear_folder(self._out_folder) self.num_steps = epochs self._mk_labels_map() self._mk_records(val_split) # update pipeline self._out_folder.joinpath(os.path.sep.join( ["export", "Servo"])).mkdir(exist_ok=True, parents=True) # merge pipelines save_pipeline_config(self.pipeline, str(self._out_folder)) # start training tf.logging.set_verbosity(tf.logging.INFO) if eval: self._train_and_eval() else: self._train() except Exception as ex: raise Exception("Error training the model : {}".format(ex)) from ex return super(TfTrainableModel, self).train()
def download_model(cls, model_name: str) -> str: available_models = cls.available_models() # get the lis assert model_name in available_models, "Invalid model name {}".format(model_name) checkpoint_model_path = LibUtil.models_home(sub_folder="tf").joinpath(model_name) model_uri = available_models[model_name] if not checkpoint_model_path.exists(): FileUtil.download_file(model_uri, checkpoint_model_path, unzip=True, show_progress=True) return checkpoint_model_path
def fetch(self, q, count=100, batch_size: int = 200, timestamp=1): try: total_matches = self._get_total_matches(q) logger.debug("{} images found ".format(total_matches)) result = [] for offset in range(0, total_matches, count): images = self._make_api_call(q, offset, count) result += images time.sleep(timestamp) for batch in more_itertools.chunked(result, batch_size): delayed_tasks = [] for img_uri in batch: try: if FileUtil.exists_http_file(img_uri): delayed_tasks.append( dask.delayed(ImageUtil.url2img)(img_uri)) except Exception as ex: if type(ex) in EXCEPTIONS: logger.debug("skipping: {}".format(img_uri)) else: logger.debug("skipping {}: {}".format(img_uri, ex)) continue compute_result = dask.compute(*delayed_tasks) yield [ img for img in compute_result if isinstance(img, np.ndarray) ] except Exception as ex: logger.error("error fetching the images: {}".format(ex))
def create_dataset(images_folder, labels_map, color_palette, n): try: # creating dataset dataset = Coco(v=2017) dataset.setup(split="train", task="segmentation") #labels = dataset.labels_map.values() # get valid labels os.makedirs(images_folder, exist_ok=True) FileUtil.clear_folder(images_folder) for batch_images in dataset.fetch(n=n, labels=list(labels_map.keys()), batch_size=500): for img in batch_images: img.export(images_folder, labels_map, color_palette) for region in img.regions: pass # print(region.shape_attributes["x"], # region.shape_attributes["y"]) except Exception as ex: print("error creating the dataset {} ".format(ex))
def _download_dependencies(self): """Download the dataset dependencies""" print("Downloading {} dataset dependencies, it can take a few minutes". format(type(self).__name__)) for dep_name, dep_uri in self._files.items(): self._deps[dep_name] = FileUtil.download_file(dep_uri, self.home(), show_progress=True, unzip=True) print("Download dependencies done")
def download_pipeline(cls, model_name: str) -> str: available_pipelines = cls.available_pipelines() assert model_name in available_pipelines, \ "there is not a pipeline available for the model {}".format(model_name) pipeline_uri = available_pipelines[model_name] filename = Path(urlparse(pipeline_uri).path).name pipeline_model_path = LibUtil.pipelines_home(subfolder="tf").joinpath(filename) if not pipeline_model_path.exists(): pipeline_model_path = FileUtil.download_file(pipeline_uri, pipeline_model_path.parent, show_progress=True) return pipeline_model_path
def _download_dependencies(self): """Download the dataset dependencies""" # delayed_tasks = {} # for dep_name, dep_uri in self._remote_dep.items(): # task = delayed(FileUtil.download_file)(dep_uri, self._home()) # delayed_tasks[dep_name] = task # with ProgressBar(): # self._dependencies = dask.compute(delayed_tasks)[0] logger.info( "Downloading {} dataset dependencies, it can take a few minutes". format(type(self).__name__)) for dep_name, dep_uri in self._remote_dep.items(): self._dependencies[dep_name] = FileUtil.download_file( dep_uri, self._home(), show_progress=True, unzip=True) logger.info("Download dependencies done")
def create_detection_dataset(images_folder, target_labels, n_images, batch_size, split): try: # creating dataset dataset = OpenImages( version=6, # versions 5 and 6 supported split=split, task="detection", labels=target_labels, # target labels n_images=n_images, # number of images by class batch_size=batch_size # batch images size ) print(len(dataset)) # size of dataset data_folder = Path(images_folder) data_folder.mkdir(exist_ok=True) FileUtil.clear_folder(data_folder) # Download images for i, batch_images in enumerate(dataset): print( f"download done for batch {i + 1} of {dataset.batches_count}") for image in batch_images: image.export(data_folder) # copy images to disk except Exception as ex: print(f"[ERROR] Error creating the dataset {ex}")
def _fetch_single_image(self, img_info, image_id, image_label): try: img_uri = img_info["coco_url"] if FileUtil.exists_http_file(img_uri): img_arr = ImageUtil.url2img(img_uri) tagged_image=TaggedImage(img_arr) tagged_image.id=image_id if self.task == "detection": tagged_image.regions = self._create_box_rois(img_info, image_label) elif self.task == "segmentation": tagged_image.regions=self._create_polygon_rois(img_info,image_label) return tagged_image except Exception as ex: print(ex) logger.error("error downloading the image with id {} : {}".format(image_id,ex)) return None
def install(self): try: super(TFObjectDetectionAPI, self).install() self._protobuf_comp() research_folder = self.repo_folder.joinpath("research") slim_folder = research_folder.joinpath("slim") if importlib.util.find_spec(self._package_name) is None: logger.debug("Installing Api") with FileUtil.workon(str(research_folder)): os.system("python setup.py build") os.system("python setup.py install") logger.debug("Api installation done") sys.path.append(str(research_folder)) sys.path.append(str(slim_folder)) os.environ['PATH'] += "{}{}{}".format(str(research_folder), os.pathsep, str(slim_folder)) except Exception as ex: logger.error("Error installing the package : {}".format(ex))