def process_image(self, image, paths): """ Run the processing pipeline for `image`. :param image: Input image. Must be a 4D color image tensor with shape (1, height, width, 3) :type image: tf.python.framework.ops.EagerTensor :param paths: Paths object representing the image file. :type paths: src.io.TreeWalker.Paths """ start_time = time.time() # Compute the detected objects and their masks. mask_results = self.masker.mask(image) time_delta = "{:.3f}".format(time.time() - start_time) LOGGER.info( __name__, f"Masked image in {time_delta} s. File: {paths.input_file}") # Convert the image to a numpy array if not isinstance(image, np.ndarray): image = image.numpy() # If we have reached the maximum number of workers. Wait for them to finish if len(self.workers) >= self.max_num_async_workers: self._wait_for_workers() # Create workers for the current image. self._spawn_workers(paths, image, mask_results)
def get_results(coco, imgs_dir): """ Get the masking results for all images in `imgs_dir`. :param coco: COCO object representing the dataset. :type coco: COC :param imgs_dir: Path to base directory with images to use for evaluation. :type imgs_dir: str :return: Masking results. Image IDs are keys, and masking results (output from `Masker.mask`) are values. :rtype: dict """ LOGGER.info(__name__, "Building results.") tree_walker = TreeWalker(imgs_dir, [], skip_webp=False, precompute_paths=True) dataset = get_tf_dataset(tree_walker) dataset_iterator = iter(dataset) filename_to_image_id = {img_dict["file_name"]: _id for _id, img_dict in coco.imgs.items()} masker = Masker() results = {} for i, paths in enumerate(tree_walker.walk()): tic = time.time() img = next(dataset_iterator) mask_results = masker.mask(img) image_id = filename_to_image_id[paths.filename] results[image_id] = mask_results dt = time.time() - tic LOGGER.info(__name__, f"Processed image {i+1}/{tree_walker.n_valid_images} in {round(dt, 2)} s. " f"File: {paths.filename}") return results
def _init_model(self): """ Initialize the TensorFlow-graph """ saved_model_path = os.path.join(config.MODEL_PATH, "saved_model") # Download and extract model if not os.path.exists(saved_model_path): LOGGER.info(__name__, "Could not find the model graph file. Downloading...") download_model(config.DOWNLOAD_BASE, config.MODEL_NAME, config.MODEL_PATH, extract_all=True) LOGGER.info(__name__, "Model graph file downloaded.") model = tf.saved_model.load(saved_model_path) self.model = model.signatures["serving_default"]
def clear_cache_file(file_path): """ Clear the output files for the unfinished image whose cahce file is located at `file_path` :param file_path: Path to cache file for unfinished image :type file_path: str """ # Read the JSON file try: with open(file_path, "r") as f: cache_info = json.load(f) except json.JSONDecodeError: # If we got a JSONDecodeError, it was most likely because the program was killed before it finished writing the # file. Since cache file writing is the first step when exporting the output images, we have no output images to # clean up. We therefore remove the (incomplete) cache file and continue. os.remove(file_path) return # Create a `src.io.TreeWalker.Paths` object representing the image paths = Paths(base_input_dir=cache_info["base_input_dir"], base_mirror_dirs=cache_info["base_mirror_dirs"], input_dir=cache_info["input_dir"], mirror_dirs=cache_info["mirror_dirs"], filename=cache_info["filename"]) # Wait for the directories if they cannot be reached try: wait_until_path_is_found( [paths.base_input_dir, *paths.base_mirror_dirs]) except PathNotReachableError as err: raise PathNotReachableError(f"The directories pointed to by the cache file '{file_path}' could not be found. If" f" they were deleted manually, delete this cache file and run the program again")\ from err # Remove any expected output files if they are present for expected_file in get_expected_files(paths): if os.path.isfile(expected_file): os.remove(expected_file) LOGGER.info( __name__, f"Removed file '{expected_file}' for unfinished image '{paths.input_file}'" ) else: LOGGER.debug( __name__, f"Could not find file '{expected_file}' for unfinished image '{paths.input_file}'" ) # Remove the cache file os.remove(file_path)
def _update_rows(self, cursor, rows): LOGGER.info( __name__, f"Attempting to update {len(rows)} row(s) in the database.") # Attempt to update the rows. When we have `batcherrors = True`, the valid rows will be updated normally. cursor.executemany(self.table.update_sql, rows, batcherrors=True) # Get the errors caused by the rows where the update failed. errors = [e for e in cursor.getbatcherrors()] # Add number of updated rows to total counter n_updated = len(rows) - len(errors) self.total_updated += n_updated LOGGER.info( __name__, f"Successfully updated {n_updated} row(s) in the database.") return errors
def clear_cache(): """ Clear the cache directory. Each JSON file in the cache directory is expected to represent an image for which the export process was aborted due to a critical error. This function will clear the output files written for the unfinished image, and then delete the cache file. """ # Return if we couldn't find a cache directory. This probably means that this is the first time the application is # ran on this machine, so the cache directory has not been created yet if not os.path.exists(config.CACHE_DIRECTORY): return LOGGER.info(__name__, "Clearing cache files") count = 0 for filename in os.listdir(config.CACHE_DIRECTORY): if filename.endswith(".json"): clear_cache_file(os.path.join(config.CACHE_DIRECTORY, filename)) count += 1 LOGGER.info(__name__, f"Found and cleared {count} cache file(s)")
def _insert_rows(self, cursor, rows): LOGGER.info( __name__, f"Attempting to insert {len(rows)} row(s) into the database.") # Attempt to insert the rows into the database. When we have `batcherrors = True`, the rows which do not # violate the unique constraint will be inserted normally. The rows which do violate the constraint will # not be inserted. cursor.executemany(self.table.insert_sql, rows, batcherrors=True) # Get the indices of the rows where the insertion failed. errors = [e for e in cursor.getbatcherrors()] # Add number of inserted rows to total counter n_inserted = len(rows) - len(errors) self.total_inserted += n_inserted LOGGER.info( __name__, f"Successfully inserted {n_inserted} row(s) into the database.") return errors
def masker_category_to_annotation_category(masker_cat, coco): """ Convert from masker category to annotation category, using the category name. :param masker_cat: Masker category :type masker_cat: int :param coco: COCO object representing the dataset :type coco: COCO :return: Annotation category :rtype: int """ masker_cat_name = LABEL_MAP[int(masker_cat)] for _id, cat_dict in coco.cats.items(): if cat_dict["name"] == masker_cat_name: return _id LOGGER.info(__name__, f"Category {masker_cat} ({masker_cat_name}) not found in annotations. This detection will be " f"ignored.") return None
def check_all_files_written(paths): """ Check that all files for a given image have been saved correctly. The list of checked files is determined by the File I/O parameters in `config`. If all expected output files exist, the cache file will be deleted. If all expected output files exist, AND `config.delete_input` is True, the input image will be deleted as well. :param paths: Paths object representing the input image :type paths: src.io.TreeWalker.Paths :return: True if all expected files were found. False otherwise :rtype: bool """ missing_files = find_missing_files(paths) if missing_files: _handle_missing_files(paths, missing_files) return False else: LOGGER.info(__name__, f"All output files written for image: {paths.input_file}") return True
def __init__(self, input_folder, mirror_folders, skip_webp=True, precompute_paths=True, ext="jpg"): LOGGER.info(__name__, f"Searching for {ext}-files in '{input_folder}'.") self.input_folder = input_folder self.mirror_folders = mirror_folders self.skip_webp = skip_webp self.precompute_paths = precompute_paths self.ext = ext self.n_valid_images = self.n_skipped_images = 0 if self.precompute_paths: self.paths = [p for p in self._walk()] LOGGER.info(__name__, f"Found {self.n_valid_images} valid {ext}-files.") if self.n_skipped_images > 0: LOGGER.info( __name__, f"Found {self.n_skipped_images} files with associated webp-files. " f"These will be skipped.") else: self.paths = None
def _path_is_valid(self, input_dir, mirror_dirs, filename): if not filename.endswith(self.ext): return False input_filepath = os.path.join(input_dir, filename) if not os.access(input_filepath, os.R_OK): LOGGER.info(__name__, f"Could not read image file '{input_filepath}'") return False if self.skip_webp: webp_path = os.path.join(mirror_dirs[0], self._to_webp(filename)) if os.path.exists(webp_path): LOGGER.debug( __name__, f"Mask already found for '{input_filepath}' at '{webp_path}'." ) self.n_skipped_images += 1 return False self.n_valid_images += 1 return True
def main(): tree_walker = initialize() for i, paths in enumerate(tree_walker.walk()): count_str = f"{i + 1} of {tree_walker.n_valid_images}" LOGGER.info(__name__, LOG_SEP) LOGGER.info(__name__, f"Iteration: {count_str}.") LOGGER.info(__name__, f"Processing file {paths.input_file}") try: worker = EXIFWorker(None, paths, None) worker.get() except PROCESSING_EXCEPTIONS as err: LOGGER.error( f"Got error '{type(err).__name__}: {str(err)}' when creating JSON from image. " f"File: {paths.input_file}")
def main(): tree_walker, database_client = initialize() start_datetime = datetime.now() for i, paths in enumerate(tree_walker.walk()): count_str = f"{i + 1} of {tree_walker.n_valid_images}" LOGGER.info(__name__, LOG_SEP) LOGGER.info(__name__, f"Iteration: {count_str}.") LOGGER.info(__name__, f"Processing file {paths.input_file}") try: json_dict = load_json(paths) database_client.add_row(json_dict) except PROCESSING_EXCEPTIONS as err: LOGGER.error( __name__, f"Got error '{type(err).__name__}: {str(err)}' when writing JSON to Database. " f"File: {paths.input_file}") LOGGER.info(__name__, LOG_SEP) LOGGER.info(__name__, "Writing remaining files to Database") database_client.close() summary_str = get_summary(tree_walker, database_client, start_datetime) LOGGER.info(__name__, LOG_SEP) LOGGER.info(__name__, summary_str)
def main(): """Run the masking.""" # Initialize start_datetime = datetime.now() args, tree_walker, image_processor, dataset_iterator = initialize() n_imgs = "?" if config.lazy_paths else (tree_walker.n_valid_images + tree_walker.n_skipped_images) # Mask images time_at_iter_start = time.time() for i, paths in enumerate(tree_walker.walk()): count_str = f"{tree_walker.n_skipped_images + i + 1} of {n_imgs}" start_time = time.time() LOGGER.set_state(paths) LOGGER.info(__name__, LOG_SEP) LOGGER.info(__name__, f"Iteration: {count_str}.") # Catch potential exceptions raised while processing the image try: # Get the image img = next(dataset_iterator) # Do the processing image_processor.process_image(img, paths) except PROCESSING_EXCEPTIONS as err: error_msg = f"'{str(err)}'. File: {paths.input_file}" LOGGER.error(__name__, error_msg, save=True, email=True, email_mode="error") continue est_done = get_estimated_done(time_at_iter_start, n_imgs, i + 1) iter_time_delta = "{:.3f}".format(time.time() - start_time) LOGGER.info(__name__, f"Iteration finished in {iter_time_delta} s.") LOGGER.info(__name__, f"Estimated completion: {est_done}") # Close the image_processor. This will make sure that all exports are finished before we continue. LOGGER.info(__name__, LOG_SEP) LOGGER.info(__name__, f"Writing output files for the remaining images.") image_processor.close() # Summary summary_str = get_summary(tree_walker, image_processor, start_datetime) LOGGER.info(__name__, LOG_SEP) LOGGER.info(__name__, summary_str, email=True, email_mode="finished")
def initialize(): """ Get command line arguments, and initialize the TreeWalker and Masker. :return: Command line arguments, an instance of `TreeWalker` initialized at the specified directories, and an instance of `Masker` ready for masking. :rtype: argparse.Namespace, TreeWalker, Masker """ # Register the logging excepthook except_hooks = [logger_excepthook] if config.uncaught_exception_email: # Register a custom excepthook which sends an email on uncaught exceptions. from src.email_sender import email_excepthook except_hooks.append(email_excepthook) # Set the exception hook(s) set_excepthook(except_hooks) # Get arguments args = get_args() # Check that the config and command line arguments are valid check_config(args) # Configure logger logging.basicConfig(level=getattr(logging, config.log_level), format=LOGGER.fmt, datefmt=LOGGER.datefmt) # Set log file if args.log_folder is not None: os.makedirs(args.log_folder, exist_ok=True) log_file_name = config.log_file_name.format( datetime=datetime.now().strftime("%Y-%m-%d_%H%M%S"), hostname=gethostname()) log_file = os.path.join(args.log_folder, log_file_name) LOGGER.set_log_file(log_file) # Log the call LOGGER.info(__name__, f"Call: {' '.join(sys.argv)}") # Log the current config. LOGGER.info(__name__, "\n" + config_string()) if args.clear_cache: # Clear any cached files clear_cache() # Clear the database cache if database writing is enabled if config.write_exif_to_db: from src.db.DatabaseClient import clear_db_cache clear_db_cache() # Get the absolute path of the directories base_input_dir = os.path.abspath(args.input_folder) base_output_dir = os.path.abspath(args.output_folder) mirror_dirs = [base_output_dir] # Make the output directory os.makedirs(base_output_dir, exist_ok=True) if args.archive_folder is not None: base_archive_dir = os.path.abspath(args.archive_folder) mirror_dirs.append(base_archive_dir) # Make the archive directory os.makedirs(base_archive_dir, exist_ok=True) # Make the cache directory os.makedirs(config.CACHE_DIRECTORY, exist_ok=True) # Configure the logger LOGGER.base_input_dir = base_input_dir LOGGER.base_output_dir = base_output_dir # Initialize the walker tree_walker = TreeWalker(base_input_dir, mirror_dirs, skip_webp=(not config.force_remask), precompute_paths=(not config.lazy_paths)) # Initialize the masker masker = Masker(mask_dilation_pixels=config.mask_dilation_pixels, max_num_pixels=config.max_num_pixels) # Create the TensorFlow datatset dataset_iterator = iter(get_tf_dataset(tree_walker)) # Initialize the ImageProcessor image_processor = ImageProcessor( masker=masker, max_num_async_workers=config.max_num_async_workers) return args, tree_walker, image_processor, dataset_iterator
def process_image_properties(contents, parsed_exif): """ Process the `ImageProperties` XML from the EXIF header :param contents: XML-contents :type contents: bytes :param parsed_exif: Dictionary to hold the extracted values :type parsed_exif: dict :return: Relevant information extracted from `contents` :rtype: dict """ contents = to_pretty_xml(contents) contents = redact_image_properties(contents) image_properties = xmltodict.parse(contents)["ImageProperties"] # Set a "default" quality. This will be adjusted if we encounter missing values quality = EXIF_QUALITIES["good"] # Position geo_tag = image_properties.get("GeoTag", None) if geo_tag is not None: ewkt = f"srid=4326;POINT Z( {geo_tag['dLongitude']} {geo_tag['dLatitude']} {geo_tag['dAltitude']} )" else: ewkt = None quality = EXIF_QUALITIES["missing_values"] # Speed and heading heading = image_properties.get("Heading", None) if heading == "NaN": heading = None speed = image_properties.get("Speed", None) if speed == "NaN": speed = None # Pent formatterte mappenavn mappenavn = re.sub(r"\\", "/", image_properties["ImageName"]) mapper = mappenavn.split("/") timestamp = image_properties["@Date"] date = timestamp.split("T")[0] exif_veg = image_properties["VegComValues"]["VCRoad"] if len(exif_veg) >= 3: exif_vegnr = exif_veg[2:].lstrip("0") exif_vegstat = exif_veg[1] exif_vegkat = exif_veg[0] else: exif_vegnr = exif_veg.lstrip("0") exif_vegstat = None exif_vegkat = None if exif_vegstat not in LOVLIG_VEGSTATUS or exif_vegkat not in LOVLIG_VEGKATEGORI: LOGGER.info( __name__, f"VCRoad={exif_veg} følger ikke KAT+STAT+vegnr syntaks: {mappenavn}" ) hp, strekning, delstrekning, ankerpunkt, kryssdel, sideanleggsdel = process_strekning_and_kryss( vchp=image_properties["VegComValues"]["VCHP"], filename=mapper[-1]) # Set values parsed_exif["exif_tid"] = timestamp parsed_exif["exif_dato"] = date parsed_exif["exif_speed"] = speed parsed_exif["exif_heading"] = heading parsed_exif["exif_gpsposisjon"] = ewkt parsed_exif["exif_strekningsnavn"] = image_properties["VegComValues"][ "VCArea"] parsed_exif["exif_fylke"] = image_properties["VegComValues"]["VCCountyNo"] parsed_exif["exif_vegkat"] = exif_vegkat parsed_exif["exif_vegstat"] = exif_vegstat parsed_exif["exif_vegnr"] = exif_vegnr parsed_exif["exif_hp"] = hp parsed_exif["exif_strekning"] = strekning parsed_exif["exif_delstrekning"] = delstrekning parsed_exif["exif_ankerpunkt"] = ankerpunkt parsed_exif["exif_kryssdel"] = kryssdel parsed_exif["exif_sideanleggsdel"] = sideanleggsdel parsed_exif["exif_meter"] = image_properties["VegComValues"]["VCMeter"] parsed_exif["exif_feltkode"] = image_properties["VegComValues"]["VCLane"] parsed_exif["exif_mappenavn"] = "/".join(mapper[0:-1]) parsed_exif["exif_filnavn"] = mapper[-1] parsed_exif["exif_strekningreferanse"] = "/".join(mapper[-4:-2]) parsed_exif["exif_imageproperties"] = contents parsed_exif["exif_kvalitet"] = quality