def generate_annotation_csv( pipeline_processor, groups_to_process, output_folder, di_filename, ): try: if pipeline_processor.options.group_by_series: files, luids = map(list, zip(*groups_to_process)) wrappers = [ file_handler_factory(files[i]) for i in [luids.index(x) for x in set(luids)] ] else: wrappers = [file_handler_factory(f) for f in groups_to_process] pd.DataFrame.from_dict( { "plant": [i.plant for i in wrappers], "date_time": [i.date_time for i in wrappers], "disease_index": "", } ).sort_values( by=["plant", "date_time"], axis=0, na_position="first", ascending=True, ).to_csv( di_filename, index=False, ) except Exception as e: logger.exception(f"Unable to build disease index file") else: logger.info("Built disease index file")
def yield_group_by_series(self, time_delta: int): # Build dictionary self.init_progress( total=len(self.accepted_files), desc="Building plants dictionaries", yield_mode=True, ) plants_ = defaultdict(list) total = len(self.accepted_files) for i, item in enumerate(self.accepted_files): yield {"step": i, "total": total} fh = file_handler_factory(item, self._target_database) plants_[fh.plant].append(fh) self.close_progress() # Sort all lists by timestamp self.init_progress( total=len(plants_), desc="Sorting observations", yield_mode=True, ) total = len(plants_.values()) for i, v in enumerate(plants_.values()): yield {"step": i, "total": total} v.sort(key=lambda x: x.date_time) self.close_progress() # Consume files_to_process = [] self.init_progress( total=len(plants_.values()), desc="Grouping by series", yield_mode=True, ) total = len(plants_.values()) for i, v in enumerate(plants_.values()): yield {"step": i, "total": total} while len(v) > 0: main = v.pop(0) main_luid = main.luid files_to_process.append((main.file_path, main_luid)) while (len(v) > 0) and ( (v[0].date_time - main.date_time).total_seconds() / 60 < time_delta): files_to_process.append((v.pop(0).file_path, main_luid)) self.close_progress() # Print stats stat_lst = [len(i) for i in files_to_process] logger.info("-- Series statistics --") logger.info(f"Originale file count: {sum(stat_lst)}") logger.info(f"Group count: {len(stat_lst)}") for k, v in Counter(stat_lst).items(): logger.info(f"Qtt: {k}, Mode frequency: {v}") logger.info(f"Min: {min(stat_lst)}, Max: {max(stat_lst)}") self.groups_to_process = files_to_process
def ipo_factory( file_path, options=None, force_abstract: bool = False, data_base=None, scale_factor=1, ): if force_abstract: return BaseImageProcessor( file_path, options, database=data_base, scale_factor=scale_factor, ) else: # Build unique class list ipt_classes_list = get_module_classes( package=class_pipelines, class_inherits_from=BaseImageProcessor, remove_abstract=True, ) # Create temporary image wrapper to detect experiment fh = file_handler_factory(file_path, data_base) # Select able class ipt_classes_list = list(set(ipt_classes_list)) for cls in ipt_classes_list: if callable(getattr(cls, "can_process", None)) and cls.can_process( dict(experiment=fh.experiment, robot=fh.__class__.__name__)): return cls( file_path, options, database=data_base, scale_factor=scale_factor, ) return BaseImageProcessor( file_path, options, database=data_base, scale_factor=scale_factor, )
def update(self, src_files_path="", extensions: tuple = (".jpg", ".tiff", ".png", ".bmp")): if not self.connect(auto_update=False): return -1 files_added = 0 if src_files_path: self.src_files_path = src_files_path if os.path.isdir(self.src_files_path): # Grab all images in folder img_lst = ImageList(extensions) img_lst.add_folder(self.src_files_path) file_list = img_lst.filter(()) # Fill database self.close_connexion() total_ = len(file_list) self._init_progress(total=total_, desc="Updating database") with self.engine as conn_: for i, file in enumerate(file_list): try: fh = file_handler_factory(file, database=None) conn_.execute( f"""INSERT INTO {self.main_table} (Luid, Name, FilePath, Experiment, Plant, Date, Time, date_time, Camera, Angle, Wavelength) VALUES (:Luid, :Name, :FilePath, :Experiment, :Plant, :Date, :Time, :date_time, :Camera, :Angle, :Wavelength)""", { "luid": fh.luid, "Name": fh.name, "filepath": fh.file_path, "experiment": fh.experiment, "plant": fh.plant, "date": fh.date_time.date(), "time": fh.date_time.time(), "date_time": fh.date_time, "camera": fh.camera, "angle": fh.angle, "wavelength": fh.wavelength, }, ) except exc.IntegrityError: pass except Exception as e: logger.exception(f'Cannot add "{file}" because "{e}"') else: files_added += 1 self._callback( step=i, total=total_, msg=f'Updating database "{self.src_files_path}"', ) self._callback( step=total_, total=total_, msg=f'Updated database "{self.src_files_path}"', ) self._close_progress(desc="Updating database") elif self.src_files_path.lower().endswith((".csv", )): dataframe = pd.read_csv(self.src_files_path, parse_dates=[3]) try: dataframe = dataframe.drop_duplicates(subset="luid", keep="first") dataframe.to_sql(name="snapshots", con=self.engine, if_exists="replace") conn_ = self.open_connexion() try: conn_.execute( "alter table snapshots add primary key (luid)") conn_.execute("alter table snapshots drop column index") finally: self.close_connexion() except Exception as e: logger.exception(f"Failed to create table because {repr(e)}") files_added = -1 else: files_added = dataframe["luid"].count() else: logger.error(f"I don't know what to do with {self.src_files_path}") files_added = -1 return files_added
def process_wrapper(self, **kwargs): """ Crop: 'Crop image or mask to rectangular ROI Real time: True Keyword Arguments (in parentheses, argument name): * Activate tool (enabled): Toggle whether or not tool is active * Select source (source_selector): Select which image will be used as source * Name of ROI to be used (roi_name): Crop Image/mask to ROI, only one ROI accepted """ wrapper = self.init_wrapper(**kwargs) if wrapper is None: return False res = False try: if self.get_value_of("enabled") == 1: # Get Source input_kind = self.get_value_of("source_selector") if input_kind == "mask": img = self.get_mask() elif input_kind == "current_image": img = wrapper.current_image else: img = None logger.error(f"Unknown source: {input_kind}") self.result = None return # Get ROI roi_list = self.get_ipt_roi( wrapper=wrapper, roi_names=self.get_value_of("roi_name").replace( " ", "").split(","), selection_mode="all_named", ) if len(roi_list) <= 0: logger.warning("No ROI detected, will return source image") elif len(roi_list) > 1: logger.warning( "Multiple ROIs detected, first one will be used") if len(roi_list) <= 0: roi = None elif isinstance(roi_list[0], regions.RectangleRegion): roi: regions.RectangleRegion = roi_list[0] else: roi: regions.RectangleRegion = roi_list[0].as_rect() # Crop image additional_images = {} if roi is not None: if self.get_value_of("store_transformation") == 1: wrapper.image_transformations.append({ "action": "crop", "roi": roi, "fixed_width": self.get_value_of("fixed_width"), "fixed_height": self.get_value_of("fixed_height"), }) self.result = roi.crop( src_image=img, fixed_width=self.get_value_of("fixed_width"), fixed_height=self.get_value_of("fixed_height"), ) if self.get_value_of("grab_linked_images") == 1: for lnk_img in wrapper.file_handler.linked_images: try: fh = file_handler_factory( lnk_img, database=wrapper.target_database, ) additional_images[fh.angle] = roi.crop( src_image=fh.load_source_file(), fixed_width=self.get_value_of( "fixed_width"), fixed_height=self.get_value_of( "fixed_height"), ) wrapper.store_image( image=additional_images[fh.angle], text=f"{fh.angle}_crop", ) additional_images[fh.wavelength] = roi.crop( src_image=fh.load_source_file(), fixed_width=self.get_value_of( "fixed_width"), fixed_height=self.get_value_of( "fixed_height"), ) wrapper.store_image( image=additional_images[fh.wavelength], text=f"{fh.wavelength}_crop", ) except Exception as e: logger.error( f"Unable to process image because {repr(e)}" ) else: self.result = img # Finalize if self.get_value_of("save_image") != 0: self.save_images(additional_images=additional_images, **kwargs) wrapper.store_image(self.result, "cropped_image") self.demo_image = self.result res = True else: wrapper.store_image(wrapper.current_image, "current_image") res = True except Exception as e: res = False logger.error(f"Crop FAILED, exception: {repr(e)}") else: pass finally: return res
def launch(**kwargs): def exit_error_message(msg: str) -> None: print(msg) logger.error(msg) start = timer() if "user" in kwargs and "password" in kwargs: dbp.master_password = kwargs.pop("user"), kwargs.pop("password") # Script script = kwargs.get("script", None) if script is not None and os.path.isfile(script): with open(script, "r") as f: script = json.load(f) kwargs["script"] = script # Image(s) image = kwargs.get("image", None) image_list = kwargs.get("image_list", None) image_folder = kwargs.get("image_folder", None) src_count = (1 if "experiment" in kwargs and "database" in kwargs else len( [src for src in [image, image_list, image_folder] if src is not None])) if src_count == 0: exit_error_message("Missing source images") return 1 elif src_count > 1: exit_error_message("Too many sources") return 1 if image is not None: kwargs["images"] = [image] elif image_list is not None: with open(image_list, "r") as f: kwargs["images"] = [ img.os.replace("\n", "") for img in f.readlines() ] elif image_folder is not None: img_lst = ImageList((".jpg", ".tiff", ".png", ".bmp")) img_lst.add_folder(image_folder) kwargs["images"] = img_lst.filter(masks=None) # State stored_state = kwargs.pop("stored_state", None) res = restore_state(blob=stored_state, overrides=kwargs) # Retrieve images image_list_ = res.get("images", None) # Build database db_data = res.get("database_data", None) database = res.get("database", None) experiment = res.get("experiment", None) if db_data is None: if database is None: db = None else: db = dbf.db_info_to_database( DbInfo( display_name=experiment, target=database, dbms="pandas", )) else: db = dbf.db_info_to_database(dbb.DbInfo(**db_data)) if experiment is not None: if "sub_folder_name" not in res or not res["sub_folder_name"]: res["sub_folder_name"] = experiment if "csv_file_name" not in res or not res["csv_file_name"]: res["csv_file_name"] = f"{experiment.lower()}_raw_data" else: db = dbf.db_info_to_database(dbb.DbInfo(**db_data)) # Retrieve output folder output_folder_ = res.get("output_folder", None) if not output_folder_: exit_error_message("Missing output folder") return 1 elif res.get("sub_folder_name", ""): output_folder_ = os.path.join(output_folder_, res["sub_folder_name"], "") else: output_folder_ = os.path.join(output_folder_, "") force_directories(output_folder_) csv_file_name = res.get("csv_file_name", None) if not csv_file_name: exit_error_message("Missing output file name") return 1 if IS_USE_MULTI_THREAD and "thread_count" in res: try: mpc = int(res["thread_count"]) except Exception as e: mpc = False else: mpc = False try: if isinstance(res["script"], str) and os.path.isfile(res["script"]): script = LoosePipeline.load(res["script"]) elif isinstance(res["script"], dict): script = LoosePipeline.from_json(json_data=res["script"]) else: exit_error_message("Failed to load script: Unknown error") return 1 except Exception as e: exit_error_message(f"Failed to load script: {repr(e)}") return 1 # Build pipeline processor pp = PipelineProcessor( database=None if db is None else db.copy(), dst_path=output_folder_, overwrite=res["overwrite"], seed_output=res["append_time_stamp"], group_by_series=res["generate_series_id"], store_images=False, ) if not image_list_: pp.grab_files_from_data_base( experiment=db.db_info.display_name.lower(), order_by="plant", **db.main_selector, ) else: pp.accepted_files = image_list_ if res.get("randomize", False) is True: random.shuffle(pp.accepted_files) logger.info("Process summary") logger.info("_______________") logger.info(f'database: {res.get("database_data", None)}') logger.info(f'Output folder: {res["output_folder"]}') logger.info(f'CSV file name: {res["csv_file_name"]}') logger.info(f'Overwrite data: {res["overwrite"]}') logger.info(f'Subfolder name: {res["sub_folder_name"]}') logger.info(f'Append timestamp to root folder: {res["append_time_stamp"]}') logger.info(f'Generate series ID: {res["generate_series_id"]}') logger.info(f'Series ID time delta allowed: {res["series_id_time_delta"]}') logger.info(f'Build annotation ready CSV: {res["build_annotation_csv"]}') logger.info(f"Images: {len(pp.accepted_files)}") logger.info(f"Concurrent processes count: {mpc}") logger.info(f"Script summary: {str(script)}") if pp.accepted_files is None or len(pp.accepted_files) < 1: exit_error_message("No images to process") return 1 pp.progress_callback = kwargs.get("progress_callback", None) pp.error_callback = kwargs.get("error_callback", None) pp.ensure_root_output_folder() pp.script = script if not pp.accepted_files: logger.error("Nothing to precess") return 1 # Process data groups_to_process = pp.prepare_groups(res["series_id_time_delta"]) if res["build_annotation_csv"]: try: if pp.options.group_by_series: files, luids = map(list, zip(*groups_to_process)) wrappers = [ file_handler_factory(files[i], db) for i in [luids.index(x) for x in set(luids)] ] else: wrappers = [ file_handler_factory(f, db) for f in tqdm.tqdm(groups_to_process, desc="Building annotation CSV") ] pd.DataFrame.from_dict({ "plant": [i.plant for i in wrappers], "date_time": [i.date_time for i in wrappers], "disease_index": "", }).sort_values(by=["plant", "date_time"], axis=0, na_position="first", ascending=True).to_csv( os.path.join( pp.options.dst_path, f"{csv_file_name}_diseaseindex.csv"), index=False, ) except Exception as e: preffix = "FAIL" logger.exception("Unable to build disease index file") else: preffix = "SUCCESS" logger.info("Built disease index file") print(f"{preffix} - Disease index file") groups_to_process_count = len(groups_to_process) if groups_to_process_count > 0: pp.multi_thread = mpc pp.process_groups(groups_list=groups_to_process) # Merge dataframe pp.merge_result_files(csv_file_name=csv_file_name + ".csv") logger.info( f"Processed {groups_to_process_count} groups/images in {format_time(timer() - start)}" ) # Build videos print("Done, see logs for more details") return 0
def __init__(self, file_path, database): self._file_handler = fh.file_handler_factory(file_path, database)