def relabel(): # Check parameters if args.images == "" or not os.path.exists(args.images) or not os.path.isdir(args.images): logger.error("Specified path does not exist (%s)" % args.images) return # Load the file patches = PatchArray(args.images) # Visualize vis = Visualize(patches, images_path=args.images) vis.pause = True vis.show()
def load_or_generate(self, patches=consts.FEATURES_FILE, load_patches=False, silent=False): """Load a model from file or generate it based on the features Args: patches (str, PatchArray) : HDF5 file containing features (see feature_extractor for details) """ # Load patches if necessary if isinstance(patches, basestring): if patches == "" or not os.path.exists(patches) or not os.path.isfile(patches): raise ValueError("Specified file does not exist (%s)" % patches) # Try loading loaded = self.load_from_file(patches, load_patches=load_patches) if loaded: return True # Read file if not isinstance(self.patches, PatchArray): patches = PatchArray(patches) elif isinstance(patches, PatchArray): self.patches = patches # Try loading loaded = self.load_from_file(patches.filename, load_patches=load_patches) if loaded: return True else: raise ValueError("patches must be a path to a file or a PatchArray.") assert patches.contains_features, "patches must contain features to calculate an anomaly model." self.patches = patches model_input = self.filter_training(patches) start = time.time() # Generate model if self.__generate_model__(model_input, silent=silent) == False: logger.info("Could not generate model.") return False end = time.time() self.save_to_file(model_input.size, start, end) self.calculate_mahalanobis_distances() return True
def load_from_file(self, model_file, load_patches=False): """ Load a model from file """ with h5py.File(model_file, "r") as hf: g = hf.get(self.NAME) if g is None: return False logger.info("Reading model from: %s" % model_file) if load_patches: self.patches = PatchArray(model_file) return self.__load_model_from_file__(g)
def extract_features(): if not args.list and len(args.files) == 0: logger.error("No input file specified.") return import tensorflow as tf import inspect import feature_extractor as feature_extractor # Add before any TF calls (https://github.com/tensorflow/tensorflow/issues/29931#issuecomment-504217770) # Initialize the keras global outside of any tf.functions temp = tf.zeros([4, 32, 32, 3]) tf.keras.applications.vgg16.preprocess_input(temp) # Get all the available feature extractor names extractor_names = list([ e[0] for e in inspect.getmembers(feature_extractor, inspect.isclass) if e[0] != "FeatureExtractorBase" ]) module = __import__("feature_extractor") if args.list: print("%-30s | %-15s | %-4s | %-8s | %-5s" % ("NAME", "OUTPUT SHAPE", "RF", "IMG SIZE", "RF / IMG")) print("-" * 80) for e in list(map(lambda e: getattr(module, e), extractor_names)): factor = e.RECEPTIVE_FIELD["size"][0] / float(e.IMG_SIZE) print("%-30s | %-15s | %-4s | %-8s | %.3f %s" % (e.__name__.replace("FeatureExtractor", ""), e.OUTPUT_SHAPE, e.RECEPTIVE_FIELD["size"][0], e.IMG_SIZE, factor, "!" if factor >= 2 else "")) return if args.extractor is None: args.extractor = extractor_names # args.extractor = filter(lambda f: "EfficientNet" in f, args.extractor) if isinstance(args.files, basestring): args.files = [args.files] patches = PatchArray(args.files) ## WZL: patches = patches.training_and_validation # For the benchmark subset: # patches = patches.training_and_validation[0:10] ## FieldSAFE: # p = patches[:, 0, 0] # f = p.round_numbers == 1 # patches = patches[f] # vis = Visualize(patches) # vis.show() dataset = patches.to_dataset() dataset_3D = patches.to_temporal_dataset(16) total = patches.shape[0] # Add progress bar if multiple extractors if len(args.extractor) > 1: args.extractor = tqdm(args.extractor, desc="Extractors", file=sys.stderr) for extractor_name in args.extractor: try: bs = getattr(module, extractor_name).TEMPORAL_BATCH_SIZE # shape = getattr(module, extractor_name).OUTPUT_SHAPE # if np.prod(shape) > 300000: # logger.warning("Skipping %s (output too big)" % extractor_name) # continue logger.info("Instantiating %s" % extractor_name) extractor = getattr(module, extractor_name)() # Get an instance if bs > 1: extractor.extract_dataset(dataset_3D, total) else: extractor.extract_dataset(dataset, total) except KeyboardInterrupt: logger.info("Terminated by CTRL-C") return except: logger.error("%s: %s" % (extractor_name, traceback.format_exc()))
def metrics(): ################ # Parameters # ################ files = args.files # Check parameters if not files or len(files) < 1 or files[0] == "": raise ValueError("Please specify at least one filename (%s)" % files) if isinstance(files, basestring): files = [files] # Expand wildcards files_expanded = [] for s in files: files_expanded += glob(s) files = sorted(list(set(files_expanded))) # Remove duplicates # files = filter(lambda f: "EfficientNetB6" in f, files) # files = filter(lambda f: "EfficientNetB6_Level6" not in f, files) if args.output is None: filename = os.path.join(consts.METRICS_PATH, datetime.now().strftime("%Y_%m_%d_%H_%M_metrics.csv")) else: filename = args.output if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) write_header = not os.path.exists(filename) with open(filename, "a") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=[ "Extractor", "Measure", "Model", "Gaussian filter", "Other filter", "ROC_AUC", "AUC_PR", "Max. f1", "FPR at TPR=0.9", "FPR at TPR=0.95", "FPR at TPR=0.99", "FPR at TPR=0.995", "FPR at TPR=0.999", "FPR at TPR=0.9999" ]) if write_header: writer.writeheader() with tqdm(total=len(files), file=sys.stderr, desc="Calculating metrics") as pbar: for features_file in files: pbar.set_description(os.path.basename(features_file)) # Check parameters if features_file == "" or not os.path.exists(features_file) or not os.path.isfile(features_file): logger.error("Specified feature file does not exist (%s)" % features_file) continue # Load the file patches = PatchArray(features_file) patches.calculate_patch_labels() res = patches.calculate_metrics() for extractor, measure, model, gauss_filter, other_filter, roc_auc, auc_pr, max_f1, fpr0, fpr1, fpr2, fpr3, fpr4, fpr5 in res: writer.writerow({ "Extractor": extractor, "Measure": measure, "Model": model, "Gaussian filter": gauss_filter, "Other filter": other_filter, "ROC_AUC": roc_auc, "AUC_PR": auc_pr, "Max. f1": max_f1, "FPR at TPR=0.9": fpr0, "FPR at TPR=0.95": fpr1, "FPR at TPR=0.99": fpr2, "FPR at TPR=0.995": fpr3, "FPR at TPR=0.999": fpr4, "FPR at TPR=0.9999": fpr5 }) pbar.update()
m.attrs["max_no_anomaly"] = np.nanmax( no_anomaly) if no_anomaly.size > 0 else np.NaN m.attrs["max_anomaly"] = np.nanmax( anomaly) if anomaly.size > 0 else np.NaN logger.info("Saved Mahalanobis distances to file") return True # Only for tests if __name__ == "__main__": from anomalyModelSVG import AnomalyModelSVG import consts patches = PatchArray(consts.FEATURES_FILE) model = AnomalyModelSpatialBinsBase(AnomalyModelSVG, patches, cell_size=0.2) if model.load_or_generate(patches): # patches.show_spatial_histogram(model.CELL_SIZE) def patch_to_color(patch): b = 0 g = 0 r = min(255, int(model.__mahalanobis_distance__(patch) * (255 / 50))) return (b, g, r)
def calculate_locations(): ################ # Parameters # ################ files = args.files # Check parameters if not files or len(files) < 1 or files[0] == "": raise ValueError("Please specify at least one filename (%s)" % files) if isinstance(files, basestring): files = [files] # Expand wildcards files_expanded = [] for s in files: files_expanded += glob(s) files = sorted(list(set(files_expanded))) # Remove duplicates # files = filter(lambda f: "EfficientNetINB0_Level6" in f or "EfficientNetB0_Level6" in f, files) # files = filter(lambda f: f in ["/media/ldwg/DataBig/data/WZL/Features/ResNet50V2_Stack4.h5"], files) if args.index is not None: files = files[args.index::args.total] with tqdm(total=len(files), file=sys.stderr) as pbar: for features_file in files: pbar.set_description(os.path.basename(features_file)) # Check parameters if features_file == "" or not os.path.exists( features_file) or not os.path.isfile(features_file): logger.error("Specified feature file does not exist (%s)" % features_file) continue try: # Load the file patches = PatchArray(features_file) models = [AnomalyModelSVG(), AnomalyModelMVG()] # Calculate and save the locations for fake in [True, False]: patches.calculate_patch_locations(fake=fake) for cell_size in [0.2, 0.5]: key = "%.2f" % cell_size if fake: key = "fake_" + key patches.calculate_rasterization(cell_size, fake=fake) models.append( AnomalyModelSpatialBinsBase(AnomalyModelSVG, patches, cell_size=cell_size, fake=fake)) models.append( AnomalyModelSpatialBinsBase(AnomalyModelMVG, patches, cell_size=cell_size, fake=fake)) # BalancedDistribution uses SVG mean as learning threshold if patches.contains_mahalanobis_distances and "SpatialBin/SVG/%s" % key in patches.mahalanobis_distances.dtype.names: threshold_learning = int( np.mean(patches.mahalanobis_distances[ "SpatialBin/SVG/%s" % key])) models.append( AnomalyModelSpatialBinsBase( lambda: AnomalyModelBalancedDistributionSVG( initial_normal_features=10, threshold_learning=threshold_learning, pruning_parameter=0.5), patches, cell_size=cell_size, fake=fake)) # BalancedDistribution uses SVG mean as learning threshold if patches.contains_mahalanobis_distances and "SVG" in patches.mahalanobis_distances.dtype.names: threshold_learning = int( np.mean(patches.mahalanobis_distances["SVG"])) models.append( AnomalyModelBalancedDistributionSVG( initial_normal_features=500, threshold_learning=threshold_learning, pruning_parameter=0.5)) # # For BalancedDistributionTest # if patches.contains_mahalanobis_distances and "SVG" in patches.mahalanobis_distances.dtype.names: # for threshold_learning in np.linspace(np.min(patches.mahalanobis_distances["SVG"]), np.max(patches.mahalanobis_distances["SVG"]), 5, dtype=np.int): # for pruning_parameter in [0.2, 0.5, 0.8]: # for initial_normal_features in [10, 500, 1000]: # models.append(AnomalyModelBalancedDistributionSVG(initial_normal_features=initial_normal_features, threshold_learning=threshold_learning, pruning_parameter=pruning_parameter)) # if patches.contains_mahalanobis_distances and "MVG" in patches.mahalanobis_distances.dtype.names: # for threshold_learning in np.linspace(np.min(patches.mahalanobis_distances["MVG"]), np.max(patches.mahalanobis_distances["MVG"]), 5, dtype=np.int): # for pruning_parameter in [0.2, 0.5, 0.8]: # for initial_normal_features in [10, 500, 1000]: # models.append(AnomalyModelBalancedDistribution(initial_normal_features=initial_normal_features, threshold_learning=threshold_learning, pruning_parameter=pruning_parameter)) with tqdm(total=len(models), file=sys.stderr) as pbar2: for m in models: try: pbar2.set_description(m.NAME) logger.info("Calculating %s" % m.NAME) model, mdist = m.is_in_file(features_file) if not model: m.load_or_generate(patches, silent=False) elif not mdist: logger.info("Model already calculated") m.load_from_file(features_file) m.patches = patches m.calculate_mahalanobis_distances() else: logger.info( "Model and mahalanobis distances already calculated" ) except (KeyboardInterrupt, SystemExit): raise except: logger.error( "%s: %s" % (features_file, traceback.format_exc())) pbar2.update() except (KeyboardInterrupt, SystemExit): raise except: logger.error("%s: %s" % (features_file, traceback.format_exc())) pbar.update()
class FeatureExtractorC3D_Block3(FeatureExtractorC3D): """Feature extractor based on C3D (trained on sports1M). Output layer: conv3b + MaxPooling3D to reduce frames""" BATCH_SIZE = 32 LAYER_NAME = "conv3b" OUTPUT_SHAPE = (28, 28, 256) RECEPTIVE_FIELD = {'stride': (4.0, 4.0), 'size': (23, 23)} # Only for tests if __name__ == "__main__": from common import PatchArray extractor = FeatureExtractorC3D() # extractor.plot_model(extractor.model) patches = PatchArray() p = patches[:, 0, 0] f = np.zeros(p.shape, dtype=np.bool) f[:] = np.logical_and( p.directions == 1, # CCW and np.logical_or( p.labels == 2, # Anomaly or np.logical_and( p.round_numbers >= 7, # Round between 2 and 5 p.round_numbers <= 9))) # Let's make contiguous blocks of at least 10, so # we can do some meaningful temporal smoothing afterwards for i, b in enumerate(f):
def anomaly_model_benchmark(): ################ # Parameters # ################ files = args.files # Check parameters if not files or len(files) < 1 or files[0] == "": raise ValueError("Please specify at least one filename (%s)" % files) if isinstance(files, basestring): files = [files] # Expand wildcards files_expanded = [] for s in files: files_expanded += glob(s) files = sorted(list(set(files_expanded))) # Remove duplicates if args.output is None: filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), datetime.now().strftime( "%Y_%m_%d_%H_%M_benchmark_anomaly_model.csv")) else: filename = args.output write_header = not os.path.exists(filename) with tqdm(total=len(files), file=sys.stderr, desc="Calculating SVG") as pbar: for features_file in files: # Load the file patches = PatchArray(features_file) m = AnomalyModelSVG() m.load_or_generate(patches, silent=True) pbar.update() with open(filename, "a") as csvfile: writer = None with tqdm(total=len(files), file=sys.stderr, desc="Benchmarking anomaly models") as pbar: for features_file in files: extractor_name = os.path.basename(features_file).replace( ".h5", "") result = { "Extractor": extractor_name.replace("FeatureExtractor", "") } def log(s, times): """Log duration t with info string s""" logger.info( "%-40s (%s): %.5fs - %.5fs" % (extractor_name, s, np.min(times), np.max(times))) result[s] = np.min(times) pbar.set_description(os.path.basename(features_file)) # Check parameters if features_file == "" or not os.path.exists( features_file) or not os.path.isfile(features_file): logger.error("Specified feature file does not exist (%s)" % features_file) continue # Load the file patches = PatchArray(features_file) models = [AnomalyModelSVG()] for fake in [True, False]: for cell_size in [0.2, 0.5]: models.append( AnomalyModelSpatialBinsBase(AnomalyModelSVG, cell_size=cell_size, fake=fake)) # models.append(AnomalyModelSpatialBinsBase(lambda: AnomalyModelBalancedDistributionSVG(initial_normal_features=10, threshold_learning=threshold_learning, pruning_parameter=0.5), cell_size=cell_size, fake=fake)) # Calculate anomaly models if patches.contains_mahalanobis_distances and "SVG" in patches.mahalanobis_distances.dtype.names: threshold_learning = int( np.mean(patches.mahalanobis_distances["SVG"])) models.append( AnomalyModelBalancedDistributionSVG( initial_normal_features=20, threshold_learning=threshold_learning, pruning_parameter=0.5)) with tqdm(total=len(models), file=sys.stderr) as pbar2: for m in models: try: pbar2.set_description(m.NAME) logger.info("Calculating %s" % m.NAME) log( "%s (Creation)" % m.NAME, np.array( timeit.repeat(lambda: m.__generate_model__( patches, silent=True), number=1, repeat=3))) def _evaluate_frame(): for i in np.ndindex(patches.shape): m.__mahalanobis_distance__(patches[i]) log( "%s (Maha per patch)" % m.NAME, np.array( timeit.repeat( lambda: m.__mahalanobis_distance__( patches[0, 0, 0]), number=1, repeat=10))) log( "%s (Maha per frame)" % m.NAME, np.array( timeit.repeat(lambda: _evaluate_frame(), number=1, repeat=10)) / float(patches.shape[0])) except (KeyboardInterrupt, SystemExit): raise except: logger.error( "%s: %s" % (features_file, traceback.format_exc())) pbar2.update() if writer is None: writer = csv.DictWriter(csvfile, fieldnames=result.keys()) if write_header: writer.writeheader() writer.writerow(result) pbar.update()
def rasterization_benchmark(): ################ # Parameters # ################ files = args.files # Check parameters if not files or len(files) < 1 or files[0] == "": raise ValueError("Please specify at least one filename (%s)" % files) if isinstance(files, basestring): files = [files] # Expand wildcards files_expanded = [] for s in files: files_expanded += glob(s) files = sorted(list(set(files_expanded))) # Remove duplicates if args.output is None: filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), datetime.now().strftime( "%Y_%m_%d_%H_%M_benchmark_rasterization.csv")) else: filename = args.output write_header = not os.path.exists(filename) with open(filename, "a") as csvfile: writer = None with tqdm(total=len(files), file=sys.stderr, desc="Benchmarking anomaly models") as pbar: for features_file in files: extractor_name = os.path.basename(features_file).replace( ".h5", "") result = { "Extractor": extractor_name.replace("FeatureExtractor", "") } def log(s, times): """Log duration t with info string s""" logger.info( "%-40s (%s): %.5fs - %.5fs" % (extractor_name, s, np.min(times), np.max(times))) result[s] = np.min(times) pbar.set_description(os.path.basename(features_file)) # Check parameters if features_file == "" or not os.path.exists( features_file) or not os.path.isfile(features_file): logger.error("Specified feature file does not exist (%s)" % features_file) continue # Load the file patches = PatchArray(features_file) for fake in [True, False]: ##################### # RECEPTIVE FIELD # ##################### key = "locations" if fake: key = "fake_" + key start = time.time() image_locations = patches._get_receptive_fields(fake) relative_locations = patches._image_to_relative( image_locations) for i in tqdm(range(patches[key].shape[0]), desc="Calculating locations", file=sys.stderr): patches[key][i] = patches._relative_to_absolute( relative_locations, patches[i, 0, 0].camera_locations) end = time.time() patches.contains_locations = True patches._save_patch_locations(fake, start, end) patches.contains_locations = True # Time individual blocks log( "RF (img) [f: %s]" % fake, np.array( timeit.repeat(lambda: patches. _get_receptive_fields(fake=fake), number=1, repeat=5))) log( "RF --> rel [f: %s]" % fake, np.array( timeit.repeat(lambda: patches._image_to_relative( image_locations), number=1, repeat=5))) log( "RF --> abs [f: %s]" % fake, np.array( timeit.repeat( lambda: patches._relative_to_absolute( relative_locations, patches[0, 0, 0]. camera_locations), number=1, repeat=10))) ##################### # RASTERIZATION # ##################### for cell_size in [0.2, 0.5]: key = "%.2f" % cell_size if fake: key = "fake_" + key grid, shape = patches._calculate_grid(cell_size, fake=fake) rf_factor = patches.receptive_field[ 0] / patches.image_size logger.info( "%i bins in x and %i bins in y direction (with cell size %.2f)" % (shape + (cell_size, ))) start = time.time() # Get the corresponding bin for every feature Parallel(n_jobs=2, prefer="threads")( delayed(patches._bin)(i, grid, shape, rf_factor, key, fake) for i in tqdm(range(patches.shape[0]), desc="Calculating bins", file=sys.stderr)) end = time.time() patches._save_rasterization(key, grid, shape, start, end) patches.contains_bins[key] = True patches.rasterizations[key] = np.vectorize( lambda b: b.patches, otypes=[object])(patches.rasterizations[key]) # Time individual blocks log( "Grid [%.2f, f: %s]" % (cell_size, fake), np.array( timeit.repeat(lambda: patches._calculate_grid( cell_size, fake=fake), number=1, repeat=3))) log( "Bins [%.2f, f: %s]" % (cell_size, fake), np.array( timeit.repeat(lambda: patches._bin( 0, grid, shape, rf_factor, key, fake), number=1, repeat=3))) if writer is None: writer = csv.DictWriter(csvfile, fieldnames=result.keys()) if write_header: writer.writeheader() writer.writerow(result) pbar.update()
def calculate_locations(): ################ # Parameters # ################ files = args.files # Check parameters if not files or len(files) < 1 or files[0] == "": raise ValueError("Please specify at least one filename (%s)" % files) if isinstance(files, basestring): files = [files] # Expand wildcards files_expanded = [] for s in files: files_expanded += glob(s) files = sorted(list(set(files_expanded))) # Remove duplicates files = filter(lambda f: not "EfficientNet" in f, files) if args.index is not None: files = files[args.index::args.total] with tqdm(total=len(files), file=sys.stderr) as pbar: for features_file in files: pbar.set_description(os.path.basename(features_file)) # Check parameters if features_file == "" or not os.path.exists( features_file) or not os.path.isfile(features_file): logger.error("Specified feature file does not exist (%s)" % features_file) continue try: # Load the file patches = PatchArray(features_file) models = [AnomalyModelSVG()] # Calculate and save the locations for fake in [False]: patches.calculate_patch_locations(fake=fake) for cell_size in [0.2, 0.5]: patches.calculate_rasterization(cell_size, fake=fake) models.append( AnomalyModelSpatialBinsBase(AnomalyModelSVG, cell_size=cell_size, fake=fake)) # models.append(AnomalyModelSpatialBinsBase(lambda: AnomalyModelBalancedDistributionSVG(initial_normal_features=10, threshold_learning=threshold_learning, pruning_parameter=0.5), cell_size=cell_size, fake=fake)) # Calculate anomaly models if patches.contains_mahalanobis_distances and "SVG" in patches.mahalanobis_distances.dtype.names: threshold_learning = int( np.mean(patches.mahalanobis_distances["SVG"])) models.append( AnomalyModelBalancedDistributionSVG( initial_normal_features=500, threshold_learning=threshold_learning, pruning_parameter=0.5)) with tqdm(total=len(models), file=sys.stderr) as pbar2: for m in models: try: pbar2.set_description(m.NAME) logger.info("Calculating %s" % m.NAME) model, mdist = m.is_in_file(features_file) if not model: m.load_or_generate(patches, silent=True) elif not mdist: logger.info("Model already calculated") m.load_from_file(features_file) m.patches = patches m.calculate_mahalanobis_distances() else: logger.info( "Model and mahalanobis distances already calculated" ) except (KeyboardInterrupt, SystemExit): raise except: logger.error( "%s: %s" % (features_file, traceback.format_exc())) pbar2.update() except (KeyboardInterrupt, SystemExit): raise except: logger.error("%s: %s" % (features_file, traceback.format_exc())) pbar.update()
if stop_label == 0: self.patches = self.patches.stop_ok elif stop_label == 1: self.patches = self.patches.stop_dont elif stop_label == 2: self.patches = self.patches.stop_do if direction == 0: self.patches = self.patches.direction_unknown elif direction == 1: self.patches = self.patches.direction_ccw elif direction == 2: self.patches = self.patches.direction_cw if round_number != -1: self.patches = self.patches.round_number(round_number) cv2.setTrackbarPos("index", self.WINDOWS_CONTROLS, 0) cv2.setTrackbarMax("index", self.WINDOWS_CONTROLS, max(0, self.patches.shape[0] - 1)) self.__maha__(only_refresh_image=True) self.__draw__() if __name__ == "__main__": import consts patches = PatchArray().training_and_validation vis = Visualize(patches) vis.show()