def train_classifier(self, class_id): """ Retrain One-Class Classifiers (partial_fit) """ log.info('cl', "(Re-)training Classifier for user ID {}".format(class_id)) # extract data with self.trainig_data_lock: # get update samples from stack if class_id in self.classifier_update_stacks: update_samples = self.classifier_update_stacks[class_id] # clear self.classifier_update_stacks[class_id] = np.array([]) else: update_samples = [] start = time() if len(update_samples) > 0: """ INCREMENTAL Methods: Use partial fit with stored update data - Samples: Partially stored in Cluster """ self.classifiers[class_id].partial_fit(update_samples) self.classifier_states[class_id] += 1 else: log.warning("No training/update samples available") if self.__verbose: log.info('cl', "fitting took {} seconds".format(time() - start))
def __predict_ORIG(self, samples): proba, class_ids = self.predict_proba(samples) mask_0 = proba > 0 # no classes detected at all - novelty if len(proba[mask_0]) == 0: return -1 mask_class = proba > self.__class_thresh nr_classes = len(proba[mask_class]) if nr_classes > 0: # class detected if nr_classes > 1: # multiple classes detected - batch invalid if self.__verbose: log.severe( "Multiple classes detected: {}".format(nr_classes)) return None confusion_mask = (self.__confusion_thresh < proba) & (proba < self.__class_thresh) # count if any element, except for class is above confusion ratio if len(proba[confusion_mask]) > 0: log.warning( "Class confusion - force re-identification: {}% confusion, {}% identification, {} samples" .format( proba[(self.__confusion_thresh < proba) & (proba < self.__class_thresh)], proba[mask_class], len(samples))) # calc pairwise distance. If small then force re-identification # for sample in proba[confusion_mask]: # Todo: implement properly # return None class_id_arr = class_ids[mask_class] return int(class_id_arr[0]) else: if len(proba[proba > self.__novelty_thresh]) > 0: print "--- no classes detected but novelty threshold exceeded: {}".format( proba) return None return -1
def get_embeddings(self, rgb_images, align=True): """ Calculate deep face embeddings for input images :param rgb_images: RGB (!) images :param align: :return: np.array embedding vectors """ images_normalized = [] embeddings = [] start = time.time() # normalize images if align is True: if len(rgb_images) > 0: for imgObject in rgb_images: # align face - ignore images with multiple bounding boxes aligned = self.align_face(imgObject, self.landmarks, self.size) if aligned is not None: images_normalized.append(aligned) # print status if self.verbose is True: if len(images_normalized) > 0: log.debug('cnn', "Alignment took {} seconds - {}/{} images suitable".format(time.time() - start, len(images_normalized), len(rgb_images))) else: log.warning("No suitable images (no faces detected)") return np.array(embeddings) else: images_normalized = rgb_images # generate embeddings start = time.time() for img in images_normalized: rep = self.neural_net.forward(img) embeddings.append(rep) # if self.verbose: # print("--- Neural network forward pass took {} seconds.".format(time.time() - start)) return np.array(embeddings)
def train_classifier(self, class_id): """ Retrain One-Class Classifiers (partial_fit) """ log.info('cl', "(Re-)training Classifier for user ID {}".format(class_id)) if class_id not in self.classifiers: log.severe( "Cannot train class {} without creating the classifier first". format(class_id)) return False start = time.time() with self.training_lock: # get update samples from stack # if samples available: do update with all available update samples # update_samples = self.classifier_update_stacks.get(class_id, []) or [] if class_id in self.classifier_update_stacks: update_samples = self.classifier_update_stacks[class_id] else: update_samples = [] if len(update_samples) > 0: training_before = self.classifier_states[class_id] if self.CLASSIFIER == 'ABOD': """ OFFLINE Classifier: retrain with all available data - Samples: Stored in user db, reloaded upon every fit """ # instead of partial fit: add samples and do refitting over complete data self.p_user_db.add_samples(class_id, update_samples) samples = self.p_user_db.get_class_samples(class_id) # stop if len(samples) > 100: log.warning("Sample size exceeding 100. No refitting.") else: # always use fit method (no partial fit available) self.classifiers[class_id].fit(samples) self.classifier_states[class_id] += 1 elif self.CLASSIFIER == 'IABOD': """ INCREMENTAL Methods: Use partial fit with stored update data - Samples: Partially stored in ABOD Cluster """ # partial update: partial_fit self.classifiers[class_id].partial_fit(update_samples) self.classifier_states[class_id] += 1 elif self.CLASSIFIER == 'ISVM': """ INCREMENTAL Methods: Use partial fit with stored update data - Samples: Partially stored in Cluster """ self.classifiers[class_id].partial_fit(update_samples) self.classifier_states[class_id] += 1 # empty update list if training was performed if self.classifier_states[class_id] - training_before == 1: self.classifier_update_stacks[class_id] = [] else: log.warning("No training/update samples available") if self.__verbose: log.info('cl', "fitting took {} seconds".format(time.time() - start)) return True
def update(self, samples): # init - add all samples if no data yet if len(self.__data) == 0: self.__data = samples return # ======================================= # 1. Reduce data/sample data if self.dim_reduction > 0: basis, mean, var = ExtractMaxVarComponents(self.__data, self.dim_reduction) self.log_expl_var.append(var) else: basis, mean = ExtractSubspace(self.__data, 0.8) cluster_reduced = ProjectOntoSubspace(self.__data, mean, basis) samples_reduced = ProjectOntoSubspace(samples, mean, basis) dims = np.shape(cluster_reduced) # select minimum data to build convex hull # min_nr_elems = dims[1] + 4 if self.__verbose: print "Reducing dimension: {}->{}".format( np.shape(self.__data)[1], dims[1]) # ======================================= # 2. Calculate Convex Hull in subspace data_hull = cluster_reduced # take all samples of data hull = Delaunay(data_hull) if self.__verbose: print "Calculating data hull using {}/{} points".format( len(data_hull), len(self.__data)) # ======================================= # 3. Select new samples from outside convex hull if not self.__inverted: inclusion_mask = np.array([ False if hull.find_simplex(sample) >= 0 else True for sample in samples_reduced ]) if self.__verbose: # Todo: use outside mask counting nr_elems_outside_hull = np.sum([ 0 if hull.find_simplex(sample) >= 0 else 1 for sample in samples_reduced ]) print "Elements OUTSIDE hull (to include): {}/{}".format( nr_elems_outside_hull, len(samples)) else: inclusion_mask = np.array([ True if hull.find_simplex(sample) >= 0 else False for sample in samples_reduced ]) # add samples (samples need to be np.array) self.__data = np.concatenate((self.__data, samples[inclusion_mask])) # ======================================= # 4. Recalculate hull with newly added points # If memory exceeded: Perform unrefinement process - # discharge sampling directions with lowest variance contribution if self.dim_reduction > 0: nr_comps = self.dim_reduction if len( self.__data) <= self.max_size else self.dim_removal if len(self.__data) > 150: nr_comps = self.dim_removal - 1 basis, mean, var = ExtractMaxVarComponents(self.__data, nr_comps) else: # automatic dimension selection (based on containing certain variance) basis, mean = ExtractSubspace(self.__data, 0.75) cluster_reduced = ProjectOntoSubspace(self.__data, mean, basis) print "Recuding dimension: {}->{}".format( np.shape(self.__data)[1], np.shape(cluster_reduced)[1]) hull = Delaunay(cluster_reduced) # ======================================= # 5. Discharge samples inside hull if not self.__inverted: # select samples inside hull cl_to_delete = np.array( list( set(range(0, len(cluster_reduced))) - set(np.unique(hull.convex_hull)))) # set(range(len(data_hull))).difference(hull.convex_hull) else: cl_to_delete = np.array([]) # select samples on hull if len(cluster_reduced) > self.max_size: hull_indices = list(np.unique(hull.convex_hull)) if len(hull_indices) > 0: nr_to_del = 5 if len(hull_indices) > 5 else 0 cl_to_delete = np.array(hull_indices[0:nr_to_del]) # print "Points building convex hull: {}".format(set(np.unique(hull.convex_hull))) # print "To delete: {}".format(cl_to_delete) if len(cl_to_delete[cl_to_delete < 0]) > 0: print set(np.unique(hull.convex_hull)) log.warning("Index elements smaller than 0: {}".format( cl_to_delete[cl_to_delete < 0])) if self.__log: self.log_intra_deleted.append(len(cl_to_delete)) self.log_cl_size_orig.append(len(self.__data)) print "Cleaning {} points from inside data".format(len(cl_to_delete)) # Remove points from inside hull self.__data = np.delete(self.__data, cl_to_delete, axis=0) # ======================================= # 6. KNN point removal: remove similar points if self.knn_removal_thresh > 0: max_removal = 10 if len( self.__data) > self.knn_removal_thresh else 0 if max_removal > 0: filter = KNFilter(self.__data, k=3, threshold=0.25) tmp = filter.filter_x_samples(max_removal) print "--- Removing {} knn points".format( len(self.__data) - len(tmp)) self.__data = tmp if self.__log: self.log_cl_size_reduced.append(len(self.__data)) print "Cluster size: {}".format(len(self.__data))