def fit(self, data, dim_reduction=False): """ TODO: precomputed PCA subspace model :param data: :param dim_reduction: :return: """ start = time.time() if dim_reduction is True: # ExtractSubspace self.basis, self.mean = ExtractSubspace(data, 0.999) if self.__verbose: log.info( 'cl', "reduced dimension to: {}".format(np.size(self.basis, 1))) # project data onto subspace self.data = ProjectOntoSubspace(data, self.mean, self.basis) else: self.basis = None self.mean = None self.data = data # calculate intra-cluster distances # todo: cleanup self.cluster_distances = pairwise_distances(self.data, self.data, metric='euclidean') log.info( 'cl', "New ABOD Classifier initialized in {}s".format( "%.4f" % (time.time() - start)))
def __predict(self, samples): """predict classes: for each sample on every class, tells whether or not (+1 or -1) it belongs to class""" predictions = [] class_ids = [] # TODO: instead of mean thresh, count min. percent of samples near class mean with self.training_lock: for class_id, __clf in self.classifiers.iteritems(): # log.error("cluster mean: {}".format(__clf.data_cluster.data_mean)) # only predict for "reasonable"/near classes range = np.mean( self.classifiers[class_id].class_mean_dist(samples)) if range < 0.7: class_ids.append(class_id) predictions.append(__clf.predict(samples)) log.info('cl', "Class {} range: {}".format(class_id, range)) else: log.info( 'cl', "Class {} out of range (0.7 [ref] < {})".format( class_id, range)) return np.array(predictions), np.array(class_ids)
def __init__(self, server, conn, handle): # receive user id user_id = server.receive_uint(conn) log.info('server', 'Updating profile picture for user with ID {}'.format(user_id)) # receive images image = server.receive_image_squared(conn) # generate embedding embedding = server.embedding_gen.get_embeddings([image]) if not embedding.any(): r.Error(server, conn, "Could not generate face embeddings.") return # predict user id # user_id_predicted = server.classifier.predict(embedding) # # check if correct user # if user_id_predicted is None: # r.Error(server, conn, "Label could not be predicted - Face is unambiguous.") # return # elif user_id_predicted != user_id: # # unknown user # r.Error(server, conn, "The profile image does not come from the same person!") # return server.user_db.set_profile_picture(user_id, image) # send back image r.QuadraticImage(server, conn, image)
def listen_to_client(self, conn, addr): log.info( 'server', "--- Parallel connection to {}:{} ---".format(addr[0], addr[1])) if self.ONE_REQ_PER_CONN: # handle single request succ = self.handle_request(conn, addr) # check status - eventually shutdown server if self.SERVER_STATUS == -1: conn.close() # close connection return # block till client has disconnected while 1: data = conn.recv(1024) if not data: # close connection - allow new socket connections conn.close() return else: # handle request # while connection open while self.handle_request(conn, addr): # check status - eventually shutdown server if self.SERVER_STATUS == -1: conn.close() # close connection return # client has disconnected # close connection - allow new socket connections conn.close()
def update(self, samples, poses=np.array([])): # check if we already have very similar samples if len(self.data) > 0: dist = pairwise_distances(samples, self.data, metric='euclidean') dist = np.square(dist) mask = dist < 0.02 ignore_mask = np.any(mask, axis=1) nr_ignored = np.count_nonzero(ignore_mask) if np.count_nonzero(ignore_mask): log.info( 'db', "Ignoring {} samples (too similar)".format(nr_ignored)) samples = samples[~ignore_mask] if len(samples) == 0: return if len(poses) > 0: poses = poses[~ignore_mask] # add data self.data = np.concatenate( (self.data, samples)) if self.data.size else np.array(samples) self.poses = np.concatenate( (self.poses, poses)) if self.poses.size else np.array(poses) # calculate mean self.data_mean = np.mean(self.data, axis=0).reshape(1, -1) # reduce data self.__reduce_after()
def handle_request(self, conn, addr): """ general request handler return: Breaks user connection, in case batch request handling is enabled, else: ignored """ # request_id = self.receive_uchar(conn, timeout=9999) byte = conn.recv(1) if not byte: log.info('server', "Client has disconnected") return False request_id = ord(byte) if request_id in self.req_lookup: req_type = self.req_lookup[request_id] log.info('server', "Incomming request: " + req_type) try: req = getattr(M_REQUESTS, req_type) # feedback handle handle = [True] # handle request req(self, conn, handle=handle) # batch mode: False: breaks client communication # regular mode: ignored return handle[0] except AttributeError: log.error("Request model '"+req_type+"' is not yet implemented or an Exception occurred.") else: log.error("Unsupported request type: " + str(request_id)) # batch mode: communication continues (can be used to break client loop) # regular mode: ignored return True
def add_samples(self, user_id, new_samples, new_poses): """pose cluster update""" if user_id not in self.class_clusters: # initialize self.class_clusters[user_id] = MeanShiftPoseCluster( self.weight_gen, max_size=300) self.class_clusters[user_id].update(new_samples, new_poses) # display minimal class distances means = [] ids = [] for id, c in self.class_clusters.iteritems(): means.append(c.data_mean[0]) ids.append(id) means = np.array(means) dist = pairwise_distances(means, means, metric='euclidean') dist = np.square(dist) dist = np.unique(dist) if len(dist) > 5: dist = dist[0:5] # first one is zero if len(dist) > 1: log.info('db', "Min. inter-class distances: {}".format(dist[1:])) else: # update self.class_clusters[user_id].update(new_samples, new_poses)
def get_decision_functions_in_range(self, samples): """ :param samples: :return: class scores in range """ # no classifiers yet, predict novelty if not self.classifiers: return {} # select classes in range classes_in_range = self.data_controller.classes_in_range( samples=samples, metric='cosine', thresh=0.7) if len(classes_in_range) == 0: log.info('cls', "No class in range... (cosine < 0.7)") return {} # predict class values decision_functions = {} for cl_id in classes_in_range: decision_functions[cl_id] = self.classifiers[ cl_id].decision_function(samples) # test identify always the same id return decision_functions
def train_classifier(self, class_id): """ Retrain One-Class Classifiers (partial_fit) """ log.info('cl', "(Re-)training Classifier for user ID {}".format(class_id)) # extract data with self.trainig_data_lock: # get update samples from stack if class_id in self.classifier_update_stacks: update_samples = self.classifier_update_stacks[class_id] # clear self.classifier_update_stacks[class_id] = np.array([]) else: update_samples = [] start = time() if len(update_samples) > 0: """ INCREMENTAL Methods: Use partial fit with stored update data - Samples: Partially stored in Cluster """ self.classifiers[class_id].partial_fit(update_samples) self.classifier_states[class_id] += 1 else: log.warning("No training/update samples available") if self.__verbose: log.info('cl', "fitting took {} seconds".format(time() - start))
def __init__(self, server, conn, handle): # receive tracking id tracking_id = server.receive_uint(conn) log.info('server', "Dropping identification queue for tracking id {}".format(tracking_id)) # drop samples server.classifier.id_controller.drop_samples(tracking_id=tracking_id) r.OK(server, conn)
def __init__(self, server, conn, handle): log.info('server', 'Getting all profile pictures') uids, pictures = server.user_db.get_all_profile_pictures() # send back image r.ProfilePictures(server, conn, uids, pictures)
def __init__(self): start = time.time() log.info('cnn', "EmbeddingGen: loading models...") # load neural net self.neural_net = openface.TorchNeuralNet(self.networkModel, imgDim=self.size, cuda=self.cuda) # load dlib model self.dlib_aligner = openface.AlignDlib(dlibModelDir + "/" + self.dlibFacePredictor) log.info('cnn', "EmbeddingGen: model loading took {} seconds".format("%.3f" % (time.time() - start)))
def __predict(self, samples): start = time.time() # dist_table = pairwise_distances(samples, samples, metric='euclidean') abof = self.__abof_multi(samples) log.info( 'cl', "ABOF: {} | calc time: {}s".format( ["%.5f" % item for item in abof], "%.4f" % (time.time() - start))) return abof
def __init__(self, server, conn, handle): nr_users = server.receive_uint(conn) target_users = [] for x in range(0, nr_users): # get target class ids (uint) user_id = server.receive_uint(conn) target_users.append(user_id) # receive images images = server.receive_image_batch_squared_same_size(conn) log.severe("ImageIdentificationPrealignedCS, possible IDs: ", target_users) # generate embedding embeddings = server.embedding_gen.get_embeddings(rgb_images=images, align=False) if not embeddings.any(): r.Error(server, conn, "Could not generate face embeddings.") return if -1 in target_users: # open set user id prediction # current_weights = np.repeat(1, len(embeddings)) is_consistent, user_id, confidence = server.classifier.predict_class( embeddings, sample_poses=None) else: # closed set user id prediction user_id = server.classifier.predict_closed_set( target_users, embeddings) if user_id is None: r.Error( server, conn, "Label could not be predicted - Samples are contradictory." ) return # get user nice name user_name = server.user_db.get_name_from_id(user_id) if user_name is None: user_name = "unnamed" # get profile picture profile_picture = server.user_db.get_profile_picture(user_id) log.info( 'server', "User identification complete: {} [ID], {} [Username]".format( user_id, user_name)) r.Identification(server, conn, int(user_id), user_name, profile_picture=profile_picture)
def __init__(self, server, conn, handle): # receive user id user_id = server.receive_uint(conn) log.info('server', 'User Update (Aligned, Robust) for ID {}'.format(user_id)) # receive images images = server.receive_image_batch_squared_same_size(conn) # get sample poses sample_poses = [] for x in range(0, len(images)): pitch = server.receive_char(conn) yaw = server.receive_char(conn) sample_poses.append([pitch, yaw]) sample_poses = np.array(sample_poses) # TODO: calculate weights weights = np.repeat(10, len(images)) # generate embedding embeddings = server.embedding_gen.get_embeddings(images, align=False) if not embeddings.any(): r.Error(server, conn, "Could not generate face embeddings.") return # accumulate samples - check for inconsistencies verified_data, reset_user, id_pred, confidence = server.classifier.update_controller.accumulate_samples(user_id, embeddings, weights) log.info('cl', "verified_data (len: {}), reset_user: {}: ID {}, conf {}".format(len(verified_data), reset_user, id_pred, confidence)) # forward save part of data if verified_data.size: # for s in embeddings: # print "new: {:.8f}".format(s[0]) # print "------------------" # for s in verified_data: # print "s: {:.5f}".format(s[0]) # add to data model server.classifier.data_controller.add_samples(user_id=user_id, new_samples=verified_data) # add to classifier training queue server.classifier.add_training_data(user_id, verified_data) # reset user if queue has become inconsistent or wrong user is predicted if reset_user: log.severe("USER VERIFICATION FAILED - FORCE REIDENTIFICATION") r.Reidentification(server, conn) return # return prediction feedback user_name = server.user_db.get_name_from_id(id_pred) if user_name is None: user_name = "unnamed" r.PredictionFeedback(server, conn, id_pred, user_name, confidence=int(confidence*100.0))
def __dep_predict_sum(self, samples): """ Prediction cases: - Only target class is identified with ratio X (high): Class - Target and other class is identified with ration X (high) and Y (small): Class with small confusion - Multiple classes are identified with small ratios Ys: Novelty - No classes identified: Novelty :param samples: :return: Class ID, -1 (Novelty), None invalid samples (multiple detections) """ # no classifiers yet, predict novelty if not self.classifiers: # 100% confidence self.__decision_function = np.array([len(samples)]), np.array([-1]) return -1 predictions, class_ids = self.__predict(samples) if len(predictions) == 0: # no class in reach - classify as novel class self.__decision_function = np.array([len(samples)]), np.array([-1]) return -1 cls_scores = np.sum(predictions, axis=1) self.__decision_function = cls_scores, class_ids nr_samples = len(samples) self.__decision_nr_samples = nr_samples log.info( 'cl', "Classifier scores: {} | max: {}".format(cls_scores, nr_samples)) # no classes detected at all - novelty # novelty_mask = cls_scores <= self.__novelty_thresh * nr_samples novelty_mask = cls_scores < 0 if len(cls_scores[novelty_mask]) == len(cls_scores): return -1 # identification_mask = cls_scores >= self.__class_thresh * nr_samples identification_mask = cls_scores >= 0 ids = cls_scores[identification_mask] if len(ids) > 0: # multiple possible detection - invalid samples if len(ids) > 1: return None # single person identified - return id return int(class_ids[identification_mask][0]) else: # samples unclear return None
def process_labeled_stream_data(self, class_id, samples, check_update=False): """ Incorporate labeled data into the classifiers. Classifier for {class_id} must be initialized already (retraining is done once the samples can't be explained by the model anymore) :param class_id: class id :param samples: class samples :param check_update: Evaluate update on the current model before using it (robust to sample pollution) :return: - """ log.info( 'cl', "Processing labeled stream data for user ID {}".format(class_id)) class_id = int(class_id) if class_id not in self.classifiers: log.severe( "Class {} has not been initialized yet!".format(class_id)) return False, 1 # force reidentification confidence = 1 if check_update: prediction = self.predict(samples) # samples are not certain enough if prediction == None: return None, 1 # calculate confidence confidence = self.prediction_proba(class_id) # detected different class if prediction != class_id: log.severe( "Updating invalid class! Tracker must have switched!") return False, confidence # force reidentification with self.training_lock: # add update data to stack if class_id not in self.classifier_update_stacks or len( self.classifier_update_stacks[class_id]) == 0: # create new list self.classifier_update_stacks[class_id] = samples else: # append self.classifier_update_stacks[class_id] = np.concatenate( (self.classifier_update_stacks[class_id], samples)) # request classifier update # Todo: only request update if available update data exceeds threshold self.add_training_task(class_id) return True, confidence
def init_from_files(self, embedding_file="pose_matthias2.pkl", pose_file="pose_matthias2_poses.pkl"): log.info('db', "Initializing weight generator...") # initialize grid embeddings = load_data(embedding_file) poses = load_data(pose_file) if embeddings is None or poses is None: log.severe("Could not load file {} in dir uids/models/confience_weights/ for weight generator...".format(embeddings)) sys.exit(0) self.generate(embeddings, poses)
def classes_in_range(self, samples, thresh=1.3, metric='euclidean'): class_ids = [] for id, c in self.class_clusters.iteritems(): # only predict for "reasonable"/near classes range = np.mean(c.class_mean_dist(samples, metric)) if range < thresh: class_ids.append(id) else: log.info('db', "Class {} out of range [{}] ({} [ref] < {:.3f})".format(id, metric, thresh, range)) return class_ids
def __classifier_trainer(self): if self.__verbose is True: log.info('cl', "Starting classifier training thread") while self.STATUS == 1: if self.__verbose is True: log.info('cl', "Begin classifier training in thread") # print "==== queue size: "+str(self.__tasks.qsize()) training_id = self.__tasks.get() self.train_classifier(training_id) self.__tasks.task_done()
def is_guaranteed_new_class(self, samples): if not self.classifiers: return True # select classes in range classes_in_range = self.data_controller.classes_in_range(samples=samples, metric='euclidean', thresh=1.3) if len(classes_in_range) == 0: log.info('db', "Guaranteed new class - No class in range... (L2^2 < 1.3)") return True return False
def save(self): filename = "{}/userdb_{}.pkl".format(DBDir, self.version_name) log.info('db', "Saving database to '{}'".format(filename)) with open(filename, 'wb') as f: pickle.dump(( self.version_name, self.id_increment, self.__user_list, self.__class_samples, self.__profile_pictures ), f) f.close()
def predict_closed_set(self, target_classes, samples): # choose nearest class mean_dist_l2, clean_ids = self.data_controller.class_mean_distances(samples, target_classes) if len(clean_ids) == 0: return None mean_dist_l2 = list(mean_dist_l2) clean_ids = list(clean_ids) log.info('cl', "Closed set distance scores (L2 squared): IDs {} | Class dist. : {}".format(clean_ids, mean_dist_l2)) min_index = mean_dist_l2.index(min(mean_dist_l2)) return clean_ids[min_index]
def __reduce_after(self, metric='cosine', reverse=True): if len(self.data) < self.__max_size: return # delete X samples which are most distant dist = pairwise_distances(self.data_mean, self.data, metric=metric) dist = dist[0] to_remove = len(self.data) - self.__max_size indices = np.arange(0, len(self.data)) dist_sorted, indices_sorted = zip( *sorted(zip(dist, indices), reverse=reverse)) # delete X samples which are most distant indices_to_delete = indices_sorted[0:to_remove] log.info('cl', "Removing {} points".format(len(indices_to_delete))) # delete self.data = np.delete(self.data, indices_to_delete, axis=0)
def __init__(self, server, conn, handle): log.info('server', "Image alignment") # receive image img = server.receive_rgb_image_squared(conn) # align image # innerEyesAndBottomLip, outerEyesAndNose aligned = server.embedding_gen.align_face(img, 'outerEyesAndNose', 96) if aligned is None: r.Error(server, conn, "Could not align the image") return # send aligned image back r.QuadraticImage(server, conn, aligned)
def __init__(self, classifier_type): # define valid classifiers self.define_classifiers() if classifier_type not in self.VALID_CLASSIFIERS: raise ValueError( 'Invalid Classifier "{}". You can choose between: {}'.format( classifier_type, str(list(self.VALID_CLASSIFIERS)))) self.CLASSIFIER = classifier_type # perform classifier training in tasks # self.__start_classifier_trainers() log.info('cl', "{} Classifier Tree initialized".format(self.CLASSIFIER))
def init_new_class(self, class_id, class_samples): """ Initialise a One-Class-Classifier with sample data :param class_id: new class id :param class_samples: samples belonging to the class :return: True/False - success """ log.info('cl', "Initializing new Classifier for user ID {}".format(class_id)) if class_id in self.classifiers: log.severe("Illegal reinitialization of classifier") return False # init new data model self.data_controller.add_samples(user_id=class_id, new_samples=class_samples) cluster_ref = self.data_controller.get_class_cluster(class_id) # init new classifier if self.CLASSIFIER == 'SetSimilarityHardThreshold': # link to data controller: similarity matching - model = data self.classifiers[class_id] = SetSimilarityHardThreshold( metric='ABOD', threshold=0.3, cluster= cluster_ref # TODO: data model is connected - might also be separate? ) elif self.CLASSIFIER == 'non-incremental': # link to data controller: non-incremental learner pass elif self.CLASSIFIER == 'incremental': # regular model. No need to pass data reference pass self.nr_classes += 1 self.classifier_states[class_id] = 0 # add samples to update stack with self.trainig_data_lock: self.classifier_update_stacks[class_id] = class_samples # directly train classifier return self.train_classifier(class_id)
def __init__(self, server, conn, handle): # receive images images = server.receive_image_batch_squared_same_size( conn, switch_rgb_bgr=True) # generate embedding embeddings = server.embedding_gen.get_embeddings(rgb_images=images, align=True) if not embeddings.any(): r.Error(server, conn, "Could not generate face embeddings.") return # unified weights sample_poses = None # open set user id prediction is_consistent, user_id, confidence = server.classifier.predict_class( embeddings, sample_poses) if is_consistent: # get user nice name user_name = server.user_db.get_name_from_id(user_id) if user_name is None: user_name = "unnamed" # get profile picture profile_picture = server.user_db.get_profile_picture(user_id) log.info( 'server', "User identification complete: {} [ID], {} [Username]".format( user_id, user_name)) r.Identification(server, conn, int(user_id), user_name, confidence=confidence, profile_picture=profile_picture) else: r.Error(server, conn, "Result is inconsistent.")
def __init__(self): start = time.time() log.info('cnn', "EmbeddingGenLightCNN: loading models...") # load face aligner #self.aligner = FaceAligner # load neural net caffe.set_mode_cpu() self.neural_net = caffe.Classifier( self.networkDef, self.networkModel, # "Data set image mean of [Channels x Height x Width] dimensions " # "(numpy array). Set to '' for no mean subtraction." image_dims=[self.size, self.size], # input size of images mean=None, input_scale=1.0, raw_scale=1.0, # raw input is multiplied by this scale (caffe needs grayscale range [0-255]) channel_swap=None # input format: BGR (BGR is caffe and OpenCV default) ) log.info('cnn', "EmbeddingGenLightCNN: model loading took {} seconds".format("%.3f" % (time.time() - start)))
def __init__(self, server, conn, handle): log.info('server', "Image alignment") # receive image img = server.receive_rgb_image_squared(conn, switch_rgb_bgr=True) # align image # innerEyesAndBottomLip, outerEyesAndNose aligned = server.embedding_gen.align_face(img, 'outerEyesAndNose', 96) if aligned is None: r.Error(server, conn, "Could not align the image") return # convert back to bgr aligned = cv2.cvtColor(aligned, cv2.COLOR_RGB2BGR) # send aligned image back r.QuadraticImage(server, conn, aligned)