def receive_message(self, the_socket, datasize, timeout=2): """Basic message receiver for known datasize""" buffer = '' begin = time.time() refresh_rate = 0.01 try: while len(buffer) < datasize: # if you got some data, then break after timeout if buffer and time.time() - begin > timeout: raise ValueError( 'receive_message timeout. Only partial data received.') # if you got no data at all, wait a little longer, twice the timeout elif time.time() - begin > timeout * 2: raise ValueError( 'receive_message timeout. No data received.') packet = the_socket.recv(datasize - len(buffer)) if packet: # append to buffer buffer += packet # print 'Total ' + str(sys.getsizeof(buffer)) + ' bytes' begin = time.time() if not packet: # wait time.sleep(refresh_rate) except socket.error, (errorCode, message): # error 10035 is no data available, it is non-fatal if errorCode != 10035: log.severe('socket.error - (' + str(errorCode) + ') ' + message)
def predict(self, samples, samples_poses): # get similarity scores cluster_type = self.data_cluster.__class__.__name__ if cluster_type == 'MeanShiftPoseCluster': similarity_scores, matching_confidence = self.decision_function( samples, samples_poses, nr_compaired_samples=self.nr_compaired_samples ) else: log.severe("Prediction for cluster type '{}' is not implemented yet!".format(cluster_type)) raise NotImplementedError("Implement threshold prediction for specific cluster type.") print "==== {}: ".format(self.metric), ["%0.3f" % i for i in similarity_scores] l2_dist = self.data_cluster.class_mean_dist(samples, metric='euclidean') print "==== L2: ", ["%0.3f" % i for i in l2_dist] print "==== Matching conf: ", ["%0.1f" % i for i in matching_confidence] if self.metric == 'ABOD': positive = similarity_scores > self.__thresh # only apply on 50% max of samples if self.recheck_L2_distance and np.count_nonzero(positive) >= int(len(positive)/2.): m1 = similarity_scores >= 0.16 m2 = l2_dist < 0.6 print ".... Rechecking L2 distance, detections: ", m1 & m2 positive[m1 & m2] = True else: positive = similarity_scores < self.__thresh return np.array([1 if v else -1 for v in positive]), np.array(matching_confidence)
def prediction_proba_old(self, user_id): total_proba = 1 # is new user if user_id == -1: for uid in range(1, self.nr_classes + 1): dec_fn = self.decision_function(uid) if dec_fn < 0: total_proba *= abs(dec_fn / float(self.__decision_nr_samples)) else: total_proba *= 1 - dec_fn / float( self.__decision_nr_samples) return total_proba # is regular user for uid in range(1, self.nr_classes + 1): if uid == user_id: # target classifier total_proba *= self.decision_function(uid) / float( self.__decision_nr_samples) else: dec_fn = self.decision_function(uid) if dec_fn < 0: total_proba *= abs(dec_fn / float(self.__decision_nr_samples)) else: total_proba *= 1 - dec_fn / float( self.__decision_nr_samples) log.severe("Duplicate detection.") raise ValueError # loop through other classifiers return total_proba
def predict(self, samples): """ One Class prediction :param samples: :return: np.array of labels. 1: is-class, -1 is-not class, 0 sample is uncertain """ print "--- Start prediction of samples: {}".format(len(samples)) if len(self.data) == 0: log.severe( "ABOD Cluster is not initialized! Please use the 'fit' method first." ) # project onto subspace if self.basis is not None: samples = ProjectOntoSubspace(samples, self.mean, self.basis) variance = self.__predict(samples) self.prediction = np.array([ -1 if v < (self.threshold - self.uncertainty_bandwidth / 2) else 1 if v > (self.threshold + self.uncertainty_bandwidth / 2) else 0 for v in variance ]) return self.prediction
def __init__(self, server, conn, handle): nr_users = server.receive_uint(conn) target_users = [] for x in range(0, nr_users): # get target class ids (uint) user_id = server.receive_uint(conn) target_users.append(user_id) # receive images images = server.receive_image_batch_squared_same_size(conn) log.severe("ImageIdentificationPrealignedCS, possible IDs: ", target_users) # generate embedding embeddings = server.embedding_gen.get_embeddings(rgb_images=images, align=False) if not embeddings.any(): r.Error(server, conn, "Could not generate face embeddings.") return if -1 in target_users: # open set user id prediction # current_weights = np.repeat(1, len(embeddings)) is_consistent, user_id, confidence = server.classifier.predict_class( embeddings, sample_poses=None) else: # closed set user id prediction user_id = server.classifier.predict_closed_set( target_users, embeddings) if user_id is None: r.Error( server, conn, "Label could not be predicted - Samples are contradictory." ) return # get user nice name user_name = server.user_db.get_name_from_id(user_id) if user_name is None: user_name = "unnamed" # get profile picture profile_picture = server.user_db.get_profile_picture(user_id) log.info( 'server', "User identification complete: {} [ID], {} [Username]".format( user_id, user_name)) r.Identification(server, conn, int(user_id), user_name, profile_picture=profile_picture)
def __init__(self, server, conn, handle): # receive user id user_id = server.receive_uint(conn) log.info('server', 'User Update (Aligned, Robust) for ID {}'.format(user_id)) # receive images images = server.receive_image_batch_squared_same_size(conn) # get sample poses sample_poses = [] for x in range(0, len(images)): pitch = server.receive_char(conn) yaw = server.receive_char(conn) sample_poses.append([pitch, yaw]) sample_poses = np.array(sample_poses) # TODO: calculate weights weights = np.repeat(10, len(images)) # generate embedding embeddings = server.embedding_gen.get_embeddings(images, align=False) if not embeddings.any(): r.Error(server, conn, "Could not generate face embeddings.") return # accumulate samples - check for inconsistencies verified_data, reset_user, id_pred, confidence = server.classifier.update_controller.accumulate_samples(user_id, embeddings, weights) log.info('cl', "verified_data (len: {}), reset_user: {}: ID {}, conf {}".format(len(verified_data), reset_user, id_pred, confidence)) # forward save part of data if verified_data.size: # for s in embeddings: # print "new: {:.8f}".format(s[0]) # print "------------------" # for s in verified_data: # print "s: {:.5f}".format(s[0]) # add to data model server.classifier.data_controller.add_samples(user_id=user_id, new_samples=verified_data) # add to classifier training queue server.classifier.add_training_data(user_id, verified_data) # reset user if queue has become inconsistent or wrong user is predicted if reset_user: log.severe("USER VERIFICATION FAILED - FORCE REIDENTIFICATION") r.Reidentification(server, conn) return # return prediction feedback user_name = server.user_db.get_name_from_id(id_pred) if user_name is None: user_name = "unnamed" r.PredictionFeedback(server, conn, id_pred, user_name, confidence=int(confidence*100.0))
def process_labeled_stream_data(self, class_id, samples, check_update=False): """ Incorporate labeled data into the classifiers. Classifier for {class_id} must be initialized already (retraining is done once the samples can't be explained by the model anymore) :param class_id: class id :param samples: class samples :param check_update: Evaluate update on the current model before using it (robust to sample pollution) :return: - """ log.info( 'cl', "Processing labeled stream data for user ID {}".format(class_id)) class_id = int(class_id) if class_id not in self.classifiers: log.severe( "Class {} has not been initialized yet!".format(class_id)) return False, 1 # force reidentification confidence = 1 if check_update: prediction = self.predict(samples) # samples are not certain enough if prediction == None: return None, 1 # calculate confidence confidence = self.prediction_proba(class_id) # detected different class if prediction != class_id: log.severe( "Updating invalid class! Tracker must have switched!") return False, confidence # force reidentification with self.training_lock: # add update data to stack if class_id not in self.classifier_update_stacks or len( self.classifier_update_stacks[class_id]) == 0: # create new list self.classifier_update_stacks[class_id] = samples else: # append self.classifier_update_stacks[class_id] = np.concatenate( (self.classifier_update_stacks[class_id], samples)) # request classifier update # Todo: only request update if available update data exceeds threshold self.add_training_task(class_id) return True, confidence
def init_from_files(self, embedding_file="pose_matthias2.pkl", pose_file="pose_matthias2_poses.pkl"): log.info('db', "Initializing weight generator...") # initialize grid embeddings = load_data(embedding_file) poses = load_data(pose_file) if embeddings is None or poses is None: log.severe("Could not load file {} in dir uids/models/confience_weights/ for weight generator...".format(embeddings)) sys.exit(0) self.generate(embeddings, poses)
def __predict_ORIG(self, samples): proba, class_ids = self.predict_proba(samples) mask_0 = proba > 0 # no classes detected at all - novelty if len(proba[mask_0]) == 0: return -1 mask_class = proba > self.__class_thresh nr_classes = len(proba[mask_class]) if nr_classes > 0: # class detected if nr_classes > 1: # multiple classes detected - batch invalid if self.__verbose: log.severe( "Multiple classes detected: {}".format(nr_classes)) return None confusion_mask = (self.__confusion_thresh < proba) & (proba < self.__class_thresh) # count if any element, except for class is above confusion ratio if len(proba[confusion_mask]) > 0: log.warning( "Class confusion - force re-identification: {}% confusion, {}% identification, {} samples" .format( proba[(self.__confusion_thresh < proba) & (proba < self.__class_thresh)], proba[mask_class], len(samples))) # calc pairwise distance. If small then force re-identification # for sample in proba[confusion_mask]: # Todo: implement properly # return None class_id_arr = class_ids[mask_class] return int(class_id_arr[0]) else: if len(proba[proba > self.__novelty_thresh]) > 0: print "--- no classes detected but novelty threshold exceeded: {}".format( proba) return None return -1
def decision_function(self, samples): """ Distance of the samples X to the target class distribution :param samples: :return: """ cluster_type = self.data_cluster.__class__.__name__ if cluster_type != 'MeanShiftCluster': log.severe( "Prediction for cluster type '{}' is not implemented yet! Add custom decision_function() first." .format(cluster_type)) raise NotImplementedError( "Implement threshold prediction for specific cluster type.") # calc hashes hashed = [self.get_hash(s) for s in samples] # check intersections and use buffered results if self.decision_fn_buffer: # ind_samples = dict((k, i) for i, k in enumerate(hashed)) intersec_hashes = list( set(self.decision_fn_buffer.keys()) & set(hashed)) similarity_scores = [] for i, h in enumerate(hashed): if h in intersec_hashes: similarity_scores.append(self.decision_fn_buffer[h]) else: score = self.data_cluster.sample_set_similarity_scores( np.array([samples[i]]), self.metric) similarity_scores.append(score) # add to buffer self.decision_fn_buffer[h] = score else: similarity_scores = self.data_cluster.sample_set_similarity_scores( samples, self.metric) # add to buffer for i, h in enumerate(hashed): self.decision_fn_buffer[h] = similarity_scores[i] similarity_scores = np.array(similarity_scores).flatten() return similarity_scores
def clean_duplicates(s1, s2): assert len(s1) == len(s2) x = np.random.rand(s1.shape[1]) y = s1.dot(x) unique, index = np.unique(y, return_index=True) # print len(s1) # print len(s1[index]) # print len(np.vstack({tuple(row) for row in s1})) if len(s1[index]) != len(s1): log.severe("Duplicate items in embeddings s1! Removing duplicates...") s1 = s1[index] s2 = s2[index] # raise ValueError return s1, s2
def init_new_class(self, class_id, class_samples): """ Initialise a One-Class-Classifier with sample data :param class_id: new class id :param class_samples: samples belonging to the class :return: True/False - success """ log.info('cl', "Initializing new Classifier for user ID {}".format(class_id)) if class_id in self.classifiers: log.severe("Illegal reinitialization of classifier") return False # init new data model self.data_controller.add_samples(user_id=class_id, new_samples=class_samples) cluster_ref = self.data_controller.get_class_cluster(class_id) # init new classifier if self.CLASSIFIER == 'SetSimilarityHardThreshold': # link to data controller: similarity matching - model = data self.classifiers[class_id] = SetSimilarityHardThreshold( metric='ABOD', threshold=0.3, cluster= cluster_ref # TODO: data model is connected - might also be separate? ) elif self.CLASSIFIER == 'non-incremental': # link to data controller: non-incremental learner pass elif self.CLASSIFIER == 'incremental': # regular model. No need to pass data reference pass self.nr_classes += 1 self.classifier_states[class_id] = 0 # add samples to update stack with self.trainig_data_lock: self.classifier_update_stacks[class_id] = class_samples # directly train classifier return self.train_classifier(class_id)
def __classifier_trainer(self): """ Manually triggered classifier training :return: """ if self.__verbose is True: log.info('cl', "Starting classifier training thread") while self.STATUS == 1: try: training_id = self.__tasks.get(False) except Queue.Empty: sleep(0.25) # Time in seconds. else: if training_id not in self.classifiers: log.severe( "Cannot train class {} without creating the classifier first" .format(training_id)) else: self.train_classifier(training_id) self.__tasks.task_done()
def init_classifier(self, class_id, class_samples): """ Initialise a One-Class-Classifier with sample data :param class_id: new class id :param class_samples: samples belonging to the class :return: True/False - success """ log.info('cl', "Initializing new Classifier for user ID {}".format(class_id)) if class_id in self.classifiers: log.severe("Illegal reinitialization of classifier") return False self.classifiers[class_id] = self.generate_classifier() self.nr_classes += 1 self.classifier_states[class_id] = 0 # add samples to update stack self.classifier_update_stacks[class_id] = class_samples # directly train classifier return self.train_classifier(class_id)
def __init__(self, user_db_, classifier='IABOD'): EnsembleClassifierTypeA.__init__(self, user_db_, classifier) if classifier == 'ISVM': # load lfw embeddings log.info('clf', 'Loading unknown class samples for ISVM classifier...') fileDir = os.path.dirname(os.path.realpath(__file__)) modelDir = os.path.join( fileDir, '../..', 'models', 'embedding_samples') # path to the model directory filename = "{}/{}".format(modelDir, "embeddings_lfw.pkl") if os.path.isfile(filename): # print filename with open(filename, 'r') as f: embeddings = pickle.load(f) f.close() self.__unknown_class_data = embeddings else: log.severe( "Missing unknown class data... File {} not found in {}!". format(filename, modelDir))
def init_new_class(self, class_id, class_samples, sample_poses): """ Initialise a One-Class-Classifier with sample data :param class_id: new class id :param class_samples: samples belonging to the class :return: True/False - success """ log.info('cl', "Initializing new Classifier for user ID {}".format(class_id)) if class_id in self.classifiers: log.severe("Illegal reinitialization of classifier") return False # init new data model self.data_controller.add_samples(user_id=class_id, new_samples=class_samples, new_poses=sample_poses) cluster_ref = self.data_controller.get_class_cluster(class_id) # init new classifier if self.CLASSIFIER == 'SetSimilarityHardThreshold': # link to data controller: similarity matching - model = data self.classifiers[class_id] = SetSimilarityHardThreshold( metric='ABOD', threshold=0.3, nr_compaired_samples=40, # select 40 best samples for comparison cluster=cluster_ref, # linked data model recheck_l2=True ) else: raise NotImplementedError('This classifier is not implemented yet!') self.nr_classes += 1 self.classifier_states[class_id] = 0 # add samples to update stack with self.trainig_data_lock: self.classifier_update_stacks[class_id] = class_samples # directly train classifier return self.train_classifier(class_id)
def check_inter_sample_dist(samples, metric='euclidean'): # calc pairwise distance if metric == 'cosine': dist = pairwise_distances(samples, samples, metric='cosine') thresh = 0.7 elif metric == 'euclidean': # dist = pairwise_distances(samples, samples, metric='euclidean') # dist = np.square(dist) dist = BaseMetaController.calc_adjacent_dist(samples) thresh = 1.4 else: raise ValueError nr_errors = np.count_nonzero(dist > thresh) # print "nr errors: {}, max: {}".format(nr_errors, np.max(dist)) # allowed errors if nr_errors > 0: log.severe( "Inconsistent set! Inter-sample distances: {}".format(dist)) return False return True
def predict(self, samples): """ Prediction cases: - Only target class is identified with ratio X (high): Class - Target and other class is identified with ration X (high) and Y (small): Class with small confusion - Multiple classes are identified with small ratios Ys: Novelty - No classes identified: Novelty :param samples: :return: Class ID, -1 (Novelty), None invalid samples (multiple detections) """ # no classifiers yet, predict novelty if not self.classifiers: # 100% confidence self.__decision_function = np.array([len(samples)]), np.array([-1]) return -1 predictions, class_ids = self.__predict(samples) if len(predictions) == 0: # no class in reach - classify as novel class self.__decision_function = np.array([len(samples)]), np.array([-1]) return -1 # calc nr of positive class detections cls_scores = (predictions > 0).sum(axis=1) self.__decision_function = cls_scores, class_ids nr_samples = len(samples) self.__decision_nr_samples = nr_samples log.info( 'cl', "Classifier scores: {} | max: {}".format(cls_scores, nr_samples)) # no classes detected at all - novelty if len(cls_scores[cls_scores <= self.__novelty_thresh * nr_samples]) == len(cls_scores): return -1 identification_mask = cls_scores >= self.__class_thresh * nr_samples ids = class_ids[identification_mask] if len(ids) > 0: # multiple possible detection - invalid samples if len(ids) > 1: # use average to-class-distance to select best choice mean_dist_cosine = [] mean_dist_euclidean = [] # todo: mean dist or mean dist to cluster mean for class_id in ids: mean_dist_cosine.append( self.classifiers[class_id].mean_dist(samples)) mean_dist_euclidean.append( self.classifiers[class_id].mean_dist( samples, 'euclidean')) id_index_cosine = mean_dist_cosine.index(min(mean_dist_cosine)) id_index_euclidean = mean_dist_euclidean.index( min(mean_dist_euclidean)) log.severe("Samples are inambiguous. Classes: {}".format(ids)) log.severe("IDCOS: {} | meandist cosine: {}".format( int(ids[id_index_cosine]), mean_dist_cosine)) log.severe("IDEUC: {} | meandist euclidean: {}".format( int(ids[id_index_euclidean]), mean_dist_euclidean)) for class_id in ids: print self.classifiers[class_id].class_mean_dist( samples, 'cosine') mean_dist_cosine = np.array(mean_dist_cosine) if np.sum( (mean_dist_cosine - min(mean_dist_cosine)) < 0.05) > 1: log.severe( "SAMPLES DISCARGED: Average distance to data inambiguous" ) return None return int(ids[id_index_cosine]) # return None # single person identified - return id return int(ids[0]) else: # samples unclear return None
def accumulate_samples(self, user_id, new_samples, sample_weights=np.array([]), sample_poses=np.array([])): """ :param user_id: :param new_samples: :param sample_weights: :return: array : save samples (save to integrate in any way) bool : reset user int : prediction of last section float : confidence of last section prediction """ # check for set inconsistency samples_ok = BaseMetaController.check_inter_sample_dist( new_samples, metric='euclidean') if not samples_ok: # no return (queue is not filled up and thus we dont have a save section) log.severe("Update set is inconsistent - disposing...") # reset queue self.sample_queue.pop(user_id, None) self.sample_weight_queue.pop(user_id, None) self.sample_pose_queue.pop(user_id, None) return np.array([]), np.array([]), True, -1, 1. # generate placeholder weights if sample_weights.size == 0: # 5 of 10 sample_weights = np.repeat(5, len(new_samples)) assert len(sample_weights) == len(new_samples) # add samples if user_id not in self.sample_queue: # initialize self.sample_queue[user_id] = new_samples self.sample_weight_queue[user_id] = sample_weights self.sample_pose_queue[user_id] = sample_poses else: # append self.sample_queue[user_id] = np.concatenate((self.sample_queue[user_id], new_samples))\ if self.sample_queue[user_id].size \ else new_samples self.sample_weight_queue[user_id] = np.concatenate((self.sample_weight_queue[user_id], sample_weights))\ if self.sample_weight_queue[user_id].size \ else sample_weights self.sample_pose_queue[user_id] = np.concatenate((self.sample_pose_queue[user_id], sample_poses))\ if self.sample_pose_queue[user_id].size \ else sample_poses target_class = -1 confidence = 1. forward = np.array([]) forward_poses = np.array([]) reset_user = False # do meta recognition # check set for inconsistencies - return only save section while len(self.sample_queue[user_id]) >= self.__queue_max_length: sample_batch = self.sample_queue[user_id][0:self. __queue_max_length] weight_batch = self.sample_weight_queue[user_id][ 0:self.__queue_max_length] pose_batch = self.sample_pose_queue[user_id][0:self. __queue_max_length] # check set consistency samples_ok = BaseMetaController.check_inter_sample_dist( sample_batch, metric='euclidean') # predict class is_consistent, target_class, confidence = self.__p_multicl.predict_class( sample_batch, sample_poses=pose_batch) if samples_ok and is_consistent: # add samples to forward forward = np.concatenate((forward, self.sample_queue[user_id][0:self.__inclusion_range])) \ if forward.size \ else self.sample_queue[user_id][0:self.__inclusion_range] forward_poses = np.concatenate((forward_poses, self.sample_pose_queue[user_id][0:self.__inclusion_range])) \ if forward_poses.size \ else self.sample_pose_queue[user_id][0:self.__inclusion_range] # remove first x samples self.sample_queue[user_id] = self.sample_queue[user_id][ self.__inclusion_range:] self.sample_weight_queue[user_id] = self.sample_weight_queue[ user_id][self.__inclusion_range:] self.sample_pose_queue[user_id] = self.sample_pose_queue[ user_id][self.__inclusion_range:] else: # dispose all samples! Whole queue! self.sample_queue.pop(user_id, None) self.sample_weight_queue.pop(user_id, None) self.sample_pose_queue.pop(user_id, None) log.severe("Set is inconsistent - disposing...") reset_user = True break # predict user if not enough samples if not forward.size and reset_user is False: is_consistent, target_class, confidence = self.__p_multicl.predict_class( self.sample_queue[user_id], sample_poses=self.sample_pose_queue[user_id]) print "Not enough to forward but predict...", is_consistent, target_class, confidence return forward, forward_poses, reset_user, target_class, confidence
def accumulate_samples(self, tracking_id, new_samples, sample_weights=np.array([]), sample_poses=np.array([])): # check for set inconsistency samples_ok = BaseMetaController.check_inter_sample_dist( new_samples, metric='euclidean') if not samples_ok: log.severe("Identification set is inconsistent - disposing...") # reset queue self.sample_queue.pop(tracking_id, None) self.sample_weight_queue.pop(tracking_id, None) self.sample_pose_queue.pop(tracking_id, None) return False, np.array([]), np.array([]), np.array([]) # generate placeholder weights if sample_weights.size == 0: # 5 of 10 sample_weights = np.repeat(5, len(new_samples)) assert len(sample_weights) == len(new_samples) # add samples if tracking_id not in self.sample_queue: # initialize self.sample_queue[tracking_id] = new_samples self.sample_weight_queue[tracking_id] = sample_weights self.sample_pose_queue[tracking_id] = sample_poses else: # append self.sample_queue[tracking_id] = np.concatenate((self.sample_queue[tracking_id], new_samples))\ if self.sample_queue[tracking_id].size \ else new_samples self.sample_weight_queue[tracking_id] = np.concatenate((self.sample_weight_queue[tracking_id], sample_weights))\ if self.sample_weight_queue[tracking_id].size \ else sample_weights self.sample_pose_queue[tracking_id] = np.concatenate((self.sample_pose_queue[tracking_id], sample_poses))\ if self.sample_pose_queue[tracking_id].size \ else sample_poses is_save_set = False # if set has save sample or is long enough if len(self.sample_queue[tracking_id]) >= self.min_sample_length: if len(self.sample_queue[tracking_id]) >= self.save_sample_length\ or np.count_nonzero(self.sample_weight_queue[tracking_id] >= self.save_weight_thresh): # check set consistency samples_ok = BaseMetaController.check_inter_sample_dist( self.sample_queue[tracking_id], metric='euclidean') if samples_ok: # set is save - allow identification is_save_set = True else: # dispose all samples self.sample_queue.pop(tracking_id, None) self.sample_weight_queue.pop(tracking_id, None) self.sample_pose_queue.pop(tracking_id, None) log.severe("Set is inconsistent - disposing...") # TODO: return whole set or only last? current_samples = self.sample_queue.get(tracking_id, np.array([])) current_weights = self.sample_weight_queue.get(tracking_id, np.array([])) current_poses = self.sample_pose_queue.get(tracking_id, np.array([])) # not enough save samples - return what we have so far return is_save_set, current_samples, current_weights, current_poses
def get_weighted_score(self, test_samples, test_poses, ref_samples, ref_poses): assert test_samples.ndim == 2 assert ref_samples.ndim == 2 dist_lookup = pairwise_distances(test_samples, ref_samples, metric='euclidean') # print np.shape(dist_lookup[0]) factors = [] sample_weights = [] # if only one sample: cannot calculate abof if len(ref_samples) < 3: log.severe( 'Cannot calculate ABOF with {} reference samples (variance calculation needs at least 3 reference points)' .format(len(ref_samples))) raise Exception for i_sample, A in enumerate(test_samples): factor_list = [] weight_list = [] for i in range(len(ref_samples)): # select first point in reference set B = ref_samples[i] # distance AB = dist_lookup[i_sample][i] for j in range(i + 1): if j == i: # ensure B != C continue # select second point in reference set C = ref_samples[j] # distance AC = dist_lookup[i_sample][j] if np.array_equal(B, C): sys.exit( "Points are equal: B == C! Reference Set contains two times the same samples" ) factor_list.append(1000) print "Bi/Cj: {}/{}".format(i, j) # sys.exit('ERROR\tangleBAC\tmath domain ERROR, |cos<AB, AC>| <= 1') continue angle_BAC = ABOD.angleBAC(A, B, C, AB, AC) w1 = self.weight_gen.get_pose_weight( test_poses[i_sample], ref_poses[i]) w2 = self.weight_gen.get_pose_weight( test_poses[i_sample], ref_poses[j]) weight_list.append(2. / float(w1 + w2)) # 1/(a+b)/2 # compute each element of variance list try: # apply weighting if self.variant == 1: tmp = angle_BAC / float( math.pow(AB * AC, 2) * (w1 * w2)) elif self.variant == 2: tmp = angle_BAC / float( math.pow(AB * AC, 2) * (w1 + w2)) else: tmp = angle_BAC / float(math.pow(AB * AC, 2)) except ZeroDivisionError: log.severe( "ERROR\tABOF\tfloat division by zero! Trying to predict training point?'" ) tmp = 500 # sys.exit('ERROR\tABOF\tfloat division by zero! Trying to predict training point?') factor_list.append(tmp) # calculate weighted variance if self.variant == 3: weighted_average = np.average(factor_list, weights=np.array(weight_list)) var = np.average((factor_list - weighted_average)**2) elif self.variant == 4: var = WeightedABOD.biased_weighted_var(np.array(factor_list), np.array(weight_list), weighted_average=False) elif self.variant == 5: var = WeightedABOD.biased_weighted_var(np.array(factor_list), np.array(weight_list)) else: var = np.var(np.array(factor_list)) factors.append(var) # weight_list = np.repeat(1, len(factors)) sample_weights.append(np.average(weight_list)) return np.array(factors), np.array(sample_weights)
def get_score(test_samples, reference_set): assert test_samples.ndim == 2 assert reference_set.ndim == 2 dist_lookup = pairwise_distances(test_samples, reference_set, metric='euclidean') # print np.shape(dist_lookup[0]) factors = [] # if only one sample: cannot calculate abof if len(reference_set) < 3: log.severe( 'Cannot calculate ABOF with {} reference samples (variance calculation needs at least 3 reference points)' .format(len(reference_set))) raise Exception for i_sample, A in enumerate(test_samples): factor_list = [] for i in range(len(reference_set)): # select first point in reference set B = reference_set[i] # distance AB = dist_lookup[i_sample][i] for j in range(i + 1): if j == i: # ensure B != C continue # select second point in reference set C = reference_set[j] # distance AC = dist_lookup[i_sample][j] if np.array_equal(B, C): print "Bi/Cj: {}/{}".format(i, j) log.error( "Points are equal: B == C! Assuming classification of training point" ) sys.exit( "Points are equal: B == C! Reference Set contains two times the same samples" ) factor_list.append(1000) # sys.exit('ERROR\tangleBAC\tmath domain ERROR, |cos<AB, AC>| <= 1') continue # angle_BAC = ABOD.angleBAC(A, B, C, AB, AC) # angle_BAC = ABOD.angleFast(A-B, A-C) vector_AB = B - A vector_AC = C - A # compute each element of variance list try: cos_similarity = np.dot(vector_AB, vector_AC) / (AB * AC) # apply weighting tmp = cos_similarity / float(math.pow(AB * AC, 2)) except ZeroDivisionError: log.severe( "ERROR\tABOF\tfloat division by zero! Trying to predict training point?'" ) tmp = 500 # sys.exit('ERROR\tABOF\tfloat division by zero! Trying to predict training point?') factor_list.append(tmp) factors.append(np.var(factor_list)) return np.array(factors)
def __abof_multi(self, samples, knn=None, cosine_weighting=False): """ calculate the ABOF of A = (x1, x2, ..., xn) pt_list = self.data (cluster) """ # Todo: fix cosine dist weighting pt_list = self.data if knn is not None and knn < len(self.data): pt_list = random.sample(pt_list, knn) dist_lookup = pairwise_distances(samples, pt_list, metric='euclidean') if cosine_weighting: cos_dist_lookup = pairwise_distances(samples, pt_list, metric='cosine') # print np.shape(dist_lookup[0]) factors = [] # if only one sample: cannot calculate abof if len(pt_list) < 2: log.severe( 'Cannot calculate ABOF with {} reference samples'.format( len(pt_list))) fake_abod = 0 if dist_lookup[0][0] < 0.3: fake_abod = 3 else: fake_abod = 0.1 factors.append(fake_abod) return factors for i_sample, A in enumerate(samples): varList = [] for i in range(len(pt_list)): B = pt_list[i] AB = dist_lookup[i_sample][i] j = 0 for j in range(i + 1): if j == i: # ensure B != C continue C = pt_list[j] AC = dist_lookup[i_sample][j] if np.array_equal(B, C): log.error( "Points are equal: B == C! Assuming classification of training point (ABOD 1000)" ) varList.append(1000) print "Bi/Cj: {}/{}".format(i, j) # sys.exit('ERROR\tangleBAC\tmath domain ERROR, |cos<AB, AC>| <= 1') continue angle_BAC = self.__angleBAC(A, B, C, AB, AC) # compute each element of variance list try: # apply weighting if cosine_weighting: tmp = angle_BAC / float( math.pow( (2.0 - cos_dist_lookup[i_sample][i]) * (2.0 - cos_dist_lookup[i_sample][j]), 2)) else: tmp = angle_BAC / float(math.pow(AB * AC, 2)) except ZeroDivisionError: log.severe( "ERROR\tABOF\tfloat division by zero! Trying to predict training point?'" ) tmp = 500 # sys.exit('ERROR\tABOF\tfloat division by zero! Trying to predict training point?') varList.append(tmp) factors.append(np.var(varList)) return factors
def train_classifier(self, class_id): """ Retrain One-Class Classifiers (partial_fit) """ log.info('cl', "(Re-)training Classifier for user ID {}".format(class_id)) if class_id not in self.classifiers: log.severe( "Cannot train class {} without creating the classifier first". format(class_id)) return False start = time.time() with self.training_lock: # get update samples from stack # if samples available: do update with all available update samples # update_samples = self.classifier_update_stacks.get(class_id, []) or [] if class_id in self.classifier_update_stacks: update_samples = self.classifier_update_stacks[class_id] else: update_samples = [] if len(update_samples) > 0: training_before = self.classifier_states[class_id] if self.CLASSIFIER == 'ABOD': """ OFFLINE Classifier: retrain with all available data - Samples: Stored in user db, reloaded upon every fit """ # instead of partial fit: add samples and do refitting over complete data self.p_user_db.add_samples(class_id, update_samples) samples = self.p_user_db.get_class_samples(class_id) # stop if len(samples) > 100: log.warning("Sample size exceeding 100. No refitting.") else: # always use fit method (no partial fit available) self.classifiers[class_id].fit(samples) self.classifier_states[class_id] += 1 elif self.CLASSIFIER == 'IABOD': """ INCREMENTAL Methods: Use partial fit with stored update data - Samples: Partially stored in ABOD Cluster """ # partial update: partial_fit self.classifiers[class_id].partial_fit(update_samples) self.classifier_states[class_id] += 1 elif self.CLASSIFIER == 'ISVM': """ INCREMENTAL Methods: Use partial fit with stored update data - Samples: Partially stored in Cluster """ self.classifiers[class_id].partial_fit(update_samples) self.classifier_states[class_id] += 1 # empty update list if training was performed if self.classifier_states[class_id] - training_before == 1: self.classifier_update_stacks[class_id] = [] else: log.warning("No training/update samples available") if self.__verbose: log.info('cl', "fitting took {} seconds".format(time.time() - start)) return True