예제 #1
0
    def receive_message(self, the_socket, datasize, timeout=2):
        """Basic message receiver for known datasize"""
        buffer = ''
        begin = time.time()
        refresh_rate = 0.01

        try:
            while len(buffer) < datasize:
                # if you got some data, then break after timeout
                if buffer and time.time() - begin > timeout:
                    raise ValueError(
                        'receive_message timeout. Only partial data received.')
                # if you got no data at all, wait a little longer, twice the timeout
                elif time.time() - begin > timeout * 2:
                    raise ValueError(
                        'receive_message timeout. No data received.')

                packet = the_socket.recv(datasize - len(buffer))

                if packet:
                    # append to buffer
                    buffer += packet
                    # print 'Total ' + str(sys.getsizeof(buffer)) + ' bytes'
                    begin = time.time()
                if not packet:
                    # wait
                    time.sleep(refresh_rate)

        except socket.error, (errorCode, message):
            # error 10035 is no data available, it is non-fatal
            if errorCode != 10035:
                log.severe('socket.error - (' + str(errorCode) + ') ' +
                           message)
예제 #2
0
    def predict(self, samples, samples_poses):

        # get similarity scores
        cluster_type = self.data_cluster.__class__.__name__
        if cluster_type == 'MeanShiftPoseCluster':
            similarity_scores, matching_confidence = self.decision_function(
                samples, samples_poses, nr_compaired_samples=self.nr_compaired_samples
            )
        else:
            log.severe("Prediction for cluster type '{}' is not implemented yet!".format(cluster_type))
            raise NotImplementedError("Implement threshold prediction for specific cluster type.")

        print "==== {}: ".format(self.metric), ["%0.3f" % i for i in similarity_scores]
        l2_dist = self.data_cluster.class_mean_dist(samples, metric='euclidean')
        print "==== L2: ", ["%0.3f" % i for i in l2_dist]
        print "==== Matching conf: ", ["%0.1f" % i for i in matching_confidence]

        if self.metric == 'ABOD':
            positive = similarity_scores > self.__thresh

            # only apply on 50% max of samples
            if self.recheck_L2_distance and np.count_nonzero(positive) >= int(len(positive)/2.):
                m1 = similarity_scores >= 0.16
                m2 = l2_dist < 0.6
                print ".... Rechecking L2 distance, detections: ", m1 & m2
                positive[m1 & m2] = True
        else:
            positive = similarity_scores < self.__thresh
        return np.array([1 if v else -1 for v in positive]), np.array(matching_confidence)
예제 #3
0
    def prediction_proba_old(self, user_id):
        total_proba = 1
        # is new user
        if user_id == -1:
            for uid in range(1, self.nr_classes + 1):
                dec_fn = self.decision_function(uid)
                if dec_fn < 0:
                    total_proba *= abs(dec_fn /
                                       float(self.__decision_nr_samples))
                else:
                    total_proba *= 1 - dec_fn / float(
                        self.__decision_nr_samples)
            return total_proba

        # is regular user
        for uid in range(1, self.nr_classes + 1):
            if uid == user_id:
                # target classifier
                total_proba *= self.decision_function(uid) / float(
                    self.__decision_nr_samples)
            else:
                dec_fn = self.decision_function(uid)

                if dec_fn < 0:
                    total_proba *= abs(dec_fn /
                                       float(self.__decision_nr_samples))
                else:
                    total_proba *= 1 - dec_fn / float(
                        self.__decision_nr_samples)
                    log.severe("Duplicate detection.")
                    raise ValueError

        # loop through other classifiers
        return total_proba
예제 #4
0
    def predict(self, samples):
        """
        One Class prediction
        :param samples:
        :return: np.array of labels. 1: is-class, -1 is-not class, 0 sample is uncertain
        """

        print "--- Start prediction of samples: {}".format(len(samples))

        if len(self.data) == 0:
            log.severe(
                "ABOD Cluster is not initialized! Please use the 'fit' method first."
            )

        # project onto subspace
        if self.basis is not None:
            samples = ProjectOntoSubspace(samples, self.mean, self.basis)

        variance = self.__predict(samples)
        self.prediction = np.array([
            -1 if v <
            (self.threshold - self.uncertainty_bandwidth / 2) else 1 if v >
            (self.threshold + self.uncertainty_bandwidth / 2) else 0
            for v in variance
        ])

        return self.prediction
예제 #5
0
    def __init__(self, server, conn, handle):

        nr_users = server.receive_uint(conn)
        target_users = []
        for x in range(0, nr_users):
            # get target class ids (uint)
            user_id = server.receive_uint(conn)
            target_users.append(user_id)

        # receive images
        images = server.receive_image_batch_squared_same_size(conn)

        log.severe("ImageIdentificationPrealignedCS, possible IDs: ",
                   target_users)

        # generate embedding
        embeddings = server.embedding_gen.get_embeddings(rgb_images=images,
                                                         align=False)

        if not embeddings.any():
            r.Error(server, conn, "Could not generate face embeddings.")
            return

        if -1 in target_users:
            # open set user id prediction
            # current_weights = np.repeat(1, len(embeddings))
            is_consistent, user_id, confidence = server.classifier.predict_class(
                embeddings, sample_poses=None)
        else:
            # closed set user id prediction
            user_id = server.classifier.predict_closed_set(
                target_users, embeddings)

            if user_id is None:
                r.Error(
                    server, conn,
                    "Label could not be predicted - Samples are contradictory."
                )
                return

        # get user nice name
        user_name = server.user_db.get_name_from_id(user_id)

        if user_name is None:
            user_name = "unnamed"

        # get profile picture
        profile_picture = server.user_db.get_profile_picture(user_id)
        log.info(
            'server',
            "User identification complete: {} [ID], {} [Username]".format(
                user_id, user_name))
        r.Identification(server,
                         conn,
                         int(user_id),
                         user_name,
                         profile_picture=profile_picture)
예제 #6
0
    def __init__(self, server, conn, handle):
        # receive user id
        user_id = server.receive_uint(conn)

        log.info('server', 'User Update (Aligned, Robust) for ID {}'.format(user_id))

        # receive images
        images = server.receive_image_batch_squared_same_size(conn)

        # get sample poses
        sample_poses = []
        for x in range(0, len(images)):
            pitch = server.receive_char(conn)
            yaw = server.receive_char(conn)
            sample_poses.append([pitch, yaw])
        sample_poses = np.array(sample_poses)

        # TODO: calculate weights
        weights = np.repeat(10, len(images))

        # generate embedding
        embeddings = server.embedding_gen.get_embeddings(images, align=False)

        if not embeddings.any():
            r.Error(server, conn, "Could not generate face embeddings.")
            return

        # accumulate samples - check for inconsistencies
        verified_data, reset_user, id_pred, confidence = server.classifier.update_controller.accumulate_samples(user_id, embeddings, weights)

        log.info('cl', "verified_data (len: {}), reset_user: {}: ID {}, conf {}".format(len(verified_data), reset_user, id_pred, confidence))

        # forward save part of data
        if verified_data.size:
            # for s in embeddings:
            #     print "new: {:.8f}".format(s[0])
            # print "------------------"
            # for s in verified_data:
            #     print "s: {:.5f}".format(s[0])

            # add to data model
            server.classifier.data_controller.add_samples(user_id=user_id, new_samples=verified_data)
            # add to classifier training queue
            server.classifier.add_training_data(user_id, verified_data)

        # reset user if queue has become inconsistent or wrong user is predicted
        if reset_user:
            log.severe("USER VERIFICATION FAILED - FORCE REIDENTIFICATION")
            r.Reidentification(server, conn)
            return

        # return prediction feedback
        user_name = server.user_db.get_name_from_id(id_pred)
        if user_name is None:
            user_name = "unnamed"
        r.PredictionFeedback(server, conn, id_pred, user_name, confidence=int(confidence*100.0))
    def process_labeled_stream_data(self,
                                    class_id,
                                    samples,
                                    check_update=False):
        """
        Incorporate labeled data into the classifiers. Classifier for {class_id} must be initialized already
        (retraining is done once the samples can't be explained by the model anymore)
        :param class_id: class id
        :param samples: class samples
        :param check_update: Evaluate update on the current model before using it (robust to sample pollution)
        :return: -
        """

        log.info(
            'cl',
            "Processing labeled stream data for user ID {}".format(class_id))
        class_id = int(class_id)

        if class_id not in self.classifiers:
            log.severe(
                "Class {} has not been initialized yet!".format(class_id))
            return False, 1  # force reidentification

        confidence = 1

        if check_update:
            prediction = self.predict(samples)
            # samples are not certain enough
            if prediction == None:
                return None, 1
            # calculate confidence
            confidence = self.prediction_proba(class_id)
            # detected different class
            if prediction != class_id:
                log.severe(
                    "Updating invalid class! Tracker must have switched!")
                return False, confidence  # force reidentification

        with self.training_lock:
            # add update data to stack
            if class_id not in self.classifier_update_stacks or len(
                    self.classifier_update_stacks[class_id]) == 0:
                # create new list
                self.classifier_update_stacks[class_id] = samples
            else:
                # append
                self.classifier_update_stacks[class_id] = np.concatenate(
                    (self.classifier_update_stacks[class_id], samples))

            # request classifier update
            # Todo: only request update if available update data exceeds threshold
            self.add_training_task(class_id)

        return True, confidence
예제 #8
0
    def init_from_files(self, embedding_file="pose_matthias2.pkl", pose_file="pose_matthias2_poses.pkl"):

        log.info('db', "Initializing weight generator...")
        # initialize grid
        embeddings = load_data(embedding_file)
        poses = load_data(pose_file)
        if embeddings is None or poses is None:
            log.severe("Could not load file {} in dir uids/models/confience_weights/ for weight generator...".format(embeddings))
            sys.exit(0)

        self.generate(embeddings, poses)
예제 #9
0
    def __predict_ORIG(self, samples):

        proba, class_ids = self.predict_proba(samples)
        mask_0 = proba > 0

        # no classes detected at all - novelty
        if len(proba[mask_0]) == 0:
            return -1

        mask_class = proba > self.__class_thresh
        nr_classes = len(proba[mask_class])

        if nr_classes > 0:
            # class detected
            if nr_classes > 1:
                # multiple classes detected - batch invalid
                if self.__verbose:
                    log.severe(
                        "Multiple classes detected: {}".format(nr_classes))
                return None

            confusion_mask = (self.__confusion_thresh <
                              proba) & (proba < self.__class_thresh)
            # count if any element, except for class is above confusion ratio
            if len(proba[confusion_mask]) > 0:
                log.warning(
                    "Class confusion - force re-identification: {}% confusion, {}% identification, {} samples"
                    .format(
                        proba[(self.__confusion_thresh < proba)
                              & (proba < self.__class_thresh)],
                        proba[mask_class], len(samples)))

                # calc pairwise distance. If small then force re-identification
                # for sample in proba[confusion_mask]:

                # Todo: implement properly
                # return None

            class_id_arr = class_ids[mask_class]
            return int(class_id_arr[0])

        else:
            if len(proba[proba > self.__novelty_thresh]) > 0:
                print "--- no classes detected but novelty threshold exceeded: {}".format(
                    proba)
                return None

            return -1
예제 #10
0
    def decision_function(self, samples):
        """
        Distance of the samples X to the target class distribution
        :param samples:
        :return:
        """

        cluster_type = self.data_cluster.__class__.__name__

        if cluster_type != 'MeanShiftCluster':
            log.severe(
                "Prediction for cluster type '{}' is not implemented yet! Add custom decision_function() first."
                .format(cluster_type))
            raise NotImplementedError(
                "Implement threshold prediction for specific cluster type.")

        # calc hashes
        hashed = [self.get_hash(s) for s in samples]

        # check intersections and use buffered results
        if self.decision_fn_buffer:
            # ind_samples = dict((k, i) for i, k in enumerate(hashed))
            intersec_hashes = list(
                set(self.decision_fn_buffer.keys()) & set(hashed))

            similarity_scores = []
            for i, h in enumerate(hashed):

                if h in intersec_hashes:
                    similarity_scores.append(self.decision_fn_buffer[h])
                else:
                    score = self.data_cluster.sample_set_similarity_scores(
                        np.array([samples[i]]), self.metric)
                    similarity_scores.append(score)
                    # add to buffer
                    self.decision_fn_buffer[h] = score

        else:
            similarity_scores = self.data_cluster.sample_set_similarity_scores(
                samples, self.metric)
            # add to buffer
            for i, h in enumerate(hashed):
                self.decision_fn_buffer[h] = similarity_scores[i]

        similarity_scores = np.array(similarity_scores).flatten()

        return similarity_scores
예제 #11
0
def clean_duplicates(s1, s2):
    assert len(s1) == len(s2)

    x = np.random.rand(s1.shape[1])
    y = s1.dot(x)
    unique, index = np.unique(y, return_index=True)

    # print len(s1)
    # print len(s1[index])
    # print len(np.vstack({tuple(row) for row in s1}))

    if len(s1[index]) != len(s1):
        log.severe("Duplicate items in embeddings s1! Removing duplicates...")
        s1 = s1[index]
        s2 = s2[index]
        # raise ValueError

    return s1, s2
    def init_new_class(self, class_id, class_samples):
        """
        Initialise a One-Class-Classifier with sample data
        :param class_id: new class id
        :param class_samples: samples belonging to the class
        :return: True/False - success
        """

        log.info('cl',
                 "Initializing new Classifier for user ID {}".format(class_id))
        if class_id in self.classifiers:
            log.severe("Illegal reinitialization of classifier")
            return False

        # init new data model
        self.data_controller.add_samples(user_id=class_id,
                                         new_samples=class_samples)
        cluster_ref = self.data_controller.get_class_cluster(class_id)

        # init new classifier
        if self.CLASSIFIER == 'SetSimilarityHardThreshold':
            # link to data controller: similarity matching - model = data
            self.classifiers[class_id] = SetSimilarityHardThreshold(
                metric='ABOD',
                threshold=0.3,
                cluster=
                cluster_ref  # TODO: data model is connected - might also be separate?
            )
        elif self.CLASSIFIER == 'non-incremental':
            # link to data controller: non-incremental learner
            pass
        elif self.CLASSIFIER == 'incremental':
            # regular model. No need to pass data reference
            pass

        self.nr_classes += 1
        self.classifier_states[class_id] = 0

        # add samples to update stack
        with self.trainig_data_lock:
            self.classifier_update_stacks[class_id] = class_samples
        # directly train classifier
        return self.train_classifier(class_id)
    def __classifier_trainer(self):
        """
        Manually triggered classifier training
        :return:
        """
        if self.__verbose is True:
            log.info('cl', "Starting classifier training thread")

        while self.STATUS == 1:
            try:
                training_id = self.__tasks.get(False)
            except Queue.Empty:
                sleep(0.25)  # Time in seconds.
            else:
                if training_id not in self.classifiers:
                    log.severe(
                        "Cannot train class {} without creating the classifier first"
                        .format(training_id))
                else:
                    self.train_classifier(training_id)
                self.__tasks.task_done()
예제 #14
0
    def init_classifier(self, class_id, class_samples):
        """
        Initialise a One-Class-Classifier with sample data
        :param class_id: new class id
        :param class_samples: samples belonging to the class
        :return: True/False - success
        """

        log.info('cl',
                 "Initializing new Classifier for user ID {}".format(class_id))
        if class_id in self.classifiers:
            log.severe("Illegal reinitialization of classifier")
            return False
        self.classifiers[class_id] = self.generate_classifier()
        self.nr_classes += 1
        self.classifier_states[class_id] = 0

        # add samples to update stack
        self.classifier_update_stacks[class_id] = class_samples
        # directly train classifier
        return self.train_classifier(class_id)
    def __init__(self, user_db_, classifier='IABOD'):
        EnsembleClassifierTypeA.__init__(self, user_db_, classifier)
        if classifier == 'ISVM':
            # load lfw embeddings
            log.info('clf',
                     'Loading unknown class samples for ISVM classifier...')
            fileDir = os.path.dirname(os.path.realpath(__file__))
            modelDir = os.path.join(
                fileDir, '../..', 'models',
                'embedding_samples')  # path to the model directory
            filename = "{}/{}".format(modelDir, "embeddings_lfw.pkl")
            if os.path.isfile(filename):
                # print filename
                with open(filename, 'r') as f:
                    embeddings = pickle.load(f)
                    f.close()

                self.__unknown_class_data = embeddings
            else:
                log.severe(
                    "Missing unknown class data... File {} not found in {}!".
                    format(filename, modelDir))
    def init_new_class(self, class_id, class_samples, sample_poses):
        """
        Initialise a One-Class-Classifier with sample data
        :param class_id: new class id
        :param class_samples: samples belonging to the class
        :return: True/False - success
        """

        log.info('cl', "Initializing new Classifier for user ID {}".format(class_id))
        if class_id in self.classifiers:
            log.severe("Illegal reinitialization of classifier")
            return False

        # init new data model
        self.data_controller.add_samples(user_id=class_id, new_samples=class_samples, new_poses=sample_poses)
        cluster_ref = self.data_controller.get_class_cluster(class_id)

        # init new classifier
        if self.CLASSIFIER == 'SetSimilarityHardThreshold':
            # link to data controller: similarity matching - model = data
            self.classifiers[class_id] = SetSimilarityHardThreshold(
                metric='ABOD',
                threshold=0.3,
                nr_compaired_samples=40,    # select 40 best samples for comparison
                cluster=cluster_ref,         # linked data model
                recheck_l2=True
            )
        else:
            raise NotImplementedError('This classifier is not implemented yet!')

        self.nr_classes += 1
        self.classifier_states[class_id] = 0

        # add samples to update stack
        with self.trainig_data_lock:
            self.classifier_update_stacks[class_id] = class_samples
        # directly train classifier
        return self.train_classifier(class_id)
예제 #17
0
    def check_inter_sample_dist(samples, metric='euclidean'):

        # calc pairwise distance
        if metric == 'cosine':
            dist = pairwise_distances(samples, samples, metric='cosine')
            thresh = 0.7
        elif metric == 'euclidean':
            # dist = pairwise_distances(samples, samples, metric='euclidean')
            # dist = np.square(dist)
            dist = BaseMetaController.calc_adjacent_dist(samples)
            thresh = 1.4
        else:
            raise ValueError

        nr_errors = np.count_nonzero(dist > thresh)
        # print "nr errors: {}, max: {}".format(nr_errors, np.max(dist))

        # allowed errors
        if nr_errors > 0:
            log.severe(
                "Inconsistent set! Inter-sample distances: {}".format(dist))
            return False
        return True
예제 #18
0
    def predict(self, samples):
        """
        Prediction cases:
        - Only target class is identified with ratio X (high): Class
        - Target and other class is identified with ration X (high) and Y (small): Class with small confusion
        - Multiple classes are identified with small ratios Ys: Novelty
        - No classes identified: Novelty
        :param samples:
        :return: Class ID, -1 (Novelty), None invalid samples (multiple detections)
        """

        # no classifiers yet, predict novelty
        if not self.classifiers:
            # 100% confidence
            self.__decision_function = np.array([len(samples)]), np.array([-1])
            return -1

        predictions, class_ids = self.__predict(samples)

        if len(predictions) == 0:
            # no class in reach - classify as novel class
            self.__decision_function = np.array([len(samples)]), np.array([-1])
            return -1

        # calc nr of positive class detections
        cls_scores = (predictions > 0).sum(axis=1)
        self.__decision_function = cls_scores, class_ids
        nr_samples = len(samples)
        self.__decision_nr_samples = nr_samples

        log.info(
            'cl',
            "Classifier scores: {} | max: {}".format(cls_scores, nr_samples))

        # no classes detected at all - novelty
        if len(cls_scores[cls_scores <= self.__novelty_thresh *
                          nr_samples]) == len(cls_scores):
            return -1

        identification_mask = cls_scores >= self.__class_thresh * nr_samples
        ids = class_ids[identification_mask]
        if len(ids) > 0:

            # multiple possible detection - invalid samples
            if len(ids) > 1:

                # use average to-class-distance to select best choice
                mean_dist_cosine = []
                mean_dist_euclidean = []

                # todo: mean dist or mean dist to cluster mean
                for class_id in ids:
                    mean_dist_cosine.append(
                        self.classifiers[class_id].mean_dist(samples))
                    mean_dist_euclidean.append(
                        self.classifiers[class_id].mean_dist(
                            samples, 'euclidean'))

                id_index_cosine = mean_dist_cosine.index(min(mean_dist_cosine))
                id_index_euclidean = mean_dist_euclidean.index(
                    min(mean_dist_euclidean))

                log.severe("Samples are inambiguous. Classes: {}".format(ids))
                log.severe("IDCOS: {} | meandist cosine: {}".format(
                    int(ids[id_index_cosine]), mean_dist_cosine))
                log.severe("IDEUC: {} | meandist euclidean: {}".format(
                    int(ids[id_index_euclidean]), mean_dist_euclidean))

                for class_id in ids:
                    print self.classifiers[class_id].class_mean_dist(
                        samples, 'cosine')

                mean_dist_cosine = np.array(mean_dist_cosine)

                if np.sum(
                    (mean_dist_cosine - min(mean_dist_cosine)) < 0.05) > 1:
                    log.severe(
                        "SAMPLES DISCARGED: Average distance to data inambiguous"
                    )
                    return None

                return int(ids[id_index_cosine])
                # return None

            # single person identified - return id
            return int(ids[0])
        else:
            # samples unclear
            return None
예제 #19
0
    def accumulate_samples(self,
                           user_id,
                           new_samples,
                           sample_weights=np.array([]),
                           sample_poses=np.array([])):
        """

        :param user_id:
        :param new_samples:
        :param sample_weights:
        :return:
        array : save samples (save to integrate in any way)
        bool : reset user
        int : prediction of last section
        float : confidence of last section prediction
        """

        # check for set inconsistency
        samples_ok = BaseMetaController.check_inter_sample_dist(
            new_samples, metric='euclidean')

        if not samples_ok:
            # no return (queue is not filled up and thus we dont have a save section)
            log.severe("Update set is inconsistent - disposing...")
            # reset queue
            self.sample_queue.pop(user_id, None)
            self.sample_weight_queue.pop(user_id, None)
            self.sample_pose_queue.pop(user_id, None)
            return np.array([]), np.array([]), True, -1, 1.

        # generate placeholder weights
        if sample_weights.size == 0:
            # 5 of 10
            sample_weights = np.repeat(5, len(new_samples))

        assert len(sample_weights) == len(new_samples)

        # add samples
        if user_id not in self.sample_queue:
            # initialize
            self.sample_queue[user_id] = new_samples
            self.sample_weight_queue[user_id] = sample_weights
            self.sample_pose_queue[user_id] = sample_poses
        else:
            # append
            self.sample_queue[user_id] = np.concatenate((self.sample_queue[user_id], new_samples))\
                                         if self.sample_queue[user_id].size \
                                         else new_samples
            self.sample_weight_queue[user_id] = np.concatenate((self.sample_weight_queue[user_id], sample_weights))\
                                         if self.sample_weight_queue[user_id].size \
                                         else sample_weights
            self.sample_pose_queue[user_id] = np.concatenate((self.sample_pose_queue[user_id], sample_poses))\
                                         if self.sample_pose_queue[user_id].size \
                                         else sample_poses

        target_class = -1
        confidence = 1.
        forward = np.array([])
        forward_poses = np.array([])
        reset_user = False

        # do meta recognition
        # check set for inconsistencies - return only save section
        while len(self.sample_queue[user_id]) >= self.__queue_max_length:

            sample_batch = self.sample_queue[user_id][0:self.
                                                      __queue_max_length]
            weight_batch = self.sample_weight_queue[user_id][
                0:self.__queue_max_length]
            pose_batch = self.sample_pose_queue[user_id][0:self.
                                                         __queue_max_length]

            # check set consistency
            samples_ok = BaseMetaController.check_inter_sample_dist(
                sample_batch, metric='euclidean')

            # predict class
            is_consistent, target_class, confidence = self.__p_multicl.predict_class(
                sample_batch, sample_poses=pose_batch)

            if samples_ok and is_consistent:
                # add samples to forward
                forward = np.concatenate((forward, self.sample_queue[user_id][0:self.__inclusion_range])) \
                    if forward.size \
                    else self.sample_queue[user_id][0:self.__inclusion_range]
                forward_poses = np.concatenate((forward_poses, self.sample_pose_queue[user_id][0:self.__inclusion_range])) \
                    if forward_poses.size \
                    else self.sample_pose_queue[user_id][0:self.__inclusion_range]

                # remove first x samples
                self.sample_queue[user_id] = self.sample_queue[user_id][
                    self.__inclusion_range:]
                self.sample_weight_queue[user_id] = self.sample_weight_queue[
                    user_id][self.__inclusion_range:]
                self.sample_pose_queue[user_id] = self.sample_pose_queue[
                    user_id][self.__inclusion_range:]
            else:
                # dispose all samples! Whole queue!
                self.sample_queue.pop(user_id, None)
                self.sample_weight_queue.pop(user_id, None)
                self.sample_pose_queue.pop(user_id, None)
                log.severe("Set is inconsistent - disposing...")
                reset_user = True
                break

        # predict user if not enough samples
        if not forward.size and reset_user is False:
            is_consistent, target_class, confidence = self.__p_multicl.predict_class(
                self.sample_queue[user_id],
                sample_poses=self.sample_pose_queue[user_id])
            print "Not enough to forward but predict...", is_consistent, target_class, confidence

        return forward, forward_poses, reset_user, target_class, confidence
예제 #20
0
    def accumulate_samples(self,
                           tracking_id,
                           new_samples,
                           sample_weights=np.array([]),
                           sample_poses=np.array([])):

        # check for set inconsistency
        samples_ok = BaseMetaController.check_inter_sample_dist(
            new_samples, metric='euclidean')

        if not samples_ok:
            log.severe("Identification set is inconsistent - disposing...")
            # reset queue
            self.sample_queue.pop(tracking_id, None)
            self.sample_weight_queue.pop(tracking_id, None)
            self.sample_pose_queue.pop(tracking_id, None)
            return False, np.array([]), np.array([]), np.array([])

        # generate placeholder weights
        if sample_weights.size == 0:
            # 5 of 10
            sample_weights = np.repeat(5, len(new_samples))

        assert len(sample_weights) == len(new_samples)

        # add samples
        if tracking_id not in self.sample_queue:
            # initialize
            self.sample_queue[tracking_id] = new_samples
            self.sample_weight_queue[tracking_id] = sample_weights
            self.sample_pose_queue[tracking_id] = sample_poses
        else:
            # append
            self.sample_queue[tracking_id] = np.concatenate((self.sample_queue[tracking_id], new_samples))\
                                         if self.sample_queue[tracking_id].size \
                                         else new_samples
            self.sample_weight_queue[tracking_id] = np.concatenate((self.sample_weight_queue[tracking_id], sample_weights))\
                                         if self.sample_weight_queue[tracking_id].size \
                                         else sample_weights
            self.sample_pose_queue[tracking_id] = np.concatenate((self.sample_pose_queue[tracking_id], sample_poses))\
                                         if self.sample_pose_queue[tracking_id].size \
                                         else sample_poses

        is_save_set = False

        # if set has save sample or is long enough
        if len(self.sample_queue[tracking_id]) >= self.min_sample_length:
            if len(self.sample_queue[tracking_id]) >= self.save_sample_length\
                    or np.count_nonzero(self.sample_weight_queue[tracking_id] >= self.save_weight_thresh):

                # check set consistency
                samples_ok = BaseMetaController.check_inter_sample_dist(
                    self.sample_queue[tracking_id], metric='euclidean')

                if samples_ok:
                    # set is save - allow identification
                    is_save_set = True
                else:
                    # dispose all samples
                    self.sample_queue.pop(tracking_id, None)
                    self.sample_weight_queue.pop(tracking_id, None)
                    self.sample_pose_queue.pop(tracking_id, None)
                    log.severe("Set is inconsistent - disposing...")

        # TODO: return whole set or only last?
        current_samples = self.sample_queue.get(tracking_id, np.array([]))
        current_weights = self.sample_weight_queue.get(tracking_id,
                                                       np.array([]))
        current_poses = self.sample_pose_queue.get(tracking_id, np.array([]))

        # not enough save samples - return what we have so far
        return is_save_set, current_samples, current_weights, current_poses
예제 #21
0
    def get_weighted_score(self, test_samples, test_poses, ref_samples,
                           ref_poses):
        assert test_samples.ndim == 2
        assert ref_samples.ndim == 2

        dist_lookup = pairwise_distances(test_samples,
                                         ref_samples,
                                         metric='euclidean')

        # print np.shape(dist_lookup[0])
        factors = []
        sample_weights = []

        # if only one sample: cannot calculate abof
        if len(ref_samples) < 3:
            log.severe(
                'Cannot calculate ABOF with {} reference samples (variance calculation needs at least 3 reference points)'
                .format(len(ref_samples)))
            raise Exception

        for i_sample, A in enumerate(test_samples):
            factor_list = []
            weight_list = []
            for i in range(len(ref_samples)):
                # select first point in reference set
                B = ref_samples[i]
                # distance
                AB = dist_lookup[i_sample][i]
                for j in range(i + 1):
                    if j == i:  # ensure B != C
                        continue
                    # select second point in reference set
                    C = ref_samples[j]
                    # distance
                    AC = dist_lookup[i_sample][j]

                    if np.array_equal(B, C):
                        sys.exit(
                            "Points are equal: B == C! Reference Set contains two times the same samples"
                        )
                        factor_list.append(1000)
                        print "Bi/Cj: {}/{}".format(i, j)
                        # sys.exit('ERROR\tangleBAC\tmath domain ERROR, |cos<AB, AC>| <= 1')
                        continue

                    angle_BAC = ABOD.angleBAC(A, B, C, AB, AC)

                    w1 = self.weight_gen.get_pose_weight(
                        test_poses[i_sample], ref_poses[i])
                    w2 = self.weight_gen.get_pose_weight(
                        test_poses[i_sample], ref_poses[j])
                    weight_list.append(2. / float(w1 + w2))  # 1/(a+b)/2

                    # compute each element of variance list
                    try:
                        # apply weighting
                        if self.variant == 1:
                            tmp = angle_BAC / float(
                                math.pow(AB * AC, 2) * (w1 * w2))
                        elif self.variant == 2:
                            tmp = angle_BAC / float(
                                math.pow(AB * AC, 2) * (w1 + w2))
                        else:
                            tmp = angle_BAC / float(math.pow(AB * AC, 2))

                    except ZeroDivisionError:
                        log.severe(
                            "ERROR\tABOF\tfloat division by zero! Trying to predict training point?'"
                        )
                        tmp = 500
                        # sys.exit('ERROR\tABOF\tfloat division by zero! Trying to predict training point?')
                    factor_list.append(tmp)

            # calculate weighted variance
            if self.variant == 3:
                weighted_average = np.average(factor_list,
                                              weights=np.array(weight_list))
                var = np.average((factor_list - weighted_average)**2)
            elif self.variant == 4:
                var = WeightedABOD.biased_weighted_var(np.array(factor_list),
                                                       np.array(weight_list),
                                                       weighted_average=False)
            elif self.variant == 5:
                var = WeightedABOD.biased_weighted_var(np.array(factor_list),
                                                       np.array(weight_list))
            else:
                var = np.var(np.array(factor_list))

            factors.append(var)
            # weight_list = np.repeat(1, len(factors))
            sample_weights.append(np.average(weight_list))

        return np.array(factors), np.array(sample_weights)
예제 #22
0
    def get_score(test_samples, reference_set):

        assert test_samples.ndim == 2
        assert reference_set.ndim == 2

        dist_lookup = pairwise_distances(test_samples,
                                         reference_set,
                                         metric='euclidean')

        # print np.shape(dist_lookup[0])
        factors = []

        # if only one sample: cannot calculate abof
        if len(reference_set) < 3:
            log.severe(
                'Cannot calculate ABOF with {} reference samples (variance calculation needs at least 3 reference points)'
                .format(len(reference_set)))
            raise Exception

        for i_sample, A in enumerate(test_samples):
            factor_list = []
            for i in range(len(reference_set)):
                # select first point in reference set
                B = reference_set[i]
                # distance
                AB = dist_lookup[i_sample][i]

                for j in range(i + 1):
                    if j == i:  # ensure B != C
                        continue
                    # select second point in reference set
                    C = reference_set[j]
                    # distance
                    AC = dist_lookup[i_sample][j]

                    if np.array_equal(B, C):
                        print "Bi/Cj: {}/{}".format(i, j)
                        log.error(
                            "Points are equal: B == C! Assuming classification of training point"
                        )
                        sys.exit(
                            "Points are equal: B == C! Reference Set contains two times the same samples"
                        )
                        factor_list.append(1000)
                        # sys.exit('ERROR\tangleBAC\tmath domain ERROR, |cos<AB, AC>| <= 1')
                        continue

                    # angle_BAC = ABOD.angleBAC(A, B, C, AB, AC)
                    # angle_BAC = ABOD.angleFast(A-B, A-C)

                    vector_AB = B - A
                    vector_AC = C - A

                    # compute each element of variance list
                    try:
                        cos_similarity = np.dot(vector_AB,
                                                vector_AC) / (AB * AC)
                        # apply weighting
                        tmp = cos_similarity / float(math.pow(AB * AC, 2))
                    except ZeroDivisionError:
                        log.severe(
                            "ERROR\tABOF\tfloat division by zero! Trying to predict training point?'"
                        )
                        tmp = 500
                        # sys.exit('ERROR\tABOF\tfloat division by zero! Trying to predict training point?')
                    factor_list.append(tmp)
            factors.append(np.var(factor_list))
        return np.array(factors)
예제 #23
0
    def __abof_multi(self, samples, knn=None, cosine_weighting=False):
        """
        calculate the ABOF of A = (x1, x2, ..., xn)
        pt_list = self.data (cluster)
        """
        # Todo: fix cosine dist weighting
        pt_list = self.data

        if knn is not None and knn < len(self.data):
            pt_list = random.sample(pt_list, knn)

        dist_lookup = pairwise_distances(samples, pt_list, metric='euclidean')

        if cosine_weighting:
            cos_dist_lookup = pairwise_distances(samples,
                                                 pt_list,
                                                 metric='cosine')

        # print np.shape(dist_lookup[0])
        factors = []

        # if only one sample: cannot calculate abof
        if len(pt_list) < 2:
            log.severe(
                'Cannot calculate ABOF with {} reference samples'.format(
                    len(pt_list)))
            fake_abod = 0
            if dist_lookup[0][0] < 0.3:
                fake_abod = 3
            else:
                fake_abod = 0.1

            factors.append(fake_abod)
            return factors

        for i_sample, A in enumerate(samples):
            varList = []
            for i in range(len(pt_list)):
                B = pt_list[i]
                AB = dist_lookup[i_sample][i]
                j = 0
                for j in range(i + 1):
                    if j == i:  # ensure B != C
                        continue

                    C = pt_list[j]
                    AC = dist_lookup[i_sample][j]

                    if np.array_equal(B, C):
                        log.error(
                            "Points are equal: B == C! Assuming classification of training point (ABOD 1000)"
                        )
                        varList.append(1000)
                        print "Bi/Cj: {}/{}".format(i, j)
                        # sys.exit('ERROR\tangleBAC\tmath domain ERROR, |cos<AB, AC>| <= 1')
                        continue

                    angle_BAC = self.__angleBAC(A, B, C, AB, AC)
                    # compute each element of variance list
                    try:
                        # apply weighting
                        if cosine_weighting:
                            tmp = angle_BAC / float(
                                math.pow(
                                    (2.0 - cos_dist_lookup[i_sample][i]) *
                                    (2.0 - cos_dist_lookup[i_sample][j]), 2))
                        else:
                            tmp = angle_BAC / float(math.pow(AB * AC, 2))
                    except ZeroDivisionError:
                        log.severe(
                            "ERROR\tABOF\tfloat division by zero! Trying to predict training point?'"
                        )
                        tmp = 500
                        # sys.exit('ERROR\tABOF\tfloat division by zero! Trying to predict training point?')
                    varList.append(tmp)
            factors.append(np.var(varList))
        return factors
    def train_classifier(self, class_id):
        """
        Retrain One-Class Classifiers (partial_fit)
        """

        log.info('cl',
                 "(Re-)training Classifier for user ID {}".format(class_id))

        if class_id not in self.classifiers:
            log.severe(
                "Cannot train class {} without creating the classifier first".
                format(class_id))
            return False

        start = time.time()

        with self.training_lock:
            # get update samples from stack

            # if samples available: do update with all available update samples
            # update_samples = self.classifier_update_stacks.get(class_id, []) or []

            if class_id in self.classifier_update_stacks:
                update_samples = self.classifier_update_stacks[class_id]
            else:
                update_samples = []

            if len(update_samples) > 0:

                training_before = self.classifier_states[class_id]

                if self.CLASSIFIER == 'ABOD':
                    """
                    OFFLINE Classifier: retrain with all available data
                        - Samples: Stored in user db, reloaded upon every fit
                    """
                    # instead of partial fit: add samples and do refitting over complete data
                    self.p_user_db.add_samples(class_id, update_samples)
                    samples = self.p_user_db.get_class_samples(class_id)

                    # stop
                    if len(samples) > 100:
                        log.warning("Sample size exceeding 100. No refitting.")
                    else:
                        # always use fit method (no partial fit available)
                        self.classifiers[class_id].fit(samples)
                        self.classifier_states[class_id] += 1

                elif self.CLASSIFIER == 'IABOD':
                    """
                    INCREMENTAL Methods: Use partial fit with stored update data
                        - Samples: Partially stored in ABOD Cluster
                    """
                    # partial update: partial_fit
                    self.classifiers[class_id].partial_fit(update_samples)
                    self.classifier_states[class_id] += 1

                elif self.CLASSIFIER == 'ISVM':
                    """
                    INCREMENTAL Methods: Use partial fit with stored update data
                        - Samples: Partially stored in Cluster
                    """
                    self.classifiers[class_id].partial_fit(update_samples)
                    self.classifier_states[class_id] += 1

                # empty update list if training was performed
                if self.classifier_states[class_id] - training_before == 1:
                    self.classifier_update_stacks[class_id] = []
            else:
                log.warning("No training/update samples available")

        if self.__verbose:
            log.info('cl',
                     "fitting took {} seconds".format(time.time() - start))

        return True