def train_classifier(self, class_id):
        """
        Retrain One-Class Classifiers (partial_fit)
        """

        log.info('cl',
                 "(Re-)training Classifier for user ID {}".format(class_id))

        # extract data
        with self.trainig_data_lock:
            # get update samples from stack
            if class_id in self.classifier_update_stacks:
                update_samples = self.classifier_update_stacks[class_id]
                # clear
                self.classifier_update_stacks[class_id] = np.array([])
            else:
                update_samples = []

        start = time()

        if len(update_samples) > 0:
            """
            INCREMENTAL Methods: Use partial fit with stored update data
                - Samples: Partially stored in Cluster
            """
            self.classifiers[class_id].partial_fit(update_samples)
            self.classifier_states[class_id] += 1
        else:
            log.warning("No training/update samples available")

        if self.__verbose:
            log.info('cl', "fitting took {} seconds".format(time() - start))
示例#2
0
    def __predict_ORIG(self, samples):

        proba, class_ids = self.predict_proba(samples)
        mask_0 = proba > 0

        # no classes detected at all - novelty
        if len(proba[mask_0]) == 0:
            return -1

        mask_class = proba > self.__class_thresh
        nr_classes = len(proba[mask_class])

        if nr_classes > 0:
            # class detected
            if nr_classes > 1:
                # multiple classes detected - batch invalid
                if self.__verbose:
                    log.severe(
                        "Multiple classes detected: {}".format(nr_classes))
                return None

            confusion_mask = (self.__confusion_thresh <
                              proba) & (proba < self.__class_thresh)
            # count if any element, except for class is above confusion ratio
            if len(proba[confusion_mask]) > 0:
                log.warning(
                    "Class confusion - force re-identification: {}% confusion, {}% identification, {} samples"
                    .format(
                        proba[(self.__confusion_thresh < proba)
                              & (proba < self.__class_thresh)],
                        proba[mask_class], len(samples)))

                # calc pairwise distance. If small then force re-identification
                # for sample in proba[confusion_mask]:

                # Todo: implement properly
                # return None

            class_id_arr = class_ids[mask_class]
            return int(class_id_arr[0])

        else:
            if len(proba[proba > self.__novelty_thresh]) > 0:
                print "--- no classes detected but novelty threshold exceeded: {}".format(
                    proba)
                return None

            return -1
示例#3
0
    def get_embeddings(self, rgb_images, align=True):
        """
        Calculate deep face embeddings for input images
        :param rgb_images: RGB (!) images
        :param align:
        :return: np.array embedding vectors
        """

        images_normalized = []
        embeddings = []
        start = time.time()

        # normalize images
        if align is True:
            if len(rgb_images) > 0:
                for imgObject in rgb_images:
                    # align face - ignore images with multiple bounding boxes
                    aligned = self.align_face(imgObject, self.landmarks, self.size)
                    if aligned is not None:
                        images_normalized.append(aligned)

            # print status
            if self.verbose is True:
                if len(images_normalized) > 0:
                    log.debug('cnn', "Alignment took {} seconds - {}/{} images suitable".format(time.time() - start, len(images_normalized), len(rgb_images)))
                else:
                    log.warning("No suitable images (no faces detected)")
                    return np.array(embeddings)
        else:
            images_normalized = rgb_images

        # generate embeddings
        start = time.time()
        for img in images_normalized:
            rep = self.neural_net.forward(img)
            embeddings.append(rep)

        # if self.verbose:
        #     print("--- Neural network forward pass took {} seconds.".format(time.time() - start))

        return np.array(embeddings)
    def train_classifier(self, class_id):
        """
        Retrain One-Class Classifiers (partial_fit)
        """

        log.info('cl',
                 "(Re-)training Classifier for user ID {}".format(class_id))

        if class_id not in self.classifiers:
            log.severe(
                "Cannot train class {} without creating the classifier first".
                format(class_id))
            return False

        start = time.time()

        with self.training_lock:
            # get update samples from stack

            # if samples available: do update with all available update samples
            # update_samples = self.classifier_update_stacks.get(class_id, []) or []

            if class_id in self.classifier_update_stacks:
                update_samples = self.classifier_update_stacks[class_id]
            else:
                update_samples = []

            if len(update_samples) > 0:

                training_before = self.classifier_states[class_id]

                if self.CLASSIFIER == 'ABOD':
                    """
                    OFFLINE Classifier: retrain with all available data
                        - Samples: Stored in user db, reloaded upon every fit
                    """
                    # instead of partial fit: add samples and do refitting over complete data
                    self.p_user_db.add_samples(class_id, update_samples)
                    samples = self.p_user_db.get_class_samples(class_id)

                    # stop
                    if len(samples) > 100:
                        log.warning("Sample size exceeding 100. No refitting.")
                    else:
                        # always use fit method (no partial fit available)
                        self.classifiers[class_id].fit(samples)
                        self.classifier_states[class_id] += 1

                elif self.CLASSIFIER == 'IABOD':
                    """
                    INCREMENTAL Methods: Use partial fit with stored update data
                        - Samples: Partially stored in ABOD Cluster
                    """
                    # partial update: partial_fit
                    self.classifiers[class_id].partial_fit(update_samples)
                    self.classifier_states[class_id] += 1

                elif self.CLASSIFIER == 'ISVM':
                    """
                    INCREMENTAL Methods: Use partial fit with stored update data
                        - Samples: Partially stored in Cluster
                    """
                    self.classifiers[class_id].partial_fit(update_samples)
                    self.classifier_states[class_id] += 1

                # empty update list if training was performed
                if self.classifier_states[class_id] - training_before == 1:
                    self.classifier_update_stacks[class_id] = []
            else:
                log.warning("No training/update samples available")

        if self.__verbose:
            log.info('cl',
                     "fitting took {} seconds".format(time.time() - start))

        return True
示例#5
0
    def update(self, samples):

        # init - add all samples if no data yet
        if len(self.__data) == 0:
            self.__data = samples
            return

        # =======================================
        # 1.  Reduce data/sample data

        if self.dim_reduction > 0:
            basis, mean, var = ExtractMaxVarComponents(self.__data,
                                                       self.dim_reduction)
            self.log_expl_var.append(var)
        else:
            basis, mean = ExtractSubspace(self.__data, 0.8)

        cluster_reduced = ProjectOntoSubspace(self.__data, mean, basis)
        samples_reduced = ProjectOntoSubspace(samples, mean, basis)
        dims = np.shape(cluster_reduced)
        # select minimum data to build convex hull
        # min_nr_elems = dims[1] + 4
        if self.__verbose:
            print "Reducing dimension: {}->{}".format(
                np.shape(self.__data)[1], dims[1])

        # =======================================
        # 2.  Calculate Convex Hull in subspace

        data_hull = cluster_reduced  # take all samples of data
        hull = Delaunay(data_hull)
        if self.__verbose:
            print "Calculating data hull using {}/{} points".format(
                len(data_hull), len(self.__data))

        # =======================================
        # 3.  Select new samples from outside convex hull

        if not self.__inverted:
            inclusion_mask = np.array([
                False if hull.find_simplex(sample) >= 0 else True
                for sample in samples_reduced
            ])
            if self.__verbose:
                # Todo: use outside mask counting
                nr_elems_outside_hull = np.sum([
                    0 if hull.find_simplex(sample) >= 0 else 1
                    for sample in samples_reduced
                ])
                print "Elements OUTSIDE hull (to include): {}/{}".format(
                    nr_elems_outside_hull, len(samples))
        else:
            inclusion_mask = np.array([
                True if hull.find_simplex(sample) >= 0 else False
                for sample in samples_reduced
            ])

        # add samples (samples need to be np.array)
        self.__data = np.concatenate((self.__data, samples[inclusion_mask]))

        # =======================================
        # 4.  Recalculate hull with newly added points

        # If memory exceeded: Perform unrefinement process -
        # discharge sampling directions with lowest variance contribution
        if self.dim_reduction > 0:
            nr_comps = self.dim_reduction if len(
                self.__data) <= self.max_size else self.dim_removal
            if len(self.__data) > 150:
                nr_comps = self.dim_removal - 1
            basis, mean, var = ExtractMaxVarComponents(self.__data, nr_comps)
        else:
            # automatic dimension selection (based on containing certain variance)
            basis, mean = ExtractSubspace(self.__data, 0.75)

        cluster_reduced = ProjectOntoSubspace(self.__data, mean, basis)
        print "Recuding dimension: {}->{}".format(
            np.shape(self.__data)[1],
            np.shape(cluster_reduced)[1])
        hull = Delaunay(cluster_reduced)

        # =======================================
        # 5.  Discharge samples inside hull

        if not self.__inverted:
            # select samples inside hull
            cl_to_delete = np.array(
                list(
                    set(range(0, len(cluster_reduced))) -
                    set(np.unique(hull.convex_hull))))
            # set(range(len(data_hull))).difference(hull.convex_hull)
        else:
            cl_to_delete = np.array([])
            # select samples on hull
            if len(cluster_reduced) > self.max_size:
                hull_indices = list(np.unique(hull.convex_hull))
                if len(hull_indices) > 0:
                    nr_to_del = 5 if len(hull_indices) > 5 else 0
                    cl_to_delete = np.array(hull_indices[0:nr_to_del])

        # print "Points building convex hull: {}".format(set(np.unique(hull.convex_hull)))
        # print "To delete: {}".format(cl_to_delete)

        if len(cl_to_delete[cl_to_delete < 0]) > 0:
            print set(np.unique(hull.convex_hull))
            log.warning("Index elements smaller than 0: {}".format(
                cl_to_delete[cl_to_delete < 0]))

        if self.__log:
            self.log_intra_deleted.append(len(cl_to_delete))
            self.log_cl_size_orig.append(len(self.__data))

        print "Cleaning {} points from inside data".format(len(cl_to_delete))

        # Remove points from inside hull
        self.__data = np.delete(self.__data, cl_to_delete, axis=0)

        # =======================================
        # 6.  KNN point removal: remove similar points

        if self.knn_removal_thresh > 0:
            max_removal = 10 if len(
                self.__data) > self.knn_removal_thresh else 0
            if max_removal > 0:
                filter = KNFilter(self.__data, k=3, threshold=0.25)
                tmp = filter.filter_x_samples(max_removal)
                print "--- Removing {} knn points".format(
                    len(self.__data) - len(tmp))
                self.__data = tmp

            if self.__log:
                self.log_cl_size_reduced.append(len(self.__data))

        print "Cluster size: {}".format(len(self.__data))