Exemplo n.º 1
0
    def __init__(self, mode='train'):
        self.database = DBManagement()
        # Get caption
        self.descriptions = utils.read_caption_clean_file(
            'Flickr8k_text/Flickr8k.cleaned.lemma.token.txt')
        self.idxtoword, self.wordtoidx, self.vocab_size = utils.map_w2id(
            self.descriptions.values())
        self.max_len = utils.calculate_caption_max_len(
            self.descriptions.values())

        if mode not in ('train', 'val', 'test'):
            raise ValueError()
        self.mode = mode  # choose data generator mode
        if self.mode == 'train':
            """Call train image vector"""
            self.imgs = self.database.get_image_data_from_list(
                'Flickr8k_text/Flickr_8k.trainImages.txt')
        elif self.mode == 'val':
            """Call val image vector"""
            self.imgs = self.database.get_image_data_from_list(
                'Flickr8k_text/Flickr_8k.devImages.txt')
        if self.mode == 'test':
            """Call test image vector"""
            self.imgs = self.database.get_image_data_from_list(
                'Flickr8k_text/Flickr_8k.testImages.txt')
Exemplo n.º 2
0
class FaceSearch:
    def __init__(self, dims=128, metric='euclidean'):
        """
            Everytime turn on, load all embedding vector into a tree.
            With 76 images, building time is 0.001s
        """
        self.dims = dims
        self.metric = metric
        self.feature_index = AnnoyIndex(self.dims, metric=self.metric)
        self.db = DBManagement()
        self.extractFeature = ExtractFeature()
        self._build_tree()

    def _build_tree(self, n_trees=18):
        # FIXME: Build annoy tree here.
        """
        n_trees: parameter should be as twice as much the items
        """
        v_arr = self.db.get_features()
        for index, vector in enumerate(v_arr):
            self.feature_index.add_item(index, vector)
        #n_trees = (index + 1) * 2
        self.feature_index.build(n_trees)

    def save_tree(self, path):
        self.feature_index.save(path)

    def load_tree(self, path):
        """
            Can't use this yet
        """
        self.feature_index = AnnoyIndex(self.dims)
        self.feature_index.load(path)

    def search_index_by_vector(self, image, top_n=1):
        # FIXME: Return the clusters and distance
        data = {'success': False}
        try:
            vector = self.extractFeature.extract_feature_insight_face(image)
            #print('Vector: ', vector.shape)
            distances = self.feature_index.get_nns_by_vector(
                vector, top_n, include_distances=True)
            ids = self.db.get_ids()
            results = [{
                'id': ids[a],
                'distance': distances[1][i]
            } for i, a in enumerate(distances[0])]
            """
            If closest distance is beq to config.face_threshold then it failed
            """
            if results[0]['distance'] >= config.face_threshold:
                results[0]['id'] = 'unknown'
            data['success'] = True
            data['results'] = results
            return data
        except:
            return data
Exemplo n.º 3
0
 def __init__(self, dims = 128, metric = 'euclidean'):
     """
         Everytime turn on, load all embedding vector into a tree.
         With 76 images, building time is 0.001s
     """
     self.dims = dims
     self.metric = metric
     self.feature_index = AnnoyIndex(self.dims, metric=self.metric)
     self.db = DBManagement()
     self.extractFeature = ExtractFeature()
     self._build_tree()
Exemplo n.º 4
0
class ImageSearch:
    def __init__(self, dims = 128, metric = 'euclidean'):
        """
            Everytime turn on, load all embedding vector into a tree.
            With 76 images, building time is 0.001s
        """
        self.dims = dims
        self.metric = metric
        self.feature_index = AnnoyIndex(self.dims, metric=self.metric)
        self.db = DBManagement()
        self.extractFeature = ExtractFeature()
        self._build_tree()

    def _build_tree(self):
        v_arr = self.db.get_features()
        for index, vector in enumerate(v_arr):
            self.feature_index.add_item(index, vector)
        n_trees = (index + 1) * 2
        self.feature_index.build(n_trees)

    def save_tree(self, path):
        self.feature_index.save(path)

    def load_tree(self, path):
        self.feature_index = AnnoyIndex(self.dims)
        self.feature_index.load(path)

    def search_index_by_vector(self, image, top_n=10):
        # FIXME: Return the clusters and distance
        data = {'success': False}
        try:
            vector = self.extractFeature.extract_feature_insight_face(image)
            #print('Vector: ', vector.shape)
            distances = self.feature_index.get_nns_by_vector(
                vector, top_n, include_distances=True
            )
            ids = self.db.get_clusters()
            results = [{'id_cluster': ids[a], 'distance': distances[1][i]}
                    for i, a in enumerate(distances[0])]
            """
            If closest distance is beq to 1 then it failed
            """
            if results[0]['distance'] >= 1:
                return data
            else:
                data['success'] = True
                data['results'] = results
                return data
        except:
            return None
Exemplo n.º 5
0
    def load_model(self):
        self.db = DBManagement()
        # Initialize our lists of extracted facial embeddings and corresponding people names
        self.features = []
        self.ids = []
        #self.genders = []
        #self.ages = []
        #self.clusters = []
        #self.fileNames = []

        parser = argparse.ArgumentParser()
        parser.add_argument('--image-size', default='112,112', help='')
        parser.add_argument(
            '--model',
            default='insightface/models/model-y1-test2/model,0',
            help='path to load model.')
        parser.add_argument('--ga-model',
                            default='insightface/models/gamodel-r50/model,0',
                            help='path to load model.')
        parser.add_argument('--gpu', default=0, type=int, help='gpu id')
        parser.add_argument(
            '--det',
            default=0,
            type=int,
            help='mtcnn option, 1 means using R+O, 0 means detect from begining'
        )
        parser.add_argument('--flip',
                            default=0,
                            type=int,
                            help='whether do lr flip aug')
        parser.add_argument('--threshold',
                            default=1.24,
                            type=float,
                            help='ver dist threshold')
        args = parser.parse_args()

        # Initialize the faces embedder
        self.model = face_model.FaceModel(args)
Exemplo n.º 6
0
class ExtractFeature():
    def __init__(self):
        self.load_model()

    def load_model(self):
        self.db = DBManagement()
        # Initialize our lists of extracted facial embeddings and corresponding people names
        self.features = []
        self.ids = []
        #self.genders = []
        #self.ages = []
        #self.clusters = []
        #self.fileNames = []

        parser = argparse.ArgumentParser()
        parser.add_argument('--image-size', default='112,112', help='')
        parser.add_argument(
            '--model',
            default='insightface/models/model-y1-test2/model,0',
            help='path to load model.')
        parser.add_argument('--ga-model',
                            default='insightface/models/gamodel-r50/model,0',
                            help='path to load model.')
        parser.add_argument('--gpu', default=0, type=int, help='gpu id')
        parser.add_argument(
            '--det',
            default=0,
            type=int,
            help='mtcnn option, 1 means using R+O, 0 means detect from begining'
        )
        parser.add_argument('--flip',
                            default=0,
                            type=int,
                            help='whether do lr flip aug')
        parser.add_argument('--threshold',
                            default=1.24,
                            type=float,
                            help='ver dist threshold')
        args = parser.parse_args()

        # Initialize the faces embedder
        self.model = face_model.FaceModel(args)

    def extract_feature_insight_faces(self, imagePath):

        for classname in os.listdir(imagePath):
            total = 0
            for filename in os.listdir(os.path.join(imagePath, classname)):
                path = os.path.join(imagePath, classname, filename)
                #print(path)
                image = cv2.imread(path)

                face = self.model.get_input(image)
                if face is None:
                    print('No face detected\n')
                    continue
                embedding = self.model.get_feature(face)
                id = '{}_{}'.format(classname, total)
                self.ids.append(id)
                self.features.append(embedding)

                total += 1
                print("[extract_feature_insight_face]: Extract face ", id)

            print(total, " faces embedded")
        # save to output
        self.db.save_data(self.features, self.ids)

    def extract_feature_insight_face(self, image):
        face = self.model.get_input(image)
        if face is None:
            return
        embedding = self.model.get_feature(face)
        return embedding
Exemplo n.º 7
0
def dump_data():
    db = DBManagement()
    db.save_image_data(processed_img)
Exemplo n.º 8
0
class DataGenerator(Sequence):
    def __init__(self, mode='train'):
        self.database = DBManagement()
        # Get caption
        self.descriptions = utils.read_caption_clean_file(
            'Flickr8k_text/Flickr8k.cleaned.lemma.token.txt')
        self.idxtoword, self.wordtoidx, self.vocab_size = utils.map_w2id(
            self.descriptions.values())
        self.max_len = utils.calculate_caption_max_len(
            self.descriptions.values())

        if mode not in ('train', 'val', 'test'):
            raise ValueError()
        self.mode = mode  # choose data generator mode
        if self.mode == 'train':
            """Call train image vector"""
            self.imgs = self.database.get_image_data_from_list(
                'Flickr8k_text/Flickr_8k.trainImages.txt')
        elif self.mode == 'val':
            """Call val image vector"""
            self.imgs = self.database.get_image_data_from_list(
                'Flickr8k_text/Flickr_8k.devImages.txt')
        if self.mode == 'test':
            """Call test image vector"""
            self.imgs = self.database.get_image_data_from_list(
                'Flickr8k_text/Flickr_8k.testImages.txt')
        #### Test purpose ####
        # self.imgs = self.database.get_image_data()

    def __len__(self):
        return len(self.imgs) // cf.batch_size

    def __getitem__(self, idx):
        X1, X2, y = list(), list(), list()

        img_dict = dict(
            (k, v) for (k, v) in self.imgs.items()
            if k in list(self.imgs.keys())[idx * cf.batch_size:(idx + 1) *
                                           cf.batch_size])

        # print('\n{}.Batch size: {}\n'.format(idx,len(img_dict)))
        for k, v in img_dict.items():
            desc_list = self.descriptions[k.split('.')[0]]
            ### Debug ###
            # print("Length of feature vector: {} of {}\n".format(len(v), k))
            ##############
            for desc in desc_list:
                seq = [self.wordtoidx[word] for word in desc.split(' ')]
                for i in range(1, len(seq)):
                    # split into input and output pair
                    in_seq, out_seq = seq[:i], seq[i]
                    # pad input sequence
                    in_seq = pad_sequences([in_seq], maxlen=self.max_len)[0]
                    # encode output sequence
                    out_seq = to_categorical([out_seq],
                                             num_classes=self.vocab_size)[0]
                    # store
                    X1.append(v)
                    X2.append(in_seq)
                    y.append(out_seq)

        return [[np.array(X1), np.array(X2)], np.array(y)]