def test_split_classes(self): data = FaceDataset() data.read_dataset('tests/fixtures/faces/same_faces') data1, data2 = data.split_classes() self.assertEqual(data.classes_length, data1.classes_length + data2.classes_length)
def test_get_embeddings(self): data = FaceDataset() data.read_dataset('tests/fixtures/faces/same_faces') X, y = data.get_embeddings() self.assertTrue(isinstance(X, np.ndarray)) self.assertEqual(X.shape[0], y.shape[0])
def test_unknown_dataset(self): data = FaceDataset() data.read_dataset('tests/fixtures/faces/same_faces') unk_data = data.create_unknown_class() self.assertEqual(unk_data.classes_length, 1) self.assertTrue(Config.UNKNOWN_TAG in unk_data.dataset) s = 0 for _, images in unk_data.items(): s += images.length self.assertEqual(unk_data[Config.UNKNOWN_TAG].length, s)
def test_sequencial_model_newdata(self): dataset = FaceDataset.load('tests/fixtures/faces/newdata/testset.pkl') dataset = dataset.create_unknown_class() modeldir = os.path.join(Config.SAVED_MODELS, datetime.datetime.now().strftime('%Y-%m-%d')) trainset = FaceDataset.load( os.path.join(modeldir, Config.TRAINSET_FILE)) seqmodel = SequentialModel() seqmodel.load(modeldir) results = score(seqmodel, dataset, trainset) self.assertGreater(results, 0.8)
def test_add_imageclass(self): data = FaceDataset.load('tests/fixtures/faces/testset.pkl') init_value = data.classes_length images = FaceImageClass('new_class') images.append(np.zeros((3, 512))) data._add_imageclass(images) self.assertEqual(data.classes_length, init_value + 1)
def test_load(self): data = FaceDataset() data.read_dataset('tests/fixtures/faces/newdata') data.save('tests/fixtures/faces/newdata/newdata.pkl') new_data = FaceDataset.load('tests/fixtures/faces/newdata/newdata.pkl') self.assertEqual(data.classes_length, new_data.classes_length) for cls in data.get_classes(): self.assertTrue(cls in new_data.get_classes()) self.assertEqual(data[cls].length, new_data[cls].length)
def test_knn_model_newdata(self): """ :return: """ dataset = FaceDataset.load('tests/fixtures/faces/newdata/testset.pkl') dataset = dataset.create_unknown_class() modeldir = os.path.join(Config.SAVED_MODELS, datetime.datetime.now().strftime('%Y-%m-%d')) trainset = FaceDataset.load( os.path.join(modeldir, Config.TRAINSET_FILE)) knnmodel = KnnModel.load(modeldir) result = score(knnmodel, dataset, trainset) self.assertGreater(result, 0.8)
def test_save(self): data = FaceDataset() data.read_dataset('tests/fixtures/faces/newdata') data.save('tests/fixtures/faces/newdata/newdata.pkl') self.assertTrue( os.path.exists('tests/fixtures/faces/newdata/newdata.pkl'))
def test_sequencial_model(self): dataset = FaceDataset.load( 'tests/fixtures/faces/same_faces/testset.pkl') X, y = dataset.get_distance_vectors() modeldir = os.path.join(Config.SAVED_MODELS, datetime.datetime.now().strftime('%Y-%m-%d')) kmodel = SequentialModel() kmodel.load(modeldir) score = kmodel.evaluate(X, y) self.assertGreater(score, 0.8)
def test_split_instances_of_classes(self): data = FaceDataset() data.read_dataset('tests/fixtures/faces/newdata') data1, data2 = data.split_instances_of_classes() self.assertEqual(data1.classes_length, data2.classes_length) for cls, images in data.items(): self.assertEqual(data[cls].length, data1[cls].length + data2[cls].length)
def test_knn_model(self): """ Tests the data loading from a json file. :return: """ dataset = FaceDataset.load( 'tests/fixtures/faces/same_faces/testset.pkl') modeldir = os.path.join(Config.SAVED_MODELS, datetime.datetime.now().strftime('%Y-%m-%d')) knnmodel = KnnModel.load(modeldir) X, y = dataset.get_embeddings() score = knnmodel.evaluate(X, y) self.assertGreater(score, 0.8)
def test_balance_downsample(self): data = FaceDataset.load('tests/fixtures/faces/testset.pkl') X, y = data.get_distance_vectors() X_bal, y_bal = dataset.balance_classes(X, y, Balance.DOWNSAMPLE) self.assertEqual(y_bal[y_bal == 0].shape, y_bal[y_bal == 1].shape) self.assertEqual(y_bal.shape[0], y[y == 1].shape[0] * 2)
def test_read_dataset(self): data = FaceDataset() data.read_dataset('tests/fixtures/faces/same_faces') self.assertEqual(data.classes_length, 150) self.assertEqual(data['George_W_Bush'].length, 26)
parser.add_argument("-o", "--output", dest="output", help="Path to the output folder.") parser.add_argument("-n", "--n_clusters", type=int, help="Number of clusters for hierarchical clustering.") parser.add_argument("--analyze", action='store_true', help="Print clustering details.") args = parser.parse_args() logger.info(sys.argv) all_data = FaceDataset() all_data.read_dataset(args.input) clustering_algo = get_dbscan() if not hasattr( args, 'n_clusters') or not args.n_clusters else get_hierarchical( args.n_clusters) time_start = time.time() labels = clustering(all_data, clustering_algo, args.analyze) time_end = time.time() logger.info('The clustering took {:.3f} ms'.format( (time_start - time_end) * 1000.0)) write_clusters(args.output, all_data, labels)
parser.add_argument("-m", "--model", dest="model_dir", help="Path to the input model.") parser.add_argument("-a", "--approach", dest="approach", type=Models, choices=list(Models), default=Models.SEQUENTIAL, help="Path to the input model.") args = parser.parse_args() logger.info(sys.argv) trainset = FaceDataset.load( os.path.join(args.model_dir, Config.TRAINSET_FILE)) testset = FaceDataset.load(os.path.join(args.input, Config.TESTSET_FILE)) newpeopleset = FaceDataset.load( os.path.join(args.input, Config.NEWDATA_FILE)) newpeopleset = newpeopleset.create_unknown_class() if args.approach == Models.SEQUENTIAL: logger.info("Load the sequencial model.") model = SequentialModel() model.load(args.model_dir) else: logger.info("Load the knn model.") model = KnnModel().load(args.model_dir) logger.info("Evaluate the model on the test set.") acc = score(model, testset, trainset)
def setUp(self): data1 = FaceDataset() data1.read_dataset('tests/fixtures/faces/same_faces') data2 = FaceDataset() data2.read_dataset('tests/fixtures/faces/newdata') data1.append(data2) data1.save(os.path.join('tests/fixtures/faces/testset.pkl'))
from config import Config from src.data.dataset import FaceDataset, Balance from src.model.sequencial_model import SequentialModel logging.config.fileConfig(Config.LOGGING_FILE) logger = logging.getLogger() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", dest="input", help="Path to the input folder.") parser.add_argument("-b", "--balance", dest="balance", type=Balance, choices=list(Balance), help="Approach for class balancing.") args = parser.parse_args() logger.info(sys.argv) trainset = FaceDataset.load(os.path.join(args.input, Config.TRAINSET_FILE)) testset = FaceDataset.load(os.path.join(args.input, Config.TESTSET_FILE)) x_train, y_train = trainset.get_distance_vectors(balance=args.balance) x_test, y_test = testset.get_distance_vectors() kmodel = SequentialModel() kmodel.train(input_size=x_train.shape[1], x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test) output_dir = os.path.join(Config.SAVED_MODELS, datetime.datetime.now().strftime('%Y-%m-%d')) if not os.path.exists(output_dir): os.makedirs(output_dir) kmodel.save(output_dir) shutil.copyfile(os.path.join(args.input, Config.TRAINSET_FILE), os.path.join(output_dir, Config.TRAINSET_FILE))