class ModelEngineering: def __init__(self, pkg_dir): self.pkg_dir = pkg_dir self.frozen_graph_path = os.path.join(pkg_dir, 'InceptionResNetV1-VGGFace2', '20180402-114759.pb') self.graph = tf.Graph() self.session = tf.Session(graph=self.graph) self.imgs_ph = None self.phase_train_ph = None self.embs_ph = None self.emb_size_ph = None self.initialized = False # we create an instance of Neighbours Classifier and fit the data. self.n_neighbors = 2 # weight function used in prediction. Possible values: 'uniform', 'distance', [callable] self.weights = 'distance' # self.clf = neighbors.KNeighborsClassifier(self.n_neighbors, algorithm='ball_tree', weights=self.weights) self.knn = KNN() #self.gender_model= os.path.join(self.pkg_dir,'pre_trained_gn','gender_detection1.model') #self.gender = Gender(model=self.gender_model) def initialize(self): """ Call load_model method and get input/output tensors :return: True, if everything goes well """ self.imgs_ph, self.phase_train_ph, self.embs_ph, self.emb_size_ph = self.load_model( self.frozen_graph_path) return True def load_model(self, model, input_map=None): """ Load a (frozen) Tensorflow model into memory. :param model: Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file :param input_map: The input map :return: The place holders for input dataset, phase train, embeddings, and the embedding size """ with self.graph.as_default(): # Check if the model is a model directory (containing a metagraph and a checkpoint file) # or if it is a protobuf file with a frozen graph model_exp = os.path.expanduser(model) if os.path.isfile(model_exp): print('Model filename: %s' % model_exp) with gfile.FastGFile(model_exp, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, input_map=input_map, name='') else: print('Model directory: %s' % model_exp) meta_file, ckpt_file = self.get_model_filenames(model_exp) print('Metagraph file: %s' % meta_file) print('Checkpoint file: %s' % ckpt_file) saver = tf.train.import_meta_graph(os.path.join( model_exp, meta_file), input_map=input_map) saver.restore(self.session, os.path.join(model_exp, ckpt_file)) # Get input and output tensors imgs_ph = self.graph.get_tensor_by_name("input:0") embs_ph = self.graph.get_tensor_by_name("embeddings:0") phase_train_ph = self.graph.get_tensor_by_name("phase_train:0") emb_size = embs_ph.get_shape()[1] return imgs_ph, phase_train_ph, embs_ph, emb_size @staticmethod def get_model_filenames(model_dir): """ Get the model file names. :param model_dir: The directory in which the saved checkpoints of the model exists. :return: The meta file name and the checkpoint file name """ files = os.listdir(model_dir) meta_files = [s for s in files if s.endswith('.meta')] if len(meta_files) == 0: raise ValueError('No meta file found in the model directory (%s)' % model_dir) elif len(meta_files) > 1: raise ValueError( 'There should not be more than one meta file in the model directory ({})' .format(model_dir)) meta_file = meta_files[0] ckpt = tf.train.get_checkpoint_state(model_dir) ckpt_file = '' if ckpt and ckpt.model_checkpoint_path: ckpt_file = os.path.basename(ckpt.model_checkpoint_path) return meta_file, ckpt_file max_step = -1 for f in files: step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f) if step_str is not None and len(step_str.groups()) >= 2: step = int(step_str.groups()[1]) if step > max_step: max_step = step ckpt_file = step_str.groups()[0] return meta_file, ckpt_file def encode(self, images): """ Run the forward pass to calculate embeddings. :param images: The input (4D) tensor :return: The 512-vector embeddings """ if not self.initialized: self.initialized = self.initialize() feed_dict = {self.imgs_ph: images, self.phase_train_ph: False} emb_array = self.session.run(self.embs_ph, feed_dict=feed_dict) return emb_array def knn_fit(self, warehouse): """ Fit the KNN classifier using the training data set :param warehouse: :return: None """ emb_array = np.array([]) uid_array = np.array([]) for face in warehouse.get_faces(): if emb_array.ndim == 1: emb_array = face.embedding else: emb_array = np.vstack((emb_array, face.embedding)) uid_array = np.append(uid_array, face.uid) self.knn.fit(emb_array, uid_array) def knn_classify(self, query): """ Supervised KNN :param query: the subject embedding :return: the UID of the subject """ uid = self.knn.classify([query]) # print('proba[index]', proba[index]) # print('detect_uid', uid) return uid def knn_eval(self, warehouse): """ Evaluate the KNN classifier on a test data set :return: the accuracy """ emb_array = np.array([]) uid_array = np.array([]) for face in warehouse.get_faces(): if emb_array.ndim == 1: emb_array = face.embedding else: emb_array = np.vstack((emb_array, face.embedding)) uid_array = np.append(uid_array, face.uid) accuracy = self.knn.evaluate(emb_array, uid_array) return accuracy
y = np.array(y_list) X = np.array(X_list) test_data = (y, X) ground_truth = y # KNN Prediction # ================================= start = datetime.now() classifier = KNN(training_set, test_data, 3, 2) predictions = classifier.classify_data() # Prediction Accuracy # ================================= prediction_percent = classifier.evaluate(ground_truth, predictions) print("Accuracy: ", prediction_percent, "%") print("Runtime: ", datetime.now() - start) # best: k=3, p=2 (Euclidean distance), 82% # Meta-parameter Analysis # Loop through k in range (1,10) and p in range (1.0, 2.0) # ================================= """ metadata = [] for i in range(1, 11): for j in range(10, 21): k = i p = j * 0.1 start = datetime.now() classifier = KNN(training_set, test_data, k, p)
k = len(np.unique(y)) alpha = 0.01 epochs = 20 hidden_layer_size = [10, 100, 250, 500, 750] test_errors = [] train_errors = [] for h in hidden_layer_size: model = nn.NN(no_of_in_nodes=image_pixels, no_of_out_nodes=k, no_of_hidden_nodes=h, learning_rate=alpha, bias=None) weights = model.fit(X, Y, epochs=epochs, intermediate_results=True) for i in range(epochs): print("epoch: ", i) model.wih = weights[i][0] model.who = weights[i][1] corrects, wrongs = model.evaluate(X, y) train_error = 1 - corrects / (corrects + wrongs) print("train error: ", train_error) corrects, wrongs = model.evaluate(Xtest, ytest) test_error = 1 - corrects / (corrects + wrongs) print("test error: ", test_error) test_errors = np.append(test_errors, test_error) train_errors = np.append(train_errors, train_error) plt.plot(hidden_layer_size, test_errors, label="validation error") plt.plot(hidden_layer_size, train_errors, label="training error") plt.title("Neural Network for One Hidden Layer") plt.xlabel("Hidden Layer Size") plt.ylabel("Error") fname = os.path.join("..", "figs", "mlp.pdf") plt.savefig(fname) print("\nFigure saved as '%s" % fname)
class KNNTestCase(unittest.TestCase): """ Test cases for the KNN implementation """ def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) self.knn = KNN(k=5) self.train_data = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [7.0, 3.2, 4.7, 1.4], [6.4, 3.2, 4.5, 1.5], [6.9, 3.1, 4.9, 1.5], [5.5, 2.3, 4.0, 1.3]]) self.train_label = np.array([0, 0, 0, 0, 1, 1, 1, 1]) self.test_data = np.array([[5.0, 3.6, 1.4, 0.2], [5.4, 3.9, 1.7, 0.4], [6.5, 2.8, 4.6, 1.5], [5.7, 2.8, 4.5, 1.3], [6.3, 3.3, 6.0, 2.5], [5.8, 2.7, 5.1, 1.9]]) self.test_label = np.array([0, 0, 1, 1, 2, 2]) def test_fit(self): """ The return value of the function should be equal to the number of the classes in the data set :return: None """ num_classes = self.knn.fit(self.test_data, self.test_label) condition = num_classes == 3 self.assertEqual(condition, True) def test_compute_distance(self): """ The distance between the two input vectors should be a floating point value inside the zero to one interval :return: None """ sample0 = self.train_data[0] sample1 = self.train_data[1] distance = self.knn.compute_distance(sample0, sample1) condition = 0.0 <= distance <= 1.0 self.assertEqual(condition, True) def test_get_neighbours(self): """ The returned neighbours should be a list of tuples, each of which contains the label and the distance :return: None """ self.knn.fit(self.train_data, self.train_label) query = self.test_data[0] neighbours = self.knn.get_neighbors(query) condition0 = len(neighbours) == self.knn.k condition1 = len(neighbours[0]) == 2 condition = condition0 and condition1 self.assertEqual(condition, True) def test_classify(self): """ The classified label of the normal sample should correspond to its ground truth label and for the anomaly sample that does not belong to any of the training classes it should be equal to -1 :return: None """ self.knn.fit(self.train_data, self.train_label) normal_data = self.test_data[0] normal_label = self.test_label[0] anomaly_data = self.test_data[-1] normal_pred = self.knn.classify(normal_data) anomaly_pred = self.knn.classify(anomaly_data) condition0 = normal_pred == normal_label condition1 = anomaly_pred == -1 condition = condition0 and condition1 self.assertEqual(condition, True) def test_evaluate(self): """ The returned accuracy should be equal to 1.0 in the case where the training and test set are the same and in the case where the training set and test set are different it should be in the interval zero to one :return: None """ self.knn.fit(self.train_data, self.train_label) accuracy_perfect = self.knn.evaluate(self.train_data, self.train_label) condition0 = accuracy_perfect == 1.0 accuracy_imperfect = self.knn.evaluate(self.test_data, self.test_label) condition1 = 0.0 < accuracy_imperfect < 1.0 condition = condition0 and condition1 self.assertEqual(condition, True)