def test_dot(): v1 = vector([1, 0]) v2 = vector([0, 1]) v3 = vector([-1, 0]) assert dot(v1, v1) == 1 assert dot(v1, v2) == 0 assert dot(v1, v3) == -1
def trainListFile(self, listTrainFile, listmanualfiles): if len(listmanualfiles) != len(listTrainFile): print("Co loi") sys.exit() self.reset() queries = dlib.ranking_pairs() for index in range(0, len(listTrainFile)): self.reset() data = dlib.ranking_pair() inputNonRelevant = " ".join([line for line in open(listTrainFile[index], 'r').readlines()]) tpAllSent = myTokenizer(inputNonRelevant) self.inputFromString(inputNonRelevant) inputRelevant = " ".join([line for line in open(listmanualfiles[index], 'r').readlines()]) tpRelevant = myTokenizer(inputRelevant) tpNonRelevant = list(set(tpAllSent).difference(set(tpRelevant))) self.genAllVector() for sent in tpRelevant: data.relevant.append(dlib.vector(self.dicVector.get(sent.strip()))) for sent in tpNonRelevant: data.nonrelevant.append(dlib.vector(self.dicVector.get(sent.strip()))) queries.append(data) trainer = dlib.svm_rank_trainer() trainer.c = 10 rank = trainer.train(queries) _weight = [] for i in range(0, len(rank.weights)): _weight.append(rank.weights[i]) return _weight
def generate_test_vectors(): vs = vectors() vs.append(vector([0, 1, 2])) vs.append(vector([3, 4, 5])) vs.append(vector([6, 7, 8])) assert len(vs) == 3 return vs
def train(tagged): """ Trains an SVM classifier based on the training data passed. Mostly based on http://dlib.net/svm_binary_classifier.py.html. :param tagged: list of TaggedFace to train on :return: dlib.svm """ x = dlib.vectors() # will carry the facial encodings y = dlib.array() # will carry the gender label print("Preparing dataset...") total = len(tagged) for i, t in enumerate(tagged): print(f"\rEncoding {t.path} ({i + 1}/{total})...", end="") faces = encode(t.img) x.append(dlib.vector(faces[0])) y.append(t.tag) img = t.img for _ in range(5): faces = encode(img) if not faces: break x.append(dlib.vector(faces[0])) y.append(t.tag) img = cv2.resize(img, None, fx=0.7, fy=0.7) print("Training SVM...") trainer = dlib.svm_c_trainer_radial_basis() #trainer.be_verbose() trainer.set_c(10) model = trainer.train(x, y) with open(PATH_SVMFILE, "wb") as filehandle: pickle.dump(model, filehandle) return None
def sentence_to_vectors(sentence): # Create an empty array of vectors vects = dlib.vectors() for word in sentence.split(): # Our vectors are very simple 1-dimensional vectors. The value of the single # feature is 1 if the first letter of the word is capitalized and 0 otherwise. if (word[0].isupper()): vects.append(dlib.vector([1])) else: vects.append(dlib.vector([0])) return vects
def dlibVectorFormating(data, tolist=True, key_descr='descr'): for d in data: if tolist: d[key_descr] = [list(dd) for dd in d[key_descr]] else: d[key_descr] = [dlib.vector(dd) for dd in d[key_descr]] return data
def CLUSTER_TRACKS(DT, threshold): track_feats = [] for i in DT.keys(): track_feats.append(dlib.vector(DT[i]['BBOX_FEAT'].mean(0))) CL = defaultdict(dict) cluster_ids = dlib.chinese_whispers_clustering(track_feats, threshold) for i in cluster_ids: try: CL[i]['BBOX'] = CL[i]['BBOX'] + DT[list(DT.keys())[i]]['BBOX'] CL[i]['Frame_ID'] = CL[i]['Frame_ID'] + DT[list(DT.keys())[i]]['Frame_ID'] CL[i]['BBOX_FEAT'] = CL[i]['BBOX_FEAT'] + DT[list(DT.keys())[i]]['BBOX_FEAT'] CL[i]['ANGLE'] = CL[i]['ANGLE'] + DT[list(DT.keys())[i]]['ANGLE'] CL[i]['IMG'] = CL[i]['IMG'] + DT[list(DT.keys())[i]]['IMG'] CL[i]['AVG_SIZE'] = DT[list(DT.keys())[i]]['AVG_SIZE'] CL[i]['AREA'] = DT[list(DT.keys())[i]]['AREA'] CL[i]['LEN'] = DT[list(DT.keys())[i]]['LEN'] + DT[list(DT.keys())[i]]['LEN'] except: CL[i]['BBOX'] = DT[list(DT.keys())[i]]['BBOX'] CL[i]['Frame_ID'] = DT[list(DT.keys())[i]]['Frame_ID'] CL[i]['BBOX_FEAT'] = DT[list(DT.keys())[i]]['BBOX_FEAT'] CL[i]['ANGLE'] = DT[list(DT.keys())[i]]['ANGLE'] CL[i]['IMG'] = DT[list(DT.keys())[i]]['IMG'] CL[i]['AVG_SIZE'] = DT[list(DT.keys())[i]]['AVG_SIZE'] CL[i]['AREA'] = DT[list(DT.keys())[i]]['AREA'] CL[i]['LEN'] = DT[list(DT.keys())[i]]['LEN'] return CL
def preprocess_faces(self, faces): # Cluster the faces with chinese whispers encodings = [dlib.vector(face['encoding']) for face in faces] labels = dlib.chinese_whispers_clustering(encodings, 0.5) selected_faces = [] # Select face most close to average group groups = list(set(labels)) for group in groups: # Get indices for each group indices = [i for i in range(len(labels)) if labels[i] == group] group_encodings = [faces[i]['encoding'] for i in indices] # Get centroid for group encodings avg_group_encoding = np.average(group_encodings, axis=0) # Get the closest face to the centroid avg_distance = face_recognition.face_distance( group_encodings, avg_group_encoding) min_index = np.argmin(avg_distance) face_index = indices[min_index] selected_faces.append(faces[face_index]) return selected_faces
def clustring(self, faces_info): for data in faces_info: encode = data['face_encoding'] self.face_encodings.append(dlib.vector(encode)) labels = dlib.chinese_whispers_clustering(self.face_encodings, 0.5) labels = np.array(labels) print("All cluster labels :", labels) unique_labels = np.unique(labels) print("Number of unique faces found : ", len(unique_labels)) print("Saving faces..........") for label in unique_labels: index = np.where(labels == label)[0] for i in index: image_path = self.faces_info[i]['img_path'] image_name = image_path.split('/')[-1].split('.')[0] image_ext = image_path.split('/')[-1].split('.')[1] image = cv2.imread(image_path) output_dir = os.getcwd() + '/' + str(label) if not os.path.isdir(output_dir): os.mkdir(str(label)) cv2.imwrite(output_dir + '/' + image_name + '.' + image_ext, image)
def predict_gender(self, encoding, thresh=0.4): result = self.classifier(dlib.vector(encoding)) if result > thresh: return "male" if result < -thresh: return "female" return "unknown"
def chinese_whispers(encodings, threshold=0.5): """ Chinese Whispers - an Efficient Graph Clustering Algorithm and its Application to Natural Language Processing Problems """ encodings = [dlib.vector(enc) for enc in encodings] return dlib.chinese_whispers_clustering(encodings, threshold)
def __clusterize(self, files_faces, debug_out_folder=None): self.__start_stage(len(files_faces)) encs = [] indexes = list(range(len(files_faces))) random.shuffle(indexes) for i in indexes: for j in range(len(files_faces[i]['faces'])): encs.append(dlib.vector( files_faces[i]['faces'][j]['encoding'])) labels = dlib.chinese_whispers_clustering( encs, self.__threshold_clusterize) labels = self.__reassign_by_count(labels) lnum = 0 for i in indexes: if self.__step_stage(): break for j in range(len(files_faces[i]['faces'])): files_faces[i]['faces'][j]['name'] = \ 'unknown_{:05d}'.format(labels[lnum]) lnum += 1 if debug_out_folder: filename = files_faces[i]['filename'] media = tools.load_media(filename, self.__max_size, self.__max_video_frames, self.__video_frames_step) debug_out_file_name = self.__extract_filename(filename) self.__save_debug_images( files_faces[i]['faces'], media, debug_out_folder, debug_out_file_name) self.__end_stage()
def make_psi(self, x, label): """Compute PSI(x,label).""" psi = dlib.vector() # Set it to have 9 dimensions. Note that the elements of the vector # are 0 initialized. psi.resize(self.num_dimensions) # first label_num = label[0] # psi[:label_num * 128] = label_num * 128 * [0] for index in range(128): psi[label_num * 128 + index] = x[0][index] # psi[label_num*128, (label_num+1)*128] = x[0].tolist() # psi[(label_num+1)*128:] = (26 - label_num) * 128 * [0] label_num = label[1] for index in range(128): psi[label_num * 128 + 128 * 27 + index] = x[1][index] # get changing label if label[0] != label[1]: psi[-1] = 1 else: psi[-1] = 0 return psi
def predict_gender(encoding): result = _classifier(dlib.vector(encoding)) if result > 0.5: return "male" if result < -0.5: return "female" return "unknown"
def predict_gender(encoding, threshold=0.5): result = _classifier(dlib.vector(encoding)) if result > threshold: return "male" if result < -threshold: return "female" return "unknown"
def calc_embded2(file_list): embd_list = [] file_name = [] for f in file_list: img = cv2.imread(f) ret = face_recognition.face_encodings(img) if len(ret) == 0: continue file_name.append(f) embd_list.append(dlib.vector(ret[0])) return file_name, embd_list
def test_vector_set_size(): v = vector(3) v.set_size(0) assert len(v) == 0 assert v.shape == (0, 1) v.resize(10) assert len(v) == 10 assert v.shape == (10, 1) for i in range(10): assert v[i] == 0
def __recognize(self): """ Recognize face and return it's descriptor :return: """ try: face_roi: RoiData = self.__frames.get() if not self.__config.recognize_faces: return img: np.ndarray = face_roi.img b, g, r = cv2.split(img) img_rgb = cv2.merge((r, g, b)) # win = dlib.image_window() # win.clear_overlay() # win.set_image(img_rgb) # win.add_overlay(face_roi.shape) # win.wait_until_closed() face_desc = self.__face_rec_model.compute_face_descriptor(img_rgb, face_roi.shape) faces = self.__db_worker.select_all_faces() wrong_face = True for face in faces: desc: str = face[3] values = [float(x) for x in desc.split('\n')] vector = dlib.vector(values) faces_dist = distance.euclidean(face_desc, vector) if faces_dist < 0.6: wrong_face = False break if wrong_face: if len(self.__faces) < 5: self.__faces.append(img) if not self.__thread_started: self.__thread_started = True self.__thread = Thread(target=self.__send_notification) self.__thread.name = "NotificationThread" self.__thread.start() except Exception as ex: self.__recognition_error(f"{ex}")
def estimate_gender(face): """ Estimates a characteristic based on the face that is passed. :param face: dlibs 128-long face encoding :return: float, estimated gender. The gender model has been trained as value 1 for females, and -1 for males. So, a value of -0.5 means "mainly male" and can be considered as such. Values between -0.3 and 0.3 mean the model is not certain enough, and should be considered as "unknown" or "uncertain" """ vector = dlib.vector(face) return gender_model(vector)
def compute_similarities(data_dir, similarity_threshold=0.6, identity_threshold=0.4, criminal_fraction=0.1, **kwargs): t = Timer() all_descriptors = db.get_all_descriptors() descriptors = [json.loads(f[1]) for f in all_descriptors] face_ids = [f[0] for f in all_descriptors] num_faces = len(all_descriptors) #print("get_all_descriptors():", t) #print("Faces: %d" % len(all_descriptors), end='') if num_faces < 2: #print() return num_faces, 0, 0 X = Y = np.array(descriptors) #print("convert to array:", t) X2 = Y2 = np.sum(np.square(X), axis=-1) dists = np.sqrt(np.maximum(X2[:, np.newaxis] + Y2[np.newaxis] - 2 * np.dot(X, Y.T), 0)) #print("calculate dists:", t) db.delete_similarities() #print("delete similarities:", t) num_similarities = 0 for i, j in zip(*np.where(dists < float(similarity_threshold))): if i != j: db.insert_similarity([face_ids[i], face_ids[j], dists[i, j]]) num_similarities += 1 #print("save similarities:", t) # cluster faces and update labels descriptors_dlib = [dlib.vector(d) for d in descriptors] clusters = dlib.chinese_whispers_clustering(descriptors_dlib, float(identity_threshold)) db.update_labels(zip(clusters, face_ids)) num_clusters = len(set(clusters)) if args.save_clusters: for cluster_num, face_id in zip(clusters, face_ids): facefile = os.path.realpath(os.path.join(data_dir, args.save_faces, "face_%05d.jpg" % face_id)) clusterdir = os.path.join(data_dir, args.save_clusters, str(cluster_num)) makedirs(clusterdir) os.symlink(facefile, os.path.join(clusterdir, 'tmpfile')) os.rename(os.path.join(clusterdir, 'tmpfile'), os.path.join(clusterdir, "face_%05d.jpg" % face_id)) # remove clusters with more than given amount of criminals criminal_clusters = db.get_clusters_with_criminals(criminal_fraction) for cluster in criminal_clusters: db.remove_cluster(cluster['cluster_num']) db.commit() #print("commit:", t) #print(", Similarities: %d, Time: %.2fs" % (num_similarities, t.total())) return num_faces, num_similarities, num_clusters
def match(candidate): bestThresh = 9999 bestIndex = -1 if (len(helpers.unique_persons) > 0): for index, person in enumerate(helpers.unique_persons): currThresh = helpers.euclidean_dist(candidate, dlib.vector(person["Mean"])) if (currThresh < helpers.MAX_MATCHING_THRESH): if (currThresh < bestThresh): bestIndex = index bestThresh = currThresh return bestIndex
def cluster_faces(src_dir): # Load face metadata faces_df = pd.read_csv(os.path.join(src_dir, 'metadata.csv')) # Check if clustering already exists if 'cluster' not in faces_df.columns: # Chinese whispers clustering faces_df['embedding'] = faces_df['json_embedding'].apply(json.loads) X = np.array([x for x in faces_df['embedding']]) faces_df['cluster'] = dlib.chinese_whispers_clustering( [dlib.vector(x) for x in X], 0.5) # Persist clustering faces_df.to_csv(os.path.join(src_dir, 'metadata.csv'), index=False)
def training_data(): r = Random(0) predictors = vectors() sparse_predictors = sparse_vectors() response = array() for i in range(30): for c in [-1, 1]: response.append(c) values = [r.random() + c * 0.5 for _ in range(3)] predictors.append(vector(values)) sp = sparse_vector() for i, v in enumerate(values): sp.append(pair(i, v)) sparse_predictors.append(sp) return predictors, sparse_predictors, response
def test_vector_slice(): v = vector([1, 2, 3, 4, 5]) v_slice = v[1:4] assert len(v_slice) == 3 for idx, val in enumerate([2, 3, 4]): assert v_slice[idx] == val v_slice = v[-3:-1] assert len(v_slice) == 2 for idx, val in enumerate([3, 4]): assert v_slice[idx] == val v_slice = v[1:-2] assert len(v_slice) == 2 for idx, val in enumerate([2, 3]): assert v_slice[idx] == val
def make_psi(self, x, label): """Compute PSI(x,label).""" # All we are doing here is taking x, which is a 3 dimensional sample # vector in this example program, and putting it into one of 3 places in # a 9 dimensional PSI vector, which we then return. So this function # returns PSI(x,label). To see why we setup PSI like this, recall how # predict_label() works. It takes in a 9 dimensional weight vector and # breaks the vector into 3 pieces. Each piece then defines a different # classifier and we use them in a one-vs-all manner to predict the # label. So now that we are in the structural SVM code we have to # define the PSI vector to correspond to this usage. That is, we need # to setup PSI so that argmax_y dot(weights,PSI(x,y)) == # predict_label(weights,x). This is how we tell the structural SVM # solver what kind of problem we are trying to solve. # # It's worth emphasizing that the single biggest step in using a # structural SVM is deciding how you want to represent PSI(x,label). It # is always a vector, but deciding what to put into it to solve your # problem is often not a trivial task. Part of the difficulty is that # you need an efficient method for finding the label that makes # dot(w,PSI(x,label)) the biggest. Sometimes this is easy, but often # finding the max scoring label turns into a difficult combinatorial # optimization problem. So you need to pick a PSI that doesn't make the # label maximization step intractable but also still well models your # problem. # # Create a dense vector object (note that you can also use unsorted # sparse vectors (i.e. dlib.sparse_vector objects) to represent your # PSI vector. This is useful if you have very high dimensional PSI # vectors that are mostly zeros. In the context of this example, you # would simply return a dlib.sparse_vector at the end of make_psi() and # the rest of the example would still work properly. ). psi = dlib.vector() # Set it to have 9 dimensions. Note that the elements of the vector # are 0 initialized. psi.resize(self.num_dimensions) dims = len(x) if label == 0: for i in range(0, dims): psi[i] = x[i] elif label == 1: for i in range(dims, 2 * dims): psi[i] = x[i - dims] else: # the label must be 2 for i in range(2 * dims, 3 * dims): psi[i] = x[i - 2 * dims] return psi
def cluster_embeddings(encodings_path=None): # Load previously generated embeddings print("Loading encodings...") data = pickle.loads(open(Path(encodings_path), "rb").read()) data = np.array(data) # Specifically grab the encodings from the data array # If using dlib's Chinese Whispers Clustering, convert to dlib vector format encodings = [dlib.vector(d["encoding"].squeeze()) for d in data] # If using KNN, keep in Numpy format # encodings = [d["encoding"] for d in data] # encodings = np.asarray(encodings).squeeze() # Calculate a threshold value for Chinese Whispers neigh = NearestNeighbors(n_neighbors=5) nbrs = neigh.fit(encodings) distances, indices = nbrs.kneighbors(encodings) distances = np.sort(distances, axis=0) distances = distances[:, 2] mean_distance = np.mean(distances) # plt.plot(distances) # plt.show() # Clustering with Chinese Whispers algorithm labels = dlib.chinese_whispers_clustering(encodings, mean_distance) # kmeans = KMeans(n_clusters=5, random_state=0).fit(encodings) # label_ids = np.unique(kmeans.labels_) # labels = kmeans.labels_ # Determine the total number of unique faces, as well # as their occurrences label_ids, counts = np.unique(labels, return_counts=True) num_unique_faces = len(label_ids) # Split images into clusters based on labels image_paths = [d["image_path"] for d in data] output_folder = image_paths[0].parent.parent.joinpath("clustered_faces") Path(output_folder).mkdir(parents=True, exist_ok=True) for i in range(len(image_paths)): current_label = labels[i] current_file = image_paths[i] new_path = output_folder.joinpath( str(current_label) + "_" + current_file.name) shutil.copy(current_file, new_path)
def cluster(): s = time.time() query = '' descriptors = [] dvec = dlib.vectors() date = input("enter a date in dd-mm-yyy format") from_time = input("enter start time in hh:mm format") to_time = input("enter end time in hh:mm format") data = ptf.retrive(date, from_time, to_time) for d in data: descriptors.append(dlib.vector(d)) # Cluster the faces. labels = dlib.chinese_whispers_clustering(descriptors, 0.5) e = time.time() print(labels) print(len(descriptors)) print(len(labels)) labset = set(labels) print(labset) num_classes = len(set(labels)) #total number of clusters print("Number of clusters: {}".format(num_classes)) print(e - s) return num_classes
def __init__(self, data_path): names = ['time', 'track'] for i in range(128): names += ['d{0}'.format(i)] # self.data = read_table(data_path, delim_whitespace=True, header=None, names=names) self.data.sort_values(by=['track', 'time'], inplace=True) # create a descriptor list with dlibs descriptor vector descriptors = [] embeddings = self.data.iloc[:, 2:].values for each_i in embeddings: face_descriptor = dlib.vector(each_i) descriptors.append(face_descriptor) # returns series of labels [0 0 2 2 2] for each row of embeddings labels = dlib.chinese_whispers_clustering(descriptors, 0.5) # put the series into a column self.data['cluster'] = pandas.Series(labels, index=self.data.index) # TODO: this can be improved by taking highest count of label in each track # get the label for each track track_label = self.data.groupby(by='track', as_index=False).first()[ ['track', 'cluster']].values # get unique labels self.labels = np.unique(track_label[:][:, [1]]) self.starting_point = Annotation(modality='face') for track, segment in self.data.groupby('track').apply(_to_segment).iteritems(): if not segment: continue self.starting_point[segment, track] = track_label[track][1]
def train(self, directoryPlain, directoryManual): self.reset() listFile = [] listPlainFile = listAllFileInFolder(directoryPlain) listManualFile = listAllFileInFolder(directoryManual) dicPlainFile = {} dicManualFile = {} for file in listPlainFile: fname = file.strip().split('/')[-1] listFile.append(fname) dicPlainFile[fname] = file for file in listManualFile: fname = file.strip().split('/')[-1] listFile.append(fname) dicManualFile[fname] = file listFile = list(set(listFile)) queries = dlib.ranking_pairs() countt = 0 outfile = open("completefile.txt", 'w') for file in listFile: outvecfile = open("/home/hien/Data/Work/Wordnet_naiscorp/test/valuevector/"+file.strip().split('/')[-1], 'w') countt = countt + 1 outfile.write(file+'\n') print (file, countt) self.reset() data = dlib.ranking_pair() inputNonRelevant = " ".join([line for line in open(dicPlainFile.get(file), 'r').readlines()]) tpAllSent = myTokenizer(inputNonRelevant) self.inputFromString(inputNonRelevant) inputRelevant = " ".join([line for line in open(dicManualFile.get(file), 'r').readlines()]) tpRelevant = myTokenizer(inputRelevant) tpNonRelevant = list(set(tpAllSent).difference(set(tpRelevant))) self.genAllVector() for sent in tpAllSent: outvecfile.write(str(self.dicVector.get(sent.strip()))+"\t"+sent.strip()+'\n') outvecfile.close() for sent in tpRelevant: # print (sent) # print(self.dicVector.get(sent)) # print(type(self.dicVector.get(sent))) data.relevant.append(dlib.vector(self.dicVector.get(sent.strip()))) # outvecfile.write(str(self.dicVector.get(sent.strip()))+"\t"+sent.strip()+'\n') # outvecfile. for sent in tpNonRelevant: # print(self.dicVector.get(sent)) data.nonrelevant.append(dlib.vector(self.dicVector.get(sent.strip()))) queries.append(data) trainer = dlib.svm_rank_trainer() trainer.c = 10 rank = trainer.train(queries) _weight = [] for i in range(0, len(rank.weights)): _weight.append(rank.weights[i]) # print(type(rank.weights)) # print (rank.weights[0]) # print (rank.weights) # print(_weight) # return rank.weights return _weight
import settings train_data = pd.read_csv('training_features.csv', index_col=0, encoding="ISO-8859-1") query_id_train = train_data["query_id"].tolist() doc_id_train = train_data["doc_id"].tolist() train_features = train_data[settings.feature_selected] # train_true = list(train_data["label"]) train_true = train_data["label"].tolist() # testing test_data = pd.read_csv('test_features.csv', index_col=0, encoding="ISO-8859-1") query_id_test = test_data["query_id"].tolist() doc_id_test = test_data["doc_id"].tolist() test_features = test_data[settings.feature_selected] test_true = test_data["label"] data = dlib.ranking_pair() for i in range(len(train_true)): if train_true[i] == 1: data.relevant.append(dlib.vector(train_features[i])) elif train_true[i] == 0: data.nonrelevant.append(dlib.vector(train_features[i])) trainer = dlib.svm_rank_trainer() trainer.c = 10 rank = trainer.train(data) print("Ranking score for a relevant vector: {}".format( rank(data.relevant[0]))) print("Ranking score for a non-relevant vector: {}".format( rank(data.nonrelevant[0])))
def test_vector_getitem(): v = vector([1, 2, 3]) assert v[0] == 1 assert v[-1] == 3 assert v[1] == v[-2]
# run compile_dlib_python_module.bat. This should work on any operating system # so long as you have CMake and boost-python installed. On Ubuntu, this can be # done easily by running the command: sudo apt-get install libboost-python-dev cmake import dlib # Now let's make some testing data. To make it really simple, let's suppose that # we are ranking 2D vectors and that vectors with positive values in the first # dimension should rank higher than other vectors. So what we do is make # examples of relevant (i.e. high ranking) and non-relevant (i.e. low ranking) # vectors and store them into a ranking_pair object like so: data = dlib.ranking_pair() # Here we add two examples. In real applications, you would want lots of # examples of relevant and non-relevant vectors. data.relevant.append(dlib.vector([1, 0])) data.nonrelevant.append(dlib.vector([0, 1])) # Now that we have some data, we can use a machine learning method to learn a # function that will give high scores to the relevant vectors and low scores to # the non-relevant vectors. trainer = dlib.svm_rank_trainer() # Note that the trainer object has some parameters that control how it behaves. # For example, since this is the SVM-Rank algorithm it has a C parameter that # controls the trade-off between trying to fit the training data exactly or # selecting a "simpler" solution which might generalize better. trainer.c = 10 # So let's do the training. rank = trainer.train(data)
def test_vector_serialization(): v = vector([1, 2, 3]) ser = pickle.dumps(v, 2) deser = pickle.loads(ser) assert str(v) == str(deser)
# sudo apt-get install libboost-python-dev cmake # import dlib try: import cPickle as pickle except ImportError: import pickle x = dlib.vectors() y = dlib.array() # Make a training dataset. Here we have just two training examples. Normally # you would use a much larger training dataset, but for the purpose of example # this is plenty. For binary classification, the y labels should all be either +1 or -1. x.append(dlib.vector([1, 2, 3, -1, -2, -3])) y.append(+1) x.append(dlib.vector([-1, -2, -3, 1, 2, 3])) y.append(-1) # Now make a training object. This object is responsible for turning a # training dataset into a prediction model. This one here is a SVM trainer # that uses a linear kernel. If you wanted to use a RBF kernel or histogram # intersection kernel you could change it to one of these lines: # svm = dlib.svm_c_trainer_histogram_intersection() # svm = dlib.svm_c_trainer_radial_basis() svm = dlib.svm_c_trainer_linear() svm.be_verbose() svm.set_c(10)
def test_vector_empty_init(): v = vector() assert len(v) == 0 assert v.shape == (0, 1) assert str(v) == "" assert repr(v) == "dlib.vector([])"
def test_vector_init_with_negative_number(): with raises(Exception): vector(-3)
def test_vector_invalid_getitem(): v = vector([1, 2, 3]) with raises(IndexError): v[-4] with raises(IndexError): v[3]
def test_vector_init_with_number(): v = vector(3) assert len(v) == 3 assert v.shape == (3, 1) assert str(v) == "0\n0\n0" assert repr(v) == "dlib.vector([0, 0, 0])"
# command: # sudo apt-get install cmake # import dlib # Now let's make some testing data. To make it really simple, let's suppose # that we are ranking 2D vectors and that vectors with positive values in the # first dimension should rank higher than other vectors. So what we do is make # examples of relevant (i.e. high ranking) and non-relevant (i.e. low ranking) # vectors and store them into a ranking_pair object like so: data = dlib.ranking_pair() # Here we add two examples. In real applications, you would want lots of # examples of relevant and non-relevant vectors. data.relevant.append(dlib.vector([1, 0])) data.nonrelevant.append(dlib.vector([0, 1])) # Now that we have some data, we can use a machine learning method to learn a # function that will give high scores to the relevant vectors and low scores to # the non-relevant vectors. trainer = dlib.svm_rank_trainer() # Note that the trainer object has some parameters that control how it behaves. # For example, since this is the SVM-Rank algorithm it has a C parameter that # controls the trade-off between trying to fit the training data exactly or # selecting a "simpler" solution which might generalize better. trainer.c = 10 # So let's do the training. rank = trainer.train(data)
# sudo apt-get install cmake # import dlib try: import cPickle as pickle except ImportError: import pickle x = dlib.vectors() y = dlib.array() # Make a training dataset. Here we have just two training examples. Normally # you would use a much larger training dataset, but for the purpose of example # this is plenty. For binary classification, the y labels should all be either +1 or -1. x.append(dlib.vector([1, 2, 3, -1, -2, -3])) y.append(+1) x.append(dlib.vector([-1, -2, -3, 1, 2, 3])) y.append(-1) # Now make a training object. This object is responsible for turning a # training dataset into a prediction model. This one here is a SVM trainer # that uses a linear kernel. If you wanted to use a RBF kernel or histogram # intersection kernel you could change it to one of these lines: # svm = dlib.svm_c_trainer_histogram_intersection() # svm = dlib.svm_c_trainer_radial_basis() svm = dlib.svm_c_trainer_linear() svm.be_verbose() svm.set_c(10)
def test_vectors_extend(): vs = vectors() vs.extend([vector([1, 2, 3]), vector([4, 5, 6])]) assert len(vs) == 2
descriptors = [] images = [] # Now find all the persons 1024D descriptors. personDesc = open(descriptor_file_path, "r") for line in personDesc: descriptorElements = line.split("|") print("Processing image: {}".format(descriptorElements[0])) # Compute the 128D vector that describes the face in img identified by # shape. descriptor = np.array(descriptorElements[1:]) descriptor = descriptor.astype(np.float) descriptors.append(dlib.vector(descriptor)) images.append(descriptorElements[0]) #descriptors = dlib.vector(descriptors) # Now let's cluster the faces. labels = dlib.chinese_whispers_clustering(descriptors, 0.20) num_classes = len(set(labels)) print("Number of clusters: {}".format(num_classes)) # Find biggest class biggest_class = None biggest_class_length = 0 for i in range(0, num_classes): class_length = len([label for label in labels if label == i]) if class_length > biggest_class_length:
def test_vector_init_with_list(): v = vector([1, 2, 3]) assert len(v) == 3 assert v.shape == (3, 1) assert str(v) == "1\n2\n3" assert repr(v) == "dlib.vector([1, 2, 3])"