def main(): # sample = descriptors.random_sample(100000) # print("sample obtenido") # np.savetxt("sample100k.csv", sample, delimiter=',') #sample = np.loadtxt("sample100k.csv", delimiter=',') # print(sample.shape) k = 64 #clustering #print("Haciendo clustering...") #codebook = generate_codebook(k, sample) #np.savetxt("codebook60",codebook,delimiter=',') codebook = np.loadtxt("codebook60", delimiter=',') dataset = pickle.load(open('dataset.obj', "rb")) #train print("Entrenando...") start = time.time() svm = svm_train(codebook, dataset.get_train_set()) end = time.time() s = "Elapsed time training {0}".format(utils.humanize_time(end - start)) print(s) #test print("Testeando...") start = time.time() svm_test(svm, codebook, dataset.get_test_set()) end = time.time() s = "Elapsed time training {0}".format(utils.humanize_time(end - start)) print(s)
def current(self): song = self.currentsong() playlist = self.status() res = self.currentsongf() + "\n" res += "[playing] #%s/%s" % (song["pos"], playlist["playlistlength"]) if "time" in playlist.keys(): current, total = playlist["time"].split(":") pcentage = int(100 * float(current) / float(total)) res += " %s/%s (%s%%)" % (utils.humanize_time(current), utils.humanize_time(total), pcentage) return res
def current(self): song = self.currentsong() playlist = self.status() res = self.currentsongf() + "\n" res += "[playing] #%s/%s" % (song["pos"], playlist["playlistlength"]) if 'time' in playlist.keys(): current, total = playlist['time'].split(':') pcentage = int(100 * float(current) / float(total)) res += " %s/%s (%s%%)" % (utils.humanize_time(current), utils.humanize_time(total), pcentage) return res
def test_knn(): dataset = pickle.load(open("dataset.obj", "rb")) n_classes = len(dataset.get_classes()) start = time.time() predictions = knn.knn(dataset) end = time.time() elapsed_time = utils.humanize_time(end - start) print("Elapsed time using knn {0}...".format(elapsed_time)) print("predictions = \n{0}".format(predictions)) utils.write_list(predictions, "results/knn-predictions.txt") # predictions = [ # [1, 1, 0, 2, 4, 3, 2, 0, 2, 4, 0, 3, 2, 1, 1], # [1, 2, 4, 2, 1, 0, 4, 1, 3, 2, 2, 2, 1, 2, 1], # [2, 3, 4, 2, 2, 0, 2, 0, 3, 3, 1, 2, 2, 2, 3], # [0, 1, 3, 3, 3, 3, 1, 3, 3, 3, 2, 2, 3, 0, 1], # [3, 0, 2, 1, 4, 2, 1, 0, 2, 4, 1, 1, 4, 2, 3] # ] hist = np.zeros((n_classes, n_classes), dtype=np.uint16) for i in range(len(predictions)): for j in range(len(predictions[i])): c = predictions[i][j] hist[i][c] += 1 print("hist = \n{0}".format(hist)) np.savetxt("results/knn-hist.csv", hist, fmt="%i", delimiter=",") confusion_matrix = hist / 25.0 print("conf mat = \n{0}".format(confusion_matrix)) values = [confusion_matrix[i][i] for i in range(n_classes)] precision = np.average(values) print("precision = {0}".format(precision)) plt.matshow(confusion_matrix) plt.title('Confusion matrix') plt.colorbar() plt.show()
def test_descriptors(): img = cv2.imread(constants.TESTING_IMG_PATH) cv2.imshow("Normal Image", img) print("Normal Image") option = input("Enter [1] for using ORB features and other number to use SIFT.\n") start = time.time() if option == 1: orb = cv2.ORB() kp, des = orb.detectAndCompute(img, None) else: sift = cv2.SIFT() kp, des = sift.detectAndCompute(img, None) end = time.time() elapsed_time = utils.humanize_time(end - start) des_name = constants.ORB_FEAT_NAME if option == ord(constants.ORB_FEAT_OPTION_KEY) else constants.SIFT_FEAT_NAME print("Elapsed time getting descriptors {0}".format(elapsed_time)) print("Number of descriptors found {0}".format(len(des))) if des is not None and len(des) > 0: print("Dimension of descriptors {0}".format(len(des[0]))) print("Name of descriptors used is {0}".format(des_name)) img2 = cv2.drawKeypoints(img, kp) # plt.imshow(img2), plt.show() cv2.imshow("{0} descriptors".format(des_name), img2) print("Press any key to exit ...") cv2.waitKey()
def all_classes_descriptors(dataset, opt="sample"): des_files = glob.glob("train/*.mat") # train_set = dataset.get_train_set() # Read training descriptors print("Getting sample of the descriptors for classes") classes_des = [] for c in range(n_classes): # class_files = train_set[c] path = des_files[c] print("Reading class number {0}".format(c)) # class_des = class_descriptors(class_files) class_des = class_des_from_file(path) sample_size = 5000 if opt == "sample": current_sample = utils.random_sample(class_des, sample_size) current_sample = np.array(current_sample, dtype=np.uint16) print("current sample shape = {0}".format(current_sample.shape)) # class_des = None classes_des.append(current_sample) else: classes_des.append(class_des) end = time.time() elapsed_time = utils.humanize_time(end - start) print("Elapsed time getting training descriptors {0}".format(elapsed_time)) return classes_des
def test_store_descriptors(): start = time.time() dataset = pickle.load(open("dataset.obj", "rb")) main.store_test_des(dataset) end = time.time() s = "Elapsed time processing {0}".format(utils.humanize_time(end - start)) print(s)
def connected(self): import urllib import BeautifulSoup import socket f = urllib.urlopen("http://*****:*****@www.sleduc.fr/server-status") soup = BeautifulSoup.BeautifulSoup(f.read()) clients = {} res = "Liste des clients connectés sur mpd:\n" for tr in soup.findAll("tr"): hosts = tr.findAll("td", {"nowrap": "nowrap"}) for host in hosts: if "mpd.sleduc.fr" in host or "mpd.leduc.42" in host: lst = tr.findAll("td") since = utils.humanize_time(lst[5].text) ip = str(lst[10].text) vhost = str(lst[11].text) clients[ip] = (vhost, since) for ip, couple in clients.iteritems(): vhost, since = couple try: reverse = socket.gethostbyaddr(ip)[0] except socket.herror: reverse = "Moi y'en a pas savoir résoudre" res += "\t- %s (%s)\n\t\tSur %s, depuis %s\n" % (ip, reverse, vhost, since) return res[0:-1]
def metrics(self, rankings, train_labels, test_labels, sorted_prods): ### Calculates classification and products set and position mAP ### ###------------------------------------------------------------------### print("Starting to calculate metrics ...") start = time.time() rel_ranks = [] for i in range(len(rankings)): rel_ranks.append( utils.relevance_ranking(rankings[i], train_labels, test_labels[i])) # Classification mAP #----------------------------------------------------------------------- class_ap = [utils.class_ap(rel_rk) for rel_rk in rel_ranks] class_ap_filename = "results/class_avg_precs_{0}.txt".format( self.n_classes) utils.write_list(class_ap, class_ap_filename) class_map = np.mean(class_ap) self.log += "ranking size = {0}".format(len(rankings[0])) + "\n" s = "classification mean average precision = {0}".format(class_map) self.log += s + "\n" print(s) # Dot products average precision #----------------------------------------------------------------------- # Set set_prec = [] for i in range(len(rankings)): indices = [prods[0] for prods in sorted_prods[i]] precision = utils.prod_set_prec(indices, rankings[i]) set_prec.append(precision) set_ap_filename = "results/set_avg_precs_{0}.txt".format( self.n_classes) utils.write_list(set_prec, set_ap_filename) set_map = np.mean(set_prec) s = "set mean average precision = {0}".format(set_map) self.log += s + "\n" print(s) # Position pos_prec = [] for i in range(len(rankings)): indices = [prods[0] for prods in sorted_prods[i]] precision = utils.prod_pos_prec(indices, rankings[i]) pos_prec.append(precision) pos_ap_filename = "results/pos_avg_precs_{0}.txt".format( self.n_classes) utils.write_list(pos_prec, pos_ap_filename) pos_map = np.mean(pos_prec) s = "position mean average precision = {0}".format(pos_map) self.log += s + "\n" print(s) end = time.time() elapsed_time = utils.humanize_time(end - start) s = "Elapsed time calculating metrics: {0}".format(elapsed_time) self.log += s + "\n" print(s)
def solve_game_specialized_ab_memo(board, depth, memo, at_least_draw=False, at_most_draw=False): """ Computes the theoretical result of the current position. The return value is: 1 when the current player wins 0 when the game ends in a draw with optimal play -1 when the other player wins """ h = board.myhash if h not in memo: Stats.total += 1 if Stats.total % 1000000 == 0: Stats.end = time() time_diff = Stats.end - Stats.start amount, suffix = humanize_time(time_diff) percentage = (100 * Stats.total) / 668607278 print( f"visited {Stats.total // 1000000}M nodes [{percentage:.2f}%]" f" in {amount:.2f}{suffix}") # this will never be -1, because we don't go that far evaluation = board.evaluate if evaluation is not None: if evaluation == 1: # this means we have just lost return -1 else: return 0 if depth == 0: raise RecursionError result = -1 for move in board.legal_moves: value = -solve_game_specialized_ab_memo( board.apply_move(move), depth - 1, memo, at_most_draw, at_least_draw) result = max(value, result) # if we found a winning move, there is no need to check other moves if result == 1: break if result == 0: if at_most_draw: # we don't want to save this result in memo - it may not be accurate! # break return 0 else: at_least_draw = True memo[h] = result return memo[h]
def test_codebook(): dataset = pickle.load(open(constants.DATASET_OBJ_FILENAME, "rb")) option = input("Enter [1] for using ORB features or [2] to use SIFT features.\n") start = time.time() des = descriptors.all_descriptors(dataset, dataset.get_train_set(), option) end = time.time() elapsed_time = utils.humanize_time(end - start) print("Elapsed time getting all the descriptors is {0}".format(elapsed_time)) k = 64 des_name = constants.ORB_FEAT_NAME if option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME codebook_filename = "codebook_{0}_{1}.csv".format(k, des_name) start = time.time() codebook = descriptors.gen_codebook(dataset, des, k) end = time.time() elapsed_time = utils.humanize_time(end - start) print("Elapsed time calculating the k means for the codebook is {0}".format(elapsed_time)) np.savetxt(codebook_filename, codebook, delimiter=constants.NUMPY_DELIMITER) print("Codebook loaded in {0}, press any key to exit ...".format(constants.CODEBOOK_FILE_NAME)) cv2.waitKey()
def store_hash(self, wta_hash): ## Store the hash in a binary file print("Storing the hash in a file ...") start = time.time() hash_filename = "results/wtahash_{0}.obj".format(self.n_classes) pickle.dump(wta_hash, open(hash_filename, "wb"), protocol=2) end = time.time() elapsed_time = utils.humanize_time(end - start) s = "Elapsed time storing the hash {0}".format(elapsed_time) self.log += s + "\n" print(s)
def best_classifiers(self, images, ranking_size): start = time.time() ConvWTAImage = wta.ConvertirenWTA(images, self.permutations, self.n, self.k, self.w) end = time.time() elapsed_time = utils.humanize_time(end - start) print("Elapsed time converting test set to WTA binary {0}.".format( elapsed_time)) BW1 = wta.GetinBinaryMayor(ConvWTAImage) values = wta.ObtenerValoresTotalesWTA(self.classifiersBW, BW1, self.whash, ranking_size) return values
def solve_game_main(): try: # -result = solve_game_alphabeta_top(Board(), Config.COLS * Config.ROWS) result = solve_game_specialized_ab_top_memo(Board(), Config.COLS * Config.ROWS) except RecursionError: result = "Not deep enough" print(f"Game solved. Result = {result}") print(f"[1 = first player wins, -1 = second player wins, 0 = draw]") print(f"Total visited nodes = {Stats.total:,}") Stats.end = time() amount, suffix = humanize_time(Stats.end - Stats.start) print(f"Total time: {amount:.2f}{suffix}")
def get_rankings(self, test_data, wta_hash, ranking_size): ### Get the rankings for the test set ### ###------------------------------------------------------------------### print("Generating ranking matrix for the test set ...") start = time.time() rankings = wta_hash.best_classifiers(test_data, ranking_size) end = time.time() elapsed_time = utils.humanize_time(end - start) s = "Elapsed time generating ranking matrix: {0}".format(elapsed_time) self.log += s + "\n" print(s) return rankings
def print_time_estimate(depth, times): if times: if len(times) > 1: den = max(0.1, times[-2]) estimate = (times[-1]**2) / den else: estimate = times[-1] * Config.COLS est_time, suffix = humanize_time(estimate) print( f"Current depth: {depth} " f"estimated time: {est_time:.2f}{suffix}", flush=True, end="")
def create_hash(self, train_data, n, k, w): ### Use WTAHash on it ### ###------------------------------------------------------------------### print ("Starting to generate hash table ...") start = time.time() wta_hash = wh.WTAHash(train_data, n, k, w) end = time.time() elapsed_time = utils.humanize_time(end - start) s = "Elapsed time on generation of hash table: {0}".format(elapsed_time) self.log += s + "\n" print (s) return wta_hash
def get_rankings(self, test_data, wta_hash, ranking_size): ### Get the rankings for the test set ### ###------------------------------------------------------------------### print ("Generating ranking matrix for the test set ...") start = time.time() rankings = wta_hash.best_classifiers(test_data, ranking_size) end = time.time() elapsed_time = utils.humanize_time(end - start) s = "Elapsed time generating ranking matrix: {0}".format(elapsed_time) self.log += s + "\n" print (s) return rankings
def create_hash(self, train_data, n, k, w): ### Use WTAHash on it ### ###------------------------------------------------------------------### print("Starting to generate hash table ...") start = time.time() wta_hash = wh.WTAHash(train_data, n, k, w) end = time.time() elapsed_time = utils.humanize_time(end - start) s = "Elapsed time on generation of hash table: {0}".format( elapsed_time) self.log += s + "\n" print(s) return wta_hash
def solve_game_memo(board, depth, memo, moves_made): """ Computes the theoretical result of the current position. The return value is: 1 when the current player wins 0 when the game ends in a draw with optimal play -1 when the other player wins """ h = board.myhash SOLVE_CUT_OFF = 18 if h not in memo: Stats.total += 1 if Stats.total % 2500000 == 0: Stats.end = time() time_diff = Stats.end - Stats.start amount, suffix = humanize_time(time_diff) #percentage = (100 * Stats.total) / TOTAL55_20 print( f"visited {Stats.total // 1000}k nodes" # [{percentage:.2f}%]" f" in {amount:.2f}{suffix} [{len(memo):,}]") # this will never be -1, because we don't go that far evaluation = board.evaluate if evaluation is not None: if evaluation == 1: # this means we have just lost result = -1 else: result = 0 elif depth == 0: raise RecursionError else: # we use the negamax schema # our best result is the worst result of the opponent result = -1 * min([ solve_game_memo(board.apply_move(move), depth - 1, memo, moves_made + 1) for move in board.legal_moves ]) if moves_made < SOLVE_CUT_OFF: memo[h] = result else: return result return memo[h]
def test(self, session, test_eval): self.logger.info("testing started") test_handle = session.run(self.iterators[0].string_handle()) tStart = time.time() session.run(self.inits[0]) test_metrics, _ = self.evaluate_batch( 0, self.num_batches[0], test_eval, session, "test", test_handle) running_time = time.time() - tStart self.logger.info( "TEST: em_score: {0:.4f}, em_score_start: {1:.4f}, " "em_score_end: {2:.4f}, f1_score: {3:.4f}, loss: {4:.4f} running_time: {5:s}".format( test_metrics["exact_match"], test_metrics["em_start"], test_metrics["em_end"], test_metrics["f1"], test_metrics["loss"], humanize_time(running_time)))
def best_classifiers(self, images, ranking_size): start = time.time() ConvWTAImage = wta.ConvertirenWTA( images, self.permutations, self.n, self.k, self.w ) end = time.time() elapsed_time = utils.humanize_time(end - start) print("Elapsed time converting test set to WTA binary {0}.".format( elapsed_time ) ) BW1 = wta.GetinBinaryMayor(ConvWTAImage) values = wta.ObtenerValoresTotalesWTA( self.classifiersBW, BW1, self.whash, ranking_size ) return values
def read_descriptors(self, training_percentage, set_name): ### Load training information matrix ### ###------------------------------------------------------------------### print("Reading {0} instances ...".format(set_name)) start = time.time() data, labels = cluster.load_classes(training_percentage, self.dataset, set_name, self.n_classes) end = time.time() self.log += "{0} matrix of shape {1}\n".format(set_name, data.shape) elapsed_time = utils.humanize_time(end - start) s = "Elapsed time reading the {0} files: {1}".format( set_name, elapsed_time) self.log += s + "\n" print(s) return data, labels
def knn(dataset, des_classes): start = time.time() n_classes = len(dataset.get_classes()) classes_sample = all_classes_descriptors(dataset) # Read testing descriptors start = time.time() test_folders = glob.glob("test/*") predictions = [] counter = 0 for folder in test_folders: print("Starting to predict the test set of class {0}.".format(counter)) predictions.append([]) # The i-th element of this list has the descriptors for the i-th image; # all the images are in the same class-folder test_files = glob.glob(folder + "/*.mat") # for i in range(len(test_files)): for i in range(25): distances = np.zeros(n_classes) # percentage = (i * 100) / len(test_files) percentage = (i * 100) / 25 print("Loading SIFT from file {0} of {1} ({2}%) class={3} ...". format( # i, len(test_files), percentage, counter i, 25, percentage, counter)) fname = test_files[i] data = sio.loadmat(fname) des = data["stored"] # Find the nearest class for c in range(n_classes): s = "Dist for img number {0} to class {1} (real = {2})".format( i, c, counter) print(s) class_des = classes_sample[c] distances[c] = dist_nn_class(des, class_des) predictions[-1].append(np.argmin(distances)) counter += 1 end = time.time() elapsed_time = utils.humanize_time(end - start) print("Elapsed time testing with KNN {0}".format(elapsed_time)) return predictions
def read_descriptors(self, training_percentage, set_name): ### Load training information matrix ### ###------------------------------------------------------------------### print ("Reading {0} instances ...".format(set_name)) start = time.time() data, labels = cluster.load_classes( training_percentage, self.dataset, set_name, self.n_classes ) end = time.time() self.log += "{0} matrix of shape {1}\n".format(set_name, data.shape) elapsed_time = utils.humanize_time(end - start) s = "Elapsed time reading the {0} files: {1}".format( set_name, elapsed_time ) self.log += s + "\n" print (s) return data, labels
def test_create_hash(): n = 32 w = 2 k = 16 train_file = sio.loadmat("train/des_bear.mat") train_data = train_file["stored"] print("Starting to generate hash table ...") start = time.time() wta_hash = wh.WTAHash(train_data, n, k, w) end = time.time() elapsed_time = utils.humanize_time(end - start) print("Elapsed time on generation of hash table: {0}".format(elapsed_time)) print("Starting to store the hash ...") start = time.time() pickle.dump(wta_hash, open("bear_hash.obj", "wb"), protocol=pickle.HIGHEST_PROTOCOL) end = time.time() print("Elapsed time storing the hash {0} seconds".format(end - start))
def test_create_hash(): n = 32 w = 2 k = 16 train_file = sio.loadmat("train/des_bear.mat") train_data = train_file["stored"] print ("Starting to generate hash table ...") start = time.time() wta_hash = wh.WTAHash(train_data, n, k, w) end = time.time() elapsed_time = utils.humanize_time(end - start) print("Elapsed time on generation of hash table: {0}".format(elapsed_time)) print("Starting to store the hash ...") start = time.time() pickle.dump( wta_hash, open("bear_hash.obj", "wb"), protocol=pickle.HIGHEST_PROTOCOL ) end = time.time() print("Elapsed time storing the hash {0} seconds".format(end - start))
def main(): start = time.time() # Define the training and testing sets # path = "dataset" # dataset = Dataset(path) # dataset.generate_sets() # pickle.dump(dataset, open("dataset.obj", "wb"), protocol=2) dataset = pickle.load(open("dataset.obj", "rb")) # Get SIFT descriptors per class classes = dataset.get_classes() train_set = dataset.get_train_set() for i in range(5, len(classes)): class_name = classes[i] class_files = train_set[i] print("Getting descriptors for class {0} of length {1}".format( class_name, len(class_files))) store_train_des(class_files, class_name) end = time.time() s = "Elapsed time processing {0}".format(utils.humanize_time(end - start)) print(s)
def evaluate_batch(self, epoch, num_batches, eval_file, sess, data_type, str_handle): answer_dict = {} remapped_dict = {} losses = [] tStart = time.time() for nb in range(1, num_batches + 1): if self.summaries: qa_id, loss, yp1, yp2, grad_sums = sess.run( [self.model.qa_ids, self.model.loss, self.model.yp1, self.model.yp2, self.grad_summaries_merged], feed_dict={self.model.handle: str_handle, self.model.is_train: False}) else: qa_id, loss, yp1, yp2 = sess.run( [self.model.qa_ids, self.model.loss, self.model.yp1, self.model.yp2], feed_dict={self.model.handle: str_handle, self.model.is_train: False}) grad_sums = None answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) running_time = time.time() - tStart rt = humanize_time(running_time / float(nb + 1) * num_batches - running_time) print('Epoch {0:d} {1:s}: {2:d}/{3:d} batches; ETA: {4:s}'.format( epoch+1+self.epoch_start, data_type, nb, num_batches, rt), end='\r') if data_type == 'test': with open(os.path.join(self.out_dir, 'answer.json'), "w") as fh: json.dump(remapped_dict, fh) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) metrics["loss"] = loss loss_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ]) f1_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[tf.Summary.Value( tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) return metrics, [loss_sum, f1_sum, em_sum, grad_sums]
def solve_game_quick_jump(board, depth): """ Computes the theoretical result of the current position. The return value is: 1 when the current player wins 0 when the game ends in a draw with optimal play -1 when the other player wins """ Stats.total += 1 if Stats.total % 500000 == 0: Stats.end = time() time_diff = Stats.end - Stats.start amount, suffix = humanize_time(time_diff) percentage = (100 * Stats.total) / TOTAL44 print(f"visited {Stats.total // 1000000}M nodes [{percentage:.2f}%]" f" in {amount:.2f}{suffix}") # this will never be -1, because we don't go that far evaluation = board.evaluate if evaluation is not None: if evaluation == 1: # this means we have just lost return -1 else: return 0 if depth == 0: raise RecursionError result = -1 for move in board.legal_moves: value = -solve_game_quick_jump(board.apply_move(move), depth - 1) result = max(value, result) # if we found a winning move, there is no need to check other moves if result == 1: break return result
def use_normal_map(normal_img, normal_opt): # Create an array from the image normal_im_array = np.asarray(normal_img) # TODO move this up normals_filename = (NORMAL_VECTORS_FILENAME + normal_opt + NORMAL_VECTORS_FILE_EXT) if os.path.exists(normals_filename): # Load normals from file print("Loading normal vectors from file {}".format(normals_filename)) normals = np.load(normals_filename) w, h, _ = normals.shape return normals, w, h # Create the normals vector map start_normals = time.time() normals = adjust_normal_map(normal_im_array) np.save(normals_filename, normals) print("Normal vectors stored inside {} file".format(normals_filename)) end_normals = time.time() elapsed_time = utils.humanize_time(end_normals - start_normals) print("Time adjusting normals was: {}".format(elapsed_time)) # Create output image vector w, h, _ = normals.shape return normals, w, h
def search_all_main(start=0, end=Config.COLS * Config.ROWS): prev_times = [] print(f"Max depth = {end}") for depth in range(start, end + 1): Stats.reset() print_time_estimate(depth, prev_times) start = time() memo_size = 0 #search_all(Board(), depth) memo_size = search_all_memo_top(Board(), depth) end = time() time_diff = end - start prev_times.append(time_diff) elapsed, suffix = humanize_time(time_diff) if Stats.terminal > 0: percentage = 100 * Stats.finished // Stats.terminal else: percentage = 100 print(f"\rDepth {depth:2}: {Stats.total:13,} " f"states in {elapsed:2.2f}{suffix} " f"[{percentage}%] {Stats.finished:,} / {Stats.terminal:,} " f"[memo size: {memo_size:,}]")
def main(): start = time.time() # Define the training and testing sets # path = "dataset" # dataset = Dataset(path) # dataset.generate_sets() # pickle.dump(dataset, open("dataset.obj", "wb"), protocol=2) dataset = pickle.load(open("dataset.obj", "rb")) # Get SIFT descriptors per class classes = dataset.get_classes() train_set = dataset.get_train_set() for i in range(5, len(classes)): class_name = classes[i] class_files = train_set[i] print("Getting descriptors for class {0} of length {1}".format( class_name, len(class_files) ) ) store_train_des(class_files, class_name) end = time.time() s = "Elapsed time processing {0}".format(utils.humanize_time(end - start)) print(s)
def main(is_interactive=True, k=64, des_option=constants.ORB_FEAT_OPTION, svm_kernel=cv2.SVM_LINEAR): if not is_interactive: experiment_start = time.time() # Check for the dataset of images if not os.path.exists(constants.DATASET_PATH): print("Dataset not found, please copy one.") return dataset = Dataset(constants.DATASET_PATH) dataset.generate_sets() # Check for the directory where stores generated files if not os.path.exists(constants.FILES_DIR_NAME): os.makedirs(constants.FILES_DIR_NAME) if is_interactive: des_option = input( "Enter [1] for using ORB features or [2] to use SIFT features.\n") k = input( "Enter the number of cluster centers you want for the codebook.\n") svm_option = input( "Enter [1] for using SVM kernel Linear or [2] to use RBF.\n") svm_kernel = cv2.SVM_LINEAR if svm_option == 1 else cv2.SVM_RBF des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME log = Log(k, des_name, svm_kernel) codebook_filename = filenames.codebook(k, des_name) if is_interactive: codebook_option = input( "Enter [1] for generating a new codebook or [2] to load one.\n") else: codebook_option = constants.GENERATE_OPTION if codebook_option == constants.GENERATE_OPTION: # Calculate all the training descriptors to generate the codebook start = time.time() des = descriptors.all_descriptors(dataset, dataset.get_train_set(), des_option) end = time.time() log.train_des_time(end - start) # Generates the codebook using K Means print("Generating a codebook using K-Means with k={0}".format(k)) start = time.time() codebook = descriptors.gen_codebook(dataset, des, k) end = time.time() log.codebook_time(end - start) # Stores the codebook in a file utils.save(codebook_filename, codebook) print("Codebook saved in {0}".format(codebook_filename)) else: # Load a codebook from a file print("Loading codebook ...") codebook = utils.load(codebook_filename) print("Codebook with shape = {0} loaded.".format(codebook.shape)) # Train and test the dataset classifier = Classifier(dataset, log) svm = classifier.train(svm_kernel, codebook, des_option=des_option, is_interactive=is_interactive) print("Training ready. Now beginning with testing") result, labels = classifier.test(codebook, svm, des_option=des_option, is_interactive=is_interactive) # Store the results from the test classes = dataset.get_classes() log.classes(classes) log.classes_counts(dataset.get_classes_counts()) result_filename = filenames.result(k, des_name, svm_kernel) test_count = len(dataset.get_test_set()[0]) result_matrix = np.reshape(result, (len(classes), test_count)) utils.save_csv(result_filename, result_matrix) # Create a confusion matrix confusion_matrix = np.zeros((len(classes), len(classes)), dtype=np.uint32) for i in range(len(result)): predicted_id = int(result[i]) real_id = int(labels[i]) confusion_matrix[real_id][predicted_id] += 1 print("Confusion Matrix =\n{0}".format(confusion_matrix)) log.confusion_matrix(confusion_matrix) log.save() print("Log saved on {0}.".format(filenames.log(k, des_name, svm_kernel))) if not is_interactive: experiment_end = time.time() elapsed_time = utils.humanize_time(experiment_end - experiment_start) print("Total time during the experiment was {0}".format(elapsed_time)) else: # Show a plot of the confusion matrix on interactive mode utils.show_conf_mat(confusion_matrix) raw_input("Press [Enter] to exit ...")
def main(is_interactive=True, k=64, des_option=constants.ORB_FEAT_OPTION, svm_kernel=cv2.SVM_LINEAR): if not is_interactive: experiment_start = time.time() # Check for the dataset of images if not os.path.exists(constants.DATASET_PATH): print("Dataset not found, please copy one.") return dataset = Dataset(constants.DATASET_PATH) dataset.generate_sets() # Check for the directory where stores generated files if not os.path.exists(constants.FILES_DIR_NAME): os.makedirs(constants.FILES_DIR_NAME) if is_interactive: des_option = input("Enter [1] for using ORB features or [2] to use SIFT features.\n") k = input("Enter the number of cluster centers you want for the codebook.\n") svm_option = input("Enter [1] for using SVM kernel Linear or [2] to use RBF.\n") svm_kernel = cv2.SVM_LINEAR if svm_option == 1 else cv2.SVM_RBF des_name = constants.ORB_FEAT_NAME if des_option == constants.ORB_FEAT_OPTION else constants.SIFT_FEAT_NAME log = Log(k, des_name, svm_kernel) codebook_filename = filenames.codebook(k, des_name) if is_interactive: codebook_option = input("Enter [1] for generating a new codebook or [2] to load one.\n") else: codebook_option = constants.GENERATE_OPTION if codebook_option == constants.GENERATE_OPTION: # Calculate all the training descriptors to generate the codebook start = time.time() des = descriptors.all_descriptors(dataset, dataset.get_train_set(), des_option) end = time.time() log.train_des_time(end - start) # Generates the codebook using K Means print("Generating a codebook using K-Means with k={0}".format(k)) start = time.time() codebook = descriptors.gen_codebook(dataset, des, k) end = time.time() log.codebook_time(end - start) # Stores the codebook in a file utils.save(codebook_filename, codebook) print("Codebook saved in {0}".format(codebook_filename)) else: # Load a codebook from a file print("Loading codebook ...") codebook = utils.load(codebook_filename) print("Codebook with shape = {0} loaded.".format(codebook.shape)) # Train and test the dataset classifier = Classifier(dataset, log) svm = classifier.train(svm_kernel, codebook, des_option=des_option, is_interactive=is_interactive) print("Training ready. Now beginning with testing") result, labels = classifier.test(codebook, svm, des_option=des_option, is_interactive=is_interactive) # Store the results from the test classes = dataset.get_classes() log.classes(classes) log.classes_counts(dataset.get_classes_counts()) result_filename = filenames.result(k, des_name, svm_kernel) test_count = len(dataset.get_test_set()[0]) result_matrix = np.reshape(result, (len(classes), test_count)) utils.save_csv(result_filename, result_matrix) # Create a confusion matrix confusion_matrix = np.zeros((len(classes), len(classes)), dtype=np.uint32) for i in range(len(result)): predicted_id = int(result[i]) real_id = int(labels[i]) confusion_matrix[real_id][predicted_id] += 1 print("Confusion Matrix =\n{0}".format(confusion_matrix)) log.confusion_matrix(confusion_matrix) log.save() print("Log saved on {0}.".format(filenames.log(k, des_name, svm_kernel))) if not is_interactive: experiment_end = time.time() elapsed_time = utils.humanize_time(experiment_end - experiment_start) print("Total time during the experiment was {0}".format(elapsed_time)) else: # Show a plot of the confusion matrix on interactive mode utils.show_conf_mat(confusion_matrix) raw_input("Press [Enter] to exit ...")
def predict_time(self, time): elapsed_time = utils.humanize_time(time) self.text += "Elapsed time predicting the testing set is {0}\n".format(elapsed_time)
def vlad_time(self, time, set): str = "Time for getting VLAD global descriptors of the {0} images was {1}.\n" elapsed_time = utils.humanize_time(time) self.text += str.format(set, elapsed_time)
def svm_time(self, time): str = "Time for calculating the SVM was {0}.\n" elapsed_time = utils.humanize_time(time) self.text += str.format(elapsed_time)
def codebook_time(self, time): str = "Time for generating the codebook with k-means was {0}.\n" elapsed_time = utils.humanize_time(time) self.text += str.format(elapsed_time)
def train_des_time(self, time): str = "Time for getting all the local descriptors of the training images was {0}.\n" elapsed_time = utils.humanize_time(time) self.text += str.format(elapsed_time)
def dot_products(self, train_data, test_data, rankings): ''' Calculates the dot product for each element in the test set with every element of the train set. Returns a matrix with two columns matrix. The first column is the index of the object in the train set and the second column is the value of the dot product of that object with the test object with index equal to the number of the row. Then the number of rows is the number of objects in the test set. Args: train_data (np matrix of floats): Each row is the vector of an object in the train set. test_data (np matrix of floats): Each row is the vector of an object in the test set. rankings (list of lists int): The ranking created for each object in the test set. Returns: list of list of tuples { e.g.: 0 ranking_size | | 0 - [[(21, 0.91), (3, 0.87), ...], [(10, 0.83), (0, 0.72), ...], ... len(test_data) - [ ... ]] int: Index of the object in the train set that should be ranked in the i-th position where i is the number of the row, float: The value of the dot product between the object in the train set and the object in the test set in the i-th position where i is the number of the row. }, numpy array of arrays of floats: Dot products where the [i-th, j-th] element is the product between the i-th object of the testing set and the j-th object of the training set. ''' ### Calculate dot product on the variables ### ###------------------------------------------------------------------### print ("Calculating dot products on the rankings ...") start = time.time() # products is the matrix that stores the dot product of each testing # vector with each training vector sorted_prods = [] products = [] ranking_size = len(rankings[0]) step = (len(test_data) * 5) / 100 train_norm = [utils.normalize(train_vec) for train_vec in train_data] train_norm = np.array(train_norm) for i in range(len(test_data)): # y is the current testing vector y = test_data[i] y_norm = utils.normalize(y) current_tuples = [] products.append([]) for j in range(len(train_data)): # vector is the training object ranked in the current position vector_norm = train_norm[j] prod = np.dot(y_norm, vector_norm) if j < ranking_size: products[i].append(prod) current_tuples.append( (j, prod) ) current_tuples.sort(key=lambda x: x[1], reverse=True) sorted_prods.append(current_tuples[:ranking_size]) if i % step == 0: percentage = (i * 100) / len(test_data) print ( "Vector number {0} of {1} ({2}%) multiplied".format( i, len(test_data), percentage ) ) end = time.time() elapsed_time = utils.humanize_time(end - start) s = "Elapsed time calculating dot products: {0}".format(elapsed_time) self.log += s + "\n" print (s) return sorted_prods, np.array(products)
def metrics(self, rankings, train_labels, test_labels, sorted_prods): ### Calculates classification and products set and position mAP ### ###------------------------------------------------------------------### print("Starting to calculate metrics ...") start = time.time() rel_ranks = [] for i in range(len(rankings)): rel_ranks.append( utils.relevance_ranking( rankings[i], train_labels, test_labels[i] ) ) # Classification mAP #----------------------------------------------------------------------- class_ap = [utils.class_ap(rel_rk) for rel_rk in rel_ranks] class_ap_filename = "results/class_avg_precs_{0}.txt".format( self.n_classes ) utils.write_list(class_ap, class_ap_filename) class_map = np.mean(class_ap) self.log += "ranking size = {0}".format(len(rankings[0])) + "\n" s = "classification mean average precision = {0}".format(class_map) self.log += s + "\n" print(s) # Dot products average precision #----------------------------------------------------------------------- # Set set_prec = [] for i in range(len(rankings)): indices = [prods[0] for prods in sorted_prods[i]] precision = utils.prod_set_prec(indices, rankings[i]) set_prec.append(precision) set_ap_filename = "results/set_avg_precs_{0}.txt".format( self.n_classes ) utils.write_list(set_prec, set_ap_filename) set_map = np.mean(set_prec) s = "set mean average precision = {0}".format(set_map) self.log += s + "\n" print(s) # Position pos_prec = [] for i in range(len(rankings)): indices = [prods[0] for prods in sorted_prods[i]] precision = utils.prod_pos_prec(indices, rankings[i]) pos_prec.append(precision) pos_ap_filename = "results/pos_avg_precs_{0}.txt".format( self.n_classes ) utils.write_list(pos_prec, pos_ap_filename) pos_map = np.mean(pos_prec) s = "position mean average precision = {0}".format(pos_map) self.log += s + "\n" print(s) end = time.time() elapsed_time = utils.humanize_time(end - start) s = "Elapsed time calculating metrics: {0}".format(elapsed_time) self.log += s + "\n" print (s)