music_train_y_year_yes_or_not = np.array(music_train.y) music_train_y_year_yes_or_not[less_year] = year1 music_train_y_year_yes_or_not[greater_year] = year2 music_train_y_year_yes_or_not.shape = ( len(music_train_y_year_yes_or_not), 1) # < year or > year classifier. theta_year_yes_or_not = solve_normal_equation( music_train.X, music_train_y_year_yes_or_not, 0) # < year classifier. y = np.array(music_train.y[less_year]) y.shape = (len(y), 1) X = music_train.X[np.where(less_year)] theta_year_less = solve_normal_equation(X, y, 0) print compute_accuracy(X, y, theta_year_less, delta_year) # > year classifier. y = np.array(music_train.y[greater_year]) y.shape = (len(y), 1) X = music_train.X[np.where(greater_year)] theta_year_more = solve_normal_equation( music_train.X[greater_year], y, 0) print compute_accuracy(X, y, theta_year_more, delta_year) print compute_accuracy_year(music_train.X, music_train.y, theta_year_yes_or_not, theta_year_less, theta_year_more, year1, year2, delta_year) #print compute_accuracy_year(music_validation.X, music_validation.y, theta_year_yes_or_not, theta_year_less, theta_year_more, year1, year2, delta_year)
# Set train parameters. # lambdav = 0.00001 lambdav = 0 # alpha = 0.0000001 # iterations = 1000000 alpha = 0.1 iterations = 1200 # print "Solving normal equation." theta = solve_normal_equation(music_train.X, music_train.y, lambdav) print "Solving using gradient descent." # theta = gradient_descent(music_train.X, music_train.y, None, alpha, lambdav, iterations) #theta, J_history = gradient_descent_with_J_history(music_train.X, music_train.y, None, alpha, lambdav, iterations) #plot_history(J_history) print "Computing cost." print compute_cost(music_train.X, music_train.y, theta, lambdav) print compute_cost(music_validation.X, music_validation.y, theta, lambdav) print compute_cost(music_test.X, music_test.y, theta, lambdav) for delta_year in range(10): print delta_year print "Computing train accuracy." print compute_accuracy(music_train.X, music_train.y, theta, delta_year) print compute_accuracy(music_validation.X, music_validation.y, theta, delta_year) print compute_accuracy(music_test.X, music_test.y, theta, delta_year)
def main(): # Read arguments args = do_args() start_time = float(time.time()) logger.info("Starting at %s", str(datetime.datetime.now())) logger.info(args) # Ensure folders are there and no overwrite logger.info("Ensuring all folders are there...") assert Path(args.data_dir).is_dir(), ( "DATA_DIR (%s) does not exist. Make sure path is correct." % args.data_dir) Path(args.output_dir).mkdir(parents=True, exist_ok=True) assert Path(args.output_dir).is_dir(), ( "OUTPUT_DIR (%s) does not exist. Make sure path is correct." % args.output_dir) assert not (Path(args.output_dir) / Path(args.name)).is_dir(), ( "EXPERIMENT_DIR (%s) already exists. Change name or delete directory." % (args.output_dir + args.name)) # Run training train = Train( name=args.name, output=args.output_dir, data_dir=args.data_dir, training_states=args.training_states, validation_states=args.validation_states, superres_states=args.superres_states, model_type=args.model, loss=args.loss, learning_rate=args.learning_rate, epochs=args.epochs, do_color=args.do_color, batch_size=args.batch_size, ) train.run_experiment() cm = np.zeros((config.HR_NCLASSES - 1, config.HR_NCLASSES - 1), dtype=np.float32) cm_dev = np.zeros((config.HR_NCLASSES - 1, config.HR_NCLASSES - 1), dtype=np.float32) for test_state in args.test_states: # Run testing ## Get test file name input_fn = Path( args.data_dir) / ("%s_extended-test_tiles.csv" % test_state) if not input_fn.is_file(): input_fn = Path(args.data_dir) / ("%s-test_tiles.csv" % test_state) ## Get model file name model_fn = Path(args.output_dir) / args.name / "final_model.h5" prediction_dir = (Path(args.output_dir) / args.name / ("test-output_%s" % test_state)) prediction_dir.mkdir(parents=True, exist_ok=True) test = Test( input_fn=input_fn, output_base=prediction_dir, model_fn=model_fn, save_probabilities=False, superres=args.loss == "superres", ) test.run_on_tiles() # Run accuracy acc, cm_s, cm_dev_s = compute_accuracy( pred_dir=prediction_dir, input_fn=input_fn, classes=config.HR_NCLASSES, hr_label_key=config.HR_LABEL_KEY, lr_label_key=config.LR_LABEL_KEY, ) logger.info("Overall accuracy for %s: %.4f", test_state, acc) # Confusion matrices cm += cm_s cm_dev += cm_dev_s # Run eval logger.info("-----------------------------") logger.info("OVERALL METRICS") logger.info("-----------------------------") logger.info("Accuracy and jaccard of all pixels") accuracy_jaccard_np(cm) logger.info("Accuracy and jaccard of pixels with developed NLCD classes") accuracy_jaccard_np(cm_dev) logger.info("Finished at %s", str(datetime.datetime.now())) logger.info("Finished in %0.4f seconds", float(time.time()) - start_time)
# Set train parameters. lambdav = 0.0000000001 n = len(music_train.X[0]) print "Solving normal equation." # Get thetas to reduce data. theta = solve_normal_equation(music_train.X, music_train.y, lambdav) ordered_theta = np.argsort(np.abs(theta).reshape(len(theta))) ordered_theta = ordered_theta[::-1] # Initialize costs. J_history_train = np.zeros(n) J_history_validation = np.zeros(n) for iteration in range(n): theta = solve_normal_equation(music_train.X[:, ordered_theta[:(n - iteration)]], music_train.y, lambdav) J_history_train[iteration] = compute_cost(music_train.X[:, ordered_theta[:(n - iteration)]], music_train.y, theta, 0) J_history_validation[iteration] = compute_cost(music_validation.X[:, ordered_theta[:(n - iteration)]], music_validation.y, theta, 0) print "Theta size: " + str(n - iteration) print "J_train: %f" % J_history_train[iteration] print "J_validation: %f" % J_history_validation[iteration] print "Accuracy: %f" % compute_accuracy(music_test.X[:, ordered_theta[:(n - iteration)]], music_test.y, theta, 9) ordered_theta = np.argsort(np.abs(theta).reshape(len(theta))) ordered_theta = ordered_theta[::-1] plot_history_train_validation(J_history_train, J_history_validation) plot_history(J_history_train -J_history_validation)
def fingerprinting_threshold_grid_search(path_audio_data, thresholds=range(10, 151, 10), verbose=True, plotFigure=True, saveFigure=False): """Evaluates the fingerprinting based alignment system with Accuracy, Precision, Recall and F-measure metrics for several thresholds **Parameters** path_audio_data: String Path to the audio dataset thresholds: List (default range(10,151,10)) List contains the threshold values verbose: Boolean (default True) Prints the resulting evaluation metrics as well as the number of TP, TN, FP and FN if it is True plotFigure: Boolean (default True) Plots the evaluation metrics vs. thresholds if it is True. saveFigure: Boolean (default False) Saves the plot for evaluation metrics. Valid only if 'plotFigure==True' **Returns** accuracy_list: List The list of accuracy results for each threshold rrecision_list: List The list of precision results for each threshold recall_list: List The list of recall results for each threshold F_measure_list: List The list of F-measure results for each threshold TP_list: List The number of the true positives for each threshold TN_list: List The number of the true negatives for each threshold FP_list: List The number of the false positives for each threshold FN_list: List The number of the false negatives for each threshold index_of_best: Integer The index of the best accuracy result""" cw_path = os.getcwd() if cw_path.find('/') == -1: cw_path_parent = cw_path[:cw_path.find('\\Evaluation')] path_ground_truth = cw_path + '\\ground_truth' #path_audio_data = cw_path_parent + '\\audio_data' path_fingerprinting_results = cw_path + '\\fingerprinting_offset_estimation_results' else: cw_path_parent = cw_path[:cw_path.find('/Evaluation')] path_ground_truth = cw_path + '/ground_truth' #path_audio_data = cw_path_parent + '/audio_data' path_fingerprinting_results = cw_path + '/fingerprinting_offset_estimation_results' path = [path_ground_truth, path_audio_data] accuracy_list = [] precision_list = [] recall_list = [] F_measure_list = [] TP_list = [] TN_list = [] FP_list = [] FN_list = [] # Evaluation of fingerprinting based alignment for different thresholds for thr in thresholds: offset_estimation_result_filename = '{0}/offset_estimation_fingerprinting_thr_'.format( path_fingerprinting_results) + np.str(thr) + '_result.txt' Accuracy, Precision, Recall, F_measure, TP, TN, FP, FN = compute_accuracy.compute_accuracy( path, offset_estimation_result_filename) accuracy_list.append(100 * Accuracy) precision_list.append(Precision) recall_list.append(Recall) F_measure_list.append(F_measure) TP_list.append(TP) TN_list.append(TN) FN_list.append(FN) FP_list.append(FP) best_accuracy_fingerprinting_based = max(accuracy_list) index_of_best = accuracy_list.index(best_accuracy_fingerprinting_based) if verbose == True: print( 'The best accuracy is obtained for threshold = {0} with accuracy = {1}' .format(thresholds[index_of_best], best_accuracy_fingerprinting_based)) print('\nOther metrics:') print(('\nFalse Negative - FN = {0}').format(FN_list[index_of_best])) print(('False Positive - FP = {0}').format(FP_list[index_of_best])) print(('True Positive - TP = {0}').format(TP_list[index_of_best])) print(('True Negative - TN = {0}').format(TN_list[index_of_best])) print(('\nAccuracy = {0}').format(accuracy_list[index_of_best])) print(('Precision = {0}').format(precision_list[index_of_best])) print(('Recall = {0}').format(recall_list[index_of_best])) print(('F-measure = {0}').format(F_measure_list[index_of_best])) if plotFigure == True: fig, axes = plt.subplots(1, 2) axes[0].plot(thresholds, accuracy_list) axes[0].set_xlabel('Thresholds') axes[0].set_ylabel('Accuracy (%)') axes[0].set_ylim([80, 100]) axes[1].plot(thresholds, precision_list, label='$Precision$') axes[1].plot(thresholds, recall_list, '+', label='$Recall$') axes[1].plot(thresholds, F_measure_list, '--', label='$F-measure$') axes[1].set_xlabel('Thresholds') axes[1].legend() fig.tight_layout() if saveFigure == True: if cw_path.find('/') == -1: fig.savefig(path_fingerprinting_results + '\\' + 'Fingerprinting_based_estimation_results.png') else: fig.savefig(path_fingerprinting_results + '/' + 'Fingerprinting_based_estimation_results.png') return (accuracy_list, precision_list, recall_list, F_measure_list, TP_list, TN_list, FP_list, FN_list, index_of_best)