music_train_y_year_yes_or_not = np.array(music_train.y)
            music_train_y_year_yes_or_not[less_year] = year1
            music_train_y_year_yes_or_not[greater_year] = year2
            music_train_y_year_yes_or_not.shape = (
                len(music_train_y_year_yes_or_not), 1)

            # < year or > year classifier.
            theta_year_yes_or_not = solve_normal_equation(
                music_train.X, music_train_y_year_yes_or_not, 0)

            # < year classifier.
            y = np.array(music_train.y[less_year])
            y.shape = (len(y), 1)
            X = music_train.X[np.where(less_year)]
            theta_year_less = solve_normal_equation(X, y, 0)
            print compute_accuracy(X, y, theta_year_less, delta_year)

            # > year classifier.
            y = np.array(music_train.y[greater_year])
            y.shape = (len(y), 1)
            X = music_train.X[np.where(greater_year)]
            theta_year_more = solve_normal_equation(
                music_train.X[greater_year], y, 0)
            print compute_accuracy(X, y, theta_year_more, delta_year)

            print compute_accuracy_year(music_train.X, music_train.y,
                                        theta_year_yes_or_not, theta_year_less,
                                        theta_year_more, year1, year2,
                                        delta_year)
            #print compute_accuracy_year(music_validation.X, music_validation.y, theta_year_yes_or_not, theta_year_less, theta_year_more, year1, year2, delta_year)
示例#2
0
    # Set train parameters.
    # lambdav = 0.00001
    lambdav = 0
    # alpha = 0.0000001
    # iterations = 1000000
    alpha = 0.1
    iterations = 1200

    # print "Solving normal equation."
    theta = solve_normal_equation(music_train.X, music_train.y, lambdav)

    print "Solving using gradient descent."
    # theta = gradient_descent(music_train.X, music_train.y, None, alpha, lambdav, iterations)
    #theta, J_history = gradient_descent_with_J_history(music_train.X, music_train.y, None, alpha, lambdav, iterations)
    #plot_history(J_history)

    print "Computing cost."
    print compute_cost(music_train.X, music_train.y, theta, lambdav)
    print compute_cost(music_validation.X, music_validation.y, theta, lambdav)
    print compute_cost(music_test.X, music_test.y, theta, lambdav)

    for delta_year in range(10):
        print delta_year

        print "Computing train accuracy."
        print compute_accuracy(music_train.X, music_train.y, theta, delta_year)
        print compute_accuracy(music_validation.X, music_validation.y, theta,
                               delta_year)
        print compute_accuracy(music_test.X, music_test.y, theta, delta_year)
示例#3
0
def main():
    # Read arguments
    args = do_args()
    start_time = float(time.time())
    logger.info("Starting at %s", str(datetime.datetime.now()))
    logger.info(args)

    # Ensure folders are there and no overwrite
    logger.info("Ensuring all folders are there...")
    assert Path(args.data_dir).is_dir(), (
        "DATA_DIR (%s) does not exist. Make sure path is correct." %
        args.data_dir)
    Path(args.output_dir).mkdir(parents=True, exist_ok=True)
    assert Path(args.output_dir).is_dir(), (
        "OUTPUT_DIR (%s) does not exist. Make sure path is correct." %
        args.output_dir)
    assert not (Path(args.output_dir) / Path(args.name)).is_dir(), (
        "EXPERIMENT_DIR (%s) already exists. Change name or delete directory."
        % (args.output_dir + args.name))

    # Run training
    train = Train(
        name=args.name,
        output=args.output_dir,
        data_dir=args.data_dir,
        training_states=args.training_states,
        validation_states=args.validation_states,
        superres_states=args.superres_states,
        model_type=args.model,
        loss=args.loss,
        learning_rate=args.learning_rate,
        epochs=args.epochs,
        do_color=args.do_color,
        batch_size=args.batch_size,
    )
    train.run_experiment()

    cm = np.zeros((config.HR_NCLASSES - 1, config.HR_NCLASSES - 1),
                  dtype=np.float32)
    cm_dev = np.zeros((config.HR_NCLASSES - 1, config.HR_NCLASSES - 1),
                      dtype=np.float32)
    for test_state in args.test_states:
        # Run testing
        ## Get test file name
        input_fn = Path(
            args.data_dir) / ("%s_extended-test_tiles.csv" % test_state)
        if not input_fn.is_file():
            input_fn = Path(args.data_dir) / ("%s-test_tiles.csv" % test_state)

        ## Get model file name
        model_fn = Path(args.output_dir) / args.name / "final_model.h5"

        prediction_dir = (Path(args.output_dir) / args.name /
                          ("test-output_%s" % test_state))
        prediction_dir.mkdir(parents=True, exist_ok=True)

        test = Test(
            input_fn=input_fn,
            output_base=prediction_dir,
            model_fn=model_fn,
            save_probabilities=False,
            superres=args.loss == "superres",
        )
        test.run_on_tiles()

        # Run accuracy
        acc, cm_s, cm_dev_s = compute_accuracy(
            pred_dir=prediction_dir,
            input_fn=input_fn,
            classes=config.HR_NCLASSES,
            hr_label_key=config.HR_LABEL_KEY,
            lr_label_key=config.LR_LABEL_KEY,
        )
        logger.info("Overall accuracy for %s: %.4f", test_state, acc)

        # Confusion matrices
        cm += cm_s
        cm_dev += cm_dev_s

    # Run eval
    logger.info("-----------------------------")
    logger.info("OVERALL METRICS")
    logger.info("-----------------------------")
    logger.info("Accuracy and jaccard of all pixels")
    accuracy_jaccard_np(cm)
    logger.info("Accuracy and jaccard of pixels with developed NLCD classes")
    accuracy_jaccard_np(cm_dev)

    logger.info("Finished at %s", str(datetime.datetime.now()))
    logger.info("Finished in %0.4f seconds", float(time.time()) - start_time)
    # Set train parameters.
    lambdav = 0.0000000001
    n = len(music_train.X[0])
    
    print "Solving normal equation."
    
    # Get thetas to reduce data.
    theta = solve_normal_equation(music_train.X, music_train.y, lambdav)
    ordered_theta = np.argsort(np.abs(theta).reshape(len(theta)))
    ordered_theta = ordered_theta[::-1]
    
    # Initialize costs.
    J_history_train = np.zeros(n)
    J_history_validation = np.zeros(n)
    
    for iteration in range(n):
        theta = solve_normal_equation(music_train.X[:, ordered_theta[:(n - iteration)]], music_train.y, lambdav)
        J_history_train[iteration] = compute_cost(music_train.X[:, ordered_theta[:(n - iteration)]], music_train.y, theta, 0)
        J_history_validation[iteration] = compute_cost(music_validation.X[:, ordered_theta[:(n - iteration)]], music_validation.y, theta, 0)
        
        print "Theta size: " + str(n - iteration)
        print "J_train: %f" % J_history_train[iteration]
        print "J_validation: %f" % J_history_validation[iteration]
        print "Accuracy: %f" % compute_accuracy(music_test.X[:, ordered_theta[:(n - iteration)]], music_test.y, theta, 9)

        ordered_theta = np.argsort(np.abs(theta).reshape(len(theta)))
        ordered_theta = ordered_theta[::-1]
    
    plot_history_train_validation(J_history_train, J_history_validation)
    plot_history(J_history_train -J_history_validation)
def fingerprinting_threshold_grid_search(path_audio_data,
                                         thresholds=range(10, 151, 10),
                                         verbose=True,
                                         plotFigure=True,
                                         saveFigure=False):
    """Evaluates the fingerprinting based alignment system with Accuracy, Precision, Recall and F-measure metrics for several thresholds
      
    **Parameters**
    
    path_audio_data: String
        Path to the audio dataset
    thresholds: List (default range(10,151,10))
        List contains the threshold values
    verbose: Boolean (default True)
        Prints the resulting evaluation metrics as well as the number of TP, TN, FP and FN 
        if it is True
    plotFigure: Boolean (default True)
        Plots the evaluation metrics vs. thresholds if it is True.
    saveFigure: Boolean (default False)
        Saves the plot for evaluation metrics. Valid only if 'plotFigure==True'
        
    **Returns**
    
    accuracy_list: List
        The list of accuracy results for each threshold
    rrecision_list: List
        The list of precision results for each threshold
    recall_list: List
        The list of recall results for each threshold
    F_measure_list: List
        The list of F-measure results for each threshold
    TP_list: List
        The number of the true positives for each threshold
    TN_list: List
        The number of the true negatives for each threshold
    FP_list: List
        The number of the false positives for each threshold
    FN_list: List
        The number of the false negatives for each threshold
    index_of_best: Integer    
        The index of the best accuracy result"""

    cw_path = os.getcwd()
    if cw_path.find('/') == -1:
        cw_path_parent = cw_path[:cw_path.find('\\Evaluation')]
        path_ground_truth = cw_path + '\\ground_truth'
        #path_audio_data = cw_path_parent + '\\audio_data'
        path_fingerprinting_results = cw_path + '\\fingerprinting_offset_estimation_results'
    else:
        cw_path_parent = cw_path[:cw_path.find('/Evaluation')]
        path_ground_truth = cw_path + '/ground_truth'
        #path_audio_data = cw_path_parent + '/audio_data'
        path_fingerprinting_results = cw_path + '/fingerprinting_offset_estimation_results'

    path = [path_ground_truth, path_audio_data]

    accuracy_list = []
    precision_list = []
    recall_list = []
    F_measure_list = []

    TP_list = []
    TN_list = []
    FP_list = []
    FN_list = []

    # Evaluation of fingerprinting based alignment for different thresholds
    for thr in thresholds:
        offset_estimation_result_filename = '{0}/offset_estimation_fingerprinting_thr_'.format(
            path_fingerprinting_results) + np.str(thr) + '_result.txt'
        Accuracy, Precision, Recall, F_measure, TP, TN, FP, FN = compute_accuracy.compute_accuracy(
            path, offset_estimation_result_filename)
        accuracy_list.append(100 * Accuracy)
        precision_list.append(Precision)
        recall_list.append(Recall)
        F_measure_list.append(F_measure)
        TP_list.append(TP)
        TN_list.append(TN)
        FN_list.append(FN)
        FP_list.append(FP)

    best_accuracy_fingerprinting_based = max(accuracy_list)
    index_of_best = accuracy_list.index(best_accuracy_fingerprinting_based)

    if verbose == True:
        print(
            'The best accuracy is obtained for threshold = {0} with accuracy = {1}'
            .format(thresholds[index_of_best],
                    best_accuracy_fingerprinting_based))
        print('\nOther metrics:')
        print(('\nFalse Negative - FN = {0}').format(FN_list[index_of_best]))
        print(('False Positive - FP = {0}').format(FP_list[index_of_best]))
        print(('True Positive - TP = {0}').format(TP_list[index_of_best]))
        print(('True Negative - TN = {0}').format(TN_list[index_of_best]))
        print(('\nAccuracy = {0}').format(accuracy_list[index_of_best]))
        print(('Precision = {0}').format(precision_list[index_of_best]))
        print(('Recall = {0}').format(recall_list[index_of_best]))
        print(('F-measure = {0}').format(F_measure_list[index_of_best]))

    if plotFigure == True:
        fig, axes = plt.subplots(1, 2)
        axes[0].plot(thresholds, accuracy_list)
        axes[0].set_xlabel('Thresholds')
        axes[0].set_ylabel('Accuracy (%)')
        axes[0].set_ylim([80, 100])

        axes[1].plot(thresholds, precision_list, label='$Precision$')
        axes[1].plot(thresholds, recall_list, '+', label='$Recall$')
        axes[1].plot(thresholds, F_measure_list, '--', label='$F-measure$')
        axes[1].set_xlabel('Thresholds')
        axes[1].legend()

        fig.tight_layout()

        if saveFigure == True:
            if cw_path.find('/') == -1:
                fig.savefig(path_fingerprinting_results + '\\' +
                            'Fingerprinting_based_estimation_results.png')
            else:
                fig.savefig(path_fingerprinting_results + '/' +
                            'Fingerprinting_based_estimation_results.png')

    return (accuracy_list, precision_list, recall_list, F_measure_list,
            TP_list, TN_list, FP_list, FN_list, index_of_best)