def test_classification_report_multiclass():
    # Test performance report
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with class names
    expected_report = """\
              precision    recall  f1-score   support

      setosa       0.83      0.79      0.81        24
  versicolor       0.33      0.10      0.15        31
   virginica       0.42      0.90      0.57        20

weighted avg       0.51      0.53      0.47        75
"""
    report = classification_report(
        y_true, y_pred, labels=np.arange(len(iris.target_names)),
        target_names=iris.target_names)
    assert_equal(report, expected_report)
    # print classification report with label detection
    expected_report = """\
              precision    recall  f1-score   support

           0       0.83      0.79      0.81        24
           1       0.33      0.10      0.15        31
           2       0.42      0.90      0.57        20

weighted avg       0.51      0.53      0.47        75
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report)
def test_classification_report_multiclass_with_string_label():
    y_true, y_pred, _ = make_prediction(binary=False)

    y_true = np.array(["blue", "green", "red"])[y_true]
    y_pred = np.array(["blue", "green", "red"])[y_pred]

    expected_report = """\
              precision    recall  f1-score   support

        blue       0.83      0.79      0.81        24
       green       0.33      0.10      0.15        31
         red       0.42      0.90      0.57        20

weighted avg       0.51      0.53      0.47        75
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report)

    expected_report = """\
              precision    recall  f1-score   support

           a       0.83      0.79      0.81        24
           b       0.33      0.10      0.15        31
           c       0.42      0.90      0.57        20

weighted avg       0.51      0.53      0.47        75
"""
    report = classification_report(y_true, y_pred,
                                   target_names=["a", "b", "c"])
    assert_equal(report, expected_report)
def test_classification_report_multiclass_with_digits():
    # Test performance report with added digits in floating point values
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with class names
    expected_report = """\
              precision    recall  f1-score   support

      setosa    0.82609   0.79167   0.80851        24
  versicolor    0.33333   0.09677   0.15000        31
   virginica    0.41860   0.90000   0.57143        20

weighted avg    0.51375   0.53333   0.47310        75
"""
    report = classification_report(
        y_true, y_pred, labels=np.arange(len(iris.target_names)),
        target_names=iris.target_names, digits=5)
    assert_equal(report, expected_report)
    # print classification report with label detection
    expected_report = """\
              precision    recall  f1-score   support

           0       0.83      0.79      0.81        24
           1       0.33      0.10      0.15        31
           2       0.42      0.90      0.57        20

weighted avg       0.51      0.53      0.47        75
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report)
def test_multilabel_classification_report_with_samples_averaging():
    n_classes = 4
    n_samples = 50

    _, y_true = make_multilabel_classification(n_features=1,
                                               n_samples=n_samples,
                                               n_classes=n_classes,
                                               random_state=0)

    _, y_pred = make_multilabel_classification(n_features=1,
                                               n_samples=n_samples,
                                               n_classes=n_classes,
                                               random_state=1)

    expected_report = """\
             precision    recall  f1-score   support

          0       0.50      0.67      0.57        24
          1       0.51      0.74      0.61        27
          2       0.29      0.08      0.12        26
          3       0.52      0.56      0.54        27

samples avg       0.46      0.42      0.40       104
"""

    report = classification_report(y_true, y_pred, average='samples')
    assert_equal(report, expected_report)
def test_classification_report_binary_averaging():
    y_true = [0, 1, 1, 1, 0, 1, 1, 0]
    y_pred = [0, 0, 1, 1, 1, 0, 1, 0]

    # print classification report with class names
    expected_report = """\
            precision    recall  f1-score   support

         0       0.50      0.67      0.57         3
         1       0.75      0.60      0.67         5

binary avg       0.75      0.60      0.67         8
"""

    report = classification_report(y_true, y_pred, average='binary')
    assert_equal(report, expected_report)
def test_classification_report_multiclass_with_unicode_label():
    y_true, y_pred, _ = make_prediction(binary=False)

    labels = np.array([u"blue\xa2", u"green\xa2", u"red\xa2"])
    y_true = labels[y_true]
    y_pred = labels[y_pred]

    expected_report = u"""\
              precision    recall  f1-score   support

       blue\xa2       0.83      0.79      0.81        24
      green\xa2       0.33      0.10      0.15        31
        red\xa2       0.42      0.90      0.57        20

weighted avg       0.51      0.53      0.47        75
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report)
Exemplo n.º 7
0
def train_model(dataset_directory: str, model_name: str, stroke_thicknesses: List[int],
                width: int, height: int,
                staff_line_vertical_offsets: List[int], training_minibatch_size: int,
                optimizer: str, dynamic_learning_rate_reduction: bool, use_fixed_canvas: bool, datasets: List[str],
                class_weights_balancing_method: str):
    image_dataset_directory = os.path.join(dataset_directory, "images")

    bounding_boxes = None
    bounding_boxes_cache = os.path.join(dataset_directory, "bounding_boxes.txt")

    print("Loading configuration and data-readers...")
    start_time = time()

    number_of_classes = len(os.listdir(os.path.join(image_dataset_directory, "training")))
    training_configuration = ConfigurationFactory.get_configuration_by_name(model_name, optimizer, width, height,
                                                                            training_minibatch_size, number_of_classes)
    if training_configuration.performs_localization() and bounding_boxes is None:
        # Try to unpickle
        with open(bounding_boxes_cache, "rb") as cache:
            bounding_boxes = pickle.load(cache)

    if not training_configuration.performs_localization():
        bounding_boxes = None

    train_generator = ImageDataGenerator(rotation_range=training_configuration.rotation_range,
                                         zoom_range=training_configuration.zoom_range
                                         )
    training_data_generator = DirectoryIteratorWithBoundingBoxes(
        directory=os.path.join(image_dataset_directory, "training"),
        image_data_generator=train_generator,
        target_size=(training_configuration.input_image_rows,
                     training_configuration.input_image_columns),
        batch_size=training_configuration.training_minibatch_size,
        bounding_boxes=bounding_boxes,
    )
    training_steps_per_epoch = np.math.ceil(training_data_generator.samples / training_data_generator.batch_size)

    validation_generator = ImageDataGenerator()
    validation_data_generator = DirectoryIteratorWithBoundingBoxes(
        directory=os.path.join(image_dataset_directory, "validation"),
        image_data_generator=validation_generator,
        target_size=(
            training_configuration.input_image_rows,
            training_configuration.input_image_columns),
        batch_size=training_configuration.training_minibatch_size,
        bounding_boxes=bounding_boxes)
    validation_steps_per_epoch = np.math.ceil(validation_data_generator.samples / validation_data_generator.batch_size)

    test_generator = ImageDataGenerator()
    test_data_generator = DirectoryIteratorWithBoundingBoxes(
        directory=os.path.join(image_dataset_directory, "test"),
        image_data_generator=test_generator,
        target_size=(training_configuration.input_image_rows,
                     training_configuration.input_image_columns),
        batch_size=training_configuration.training_minibatch_size,
        shuffle=False,
        bounding_boxes=bounding_boxes)
    test_steps_per_epoch = np.math.ceil(test_data_generator.samples / test_data_generator.batch_size)

    model = training_configuration.classifier()
    model.summary()

    print("Model {0} loaded.".format(training_configuration.name()))
    print(training_configuration.summary())

    start_of_training = datetime.date.today()
    best_model_path = "{0}_{1}.h5".format(start_of_training, training_configuration.name())

    monitor_variable = 'val_acc'
    if training_configuration.performs_localization():
        monitor_variable = 'val_output_class_acc'

    model_checkpoint = ModelCheckpoint(best_model_path, monitor=monitor_variable, save_best_only=True, verbose=1)
    early_stop = EarlyStopping(monitor=monitor_variable,
                               patience=training_configuration.number_of_epochs_before_early_stopping,
                               verbose=1)
    learning_rate_reduction = ReduceLROnPlateau(monitor=monitor_variable,
                                                patience=training_configuration.number_of_epochs_before_reducing_learning_rate,
                                                verbose=1,
                                                factor=training_configuration.learning_rate_reduction_factor,
                                                min_lr=training_configuration.minimum_learning_rate)
    tensorboard_callback = TensorBoard(
            log_dir="./logs/{0}_{1}/".format(start_of_training, training_configuration.name()),
            batch_size=training_configuration.training_minibatch_size)
    if dynamic_learning_rate_reduction:
        callbacks = [model_checkpoint, early_stop, tensorboard_callback, learning_rate_reduction]
    else:
        print("Learning-rate reduction on Plateau disabled")
        callbacks = [model_checkpoint, early_stop, tensorboard_callback]

    class_weight_calculator = ClassWeightCalculator()
    class_weights = class_weight_calculator.calculate_class_weights(image_dataset_directory,
                                                                    method=class_weights_balancing_method,
                                                                    class_indices=training_data_generator.class_indices)
    if class_weights_balancing_method is not None:
        print("Using {0} method for obtaining class weights to compensate for an unbalanced dataset.".format(
            class_weights_balancing_method))

    print("Training on dataset...")
    history = model.fit_generator(
        generator=training_data_generator,
        steps_per_epoch=training_steps_per_epoch,
        epochs=training_configuration.number_of_epochs,
        callbacks=callbacks,
        validation_data=validation_data_generator,
        validation_steps=validation_steps_per_epoch,
        class_weight=class_weights
    )

    print("Loading best model from check-point and testing...")
    best_model = keras.models.load_model(best_model_path)

    test_data_generator.reset()
    file_names = test_data_generator.filenames
    class_labels = os.listdir(os.path.join(image_dataset_directory, "test"))
    # Notice that some classes have so few elements, that they are not present in the test-set and do not
    # appear in the final report. To obtain the correct classes, we have to enumerate all non-empty class
    # directories inside the test-folder and use them as labels
    names_of_classes_with_test_data = [
        class_name for class_name in class_labels
        if os.listdir(os.path.join(image_dataset_directory, "test", class_name))]
    true_classes = test_data_generator.classes
    predictions = best_model.predict_generator(test_data_generator, steps=test_steps_per_epoch)
    if training_configuration.performs_localization():
        predicted_classes = numpy.argmax(predictions[0], axis=1)
    else:
        predicted_classes = numpy.argmax(predictions, axis=1)

    test_data_generator.reset()
    evaluation = best_model.evaluate_generator(test_data_generator, steps=test_steps_per_epoch)
    classification_accuracy = 0

    print("Reporting classification statistics with micro average")
    report = sklearn_reporting.classification_report(true_classes, predicted_classes, digits=3,
                                                     target_names=names_of_classes_with_test_data, average='micro')
    print(report)

    print("Reporting classification statistics with macro average")
    report = sklearn_reporting.classification_report(true_classes, predicted_classes, digits=3,
                                                     target_names=names_of_classes_with_test_data, average='macro')
    print(report)

    print("Reporting classification statistics with weighted average")
    report = sklearn_reporting.classification_report(true_classes, predicted_classes, digits=3,
                                                     target_names=names_of_classes_with_test_data, average='weighted'
                                                     )
    print(report)

    indices_of_misclassified_files = [i for i, e in enumerate(true_classes - predicted_classes) if e != 0]
    misclassified_files = [file_names[i] for i in indices_of_misclassified_files]
    misclassified_files_actual_prediction_indices = [predicted_classes[i] for i in indices_of_misclassified_files]
    misclassified_files_actual_prediction_classes = [class_labels[i] for i in
                                                     misclassified_files_actual_prediction_indices]
    print("Misclassified files:")
    for i in range(len(misclassified_files)):
        print("\t{0} is incorrectly classified as {1}".format(misclassified_files[i],
                                                              misclassified_files_actual_prediction_classes[i]))

    for i in range(len(best_model.metrics_names)):
        current_metric = best_model.metrics_names[i]
        print("{0}: {1:.5f}".format(current_metric, evaluation[i]))
        if current_metric == 'acc' or current_metric == 'output_class_acc':
            classification_accuracy = evaluation[i]
    print("Total Accuracy: {0:0.5f}%".format(classification_accuracy * 100))
    print("Total Error: {0:0.5f}%".format((1 - classification_accuracy) * 100))

    end_time = time()
    execution_time_in_seconds = round(end_time - start_time)
    print("Execution time: {0:.1f}s".format(end_time - start_time))

    training_result_image = "{1}_{0}_{2:.1f}p.png".format(training_configuration.name(), start_of_training,
                                                          classification_accuracy * 100)
    TrainingHistoryPlotter.plot_history(history, training_result_image)

    datasets_string = str.join(",", datasets)
    notification_message = "Training on {0} dataset with model {1} finished. " \
                           "Accuracy: {2:0.5f}%".format(datasets_string, model_name, classification_accuracy * 100)
    TelegramNotifier.send_message_via_telegram(notification_message, training_result_image)

    dataset_size = training_data_generator.samples + validation_data_generator.samples + test_data_generator.samples
    stroke_thicknesses_string = ",".join(map(str, stroke_thicknesses))
    staff_line_vertical_offsets_string = ",".join(map(str, staff_line_vertical_offsets))
    image_sizes = "{0}x{1}px".format(training_configuration.input_image_rows,
                                     training_configuration.input_image_columns)
    data_augmentation = "{0}% zoom, {1}° rotation".format(int(training_configuration.zoom_range * 100),
                                                          training_configuration.rotation_range)
    today = "{0:02d}.{1:02d}.{2}".format(start_of_training.day, start_of_training.month, start_of_training.year)
    balancing_method = "None" if class_weights_balancing_method is None else class_weights_balancing_method

    GoogleSpreadsheetReporter.append_result_to_spreadsheet(dataset_size=dataset_size, image_sizes=image_sizes,
                                                           stroke_thicknesses=stroke_thicknesses_string,
                                                           staff_lines=staff_line_vertical_offsets_string,
                                                           model_name=model_name, data_augmentation=data_augmentation,
                                                           optimizer=optimizer,
                                                           early_stopping=training_configuration.number_of_epochs_before_early_stopping,
                                                           reduction_patience=training_configuration.number_of_epochs_before_reducing_learning_rate,
                                                           learning_rate_reduction_factor=training_configuration.learning_rate_reduction_factor,
                                                           minibatch_size=training_minibatch_size,
                                                           initialization=training_configuration.initialization,
                                                           initial_learning_rate=training_configuration.get_initial_learning_rate(),
                                                           accuracy=classification_accuracy,
                                                           date=today,
                                                           use_fixed_canvas=use_fixed_canvas,
                                                           datasets=datasets_string,
                                                           execution_time_in_seconds=execution_time_in_seconds,
                                                           balancing_method=balancing_method)