Пример #1
0
def main():
    """Main block of code, which runs the training"""

    start = time.time()
    logging.basicConfig(level="INFO")

    # Load the .yaml data and unpacks it
    assert len(sys.argv) == 2, "Exactly one experiment configuration file must be "\
        "passed as a positional argument to this script. \n\n"\
        "E.g. `python run_non_tracking_experiment.py <path to .yaml file>`"
    with open(sys.argv[1], "r") as yaml_config_file:
        logging.info("Loading simulation settings from %s", sys.argv[1])
        experiment_config = yaml.load(yaml_config_file)
    experiment_settings = experiment_config['experiment_settings']
    data_parameters = experiment_config['data_parameters']
    ml_parameters = experiment_config['ml_parameters']
    path_parameters = experiment_config['path_parameters'] \
        if 'path_parameters' in experiment_config else None

    # Loads the raw dataset
    logging.info("Loading the dataset...")
    data_preprocessor = Preprocessor(data_parameters)
    features, labels = data_preprocessor.load_dataset()
    if path_parameters:
        path_creator = PathCreator(data_parameters, path_parameters, labels)
        paths = path_creator.load_paths()

    # Undersamples the dataset (if requested)
    if "undersample_bf" in experiment_settings and experiment_settings["undersample_bf"]:
        features = undersample_bf(features, data_parameters["beamformings"])
    if "undersample_space" in experiment_settings:
        assert not path_parameters, "This option is not supported for tracking experiments, "\
            "unless the code for the path creation is updated"
        features, labels = undersample_space(features, labels, data_parameters["undersample_space"])

    # Initializes the model and prepares it for training
    logging.info("Initializing the model (type = %s)...", experiment_settings["model_type"].lower())
    if experiment_settings["model_type"].lower() == "cnn":
        ml_parameters["input_type"] = "float"
        model = CNN(ml_parameters)
    elif experiment_settings["model_type"].lower() in ("lstm", "tcn"):
        assert path_parameters, "This model requires `paths_parameters`. See the example."
        assert path_parameters["time_steps"] == ml_parameters["input_shape"][0], "The ML model "\
            "first input dimention must match the length of the paths! (path length = {}, model)"\
            "input = {})".format(path_parameters["time_steps"], ml_parameters["input_shape"][0])
        ml_parameters["input_type"] = "bool"
        if experiment_settings["model_type"].lower() == "lstm":
            model = LSTM(ml_parameters)
        else:
            model = TCN(ml_parameters)
    else:
        raise ValueError("The simulation settings specified 'model_type'={}. Currently, only "
            "'cnn', 'lstm', and 'tcn' are supported.".format(experiment_settings["model_type"]))
    model.set_graph()

    # Creates the validation set
    logging.info("Creating validation set...")
    if path_parameters:
        features_val, labels_val, _ = sample_paths(
            paths["validation"],
            features,
            labels,
            experiment_settings,
            data_parameters,
            path_parameters,
        )
    else:
        features_val, labels_val = create_noisy_features(
            features,
            labels,
            experiment_settings,
            data_parameters,
        )

    # Runs the training loop
    logging.info("\nStaring the training loop!\n")
    keep_training = True
    while keep_training:
        logging.info("Creating noisy set for this epoch...")
        if path_parameters:
            features_train, labels_train, _ = sample_paths(
                paths["train"],
                features,
                labels,
                experiment_settings,
                data_parameters,
                path_parameters,
                sample_fraction=experiment_settings["train_sample_fraction"]
            )
        else:
            features_train, labels_train = create_noisy_features(
                features,
                labels,
                experiment_settings,
                data_parameters,
            )
        model.train_epoch(features_train, labels_train)
        predictions_val = model.predict(features_val, validation=True)
        keep_training, val_avg_dist = model.epoch_end(labels_val, predictions_val)
        if predictions_val is not None:
            # Upscales the validation score back to the original scale and gets the 95th percentile
            val_avg_dist *= data_parameters["pos_grid"][0]
            val_95_perc = get_95th_percentile(
                labels_val,
                predictions_val,
                rescale_factor=data_parameters["pos_grid"][0]
            )
            logging.info("Current avg val. distance: %.5f m || 95th percentile: %.5f m\n",
                val_avg_dist, val_95_perc)

    # Store the trained model and cleans up
    logging.info("Saving and closing model.")
    experiment_name = os.path.basename(sys.argv[1]).split('.')[0]
    model.save(model_name=experiment_name)
    model.close()

    # Prints elapsed time
    end = time.time()
    exec_time = (end-start)
    logging.info("Total execution time: %.5E seconds", exec_time)
Пример #2
0
def main():
    """Main block of code, which runs the tests"""

    start = time.time()
    logging.basicConfig(level="INFO")

    # Load the .yaml data and unpacks it
    assert len(sys.argv) == 2, "Exactly one experiment configuration file must be "\
        "passed as a positional argument to this script. \n\n"\
        "E.g. `python run_non_tracking_experiment.py <path to .yaml file>`"
    with open(sys.argv[1], "r") as yaml_config_file:
        logging.info("Loading simulation settings from %s", sys.argv[1])
        experiment_config = yaml.load(yaml_config_file)
    experiment_settings = experiment_config['experiment_settings']
    data_parameters = experiment_config['data_parameters']
    ml_parameters = experiment_config['ml_parameters']
    path_parameters = experiment_config['path_parameters'] \
        if 'path_parameters' in experiment_config else None

    # Loads the raw dataset
    logging.info("Loading the dataset...")
    data_preprocessor = Preprocessor(data_parameters)
    features, labels = data_preprocessor.load_dataset()
    if path_parameters:
        path_creator = PathCreator(data_parameters, path_parameters, labels)
        paths = path_creator.load_paths()

    # Undersamples the dataset (if requested)
    if "undersample_bf" in experiment_settings and experiment_settings[
            "undersample_bf"]:
        features = undersample_bf(features, data_parameters["beamformings"])
    if "undersample_space" in experiment_settings:
        assert not path_parameters, "This option is not supported for tracking experiments, "\
            "unless the code for the path creation is updated"
        features, labels = undersample_space(
            features, labels, data_parameters["undersample_space"])

    # Initializes the model and prepares it for testing
    logging.info("Initializing the model (type = %s)...",
                 experiment_settings["model_type"].lower())
    if experiment_settings["model_type"].lower() == "cnn":
        ml_parameters["input_type"] = "float"
        model = CNN(ml_parameters)
    elif experiment_settings["model_type"].lower() in ("lstm", "tcn"):
        assert path_parameters, "This model requires `paths_parameters`. See the example."
        assert path_parameters["time_steps"] == ml_parameters["input_shape"][0], "The ML model "\
            "first input dimention must match the length of the paths! (path length = {}, model)"\
            "input = {})".format(path_parameters["time_steps"], ml_parameters["input_shape"][0])
        ml_parameters["input_type"] = "bool"
        if experiment_settings["model_type"].lower() == "lstm":
            model = LSTM(ml_parameters)
        else:
            model = TCN(ml_parameters)
    else:
        raise ValueError(
            "The simulation settings specified 'model_type'={}. Currently, only "
            "'cnn', 'lstm', and 'tcn' are supported.".format(
                experiment_settings["model_type"]))
    experiment_name = os.path.basename(sys.argv[1]).split('.')[0]
    model.load(model_name=experiment_name)

    # Prediction loop
    if "tests_per_position" in experiment_settings:
        tests_per_input = experiment_settings["tests_per_position"]
    else:
        tests_per_input = experiment_settings["tests_per_path"] * 10
        logging.info(
            "Note - each set of paths will be split into 10 sub-sets, for easier RAM"
            "management -- that's why you'll see 10x test sets in the next logging messages."
        )

    y_true = None
    y_pred = None
    for set_idx in range(tests_per_input):
        logging.info("Creating test set %2s out of %2s...", set_idx + 1,
                     tests_per_input)
        if path_parameters:
            features_test, labels_test, _ = sample_paths(paths["test"],
                                                         features,
                                                         labels,
                                                         experiment_settings,
                                                         data_parameters,
                                                         path_parameters,
                                                         sample_fraction=0.1)
        else:
            features_test, labels_test = create_noisy_features(
                features,
                labels,
                experiment_settings,
                data_parameters,
            )
        logging.info("Running predictions and storing data...\n")
        predictions_test = model.predict(features_test)
        y_true = np.vstack(
            (y_true, labels_test)) if y_true is not None else labels_test
        y_pred = np.vstack(
            (y_pred,
             predictions_test)) if y_pred is not None else predictions_test
        assert labels_test.shape[1] == y_true.shape[1], "The number of dimensions per sample "\
            "must stay constant!"
        assert y_true.shape == y_pred.shape, "The predictions and the labels must have the "\
            "same shape!"

    # Closes the model, gets the test scores, and stores predictions-labels pairs
    model.close()
    logging.info("Computing test metrics...")
    test_score = score_predictions(y_true, y_pred,
                                   ml_parameters["validation_metric"])
    test_score *= data_parameters["pos_grid"][0]
    test_95_perc = get_95th_percentile(
        y_true, y_pred, rescale_factor=data_parameters["pos_grid"][0])
    logging.info("Average test distance: %.5f m || 95th percentile: %.5f m\n",
                 test_score, test_95_perc)
    preditions_file = os.path.join(
        ml_parameters["model_folder"],
        experiment_name + '_' + experiment_settings["predictions_file"])
    with open(preditions_file, 'wb') as data_file:
        pickle.dump([y_true, y_pred], data_file)

    # Prints elapsed time
    end = time.time()
    exec_time = (end - start)
    logging.info("Total execution time: %.5E seconds", exec_time)