Пример #1
0
def main(argv):
    # Define args
    args: Namespace = get_args(argv)

    # Initialize Relevance Pipeline and run in train/inference mode
    rp = RankingPipeline(args=args)
    rp.run()
Пример #2
0
    def setUp(
        self,
        output_dir: str = OUTPUT_DIR,
        root_data_dir: str = ROOT_DATA_DIR,
        feature_config_fname: str = FEATURE_CONFIG_FNAME,
    ):
        self.output_dir = output_dir
        self.root_data_dir = root_data_dir
        self.feature_config_fname = feature_config_fname

        # Make temp output directory
        file_io.make_directory(self.output_dir, clear_dir=True)

        # Fix random seed values for repeatability
        tf.keras.backend.clear_session()
        np.random.seed(123)
        tf.random.set_seed(123)
        random.seed(123)

        # Setup arguments
        self.args: Namespace = get_args([])
        self.args.models_dir = output_dir
        self.args.logs_dir = output_dir

        # Load model_config
        self.model_config = file_io.read_yaml(self.args.model_config)

        # Setup logging
        outfile: str = os.path.join(self.args.logs_dir, "output_log.csv")

        self.logger = setup_logging(reset=True,
                                    file_name=outfile,
                                    log_to_file=True)
Пример #3
0
def create_parse_args(data_dir, feature_config, model_config, logs_dir,
                      num_folds, use_testset_in_folds):
    argv = [
        "--data_dir", data_dir, "--feature_config", feature_config, "--kfold",
        str(num_folds), "--include_testset_in_kfold",
        str(use_testset_in_folds), "--run_id", "testing_kfold_cs",
        "--data_format", "csv", "--execution_mode", "train_inference_evaluate",
        "--num_epochs", "1", "--model_config", model_config, "--batch_size",
        "4", "--logs_dir", logs_dir, "--max_sequence_size", "25",
        "--train_pcent_split", "1.0", "--val_pcent_split", "-1",
        "--test_pcent_split", "-1"
    ]

    args = get_args(argv)
    return args
Пример #4
0
def train_ml4ir(data_dir, feature_config, model_config, logs_dir):
    """
    Train a pointwise ranker, listwise loss model using ml4ir
    """
    argv = [
        "--data_dir", data_dir, "--feature_config", feature_config,
        "--loss_type", "listwise", "--scoring_type", "listwise", "--run_id",
        "test_command_line", "--data_format", "csv", "--execution_mode",
        "train_evaluate", "--loss_key", "rank_one_listnet", "--num_epochs",
        "150", "--model_config", model_config, "--batch_size", "1",
        "--logs_dir", logs_dir, "--max_sequence_size", "25",
        "--train_pcent_split", "1.0", "--val_pcent_split", "-1",
        "--test_pcent_split", "-1", "--early_stopping_patience", "25",
        "--metrics_keys", "MRR", "categorical_accuracy", "--monitor_metric",
        "categorical_accuracy"
    ]
    args = get_args(argv)
    rp = RankingPipeline(args=args)
    rp.run()