示例#1
0
def main() -> None:
    parser = argparse.ArgumentParser(
        description="Runs Neural Monkey as a web server.")
    parser.add_argument("--port", type=int, default=5000)
    parser.add_argument("--host", type=str, default="127.0.0.1")
    parser.add_argument("--configuration", type=str, required=True)
    parser.add_argument("--preprocess", type=str,
                        required=False, default=None)
    args = parser.parse_args()

    print("")

    if args.preprocess is not None:
        preprocessing = Configuration()
        preprocessing.add_argument("preprocess")
        preprocessing.load_file(args.preprocess)
        preprocessing.build_model()
        APP.config["preprocess"] = preprocessing.model.preprocess
    else:
        APP.config["preprocess"] = []

    exp = Experiment(config_path=args.configuration)
    exp.build_model()
    APP.config["experiment"] = exp
    APP.run(port=args.port, host=args.host)
示例#2
0
def main() -> None:
    # pylint: disable=no-member,broad-except
    if len(sys.argv) != 3:
        print("Usage: run.py <run_ini_file> <test_datasets>")
        exit(1)

    test_datasets = Configuration()
    test_datasets.add_argument('test_datasets')
    test_datasets.add_argument('variables')

    CONFIG.load_file(sys.argv[1])
    CONFIG.build_model()
    test_datasets.load_file(sys.argv[2])
    test_datasets.build_model()
    datesets_model = test_datasets.model
    initialize_for_running(CONFIG.model.output, CONFIG.model.tf_manager,
                           datesets_model.variables)

    print("")

    evaluators = [(e[0], e[0], e[1]) if len(e) == 2 else e
                  for e in CONFIG.model.evaluation]

    for dataset in datesets_model.test_datasets:
        execution_results, output_data = run_on_dataset(
            CONFIG.model.tf_manager,
            CONFIG.model.runners,
            dataset,
            CONFIG.model.postprocess,
            write_out=True)
        # TODO what if there is no ground truth
        eval_result = evaluation(evaluators, dataset, CONFIG.model.runners,
                                 execution_results, output_data)
        if eval_result:
            print_final_evaluation(dataset.name, eval_result)
示例#3
0
def main() -> None:
    # pylint: disable=no-member,broad-except
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("config",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument("datasets",
                        metavar="INI-TEST-DATASETS",
                        help="the configuration of the test datasets")
    parser.add_argument("-g",
                        "--grid",
                        dest="grid",
                        action="store_true",
                        help="look at the SGE variables for slicing the data")
    args = parser.parse_args()

    test_datasets = Configuration()
    test_datasets.add_argument("test_datasets")
    test_datasets.add_argument("variables", cond=lambda x: isinstance(x, list))

    test_datasets.load_file(args.datasets)
    test_datasets.build_model()
    datasets_model = test_datasets.model

    exp = Experiment(config_path=args.config)
    exp.build_model()
    exp.load_variables(datasets_model.variables)

    if args.grid and len(datasets_model.test_datasets) > 1:
        raise ValueError("Only one test dataset supported when using --grid")

    for dataset in datasets_model.test_datasets:
        if args.grid:
            if ("SGE_TASK_FIRST" not in os.environ
                    or "SGE_TASK_LAST" not in os.environ
                    or "SGE_TASK_STEPSIZE" not in os.environ
                    or "SGE_TASK_ID" not in os.environ):
                raise EnvironmentError(
                    "Some SGE environment variables are missing")

            length = int(os.environ["SGE_TASK_STEPSIZE"])
            start = int(os.environ["SGE_TASK_ID"]) - 1
            end = int(os.environ["SGE_TASK_LAST"]) - 1

            if start + length > end:
                length = end - start + 1

            log("Running grid task {} starting at {} with step {}".format(
                start // length, start, length))

            dataset = dataset.subset(start, length)

        if exp.config.args.evaluation is None:
            exp.run_model(dataset, write_out=True)
        else:
            exp.evaluate(dataset, write_out=True)

    for session in exp.config.model.tf_manager.sessions:
        session.close()
示例#4
0
def load_runtime_config(config_path: str) -> argparse.Namespace:
    """Load a runtime configuration file."""
    cfg = Configuration()
    cfg.add_argument("test_datasets")
    cfg.add_argument("variables", cond=lambda x: isinstance(x, list))

    cfg.load_file(config_path)
    cfg.build_model()
    return cfg.model
示例#5
0
def main() -> None:
    parser = argparse.ArgumentParser(
        description="Runs Neural Monkey as a web server.")
    parser.add_argument("--port", type=int, default=5000)
    parser.add_argument("--host", type=str, default="127.0.0.1")
    parser.add_argument("--configuration", type=str, required=True)
    parser.add_argument("--preprocess", type=str, required=False, default=None)
    args = parser.parse_args()

    print("")

    if args.preprocess is not None:
        preprocessing = Configuration()
        preprocessing.add_argument("preprocess")
        preprocessing.load_file(args.preprocess)
        preprocessing.build_model()
        APP.config["preprocess"] = preprocessing.model.preprocess
    else:
        APP.config["preprocess"] = []

    exp = Experiment(config_path=args.configuration)
    exp.build_model()
    APP.config["experiment"] = exp
    APP.run(port=args.port, host=args.host)
def main() -> None:
    # pylint: disable=no-member,broad-except
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("config",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument("datasets",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument("--beam",
                        metavar="BEAM_SIZE",
                        type=int,
                        default=10,
                        help="Beam size.")
    parser.add_argument("--kenlm",
                        type=str,
                        default=None,
                        help="Path to a KenLM model arpa file.")
    parser.add_argument("--lm-weight",
                        type=float,
                        help="Weight of the language model.")
    parser.add_argument("--null-trail-weight",
                        type=float,
                        help="Weight of the null-trailing feature.")
    parser.add_argument("--nt-ratio-weight",
                        type=float,
                        help="Weight of the null-token ratio feature.")
    parser.add_argument("--out", type=str, help="Path to the output file.")
    args = parser.parse_args()

    test_datasets = Configuration()
    test_datasets.add_argument("test_datasets")
    test_datasets.add_argument("batch_size", cond=lambda x: x > 0)
    test_datasets.add_argument("variables", cond=lambda x: isinstance(x, list))

    test_datasets.load_file(args.datasets)
    test_datasets.build_model()
    datasets_model = test_datasets.model

    exp = Experiment(config_path=args.config)
    exp.build_model()
    exp.load_variables(datasets_model.variables)

    ctc_decoder = None
    for runner in exp.model.runners:
        if (isinstance(runner, PlainRunner)
                and isinstance(runner.decoder, CTCDecoder)):
            ctc_decoder = runner.decoder
            break

    if ctc_decoder is None:
        raise ValueError(
            "Was not able to detect CTC decoder in the configuration.")

    logits_runner = RepresentationRunner(output_series="logits",
                                         encoder=ctc_decoder,
                                         attribute="logits")
    exp.model.runners = [logits_runner]

    dataset = datasets_model.test_datasets[0]
    singleton_batches = dataset.batches(BatchingScheme(1))
    print("Loading language model")
    lm = NGramModel(args.kenlm)
    print("LM loaded")

    weights = {}

    if args.lm_weight:
        weights['lm_score'] = args.lm_weight

    if args.null_trail_weight:
        weights['null_trailing'] = args.null_trail_weight

    if args.nt_ratio_weight:
        weights['null_token_ratio'] = args.nt_ratio_weight

    print("Weights:", weights)

    i = 0
    stats = []

    with open(args.out, 'w') as out_file:
        for sent_dataset in singleton_batches:

            t1 = timeit.default_timer()
            ctc_model_result = exp.run_model(sent_dataset,
                                             write_out=False,
                                             batch_size=1)
            t2 = timeit.default_timer()

            logits = np.squeeze(ctc_model_result[1]['logits'], axis=1)

            t3 = timeit.default_timer()
            best_hyp = decode_beam(logits,
                                   args.beam,
                                   ctc_decoder.vocabulary,
                                   lm=lm,
                                   weights=weights)
            t4 = timeit.default_timer()

            stats.append([len(best_hyp.tokens), t2 - t1, t4 - t3])

            output = " ".join([best_hyp.tokens][0])
            out_file.write(output + "\n")

            if i % 10 == 0:
                print("[{}] {}".format(i, output))
            i += 1

    with open(args.out + ".stats", 'w') as stats_file:
        for line in stats:
            stats_file.write("{} {:.3f} {:.3f}\n".format(*line))

    for session in exp.config.model.tf_manager.sessions:
        session.close()
示例#7
0
def main() -> None:
    # pylint: disable=no-member,broad-except
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("config",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument('datasets',
                        metavar='INI-TEST-DATASETS',
                        help="the configuration of the test datasets")
    parser.add_argument("-g",
                        "--grid",
                        dest="grid",
                        action="store_true",
                        help="look at the SGE variables for slicing the data")
    args = parser.parse_args()

    test_datasets = Configuration()
    test_datasets.add_argument('test_datasets')
    test_datasets.add_argument('variables')

    CONFIG.load_file(args.config)
    CONFIG.build_model()
    test_datasets.load_file(args.datasets)
    test_datasets.build_model()
    datasets_model = test_datasets.model
    initialize_for_running(CONFIG.model.output, CONFIG.model.tf_manager,
                           datasets_model.variables)

    print("")

    evaluators = [(e[0], e[0], e[1]) if len(e) == 2 else e
                  for e in CONFIG.model.evaluation]

    if args.grid and len(datasets_model.test_datasets) > 1:
        raise ValueError("Only one test dataset supported when using --grid")

    for dataset in datasets_model.test_datasets:
        if args.grid:
            if ("SGE_TASK_FIRST" not in os.environ
                    or "SGE_TASK_LAST" not in os.environ
                    or "SGE_TASK_STEPSIZE" not in os.environ
                    or "SGE_TASK_ID" not in os.environ):
                raise EnvironmentError(
                    "Some SGE environment variables are missing")

            length = int(os.environ["SGE_TASK_STEPSIZE"])
            start = int(os.environ["SGE_TASK_ID"]) - 1
            end = int(os.environ["SGE_TASK_LAST"]) - 1

            if start + length > end:
                length = end - start + 1

            log("Running grid task {} starting at {} with step {}".format(
                start // length, start, length))

            dataset = dataset.subset(start, length)

        if CONFIG.model.runners_batch_size is None:
            runners_batch_size = CONFIG.model.batch_size
        else:
            runners_batch_size = CONFIG.model.runners_batch_size

        execution_results, output_data = run_on_dataset(
            CONFIG.model.tf_manager,
            CONFIG.model.runners,
            dataset,
            CONFIG.model.postprocess,
            write_out=True,
            batch_size=runners_batch_size,
            log_progress=60)
        # TODO what if there is no ground truth
        eval_result = evaluation(evaluators, dataset, CONFIG.model.runners,
                                 execution_results, output_data)
        if eval_result:
            print_final_evaluation(dataset.name, eval_result)
def main() -> None:
    # pylint: disable=no-member,broad-except
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("config",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument("datasets",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument("--beam",
                        metavar="BEAM_SIZE",
                        type=int,
                        default=10,
                        help="Beam size.")
    parser.add_argument("--kenlm",
                        type=str,
                        help="Path to a KenLM model arpa file.")
    parser.add_argument("--prefix",
                        type=str,
                        help="Path used as a prefix of stored checkpoints.")
    parser.add_argument("--lm-weight",
                        type=float,
                        help="Default weight of the language model.")
    parser.add_argument("--null-trail-weight",
                        type=float,
                        help="Default weight of the null-trailing feature.")
    parser.add_argument("--nt-ratio-weight",
                        type=float,
                        help="Default weight of the null-token ratio feature.")

    args = parser.parse_args()

    test_datasets = Configuration()
    test_datasets.add_argument("test_datasets")
    test_datasets.add_argument("batch_size", cond=lambda x: x > 0)
    test_datasets.add_argument("variables", cond=lambda x: isinstance(x, list))

    test_datasets.load_file(args.datasets)
    test_datasets.build_model()
    datasets_model = test_datasets.model

    exp = Experiment(config_path=args.config)
    exp.build_model()
    exp.load_variables(datasets_model.variables)

    weights = {}

    if args.lm_weight is not None:
        weights['lm_score'] = args.lm_weight

    if args.null_trail_weight is not None:
        weights['null_trailing'] = args.null_trail_weight

    if args.nt_ratio_weight is not None:
        weights['null_token_ratio'] = args.nt_ratio_weight

    if not weights:
        raise ValueError("No default weights specified, nothing to train.")

    ctc_decoder = None
    for runner in exp.model.runners:
        if (isinstance(runner, PlainRunner)
                and isinstance(runner.decoder, CTCDecoder)):
            ctc_decoder = runner.decoder
            break

    if ctc_decoder is None:
        raise ValueError(
            "Was not able to detect CTC decoder in the configuration.")

    print("Loading language model")
    lm = NGramModel(args.kenlm)
    print("LM loaded")

    logits_runner = RepresentationRunner(output_series="logits",
                                         encoder=ctc_decoder,
                                         attribute="logits")
    exp.model.runners = [logits_runner]

    dataset = datasets_model.test_datasets[0]
    singleton_batches = dataset.batches(BatchingScheme(1))

    DATASET_SIZE = dataset.length
    CHECKPOINTS = 5
    CHECKPOINT_ITERS = int(DATASET_SIZE / CHECKPOINTS)

    print(
        "{} sentences in the dataset, checkpoint every {} sentences ({} checkpoints in total)."
        .format(DATASET_SIZE, CHECKPOINT_ITERS, CHECKPOINTS))

    for i, sent_dataset in enumerate(singleton_batches):
        ctc_model_result = exp.run_model(sent_dataset,
                                         write_out=False,
                                         batch_size=1)

        logits = np.squeeze(ctc_model_result[1]['logits'], axis=1)
        target = ctc_model_result[2]['target'][0]

        train_weights(logits, args.beam, ctc_decoder.vocabulary, target,
                      weights, lm)

        print(
            "[{}] Weights:".format(i + 1), ", ".join([
                "{}: {:.3f}".format(key, value)
                for key, value in weights.items()
            ]))

        if i != 0 and (i + 1) % CHECKPOINT_ITERS == 0:
            with open("{}.{}".format(args.prefix, int(i / CHECKPOINT_ITERS)),
                      "w") as f:
                for key, value in weights.items():
                    f.write("{}={:.3f}\n".format(key.upper(), value))

            print("\nCheckpoint saved.\n")

    for session in exp.config.model.tf_manager.sessions:
        session.close()