Exemplo n.º 1
0
def create_parser():
    parser = yaap.Yaap()
    # data options
    parser.add_str("dataset", default="woz")
    parser.add_pth("exp-dir",
                   is_dir=True,
                   must_exist=True,
                   required=True,
                   help="Path to an experiment folder.")
    # model options
    parser.add_int("gpu",
                   min_bound=0,
                   help="GPU device to use. (e.g. 0, 1, etc.)")
    # embedding evaluation options
    parser.add_str("embed-type",
                   default="glove",
                   choices=("glove", "bin", "hdf5"),
                   help="Type of embedding to load for evaluation.")
    parser.add_pth("embed-path",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "tests/data/glove/"
                       "glove.840B.300d.woz.txt")),
                   help="Path to embedding file for evaluation.")
    # input/output options
    parser.add_pth("logging-config",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "configs/logging.yml")),
                   help="Path to a logging config file (yaml/json).")
    parser.add_bol("overwrite", help="Whether to overwrite save dir.")
    # inference options
    parser.add_int("seed", help="Random seed.")
    return parser
Exemplo n.º 2
0
def create_parser():
    parser = yaap.Yaap()
    # data options
    parser.add_str("dataset", default="woz")
    parser.add_pth("exp-dir", is_dir=True, must_exist=True, required=True,
                   help="Path to an experiment folder.")
    parser.add_str("dirname", default="z-interp",
                   help="Name of the resulting directory.")
    parser.add_str("anchor1", regex=r"(train|dev|test)-\d+",
                   help="Data index of the first anchor. If not provided, "
                        "a random data point will be chosen.")
    parser.add_str("anchor2", regex=r"(train|dev|test)-\d+",
                   help="Data index of the second anchor. If not provided, "
                        "a random data point will be chosen.")
    # interpolation options
    parser.add_int("steps", default=10,
                   help="Number of intermediate steps between two data points.")
    # model options
    parser.add_int("gpu", min_bound=0,
                   help="GPU device to use. (e.g. 0, 1, etc.)")
    # display options
    parser.add_pth("logging-config", must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("configs/logging.yml")),
                   help="Path to a logging config file (yaml/json).")
    parser.add_bol("overwrite", help="Whether to overwrite save dir.")
    return parser
Exemplo n.º 3
0
def create_parser():
    parser = yaap.Yaap()
    # data options
    parser.add_pth("data-path", must_exist=True,
                   help="Path to the data. If not given, then the data "
                        "will be generated from the model's prior.")
    parser.add_pth("processor-path", must_exist=True, required=True,
                   help="Path to the processor pickle file.")
    # model options
    parser.add_pth("model-path", must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("configs/vhda-mini.yml")),
                   help="Path to the model configuration file.")
    parser.add_pth("ckpt-path", must_exist=True, required=True,
                   help="Path to the model checkpoint.")
    # model-specific options (TDA)
    parser.add_flt("conv-scale", default=1.0,
                   help="Scale to introduce into conv vector "
                        "for TDA generation.")
    parser.add_flt("spkr-scale", default=1.0,
                   help="Scale to introduce into spkr vector "
                        "for TDA generation.")
    parser.add_flt("goal-scale", default=1.0,
                   help="Scale to introduce into goal vector "
                        "for TDA generation.")
    parser.add_flt("state-scale", default=1.0,
                   help="Scale to introduce into state vector "
                        "for TDA generation.")
    parser.add_flt("sent-scale", default=1.0,
                   help="Scale to introduce into sent vector "
                        "for TDA generation.")
    # model-specific options (general)
    parser.add_int("beam-size", default=4,
                   help="Beam search beam size.")
    parser.add_int("max-sent-len", default=30,
                   help="Beam search maximum sentence length.")
    # generation options
    parser.add_int("batch-size", default=32,
                   help="Mini-batch size.")
    parser.add_bol("validate-dst",
                   help="Whether to validate generated samples "
                        "to be a valid dst dialogs.")
    parser.add_bol("validate-unique",
                   help="Whether to validate by checking uniqueness.")
    parser.add_int("instances",
                   help="Number of dialog instances to generate. "
                        "If not given, the same number of instances "
                        "as the data will be generated.")
    # misc options
    parser.add_pth("logging-config", must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("configs/logging.yml")),
                   help="Path to a logging config file (yaml/json).")
    parser.add_pth("save-dir", default=pathlib.Path("out"),
                   help="Directory to save output generation files.")
    parser.add_int("gpu", min_bound=0,
                   help="GPU device to use. (e.g. 0, 1, etc.)")
    parser.add_bol("overwrite", help="Whether to overwrite save dir.")
    parser.add_int("seed", help="Random seed.")
    return parser
Exemplo n.º 4
0
def create_parser():
    parser = yaap.Yaap(
        desc="Create z-interpolation between two random data points")
    # data options
    parser.add_pth("data-dir",
                   is_dir=True,
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "tests/data/json")),
                   help="Path to the data dir. Must contain 'train.json' and "
                   "'dev.json'.")
    parser.add_str("splits",
                   is_list=True,
                   default=("train", ),
                   choices=("train", "dev", "test"),
                   help="List of splits to evaluate on.")
    parser.add_pth("processor-path",
                   required=True,
                   must_exist=True,
                   help="Path to the processor pickle file.")
    parser.add_str("anchor1",
                   regex=r"(train|dev|test)-\d+",
                   help="Data index of the first anchor. If not provided, "
                   "a random data point will be chosen.")
    parser.add_str("anchor2",
                   regex=r"(train|dev|test)-\d+",
                   help="Data index of the second anchor. If not provided, "
                   "a random data point will be chosen.")
    # interpolation options
    parser.add_int(
        "steps",
        default=10,
        help="Number of intermediate steps between two data points.")
    # model options
    parser.add_pth("model-path",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "configs/vhda-mini.yml")),
                   help="Path to the model configuration file.")
    parser.add_pth("ckpt-path",
                   required=True,
                   must_exist=True,
                   help="Path to the model checkpoint.")
    parser.add_int("gpu",
                   min_bound=0,
                   help="GPU device to use. (e.g. 0, 1, etc.)")
    # display options
    parser.add_pth("logging-config",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "configs/logging.yml")),
                   help="Path to a logging config file (yaml/json).")
    parser.add_pth("save-dir",
                   default="out",
                   is_dir=True,
                   help="Directory to save output files.")
    parser.add_bol("overwrite", help="Whether to overwrite save dir.")
    return parser
Exemplo n.º 5
0
Arquivo: run.py Projeto: kaniblu/vhda
def create_parser():
    parser = yaap.Yaap()
    parser.add_pth("logging-config", must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("../../tests/config/logging.yml")),
                   help="Path to a logging configuration file.")
    parser.add_pth("data-dir", is_dir=True, must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("../../tests/data/json")),
                   help="Path to a json-format dialogue dataset.")
    parser.add_pth("save-dir", is_dir=True, default="out",
                   help="Saving directory.")
    parser.add_bol("overwrite", help="Whether to overwrite.")
    parser.add_pth("model-path", must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("configs/glad-mini.yml")))
    parser.add_int("epochs", default=10, help="Number of epochs")
    parser.add_int("batch-size", default=32, help="Batch size.")
    parser.add_bol("scheduled-lr", help="Enable scheduled learning rate.")
    parser.add_str("scheduler-cls", default="StepLR",
                   help="Name of the scheduler class under "
                        "`torch.optim.lr_scheduler` package.")
    parser.add_str("scheduler-kwargs",
                   default="{\"step\": 10, \"gamma\": 0.8}",
                   help="Keyword arguments for the scheduler class, given "
                        "as a serialized json dictionary.")
    parser.add_str("loss", default="sum", choices=("sum", "mean"),
                   help="Type of loss aggregation ('sum' or 'mean').")
    parser.add_flt("l2norm",
                   help="Weight of l2norm regularization.")
    parser.add_flt("gradient-clip",
                   help="Clipping bounds for gradients.")
    parser.add_bol("train-validate",
                   help="Whether to validate on the training set as well.")
    parser.add_bol("early-stop", help="Whether to early stop.")
    parser.add_str("early-stop-criterion", default="joint-goal",
                   help="Early stopping criterion.")
    parser.add_int("early-stop-patience",
                   help="Number of epochs to wait until early stopped.")
    parser.add_bol("validate-asr",
                   help="Whether to use asr information during validation.")
    parser.add_bol("test-asr",
                   help="Whether to use asr information during testing.")
    parser.add_str("asr-method", default="scaled",
                   choices=("score", "uniform", "ones", "scaled"),
                   help="Type of aggregation method to use when summing output "
                        "scores during asr-enabled evaluation.")
    parser.add_str("asr-sigmoid-sum-order", default="sigmoid-sum",
                   help="The order of sum and sigmoid operations in ASR mode.")
    parser.add_int("asr-topk", min_bound=1,
                   help="Number of top-k candidates.")
    parser.add_bol("save-ckpt",
                   help="Whether to save the final checkpoint. "
                        "If enabled, it will be saved as 'ckpt.pth'"
                        "under save_dir.")
    parser.add_int("gpu", help="gpu device id.")
    return parser
Exemplo n.º 6
0
def create_parser():
    parser = yaap.Yaap("Conduct generative data augmentation experiments "
                       "(with easier arguments).")
    # data options
    parser.add_str("dataset", default="woz")
    parser.add_pth("exp-dir",
                   is_dir=True,
                   must_exist=True,
                   required=True,
                   help="Path to an experiment folder.")
    # model options
    parser.add_str("dst-model",
                   default="gce-ft-wed",
                   help="Name of the DST model.")
    # model-specific options (TDA)
    parser.add_flt("scale",
                   is_list=True,
                   num_elements=5,
                   default=(1.0, 1.0, 1.0, 1.0, 1.0),
                   help="Scale to introduce into conv, goal and sent vector "
                   "for TDA generation.")
    # generation options
    parser.add_int("gen-runs", default=3, help="Number of generation trials")
    parser.add_flt("multiplier",
                   default=1.0,
                   help="Ratio of dialog instances to generate. ")
    parser.add_bol("validate-unique",
                   help="Whether to validate by checking uniqueness.")
    # DST options
    parser.add_int("batch-size", default=100)
    parser.add_int("dst-runs",
                   default=5,
                   help="Number of DST models to train and evaluate using "
                   "different seeds.")
    parser.add_int("epochs",
                   default=200,
                   help="Number of epochs to train DST. "
                   "The actual number of epochs will be scaled by "
                   "the multiplier.")
    parser.add_bol("test-asr",
                   help="Whether to use asr information during testing.")
    # misc options
    parser.add_pth("logging-config",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "configs/logging.yml")),
                   help="Path to a logging config file (yaml/json).")
    parser.add_pth("save-dir",
                   default=pathlib.Path("out"),
                   help="Directory to save output generation files.")
    parser.add_int("gpu",
                   min_bound=0,
                   help="GPU device to use. (e.g. 0, 1, etc.)")
    parser.add_bol("overwrite", help="Whether to overwrite save dir.")
    parser.add_int("seed", help="Random seed.")
    return parser
Exemplo n.º 7
0
def create_parser():
    parser = yaap.Yaap()
    parser.add_pth("data-path", must_exist=True, required=True,
                   help="Path to the dialog data.")
    parser.add_str("data-format", default="json",
                   choices=("woz", "json", "dstc"),
                   help="Data format of the data to be loaded.")
    parser.add_int("max-column-length", default=50,
                   help="Maximum length of each column of formatted table.")
    parser.add_str("save-format", default="human",
                   choices=("human", "woz", "json"),
                   help="Output data format.")
    parser.add_pth("save-path",
                   help="Path to save the resulting text file.")
    return parser
Exemplo n.º 8
0
def create_parser():
    parser = yaap.Yaap()
    # data options
    parser.add_pth("data-dir", is_dir=True, must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("tests/data/json")),
                   help="Path to the data dir. Must contain 'train.json' and "
                        "'dev.json'.")
    parser.add_str("eval-splits", is_list=True,
                   default=("train", "dev", "test"),
                   choices=("train", "dev", "test"),
                   help="List of splits to evaluate on.")
    parser.add_pth("processor-path", required=True, must_exist=True,
                   help="Path to the processor pickle file.")
    # model options
    parser.add_pth("model-path", must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("configs/vhda-mini.yml")),
                   help="Path to the model configuration file.")
    parser.add_pth("ckpt-path", required=True, must_exist=True,
                   help="Path to the model checkpoint.")
    parser.add_int("gpu", min_bound=0,
                   help="GPU device to use. (e.g. 0, 1, etc.)")
    # embedding evaluation options
    parser.add_str("embed-type", default="glove",
                   choices=("glove", "bin", "hdf5"),
                   help="Type of embedding to load for evaluation.")
    parser.add_pth("embed-path", must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("tests/data/glove/"
                                      "glove.840B.300d.woz.txt")),
                   help="Path to embedding file for evaluation.")
    # input/output options
    parser.add_pth("logging-config", must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("configs/logging.yml")),
                   help="Path to a logging config file (yaml/json).")
    parser.add_pth("save-dir", default="out",
                   help="Path to save evaluation results.")
    parser.add_bol("overwrite", help="Whether to overwrite save dir.")
    # inference options
    parser.add_int("batch-size", default=32,
                   help="Mini-batch size.")
    parser.add_int("beam-size", default=4)
    parser.add_int("max-conv-len", default=20)
    parser.add_int("max-sent-len", default=30)
    parser.add_int("seed", help="Random seed.")
    return parser
Exemplo n.º 9
0
def create_parser():
    parser = yaap.Yaap()
    parser.add_pth("logging-config",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "../../tests/config/logging.yml")),
                   help="Path to a logging configuration file.")
    parser.add_pth("save-path",
                   default="out.json",
                   help="Path to save evaluation results (json).")
    parser.add_pth("data-dir",
                   is_dir=True,
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "../../tests/data/json")),
                   help="Path to a json-format dialogue dataset.")
    parser.add_pth("model-path",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "configs/glad-mini.yml")))
    parser.add_pth("ckpt-path",
                   must_exist=True,
                   required=True,
                   help="Path to a checkpoint file.")
    parser.add_pth("processor-path",
                   must_exist=True,
                   required=True,
                   help="Path to a processor object.")
    parser.add_int("batch-size", default=32, help="Batch size.")
    parser.add_str("loss",
                   default="sum",
                   choices=("sum", "mean"),
                   help="Type of loss aggregation ('sum' or 'mean').")
    parser.add_bol("test-asr",
                   help="Whether to use asr information during testing.")
    parser.add_str(
        "asr-method",
        default="scaled",
        choices=("score", "uniform", "ones", "scaled"),
        help="Type of aggregation method to use when summing output "
        "scores during asr-enabled evaluation.")
    parser.add_str("asr-sigmoid-sum-order",
                   default="sigmoid-sum",
                   help="The order of sum and sigmoid operations in ASR mode.")
    parser.add_int("asr-topk", min_bound=1, help="Number of top-k candidates.")
    parser.add_int("gpu", help="gpu device id.")
    return parser
Exemplo n.º 10
0
def create_parser():
    parser = yaap.Yaap(
        desc="Conducts t-test on a summary or a pair of summary files.")
    parser.add_pth("summary-path", required=True, must_exist=True)
    parser.add_pth("other-path",
                   must_exist=True,
                   help="If supplied, the script will perform unpaired "
                   "t-test of two individual samples on common keys.")
    parser.add_str("expected-mean",
                   is_list=True,
                   regex=r"(.+)=(.+)",
                   help="Expected mean for summary samples. Must be "
                   "given as a pair of key and expected population "
                   "mean (e.g. 'loss=0.3'). Could supply multiple "
                   "pairs. Applicable only when no other summary "
                   "path is supplied.")
    parser.add_pth("save-path")
    return parser
Exemplo n.º 11
0
def create_parser():
    parser = yaap.Yaap()
    parser.add_pth("model-path",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "examples/model-vhda.yml")),
                   help="Path to a base model path.")
    parser.add_pth("run-path",
                   must_exist=True,
                   required=True,
                   help="Path to a base run configuration path.")
    parser.add_str("storage",
                   format="url",
                   default="sqlite:///examples/study.db",
                   help="Optuna database url supported by sqlalchemy.")
    parser.add_str("study-name", default="default", help="Optuna study name.")
    parser.add_int("num-trials", help="Number of trials.")
    parser.add_int("num-jobs", default=1, help="Number of concurrent jobs.")
    parser.add_flt("timeout", help="Timeout for a single trial in seconds.")
    return parser
Exemplo n.º 12
0
def create_parser():
    parser = yaap.Yaap()
    parser.add_pth("logging-config", must_exist=True,
                   default=(pathlib.Path(__file__).parent
                            .joinpath("../examples/logging.yml")),
                   help="Path to logging configuration file.")
    parser.add_pth("data-dir", is_dir=True, must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("examples")),
                   help="Path to the data dir. Must contain 'train.json', "
                        "'valid.json' and 'test.json'.")
    parser.add_str("data-format", default="json",
                   choices=("woz", "json", "dstc"),
                   help="Data format of the data to be loaded.")
    parser.add_pth("glad-dir", is_dir=True, must_exist=True,
                   default=(pathlib.Path(__file__).parent
                            .joinpath("../dst/glad").absolute()),
                   help="Directory to an existing glad codebase.")
    parser.add_pth("save-dir", is_dir=True, default="out-glad",
                   help="Directory for saving output files.")
    parser.add_int("max-epochs", min_bound=1, default=50,
                   help="Maximum epochs to train models.")
    parser.add_int("batch-size", min_bound=1, default=50,
                   help="Mini-batch size during stochastic gd.")
    parser.add_flt("emb-dropout", default=0.2,
                   help="Embedding dropout.")
    parser.add_flt("local-dropout", default=0.2,
                   help="Local dropout.")
    parser.add_flt("global-dropout", default=0.2,
                   help="Global dropout.")
    parser.add_str("early-stop-criterion", default="joint_goal",
                   choices=("joint_goal", "turn_inform",
                            "turn_request", "hmean"))
    parser.add_int("runs", default=10,
                   help="Number of runs to execute and aggregate.")
    parser.add_int("gpu", help="Index of specific GPU device to use.")
    return parser
Exemplo n.º 13
0
def create_parser():
    parser = yaap.Yaap()
    # data options
    parser.add_pth("data-dir",
                   is_dir=True,
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "tests/data/json")),
                   help="Path to the data dir. Must contain 'train.json' and "
                   "'dev.json'.")
    parser.add_str("eval-splits",
                   default=("train", "dev", "test"),
                   choices=("train", "dev", "test"),
                   help="List of splits to evaluate on.")
    # model options
    parser.add_pth("gen-model-path",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "configs/vhda-mini.yml")),
                   help="Path to the generative model configuration file.")
    parser.add_pth("dst-model-path",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "dst/internal/configs/gce.yml")),
                   help="Path to the dst model configuration file.")
    parser.add_int("gpu",
                   min_bound=0,
                   help="GPU device to use. (e.g. 0, 1, etc.)")
    # model-specific options (TDA)
    parser.add_flt("conv-scale",
                   default=1.0,
                   help="Scale to introduce into conv vector "
                   "for TDA generation.")
    parser.add_flt("spkr-scale",
                   default=1.0,
                   help="Scale to introduce into spkr vector "
                   "for TDA generation.")
    parser.add_flt("goal-scale",
                   default=1.0,
                   help="Scale to introduce into goal vector "
                   "for TDA generation.")
    parser.add_flt("state-scale",
                   default=1.0,
                   help="Scale to introduce into state vector "
                   "for TDA generation.")
    parser.add_flt("sent-scale",
                   default=1.0,
                   help="Scale to introduce into sent vector "
                   "for TDA generation.")
    # display options
    parser.add_pth("logging-config",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "configs/logging.yml")),
                   help="Path to a logging config file (yaml/json).")
    parser.add_pth("save-dir",
                   default="out",
                   is_dir=True,
                   help="Directory to save output files.")
    parser.add_bol("overwrite", help="Whether to overwrite save dir.")
    parser.add_int("report-every",
                   help="Report training statistics every N steps.")
    # training options
    parser.add_int("batch-size", default=32, help="Mini-batch size.")
    parser.add_str("optimizer",
                   default="adam",
                   choices=("adam", ),
                   help="Optimizer to use.")
    parser.add_flt("gradient-clip", help="Clip gradients by norm size.")
    parser.add_flt("l2norm-weight", help="Weight of l2-norm regularization.")
    parser.add_flt("learning-rate",
                   default=0.001,
                   min_bound=0,
                   help="Optimizer learning rate.")
    parser.add_int("epochs",
                   default=10,
                   min_bound=1,
                   help="Number of epochs to train for.")
    parser.add_str("kld-schedule",
                   help="KLD w schedule given as a list of data points. Each "
                   "data point is a pair of training step and target "
                   "dropout scale. Steps in-between data points will be "
                   "interpolated. e.g. '[(0, 1.0), (10000, 0.1)]'")
    parser.add_str(
        "dropout-schedule",
        help="Dropout schedule given as a list of data points. Each "
        "data point is a pair of training step and target "
        "dropout scale. Steps in-between data points will be "
        "interpolated. e.g. '[(0, 1.0), (10000, 0.1)]'")
    parser.add_bol("disable-kl", help="Whether to disable kl-divergence term.")
    parser.add_str("kl-mode",
                   default="kl-mi",
                   help="KL mode: one of kl, kl-mi, kl-mi+.")
    # validation options
    parser.add_int("valid-batch-size",
                   default=32,
                   help="Mini-batch sizes for validation inference.")
    parser.add_flt("validate-every",
                   default=1,
                   help="Number of epochs in-between validations.")
    parser.add_bol("early-stop", help="Whether to enable early-stopping.")
    parser.add_str("early-stop-criterion",
                   default="~loss",
                   help="The training statistics key to use as criterion "
                   "for early-stopping. Prefix with '~' to denote "
                   "negation during comparison.")
    parser.add_int("early-stop-patience",
                   help="Number of epochs to wait without breaking "
                   "records until executing early-stopping.")
    parser.add_int("beam-size", default=4)
    parser.add_int("max-conv-len", default=20)
    parser.add_int("max-sent-len", default=30)
    # testing options
    parser.add_str("embed-type",
                   default="glove",
                   choices=("glove", "bin", "hdf5"),
                   help="Type of embedding to load for emb. evaluation.")
    parser.add_pth("embed-path",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "tests/data/glove/"
                       "glove.840B.300d.woz.txt")),
                   help="Path to embedding file for emb. evaluation.")
    # generation options
    parser.add_int("gen-runs", default=3, help="Number of generation runs.")
    parser.add_bol("validate-dst",
                   help="Whether to validate by checking uniqueness.")
    parser.add_bol("validate-unique",
                   help="Whether to validate by checking uniqueness.")
    parser.add_flt("multiplier",
                   default=1.0,
                   help="Ratio of dialog instances to generate. ")
    # DST options
    parser.add_int("dst-batch-size", default=32, help="Mini-batch size.")
    parser.add_int("dst-runs",
                   default=5,
                   help="Number of DST models to train and evaluate using "
                   "different seeds.")
    parser.add_int("dst-epochs",
                   default=200,
                   help="Number of epochs to train DST. "
                   "The actual number of epochs will be scaled by "
                   "the multiplier.")
    parser.add_flt("dst-l2norm", help="DST Weight of l2norm regularization.")
    parser.add_flt("dst-gradient-clip",
                   help="DST Clipping bounds for gradients.")
    parser.add_bol("dst-test-asr",
                   help="Whether to use asr information during testing.")
    # misc
    parser.add_int("seed", help="Random seed.")
    return parser
Exemplo n.º 14
0
Arquivo: gda.py Projeto: kaniblu/vhda
def create_parser():
    parser = yaap.Yaap("Conduct generative data augmentation experiments.")
    # data options
    parser.add_pth("data-dir",
                   is_dir=True,
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "tests/data/json")),
                   help="Path to a json-format dialogue dataset.")
    parser.add_pth("processor-path",
                   must_exist=True,
                   required=True,
                   help="Path to the processor pickle file.")
    # model options
    parser.add_pth("gen-model-path",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "configs/vhda-mini.yml")),
                   help="Path to the generative model configuration file.")
    parser.add_pth("dst-model-path",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "dst/internal/configs/gce.yml")),
                   help="Path to the dst model configuration file.")
    parser.add_pth("ckpt-path",
                   must_exist=True,
                   required=True,
                   help="Path to the model checkpoint.")
    # model-specific options (TDA)
    parser.add_flt("conv-scale",
                   default=1.0,
                   help="Scale to introduce into conv vector "
                   "for TDA generation.")
    parser.add_flt("spkr-scale",
                   default=1.0,
                   help="Scale to introduce into spkr vector "
                   "for TDA generation.")
    parser.add_flt("goal-scale",
                   default=1.0,
                   help="Scale to introduce into goal vector "
                   "for TDA generation.")
    parser.add_flt("state-scale",
                   default=1.0,
                   help="Scale to introduce into state vector "
                   "for TDA generation.")
    parser.add_flt("sent-scale",
                   default=1.0,
                   help="Scale to introduce into sent vector "
                   "for TDA generation.")
    # model-specific options (general)
    parser.add_int("beam-size", default=4, help="Beam search beam size.")
    parser.add_int("max-sent-len",
                   default=30,
                   help="Beam search maximum sentence length.")
    # generation options
    parser.add_int("gen-runs", default=3, help="Number of generations to run.")
    parser.add_int("gen-batch-size", default=32, help="Mini-batch size.")
    parser.add_flt("multiplier",
                   default=1.0,
                   help="Ratio of dialog instances to generate. ")
    parser.add_bol("validate-unique",
                   help="Whether to validate by checking uniqueness.")
    # DST options
    parser.add_int("dst-batch-size", default=32, help="Mini-batch size.")
    parser.add_int("dst-runs",
                   default=5,
                   help="Number of DST models to train and evaluate using "
                   "different seeds.")
    parser.add_int("epochs",
                   default=200,
                   help="Number of epochs to train DST. "
                   "The actual number of epochs will be scaled by "
                   "the multiplier.")
    parser.add_flt("l2norm", help="DST Weight of l2norm regularization.")
    parser.add_flt("gradient-clip", help="DST Clipping bounds for gradients.")
    parser.add_bol("test-asr",
                   help="Whether to use asr information during testing.")
    # misc options
    parser.add_pth("logging-config",
                   must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent.joinpath(
                       "configs/logging.yml")),
                   help="Path to a logging config file (yaml/json).")
    parser.add_pth("save-dir",
                   default=pathlib.Path("out"),
                   help="Directory to save output generation files.")
    parser.add_int("gpu",
                   min_bound=0,
                   help="GPU device to use. (e.g. 0, 1, etc.)")
    parser.add_bol("overwrite", help="Whether to overwrite save dir.")
    parser.add_int("seed", help="Random seed.")
    return parser
Exemplo n.º 15
0
def create_parser():
    parser = yaap.Yaap()
    # data options
    parser.add_pth("data-dir", is_dir=True, must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("tests/data/json")),
                   help="Path to the data dir. Must contain 'train.json' and "
                        "'dev.json'.")
    # model options
    parser.add_pth("model-path", must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("configs/vhda-mini.yml")),
                   help="Path to the model configuration file.")
    parser.add_int("gpu", min_bound=0,
                   help="GPU device to use. (e.g. 0, 1, etc.)")
    # display options
    parser.add_pth("logging-config", must_exist=True,
                   default=(pathlib.Path(__file__).absolute().parent
                            .joinpath("configs/logging.yml")),
                   help="Path to a logging config file (yaml/json).")
    parser.add_pth("save-dir", default="out", is_dir=True,
                   help="Directory to save output files.")
    parser.add_bol("overwrite", help="Whether to overwrite save dir.")
    parser.add_int("report-every",
                   help="Report training statistics every N steps.")
    # training options
    parser.add_int("batch-size", default=32,
                   help="Mini-batch size.")
    parser.add_int("valid-batch-size", default=32,
                   help="Mini-batch sizes for validation inference.")
    parser.add_str("optimizer", default="adam", choices=("adam",),
                   help="Optimizer to use.")
    parser.add_flt("gradient-clip",
                   help="Clip gradients by norm size.")
    parser.add_flt("l2norm-weight",
                   help="Weight of l2-norm regularization.")
    parser.add_flt("learning-rate", default=0.001, min_bound=0,
                   help="Optimizer learning rate.")
    parser.add_int("epochs", default=10, min_bound=1,
                   help="Number of epochs to train for.")
    parser.add_str("kld-schedule",
                   help="KLD w schedule given as a list of data points. Each "
                        "data point is a pair of training step and target "
                        "dropout scale. Steps in-between data points will be "
                        "interpolated. e.g. '[(0, 1.0), (10000, 0.1)]'")
    parser.add_str("dropout-schedule",
                   help="Dropout schedule given as a list of data points. Each "
                        "data point is a pair of training step and target "
                        "dropout scale. Steps in-between data points will be "
                        "interpolated. e.g. '[(0, 1.0), (10000, 0.1)]'")
    parser.add_flt("validate-every", default=1,
                   help="Number of epochs in-between validations.")
    parser.add_bol("early-stop",
                   help="Whether to enable early-stopping.")
    parser.add_str("early-stop-criterion", default="~loss",
                   help="The training statistics key to use as criterion "
                        "for early-stopping. Prefix with '~' to denote "
                        "negation during comparison.")
    parser.add_int("early-stop-patience",
                   help="Number of epochs to wait without breaking "
                        "records until executing early-stopping. "
                        "defaults to infinity.")
    parser.add_int("save-every",
                   help="Number of epochs to wait until saving a model "
                        "checkpoint.")
    # model specific settings
    parser.add_bol("disable-kl",
                   help="Whether to disable kl-divergence term.")
    parser.add_str("kl-mode", default="kl-mi",
                   help="KL mode: one of kl, kl-mi, kl-mi+.")
    parser.add_int("seed", help="Random seed.")
    return parser