def parser(): _parser = config_argparse.ArgumentParser("test") _parser.add_argument("--foo") _parser.add_argument("--bar") _parser.add_argument("--baz", action="store_true") _parser.add_argument("--count", action="count") return _parser
def get_parser(): parser = config_argparse.ArgumentParser( description="Frontend inference", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Note(kamo): Use '_' instead of '-' as separator. # '-' is confusing if written in yaml. parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument("--output_dir", type=str, required=True) parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) group = parser.add_argument_group("Input data related") group.add_argument( "--ref_scp", type=str, required=True, action="append", ) group.add_argument( "--inf_scp", type=str, required=True, action="append", ) group.add_argument("--key_file", type=str) group.add_argument("--metrics", type=str, action="append") group.add_argument("--ref_channel", type=int, default=0) group.add_argument( "--frame_size", type=int, default=512, help="STFT frame size in samples, for calculating framewise-* metrics", ) group.add_argument( "--frame_hop", type=int, default=256, help="STFT frame hop in samples, for calculating framewise-* metrics", ) return parser
def get_parser(): parser = config_argparse.ArgumentParser( description="Upload files to Zenodo", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--access_token", help="Get your access_token from " "https://zenodo.org/account/settings/applications/ or " "https://sandbox.zenodo.org/account/settings/applications/ . " "You can also give it from an environment variable 'ACCESS_TOKEN'", ) parser.add_argument( "--title", required=True, help="e.g. ESPnet pretrained model, MT, " "Fisher-CallHome Spanish (Es->En), Transformer", ) parser.add_argument("--creator_name", required=True, help="Your name") parser.add_argument("--file", nargs="+", required=True) group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--description", help="Give the description") group.add_argument("--description_file", help="Give the description from file") parser.add_argument( "--use_sandbox", type=str2bool, default=False, help="Use zenodo sandbox for testing", ) parser.add_argument("--publish", type=str2bool, default=False, help="Publish after uploading") parser.add_argument("--license", default="CC-BY-4.0") parser.add_argument("--affiliation") parser.add_argument("--orcid") parser.add_argument("--gnd") return parser
def get_parser(): parser = config_argparse.ArgumentParser( description="Frontend inference", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Note(kamo): Use '_' instead of '-' as separator. # '-' is confusing if written in yaml. parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument("--output_dir", type=str, required=True) parser.add_argument( "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode", ) parser.add_argument("--seed", type=int, default=0, help="Random seed") parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) parser.add_argument("--fs", type=humanfriendly_or_none, default=8000, help="Sampling rate") parser.add_argument( "--num_workers", type=int, default=1, help="The number of workers used for DataLoader", ) group = parser.add_argument_group("Input data related") group.add_argument( "--data_path_and_name_and_type", type=str2triple_str, required=True, action="append", ) group.add_argument("--key_file", type=str_or_none) group.add_argument("--allow_variable_data_keys", type=str2bool, default=False) group = parser.add_argument_group("Output data related") group.add_argument( "--normalize_output_wav", type=str2bool, default=False, help="Whether to normalize the predicted wav to [-1~1]", ) group = parser.add_argument_group("The model configuration related") group.add_argument( "--train_config", type=str, help="Training configuration file", ) group.add_argument( "--model_file", type=str, help="Model parameter file", ) group.add_argument( "--model_tag", type=str, help="Pretrained model tag. If specify this option, train_config and " "model_file will be overwritten", ) group.add_argument( "--inference_config", type=str_or_none, default=None, help="Optional configuration file for overwriting enh model attributes " "during inference", ) group.add_argument( "--enh_s2t_task", type=str2bool, default=False, help="enhancement and asr joint model", ) group = parser.add_argument_group("Data loading related") group.add_argument( "--batch_size", type=int, default=1, help="The batch size for inference", ) group = parser.add_argument_group("SeparateSpeech related") group.add_argument( "--segment_size", type=float, default=None, help= "Segment length in seconds for segment-wise speech enhancement/separation", ) group.add_argument( "--hop_size", type=float, default=None, help= "Hop length in seconds for segment-wise speech enhancement/separation", ) group.add_argument( "--normalize_segment_scale", type=str2bool, default=False, help= "Whether to normalize the energy of the separated streams in each segment", ) group.add_argument( "--show_progressbar", type=str2bool, default=False, help= "Whether to show a progress bar when performing segment-wise speech " "enhancement/separation", ) group.add_argument( "--ref_channel", type=int, default=None, help="If not None, this will overwrite the ref_channel defined in the " "separator module (for multi-channel speech processing)", ) return parser
def get_parser(): parser = config_argparse.ArgumentParser( description="ASR Decoding", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Note(kamo): Use '_' instead of '-' as separator. # '-' is confusing if written in yaml. parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("INFO", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument("--output_dir", type=str, required=True) parser.add_argument( "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode", ) parser.add_argument("--seed", type=int, default=0, help="Random seed") parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) parser.add_argument( "--num_workers", type=int, default=1, help="The number of workers used for DataLoader", ) group = parser.add_argument_group("Input data related") group.add_argument( "--data_path_and_name_and_type", type=str2triple_str, required=True, action="append", ) group.add_argument("--key_file", type=str_or_none) group.add_argument("--allow_variable_data_keys", type=str2bool, default=False) group = parser.add_argument_group("The model configuration related") group.add_argument("--asr_train_config", type=str, required=True) group.add_argument("--asr_model_file", type=str, required=True) group.add_argument("--lm_train_config", type=str) group.add_argument("--lm_file", type=str) group.add_argument("--word_lm_train_config", type=str) group.add_argument("--word_lm_file", type=str) group = parser.add_argument_group("Beam-search related") group.add_argument( "--batch_size", type=int, default=1, help="The batch size for inference", ) group.add_argument("--nbest", type=int, default=1, help="Output N-best hypotheses") group.add_argument("--beam_size", type=int, default=20, help="Beam size") group.add_argument("--penalty", type=float, default=0.0, help="Insertion penalty") group.add_argument( "--maxlenratio", type=float, default=0.0, help="Input length ratio to obtain max output length. " "If maxlenratio=0.0 (default), it uses a end-detect " "function " "to automatically find maximum hypothesis lengths", ) group.add_argument( "--minlenratio", type=float, default=0.0, help="Input length ratio to obtain min output length", ) group.add_argument( "--ctc_weight", type=float, default=0.5, help="CTC weight in joint decoding", ) group.add_argument("--lm_weight", type=float, default=1.0, help="RNNLM weight") group.add_argument( "--blank_symbol", type=str, default="<blank>", help="The token symbol represents CTC-blank", ) group = parser.add_argument_group("Text converter related") group.add_argument( "--token_type", type=str_or_none, default=None, choices=["char", "bpe", None], help="The token type for ASR model. " "If not given, refers from the training args", ) group.add_argument( "--bpemodel", type=str_or_none, default=None, help="The model path of sentencepiece. " "If not given, refers from the training args", ) return parser
def get_parser(): """Obtain an argument-parser for the script interface.""" parser = config_argparse.ArgumentParser( description="ASR Decoding", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Note(kamo): Use '_' instead of '-' as separator. # '-' is confusing if written in yaml. parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument( "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode", ) parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) group = parser.add_argument_group("Model configuration related") group.add_argument("--asr_train_config", type=str, required=True) group.add_argument("--asr_model_file", type=str, required=True) group = parser.add_argument_group("Text converter related") group.add_argument( "--token_type", type=str_or_none, default=None, choices=["char", "bpe", None], help="The token type for ASR model. " "If not given, refers from the training args", ) group.add_argument( "--bpemodel", type=str_or_none, default=None, help="The model path of sentencepiece. " "If not given, refers from the training args", ) group = parser.add_argument_group("CTC segmentation related") group.add_argument( "--fs", type=int, default=16000, help="Sampling Frequency." " The sampling frequency (in Hz) is needed to correctly determine the" " starting and ending time of aligned segments.", ) group.add_argument( "--min_window_size", type=int, default=None, help="Minimum window size considered for utterance.", ) group.add_argument( "--max_window_size", type=int, default=None, help="Maximum window size considered for utterance.", ) group.add_argument( "--set_blank", type=int, default=None, help="Index of model dictionary for blank token.", ) group.add_argument( "--gratis_blank", type=str2bool, default=False, help="Set the transition cost of the blank token to zero. Audio sections" " labeled with blank tokens can then be skipped without penalty. Useful" " if there are unrelated audio segments between utterances.", ) group.add_argument( "--replace_spaces_with_blanks", type=str2bool, default=False, help="Fill blanks in between words to better model pauses between words." " This option is only active for `--text_converter classic`." " Segments can be misaligned if this option is combined with" " --gratis-blank.", ) group.add_argument( "--scoring_length", type=int, default=None, help= "Changes partitioning length L for calculation of the confidence score.", ) group.add_argument( "--time_stamps", type=str, default=CTCSegmentation.time_stamps, choices=CTCSegmentation.choices_time_stamps, help="Select method how CTC index duration is estimated, and" " thus how the time stamps are calculated.", ) group.add_argument( "--text_converter", type=str, default=CTCSegmentation.text_converter, choices=CTCSegmentation.choices_text_converter, help="How CTC segmentation handles text.", ) group = parser.add_argument_group("Input/output arguments") group.add_argument( "--kaldi_style_text", type=str2bool, default=True, help= "Assume that the input text file is kaldi-style formatted, i.e., the" " utterance name is at the beginning of each line.", ) group.add_argument( "--print_utt_text", type=str2bool, default=True, help="Include the utterance text in the segments output.", ) group.add_argument( "--print_utt_score", type=str2bool, default=True, help="Include the confidence score in the segments output.", ) group.add_argument( "-a", "--audio", type=Path, required=True, help="Input audio file.", ) group.add_argument( "-t", "--text", type=argparse.FileType("r"), required=True, help="Input text file." " Each line contains the ground truth of a single utterance." " Kaldi-style text files include the name of the utterance as" " the first word in the line.", ) group.add_argument( "-o", "--output", type=argparse.FileType("w"), default="-", help="Output in the form of a `segments` file." " If not given, output is written to stdout.", ) return parser
def get_parser(): parser = config_argparse.ArgumentParser( description="Speaker Diarization inference", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Note(kamo): Use '_' instead of '-' as separator. # '-' is confusing if written in yaml. parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument("--output_dir", type=str, required=True) parser.add_argument( "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode", ) parser.add_argument("--seed", type=int, default=0, help="Random seed") parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) parser.add_argument( "--fs", type=humanfriendly_parse_size_or_none, default=8000, help="Sampling rate", ) parser.add_argument( "--num_workers", type=int, default=1, help="The number of workers used for DataLoader", ) group = parser.add_argument_group("Input data related") group.add_argument( "--data_path_and_name_and_type", type=str2triple_str, required=True, action="append", ) group.add_argument("--key_file", type=str_or_none) group.add_argument("--allow_variable_data_keys", type=str2bool, default=False) group = parser.add_argument_group("The model configuration related") group.add_argument( "--train_config", type=str, help="Diarization training configuration", ) group.add_argument( "--model_file", type=str, help="Diarization model parameter file", ) group.add_argument( "--model_tag", type=str, help="Pretrained model tag. If specify this option, train_config and " "model_file will be overwritten", ) group = parser.add_argument_group("Data loading related") group.add_argument( "--batch_size", type=int, default=1, help="The batch size for inference", ) group = parser.add_argument_group("Diarize speech related") group.add_argument( "--segment_size", type=float, default=None, help="Segment length in seconds for segment-wise speaker diarization", ) group.add_argument( "--show_progressbar", type=str2bool, default=False, help= "Whether to show a progress bar when performing segment-wise speaker " "diarization", ) return parser
def get_parser(): """Get argument parser.""" parser = config_argparse.ArgumentParser( description="TTS Decode", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Note(kamo): Use "_" instead of "-" as separator. # "-" is confusing if written in yaml. parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument( "--output_dir", type=str, required=True, help="The path of output directory", ) parser.add_argument( "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode", ) parser.add_argument( "--seed", type=int, default=0, help="Random seed", ) parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) parser.add_argument( "--num_workers", type=int, default=1, help="The number of workers used for DataLoader", ) parser.add_argument( "--batch_size", type=int, default=1, help="The batch size for inference", ) group = parser.add_argument_group("Input data related") group.add_argument( "--data_path_and_name_and_type", type=str2triple_str, required=True, action="append", ) group.add_argument( "--key_file", type=str_or_none, ) group.add_argument( "--allow_variable_data_keys", type=str2bool, default=False, ) group = parser.add_argument_group("The model configuration related") group.add_argument( "--train_config", type=str, help="Training configuration file.", ) group.add_argument( "--model_file", type=str, help="Model parameter file.", ) group = parser.add_argument_group("Decoding related") group.add_argument( "--maxlenratio", type=float, default=10.0, help="Maximum length ratio in decoding", ) group.add_argument( "--minlenratio", type=float, default=0.0, help="Minimum length ratio in decoding", ) group.add_argument( "--threshold", type=float, default=0.5, help="Threshold value in decoding", ) group.add_argument( "--use_att_constraint", type=str2bool, default=False, help="Whether to use attention constraint", ) group.add_argument( "--backward_window", type=int, default=1, help="Backward window value in attention constraint", ) group.add_argument( "--forward_window", type=int, default=3, help="Forward window value in attention constraint", ) group.add_argument( "--use_teacher_forcing", type=str2bool, default=False, help="Whether to use teacher forcing", ) parser.add_argument( "--speed_control_alpha", type=float, default=1.0, help="Alpha in FastSpeech to change the speed of generated speech", ) group = parser.add_argument_group("Grriffin-Lim related") group.add_argument( "--vocoder_conf", action=NestedDictAction, default=get_default_kwargs(Spectrogram2Waveform), help="The configuration for Grriffin-Lim", ) return parser
def get_parser(): parser = config_argparse.ArgumentParser( description="ASR Decoding", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Note(kamo): Use '_' instead of '-' as separator. # '-' is confusing if written in yaml. parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument("--output_dir", type=str, required=True) parser.add_argument( "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode", ) parser.add_argument("--seed", type=int, default=0, help="Random seed") parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) parser.add_argument( "--num_workers", type=int, default=1, help="The number of workers used for DataLoader", ) group = parser.add_argument_group("Input data related") group.add_argument( "--data_path_and_name_and_type", type=str2triple_str, required=True, action="append", ) group.add_argument("--key_file", type=str_or_none) group.add_argument("--allow_variable_data_keys", type=str2bool, default=False) group = parser.add_argument_group("The model configuration related") group.add_argument("--asr_train_config", type=str, required=True) group.add_argument("--asr_model_file", type=str, required=True) group.add_argument( "--model_tag", type=str, help="Pretrained model tag. If specify this option, *_train_config and " "*_file will be overwritten", ) group = parser.add_argument_group("Decoding related") group.add_argument( "--batch_size", type=int, default=1, help="The batch size for inference", ) group.add_argument("--maskctc_n_iterations", type=int, default=10) group.add_argument("--maskctc_threshold_probability", type=float, default=0.99) group = parser.add_argument_group("Text converter related") group.add_argument( "--token_type", type=str_or_none, default=None, choices=["char", "bpe", None], help="The token type for ASR model. " "If not given, refers from the training args", ) group.add_argument( "--bpemodel", type=str_or_none, default=None, help="The model path of sentencepiece. " "If not given, refers from the training args", ) return parser
def get_parser(): parser = config_argparse.ArgumentParser( description="Calc perplexity", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Note(kamo): Use '_' instead of '-' as separator. # '-' is confusing if written in yaml. parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("INFO", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument("--output_dir", type=str, required=True) parser.add_argument( "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode", ) parser.add_argument("--seed", type=int, default=0, help="Random seed") parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) parser.add_argument( "--num_workers", type=int, default=1, help="The number of workers used for DataLoader", ) parser.add_argument( "--batch_size", type=int, default=1, help="The batch size for inference", ) parser.add_argument( "--log_base", type=float_or_none, default=None, help="The base of logarithm for Perplexity. " "If None, napier's constant is used.", ) group = parser.add_argument_group("Input data related") group.add_argument( "--data_path_and_name_and_type", type=str2triple_str, required=True, action="append", ) group.add_argument("--key_file", type=str_or_none) group.add_argument("--allow_variable_data_keys", type=str2bool, default=False) group = parser.add_argument_group("The model configuration related") group.add_argument("--train_config", type=str) group.add_argument("--model_file", type=str) return parser
def get_parser(): parser = config_argparse.ArgumentParser( description="ASR Decoding", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Note(kamo): Use '_' instead of '-' as separator. # '-' is confusing if written in yaml. parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument("--output_dir", type=str, required=True) parser.add_argument( "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode", ) parser.add_argument("--seed", type=int, default=0, help="Random seed") parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) parser.add_argument( "--num_workers", type=int, default=1, help="The number of workers used for DataLoader", ) group = parser.add_argument_group("Input data related") group.add_argument( "--data_path_and_name_and_type", type=str2triple_str, required=True, action="append", ) group.add_argument("--key_file", type=str_or_none) group.add_argument("--allow_variable_data_keys", type=str2bool, default=False) group = parser.add_argument_group("The model configuration related") group.add_argument( "--asr_train_config", type=str, help="ASR training configuration", ) group.add_argument( "--asr_model_file", type=str, help="ASR model parameter file", ) group.add_argument( "--lm_train_config", type=str, help="LM training configuration", ) group.add_argument( "--lm_file", type=str, help="LM parameter file", ) group.add_argument( "--word_lm_train_config", type=str, help="Word LM training configuration", ) group.add_argument( "--word_lm_file", type=str, help="Word LM parameter file", ) group.add_argument( "--ngram_file", type=str, help="N-gram parameter file", ) group.add_argument( "--model_tag", type=str, help="Pretrained model tag. If specify this option, *_train_config and " "*_file will be overwritten", ) group.add_argument( "--enh_s2t_task", type=str2bool, default=False, help="enhancement and asr joint model", ) group = parser.add_argument_group("Quantization related") group.add_argument( "--quantize_asr_model", type=str2bool, default=False, help="Apply dynamic quantization to ASR model.", ) group.add_argument( "--quantize_lm", type=str2bool, default=False, help="Apply dynamic quantization to LM.", ) group.add_argument( "--quantize_modules", type=str, nargs="*", default=["Linear"], help="""List of modules to be dynamically quantized. E.g.: --quantize_modules=[Linear,LSTM,GRU]. Each specified module should be an attribute of 'torch.nn', e.g.: torch.nn.Linear, torch.nn.LSTM, torch.nn.GRU, ...""", ) group.add_argument( "--quantize_dtype", type=str, default="qint8", choices=["float16", "qint8"], help="Dtype for dynamic quantization.", ) group = parser.add_argument_group("Beam-search related") group.add_argument( "--batch_size", type=int, default=1, help="The batch size for inference", ) group.add_argument("--nbest", type=int, default=1, help="Output N-best hypotheses") group.add_argument("--beam_size", type=int, default=20, help="Beam size") group.add_argument("--penalty", type=float, default=0.0, help="Insertion penalty") group.add_argument( "--maxlenratio", type=float, default=0.0, help="Input length ratio to obtain max output length. " "If maxlenratio=0.0 (default), it uses a end-detect " "function " "to automatically find maximum hypothesis lengths." "If maxlenratio<0.0, its absolute value is interpreted" "as a constant max output length", ) group.add_argument( "--minlenratio", type=float, default=0.0, help="Input length ratio to obtain min output length", ) group.add_argument( "--ctc_weight", type=float, default=0.5, help="CTC weight in joint decoding", ) group.add_argument("--lm_weight", type=float, default=1.0, help="RNNLM weight") group.add_argument("--ngram_weight", type=float, default=0.9, help="ngram weight") group.add_argument("--streaming", type=str2bool, default=False) group.add_argument( "--transducer_conf", default=None, help="The keyword arguments for transducer beam search.", ) group = parser.add_argument_group("Text converter related") group.add_argument( "--token_type", type=str_or_none, default=None, choices=["char", "bpe", None], help="The token type for ASR model. " "If not given, refers from the training args", ) group.add_argument( "--bpemodel", type=str_or_none, default=None, help="The model path of sentencepiece. " "If not given, refers from the training args", ) return parser
def get_parser(): parser = config_argparse.ArgumentParser( description="Frontend inference", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Note(kamo): Use '_' instead of '-' as separator. # '-' is confusing if written in yaml. parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("INFO", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument("--output_dir", type=str, required=True) parser.add_argument( "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode", ) parser.add_argument("--seed", type=int, default=0, help="Random seed") parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) parser.add_argument("--fs", type=int, default=8000, help="Sampling rate") parser.add_argument( "--num_workers", type=int, default=1, help="The number of workers used for DataLoader", ) group = parser.add_argument_group("Input data related") group.add_argument( "--data_path_and_name_and_type", type=str2triple_str, required=True, action="append", ) group.add_argument("--key_file", type=str_or_none) group.add_argument("--allow_variable_data_keys", type=str2bool, default=False) group = parser.add_argument_group("Output data related") group.add_argument( "--normalize_output_wav", type=str2bool, default=False, help="Weather to normalize the predicted wav to [-1~1]", ) group = parser.add_argument_group("The model configuration related") group.add_argument("--enh_train_config", type=str, required=True) group.add_argument("--enh_model_file", type=str, required=True) group = parser.add_argument_group("Beam-search related") group.add_argument( "--batch_size", type=int, default=1, help="The batch size for inference", ) return parser
def get_parser(): parser = config_argparse.ArgumentParser( description="ASR Decoding", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # Note(kamo): Use '_' instead of '-' as separator. # '-' is confusing if written in yaml. parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument("--output_dir", type=str, required=True) parser.add_argument( "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode", ) parser.add_argument("--seed", type=int, default=0, help="Random seed") parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) parser.add_argument( "--num_workers", type=int, default=1, help="The number of workers used for DataLoader", ) group = parser.add_argument_group("Input data related") group.add_argument( "--data_path_and_name_and_type", type=str2triple_str, required=True, action="append", ) group.add_argument("--key_file", type=str_or_none) group.add_argument("--allow_variable_data_keys", type=str2bool, default=False) group = parser.add_argument_group("The model configuration related") group.add_argument( "--asr_train_config", type=str, help="ASR training configuration", ) group.add_argument( "--asr_model_file", type=str, help="ASR model parameter file", ) group.add_argument( "--lm_train_config", type=str, help="LM training configuration", ) group.add_argument( "--lm_file", type=str, help="LM parameter file", ) group.add_argument( "--word_lm_train_config", type=str, help="Word LM training configuration", ) group.add_argument( "--word_lm_file", type=str, help="Word LM parameter file", ) group.add_argument( "--model_tag", type=str, help="Pretrained model tag. If specify this option, *_train_config and " "*_file will be overwritten", ) group = parser.add_argument_group("Beam-search related") group.add_argument( "--batch_size", type=int, default=1, help="The batch size for inference", ) group.add_argument("--nbest", type=int, default=1, help="Output N-best hypotheses") group.add_argument("--beam_size", type=int, default=20, help="Beam size") group.add_argument("--penalty", type=float, default=0.0, help="Insertion penalty") group.add_argument( "--maxlenratio", type=float, default=0.0, help="Input length ratio to obtain max output length. " "If maxlenratio=0.0 (default), it uses a end-detect " "function " "to automatically find maximum hypothesis lengths", ) group.add_argument( "--minlenratio", type=float, default=0.0, help="Input length ratio to obtain min output length", ) group.add_argument( "--ctc_weight", type=float, default=0.5, help="CTC weight in joint decoding", ) group.add_argument("--lm_weight", type=float, default=1.0, help="RNNLM weight") group.add_argument("--streaming", type=str2bool, default=False) group = parser.add_argument_group("Text converter related") group.add_argument( "--token_type", type=str_or_none, default=None, choices=["char", "bpe", None], help="The token type for ASR model. " "If not given, refers from the training args", ) group.add_argument( "--bpemodel", type=str_or_none, default=None, help="The model path of sentencepiece. " "If not given, refers from the training args", ) group.add_argument( "--is_ctc_decoding", type=str2bool, default=True, help="Use ctc topology as decoding graph", ) group.add_argument("--use_nbest_rescoring", type=str2bool, default=False) group.add_argument( "--num_paths", type=int, default=1000, help="The third argument for k2.random_paths", ) group.add_argument( "--nbest_batch_size", type=int, default=500, help="batchify nbest list when computing am/lm scores to avoid OOM", ) group.add_argument( "--nll_batch_size", type=int, default=100, help="batch_size when computing nll during nbest rescoring", ) group.add_argument("--k2_config", type=str, help="Config file for decoding with k2") return parser
def get_parser(): """Get Transducer model inference parser.""" parser = config_argparse.ArgumentParser( description="ASR Transducer Decoding", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--log_level", type=lambda x: x.upper(), default="INFO", choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"), help="The verbose level of logging", ) parser.add_argument("--output_dir", type=str, required=True) parser.add_argument( "--ngpu", type=int, default=0, help="The number of gpus. 0 indicates CPU mode", ) parser.add_argument("--seed", type=int, default=0, help="Random seed") parser.add_argument( "--dtype", default="float32", choices=["float16", "float32", "float64"], help="Data type", ) parser.add_argument( "--num_workers", type=int, default=1, help="The number of workers used for DataLoader", ) group = parser.add_argument_group("Input data related") group.add_argument( "--data_path_and_name_and_type", type=str2triple_str, required=True, action="append", ) group.add_argument("--key_file", type=str_or_none) group.add_argument("--allow_variable_data_keys", type=str2bool, default=False) group = parser.add_argument_group("The model configuration related") group.add_argument( "--asr_train_config", type=str, help="ASR training configuration", ) group.add_argument( "--asr_model_file", type=str, help="ASR model parameter file", ) group.add_argument( "--lm_train_config", type=str, help="LM training configuration", ) group.add_argument( "--lm_file", type=str, help="LM parameter file", ) group.add_argument( "--model_tag", type=str, help="Pretrained model tag. If specify this option, *_train_config and " "*_file will be overwritten", ) group = parser.add_argument_group("Beam-search related") group.add_argument( "--batch_size", type=int, default=1, help="The batch size for inference", ) group.add_argument("--nbest", type=int, default=1, help="Output N-best hypotheses") group.add_argument("--beam_size", type=int, default=5, help="Beam size") group.add_argument("--lm_weight", type=float, default=1.0, help="RNNLM weight") group.add_argument( "--beam_search_config", default={}, help="The keyword arguments for transducer beam search.", ) group = parser.add_argument_group("Text converter related") group.add_argument( "--token_type", type=str_or_none, default=None, choices=["char", "bpe", None], help="The token type for ASR model. " "If not given, refers from the training args", ) group.add_argument( "--bpemodel", type=str_or_none, default=None, help="The model path of sentencepiece. " "If not given, refers from the training args", ) group = parser.add_argument_group("Dynamic quantization related") parser.add_argument( "--quantize_asr_model", type=bool, default=False, help="Apply dynamic quantization to ASR model.", ) parser.add_argument( "--quantize_modules", nargs="*", default=None, help="""Module names to apply dynamic quantization on. The module names are provided as a list, where each name is separated by a comma (e.g.: --quantize-config=[Linear,LSTM,GRU]). Each specified name should be an attribute of 'torch.nn', e.g.: torch.nn.Linear, torch.nn.LSTM, torch.nn.GRU, ...""", ) parser.add_argument( "--quantize_dtype", type=str, default="qint8", choices=["float16", "qint8"], help="Dtype for dynamic quantization.", ) group = parser.add_argument_group("Streaming related") parser.add_argument( "--streaming", type=bool, default=False, help="Whether to perform chunk-by-chunk inference.", ) parser.add_argument( "--chunk_size", type=int, default=16, help="Number of frames in chunk AFTER subsampling.", ) parser.add_argument( "--left_context", type=int, default=32, help="Number of frames in left context of the chunk AFTER subsampling.", ) parser.add_argument( "--right_context", type=int, default=0, help= "Number of frames in right context of the chunk AFTER subsampling.", ) parser.add_argument( "--display_partial_hypotheses", type=bool, default=False, help= "Whether to display partial hypotheses during chunk-by-chunk inference.", ) return parser