"%(filename)s:%(lineno)s) %(message)s".format(__version__), level=logging.INFO) usage = """Use Principal component analysis for dimension reduction. For the details, Please refer to website: https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html Usage: pca-vector.py [options] <vector-rspecifier> <vector-wspecifier e.g. pca-vector.py scp:data/train/ivector.scp ark:data/train/low_dim_vector.ark see also: two-dim-vector-visual.py """ po = ParseOptions(usage) po.register_int( "output-dim", 2, "dimension of the output vectors." " For visualization, only 2 is allowed in this program. (2 by default)" ) opts = po.parse_args() if (po.num_args() != 2): po.print_usage() sys.exit() vector_rspecifier = po.get_arg(1) vector_wspecifier = po.get_arg(2) isSuccess = pca_vector(vector_rspecifier, vector_wspecifier, output_dim=opts.output_dim) if not isSuccess: sys.exit()
"ltsv-ctx-window", 50, "Context window for LTSV computation (default: 50)", ) po.register_float( "threshold", 0.01, "Parameter for sigmoid scaling in LTSV (default: 0.01)", ) po.register_float( "slope", 0.001, "Parameter for sigmoid scaling in LTSV (default: 0.001)") po.register_bool("sigmoid-scale", True, "Apply sigmoid scaling in LTSV (default: True)") po.register_int("dct-num-cep", 5, "DCT number of coefficitents (default: 5)") po.register_int("dct-ctx-window", 30, "DCT context window (default: 30)") po.register_bool("test-plot", False, "Produces a plot for testing (default: False)") opts = po.parse_args() if po.num_args() != 2: po.print_usage() sys.exit() wav_rspecifier = po.get_arg(1) feats_wspecifier = po.get_arg(2) compute_vad(wav_rspecifier, feats_wspecifier, opts)
from kaldi import __version__ logging.addLevelName(20, 'LOG') logging.basicConfig( format='%(levelname)s (%(module)s[{}]:%(funcName)s():' '%(filename)s:%(lineno)s) %(message)s'.format(__version__), level=logging.INFO) usage = """Extract segments from a large audio file in WAV format. Usage: extract-segments [options] <wav-rspecifier> <segments-file> <wav-wspecifier> """ po = ParseOptions(usage) po.register_float( "min-segment-length", 0.1, "Minimum segment length " "in seconds (reject shorter segments)") po.register_float( "max_overshoot", 0.5, "End segments overshooting audio " "by less than this (in seconds) are truncated, " "else rejected.") opts = po.parse_args() if po.num_args() != 3: po.print_usage() sys.exit() wav_rspecifier = po.get_arg(1) segments_rxfilename = po.get_arg(2) wav_wspecifier = po.get_arg(3) extract_segments(wav_rspecifier, segments_rxfilename, wav_wspecifier, opts)
Note: lattices, if output, will just be linear sequences; use gmm-latgen-faster if you want "real" lattices. """ po = ParseOptions(usage) decoder_opts = FasterDecoderOptions() decoder_opts.register(po, True) po.register_float("acoustic-scale", 0.1, "Scaling factor for acoustic likelihoods") po.register_bool("allow-partial", True, "Produce output even when final state was not reached") po.register_str("word-symbol-table", "", "Symbol table for words [for debug output]"); opts = po.parse_args() if po.num_args() < 4 or po.num_args() > 6: po.print_usage() sys.exit() model_rxfilename = po.get_arg(1) fst_rxfilename = po.get_arg(2) feature_rspecifier = po.get_arg(3) words_wspecifier = po.get_arg(4) alignment_wspecifier = po.get_opt_arg(5) lattice_wspecifier = po.get_opt_arg(6) gmm_decode_faster(model_rxfilename, fst_rxfilename, feature_rspecifier, words_wspecifier, alignment_wspecifier, lattice_wspecifier, opts.word_symbol_table, opts.acoustic_scale, opts.allow_partial, decoder_opts)
if __name__ == '__main__': # Configure log messages to look like Kaldi messages from kaldi import __version__ logging.addLevelName(20, "LOG") logging.basicConfig(format="%(levelname)s (%(module)s[{}]:%(funcName)s():" "%(filename)s:%(lineno)s) %(message)s" .format(__version__), level=logging.INFO) usage = """save the visualization plot of 2-dimensional vectors to hardisk. Usage: two-dim-vector-visual.py [options] <vector-rspecifier> <utt2spk-rxfilename> <figure-rxfilename> e.g. two-dim-vector-visual.py scp:data/train/2d_vectors.scp data/train/utt2spk data/train/2d_vectors.png """ po = ParseOptions(usage) opts = po.parse_args() if (po.num_args() != 3): po.print_usage() sys.exit() vector_rspecifier = po.get_arg(1) utt2spk_rxfilename = po.get_arg(2) figure_rxfilename = po.get_arg(3) isSuccess = two_dim_vector_visual(vector_rspecifier, utt2spk_rxfilename, figure_rxfilename) if not isSuccess: sys.exit()