示例#1
0
        "%(filename)s:%(lineno)s) %(message)s".format(__version__),
        level=logging.INFO)
    usage = """Use Principal component analysis for dimension reduction.
  For the details, Please refer to website:
  https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html

  Usage: pca-vector.py [options] <vector-rspecifier> <vector-wspecifier

  e.g.
      pca-vector.py scp:data/train/ivector.scp ark:data/train/low_dim_vector.ark

  see also: two-dim-vector-visual.py
  """
    po = ParseOptions(usage)
    po.register_int(
        "output-dim", 2, "dimension of the output vectors."
        " For visualization, only 2 is allowed in this program. (2 by default)"
    )
    opts = po.parse_args()
    if (po.num_args() != 2):
        po.print_usage()
        sys.exit()

    vector_rspecifier = po.get_arg(1)
    vector_wspecifier = po.get_arg(2)
    isSuccess = pca_vector(vector_rspecifier,
                           vector_wspecifier,
                           output_dim=opts.output_dim)
    if not isSuccess:
        sys.exit()
示例#2
0
        "ltsv-ctx-window",
        50,
        "Context window for LTSV computation (default: 50)",
    )
    po.register_float(
        "threshold",
        0.01,
        "Parameter for sigmoid scaling in LTSV (default: 0.01)",
    )
    po.register_float(
        "slope", 0.001,
        "Parameter for sigmoid scaling in LTSV (default: 0.001)")
    po.register_bool("sigmoid-scale", True,
                     "Apply sigmoid scaling in LTSV (default: True)")
    po.register_int("dct-num-cep", 5,
                    "DCT number of coefficitents (default: 5)")
    po.register_int("dct-ctx-window", 30, "DCT context window (default: 30)")
    po.register_bool("test-plot", False,
                     "Produces a plot for testing (default: False)")

    opts = po.parse_args()

    if po.num_args() != 2:
        po.print_usage()
        sys.exit()

    wav_rspecifier = po.get_arg(1)
    feats_wspecifier = po.get_arg(2)

    compute_vad(wav_rspecifier, feats_wspecifier, opts)
示例#3
0
    from kaldi import __version__
    logging.addLevelName(20, 'LOG')
    logging.basicConfig(
        format='%(levelname)s (%(module)s[{}]:%(funcName)s():'
        '%(filename)s:%(lineno)s) %(message)s'.format(__version__),
        level=logging.INFO)

    usage = """Extract segments from a large audio file in WAV format.
    Usage:
        extract-segments [options] <wav-rspecifier> <segments-file> <wav-wspecifier>
    """
    po = ParseOptions(usage)
    po.register_float(
        "min-segment-length", 0.1, "Minimum segment length "
        "in seconds (reject shorter segments)")
    po.register_float(
        "max_overshoot", 0.5, "End segments overshooting audio "
        "by less than this (in seconds) are truncated, "
        "else rejected.")

    opts = po.parse_args()
    if po.num_args() != 3:
        po.print_usage()
        sys.exit()

    wav_rspecifier = po.get_arg(1)
    segments_rxfilename = po.get_arg(2)
    wav_wspecifier = po.get_arg(3)

    extract_segments(wav_rspecifier, segments_rxfilename, wav_wspecifier, opts)
示例#4
0
    Note: lattices, if output, will just be linear sequences;
          use gmm-latgen-faster if you want "real" lattices.
    """
    po = ParseOptions(usage)
    decoder_opts = FasterDecoderOptions()
    decoder_opts.register(po, True)
    po.register_float("acoustic-scale", 0.1,
                      "Scaling factor for acoustic likelihoods")
    po.register_bool("allow-partial", True,
                     "Produce output even when final state was not reached")
    po.register_str("word-symbol-table", "",
                    "Symbol table for words [for debug output]");
    opts = po.parse_args()

    if po.num_args() < 4 or po.num_args() > 6:
        po.print_usage()
        sys.exit()

    model_rxfilename = po.get_arg(1)
    fst_rxfilename = po.get_arg(2)
    feature_rspecifier = po.get_arg(3)
    words_wspecifier = po.get_arg(4)
    alignment_wspecifier = po.get_opt_arg(5)
    lattice_wspecifier = po.get_opt_arg(6)

    gmm_decode_faster(model_rxfilename, fst_rxfilename,
                      feature_rspecifier, words_wspecifier,
                      alignment_wspecifier, lattice_wspecifier,
                      opts.word_symbol_table, opts.acoustic_scale,
                      opts.allow_partial, decoder_opts)
示例#5
0
if __name__ == '__main__':
  # Configure log messages to look like Kaldi messages
  from kaldi import __version__
  logging.addLevelName(20, "LOG")
  logging.basicConfig(format="%(levelname)s (%(module)s[{}]:%(funcName)s():"
                             "%(filename)s:%(lineno)s) %(message)s"
                             .format(__version__), level=logging.INFO)
  usage = """save the visualization plot of 2-dimensional vectors to hardisk.

  Usage: two-dim-vector-visual.py [options] <vector-rspecifier> <utt2spk-rxfilename> <figure-rxfilename>

  e.g.
      two-dim-vector-visual.py scp:data/train/2d_vectors.scp data/train/utt2spk data/train/2d_vectors.png
  """
  po = ParseOptions(usage)
  opts = po.parse_args()

  if (po.num_args() != 3):
    po.print_usage()
    sys.exit()

  vector_rspecifier = po.get_arg(1)
  utt2spk_rxfilename = po.get_arg(2)
  figure_rxfilename = po.get_arg(3)
  isSuccess = two_dim_vector_visual(vector_rspecifier,
                                    utt2spk_rxfilename,
                                    figure_rxfilename)
  if not isSuccess:
    sys.exit()