示例#1
0
    po.register_int("frame-shift", 10, "Length of frame shift in ms "
                    "default is 10ms")
    po.register_int("nfft", 256, "Number of DFT points " "default is 256")
    po.register_int(
        "arma-order", 5, "Length of ARMA window that will be applied "
        "to the spectrogram")
    po.register_int("ltsv-ctx-window", 50,
                    "Context window for LTSV computation "
                    "default is 50")
    po.register_float(
        "threshold", 0.01, "Parameter for sigmoid scaling in LTSV "
        "default is 0.01")
    po.register_float(
        "slope", 0.001, "Parameter for sigmoid scaling in LTSV "
        "default is 0.001")
    po.register_bool("sigmoid-scale", True, "Apply sigmoid scaling in LTSV "
                     "default is True")
    po.register_int("dct-num-cep", 5, "DCT number of coefficitents "
                    "default is 5")
    po.register_int("dct-ctx-window", 30, "DCT context window "
                    "default is 30")
    po.register_bool("test-plot", False, "Produces a plot for testing "
                     "default is False")

    opts = po.parse_args()

    if (po.num_args() != 2):
        po.print_usage()
        sys.exit()

    wav_rspecifier = po.get_arg(1)
    feats_wspecifier = po.get_arg(2)
示例#2
0
        "Length of ARMA window that will be applied to the spectrogram",
    )
    po.register_int(
        "ltsv-ctx-window",
        50,
        "Context window for LTSV computation (default: 50)",
    )
    po.register_float(
        "threshold",
        0.01,
        "Parameter for sigmoid scaling in LTSV (default: 0.01)",
    )
    po.register_float(
        "slope", 0.001,
        "Parameter for sigmoid scaling in LTSV (default: 0.001)")
    po.register_bool("sigmoid-scale", True,
                     "Apply sigmoid scaling in LTSV (default: True)")
    po.register_int("dct-num-cep", 5,
                    "DCT number of coefficitents (default: 5)")
    po.register_int("dct-ctx-window", 30, "DCT context window (default: 30)")
    po.register_bool("test-plot", False,
                     "Produces a plot for testing (default: False)")

    opts = po.parse_args()

    if po.num_args() != 2:
        po.print_usage()
        sys.exit()

    wav_rspecifier = po.get_arg(1)
    feats_wspecifier = po.get_arg(2)
示例#3
0
                               .format(__version__), level=logging.INFO)

    usage = """Decode features using GMM-based model.

    Usage:  gmm-decode-faster.py [options] model-in fst-in features-rspecifier
                words-wspecifier [alignments-wspecifier [lattice-wspecifier]]

    Note: lattices, if output, will just be linear sequences;
          use gmm-latgen-faster if you want "real" lattices.
    """
    po = ParseOptions(usage)
    decoder_opts = FasterDecoderOptions()
    decoder_opts.register(po, True)
    po.register_float("acoustic-scale", 0.1,
                      "Scaling factor for acoustic likelihoods")
    po.register_bool("allow-partial", True,
                     "Produce output even when final state was not reached")
    po.register_str("word-symbol-table", "",
                    "Symbol table for words [for debug output]");
    opts = po.parse_args()

    if po.num_args() < 4 or po.num_args() > 6:
        po.print_usage()
        sys.exit()

    model_rxfilename = po.get_arg(1)
    fst_rxfilename = po.get_arg(2)
    feature_rspecifier = po.get_arg(3)
    words_wspecifier = po.get_arg(4)
    alignment_wspecifier = po.get_opt_arg(5)
    lattice_wspecifier = po.get_opt_arg(6)
示例#4
0
    return num_success != 0


if __name__ == '__main__':
    usage = """Create MFCC feature files.

    Usage:  compute-mfcc-feats [options...] <wav-rspecifier> <feats-wspecifier>
    """
    po = ParseOptions(usage)

    mfcc_opts = MfccOptions()
    mfcc_opts.register(po)

    po.register_bool(
        "subtract-mean", False, "Subtract mean of each feature"
        "file [CMS]; not recommended to do it this way.")
    po.register_float(
        "vtln-warp", 1.0, "Vtln warp factor (only applicable "
        "if vtln-map not specified)")
    po.register_str(
        "vtln-map", "", "Map from utterance or speaker-id to "
        "vtln warp factor (rspecifier)")
    po.register_str(
        "utt2spk", "", "Utterance to speaker-id map rspecifier"
        "(if doing VTLN and you have warps per speaker)")
    po.register_int(
        "channel", -1, "Channel to extract (-1 -> expect mono, "
        "0 -> left, 1 -> right)")
    po.register_float(
        "min-duration", 0.0, "Minimum duration of segments "
示例#5
0
   Posterior-formatted posterior:
     <uttid> [[(0,0.1), (1,0.89), (5,0.01)],
              [(1,0,9), (5,0.1)],
                ...
              [(0,0.8), (1,0.2)]]
       ... 

  Usage: feat-to-post.py [options] feature_rspecifier posteriors_wspecifier

  e.g.
      feat-to-post scp:feats.scp ark:post.ark

  """
  po = ParseOptions(usage)
  po.register_int("top-n", 10,
                  "only keep highest N posteriors per frame, 10 by default")
  po.register_bool("rescale", False,
                   "rescale top N posteriors to let summation equals to 1, false by default")
  opts = po.parse_args()

  if (po.num_args() != 2):
    po.print_usage()
    sys.exit()

  feature_rspecifier = po.get_arg(1)
  posterior_wspecifier = po.get_arg(2)
  isSuccess = feat_to_post(feature_rspecifier, posterior_wspecifier,
                           opts.top_n, opts.rescale)
  if not isSuccess:
    sys.exit()
示例#6
0
    usage = """Copy matrices, or archives of matrices (e.g. features or transforms)
    Also see copy-feats which has other format options


    Usage: copy-matrix [options] <matrix-in-rspecifier> <matrix-out-wspecifier>
    or     copy-matrix [options] <matrix-in-rxfilename> <matrix-out-wxfilename>

    e.g.
        copy-matrix --binary=false 1.mat -
        copy-matrix ark:2.trans ark,t:-
    """

    po = ParseOptions(usage)

    po.register_bool(
        "binary", True,
        "Write in binary mode (only relevant if output is a wxfilename)")
    po.register_float(
        "scale", 1.0,
        "This option can be used to scale the matrices being copied.")
    po.register_bool(
        "apply-log", False,
        "This option can be used to apply log on the matrices. Must be avoided if matrix has negative quantities."
    )
    po.register_bool("apply-exp", False,
                     "This option can be used to apply exp on the matrices")
    po.register_float(
        "apply-power", 1.0,
        "This option can be used to apply a power on the matrices")
    po.register_bool(
        "apply-softmax-per-row", False,
示例#7
0
if __name__ == '__main__':
  # Configure log messages to look like Kaldi messages
  from kaldi import __version__
  logging.addLevelName(20, "LOG")
  logging.basicConfig(format="%(levelname)s (%(module)s[{}]:%(funcName)s():"
                             "%(filename)s:%(lineno)s) %(message)s"
                             .format(__version__), level=logging.INFO)
  usage = """Compute the counts of *feature-formatted* posterior for each mixture. 
  If --normalize=True and --per-utt=False, the counts will be averaged by the
    number of utterances.
  Usage: post-count.py [options] feature_rspecifier posteriors_wspecifier

  e.g.
      post-count scp:feats.scp ark,t:count.txt

  """
  po = ParseOptions(usage)
  po.register_bool("normalize", False, "normalize the counts, False by default")
  po.register_bool("per-utt", False, "Count per utterance, False by default")
  opts = po.parse_args()

  if (po.num_args() != 2):
    po.print_usage()
    sys.exit()

  feature_rspecifier = po.get_arg(1)
  posterior_wspecifier = po.get_arg(2)
  isSuccess = post_to_count(feature_rspecifier, posterior_wspecifier, normalize=opts.normalize, per_utt=opts.per_utt)
  if not isSuccess:
    sys.exit()