Exemplo n.º 1
0
            print("\t can't write denominator lattice to %s" % denlatfile)

    print("ALL DONE\n")


# If this script is being run from the command-line, do this
if __name__ == '__main__':
    if len(sys.argv) != 9:
        sys.stderr.write(
            "Usage: %s LMFILE FILLERFILE FILELIST FILECOUNT FILEOFFSET DENLATDIR NUMLATDIR OUTLATDIR\n"
            % (sys.argv[0]))
        sys.exit(1)

    command = ''
    for argv in sys.argv:
        command += argv + ' '
    print("%s\n" % command)

    lmfile, fillerfile, filelist, filecount, fileoffset, \
        denlatdir, numlatdir, outdir = sys.argv[1:]

    # load the filler dictionary
    filler = load_lexicon(fillerfile)

    # load the language model to score the word hypothesis in a lattice
    lm = sphinxbase.NGramModel(lmfile)

    # convert lattice and output to a file
    write_lat(lm, filler, filelist, filecount, fileoffset, denlatdir,
              numlatdir, outdir)
Exemplo n.º 2
0
#!/usr/bin/env python
# -*- py-indent-offset: 2; indent-tabs-mode: nil; coding: utf-8 -*-

import sys
import sphinxbase as sb
import pocketsphinx as ps

RAW_FILE = "../test/data/goforward.raw"

LM1_PATH = "../model/lm/en_US/wsj0vp.5000.DMP"
LM1_NAME = "lm1"
LM1 = sb.NGramModel(LM1_PATH)

LM2_PATH = "../model/lm/en_US/hub4.5000.DMP"
LM2_NAME = "lm2"
LM2 = sb.NGramModel(LM2_PATH)

def addLM(decoder, lm, name):
  lmSet = decoder.get_lmset()
  lmSet.set_add(lm, name, 1.0, 0)
  decoder.update_lmset(lmSet)
  return

def decodeAudio(decoder, uttID, audioFile, lm):
  lmSet = decoder.get_lmset()
  lmSet.set_select(lm)
  decoder.update_lmset(lmSet)

  decoder.start_utt(uttID)

  fh = open(audioFile, "r")
Exemplo n.º 3
0
"""
Rescore a lattice using a language model directly
"""

__author__ = "David Huggins-Daines <*****@*****.**>"
__version__ = "$Revision $"


import sphinxbase
import lattice
import math
import sys
import os

def lat_rescore(latfile, lmfst):
    """
    Rescore a lattice using a language model.
    """
    dag = lattice.Dag(latfile)
    end = dag.bestpath(lm)
    words = []
    return [lattice.baseword(x.sym) for x in dag.backtrace(end)], end.score

if __name__ == '__main__':
    ctlfile, latdir, lmfile = sys.argv[1:]
    lm = sphinxbase.NGramModel(lmfile, wip=1.0, lw=9.5)
    for spam in file(ctlfile):
        latfile = os.path.join(latdir, spam.strip() + ".lat.gz")
        words, score = lat_rescore(latfile, lm)
        print " ".join(words), "(%s %f)" % (spam.strip(), score)
Exemplo n.º 4
0
#!/usr/bin/env python

import sphinxbase
import sys

lm = sphinxbase.NGramModel("../test/unit/test_ngram/100.arpa.DMP")
for ng in lm.mgrams(0):
    print ng.log_prob, ng.log_bowt

hc = sphinxbase.HuffCode(
    (("foo", 42), ("bar", 4), ("baz", 5), ("quux", 6), ("argh", 225),
     ("hurf", 15001), ("burf", 3), ("blatz", 2), ("unf", 87), ("woof", 1003)))
hc.dump(sys.stdout)
data, bits = hc.encode(("hurf", "burf", "blatz", "unf", "woof"))
dstr = "".join([("%02x" % ord(b)) for b in data])
print "encoding", ("hurf", "burf", "blatz", "unf", "woof"), "=>", (dstr, bits)
print "decoded to", hc.decode(data)

hc.write("foo.huff")
hc = sphinxbase.HuffCode(infile="foo.huff")
data, bits = hc.encode(("hurf", "burf", "blatz", "unf", "woof"))
dstr = "".join([("%02x" % ord(b)) for b in data])
print "encoding", ("hurf", "burf", "blatz", "unf", "woof"), "=>", (dstr, bits)
print "decoded to", hc.decode(data)

hc.attach("foo.bin", "wb")
hc.encode_to_file(("hurf", "burf", "blatz", "unf", "woof"))
hc.encode_to_file(("burf", "blatz", "woof", "unf", "woof"))
hc.detach()

hc.attach("foo.bin", "rb")