Exemplo n.º 1
0
def main():
    args = parse_args()
    eval_start = time.time()
    model = phonetisaurus.Phonetisaurus(args.model)
    d = read_dict(args.dict)
    wer = 0.0
    stressless_wer = 0.0
    for word, pron in d:
        # token, beam, threshold, write decoded fsts to disc,
        # accum probs across unique pronunciations, target prob mass
        results = model.Phoneticize(word, 1, 500, 10., False, False, 0.0)
        result = next(iter(results))
        pred_pron = [model.FindOsym(u) for u in result.Uniques]
        oo = ' '.join(pron)
        pp = ' '.join(pred_pron)
        if oo != pp:
            wer += 1
        oo = ' '.join(drop_stress(pron))
        pp = ' '.join(drop_stress(pred_pron))
        if oo != pp:
            stressless_wer += 1

    wer /= float(len(d))
    stressless_wer /= float(len(d))
    eval_took = time.time() - eval_start

    print(' Eval: wer %f; stressless wer %f; eval took %f' %
          (wer, stressless_wer, eval_took))
Exemplo n.º 2
0
def create_app():
    import phonetisaurus
    import pathlib
    path = pathlib.Path(__file__).absolute().parents[2] / 'model_store'
    logger.info("model directory path = '{}'".format(path))
    # start packing
    net_models = {}
    # pack arabic model
    model_ara = phonetisaurus.Phonetisaurus(
        str(path / 'arabic_ps_1' / 'model.fst'))
    net_models["ara"] = model_ara
    logger.info("packed arabic model")
    # pack chiense model
    model_chi = phonetisaurus.Phonetisaurus(
        str(path / 'chinese_ps_1' / 'model.fst'))
    net_models["chi"] = model_chi
    logger.info("packed chinese model")
    # pack hebrew model
    model_heb = phonetisaurus.Phonetisaurus(
        str(path / 'hebrew_ps_1' / 'model.fst'))
    net_models["heb"] = model_heb
    logger.info("packed hebrew model")
    # pack japanese model
    model_jpn = phonetisaurus.Phonetisaurus(
        str(path / 'katakana_ps_1' / 'model.fst'))
    net_models["jpn"] = model_jpn
    logger.info("packed japanese model")
    # pack korean model
    model_kor = phonetisaurus.Phonetisaurus(
        str(path / 'korean_ps_1' / 'model.fst'))
    net_models["kor"] = model_kor
    logger.info("packed korean model")
    # pack russian model
    model_rus = phonetisaurus.Phonetisaurus(
        str(path / 'russian_ps_1' / 'model.fst'))
    net_models["rus"] = model_rus
    logger.info("packed russian model")
    # init flask objects
    app = Flask(__name__)
    api = Api(app)
    api.app.config['RESTFUL_JSON'] = {'ensure_ascii': False}
    api.add_resource(PhonetisaurusNETransliterator,
                     '/predict',
                     resource_class_kwargs={"net_models": net_models})
    return app
Exemplo n.º 3
0
                        help="IP to host the service on.",
                        default="localhost")
    parser.add_argument("--port",
                        "-p",
                        help="Port to use for hosting.",
                        default=8080,
                        type=int)
    parser.add_argument("--model",
                        "-m",
                        help="Phonetisaurus G2P model.",
                        required=True)
    parser.add_argument("--lexicon",
                        "-l",
                        help="Reference lexicon.",
                        required=True)
    parser.add_argument("--verbose",
                        "-v",
                        help="Verbose mode.",
                        default=False,
                        action="store_true")
    args = parser.parse_args()

    if args.verbose:
        for key, val in args.__dict__.iteritems():
            print >> sys.stderr, "{0}:\t{1}".format(key, val)

    _g2pmodel = phonetisaurus.Phonetisaurus(args.model)
    _loadLexicon(args.lexicon)

    run(host=args.host, port=args.port, debug=False)