Exemplo n.º 1
0
def main(SylCls):
    import json
    import argparse

    import dictconv

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('phonesetfile',
                        metavar='PHONESETFILE',
                        type=str,
                        help="File containing the phoneme set (json utf-8).")
    parser.add_argument('--oformat',
                        metavar='OUTPUTFORMAT',
                        default=dictconv.DEF_OUTFORMAT,
                        help="output format (flat|nested)")
    parser.add_argument('--defstresstone',
                        metavar='DEFSTRESSTONE',
                        default=dictconv.DEFSTRESSTONE,
                        help="default stress/tone")
    args = parser.parse_args()

    #load phoneset
    with open(args.phonesetfile, encoding="utf-8") as infh:
        phoneset = json.load(infh)
    syllabifier = SylCls(phoneset)

    for line in sys.stdin:
        fields = line.strip().split()
        word = fields[0]
        pronun = fields[1:]

        syls = syllabifier.syllabify(pronun)
        sylspec = [str(len(syl)) for syl in syls]
        stresspat = args.defstresstone * len(sylspec)

        if args.oformat == "flat":
            print(
                dictconv.print_flat(word, "None", stresspat, sylspec, pronun,
                                    None))
        elif args.oformat == "nested":
            print(
                dictconv.print_nested(word, "None", stresspat, sylspec, pronun,
                                      phoneset, args.defstresstone, None))
        else:
            raise Exception("Invalid output format specified")
Exemplo n.º 2
0
        #print(lexstress)
    else:
        with open(args.decomp, encoding="utf-8") as infh:
            wordlist = infh.read().split()
        lexstress = LexStresserDecomp(phoneset, wordlist)
        #print(lexstress)

    for line in sys.stdin:
        #input format is "flat" separate fields (current stress pattern is ignored/replaced)
        fields = line.strip().split()
        word, pos, stresspat, sylspec = fields[:4]
        pronun = fields[4:]

        syls = []
        i = 0
        for syllen in map(int, sylspec):
            syls.append(pronun[i:i + syllen])
            i += syllen
        stresspat = "".join(map(str, lexstress.get_stress_word(word, syls)))

        if args.oformat == "flat":
            print(
                dictconv.print_flat(word, "None", stresspat, sylspec, pronun,
                                    None))
        elif args.oformat == "nested":
            print(
                dictconv.print_nested(word, "None", stresspat, sylspec, pronun,
                                      phoneset, args.defstresstone, None))
        else:
            raise Exception("Invalid output format specified")