Exemplo n.º 1
0
 def __init__(self,
              processor,
              linear_x="",
              spatial="",
              linear_y=None,
              truncated=False):
     self.processor = processor
     self.truncated = truncated
     self.linear_x = ScriptReader(linear_x)
     self.linear_y = [ScriptReader(ly) for ly in linear_y]
     self.spatial = ScriptReader(spatial) if spatial else None
Exemplo n.º 2
0
 def __init__(self, data_dir):
     depends = [op.join(data_dir, x) for x in ["feats.scp", "spk2utt"]]
     for depend in depends:
         if not op.exists(depend):
             raise RuntimeError("Missing {}!".format(depend))
     self.reader = ScriptReader(depends[0])
     self.spk2utt = Reader(depends[1], num_tokens=-1)
def re_decide(enroll_xvector_scp, test_xvector_scp, threshold_value):
    enroll_xvector_scp_reader = ScriptReader(enroll_xvector_scp)
    for utt, value in enroll_xvector_scp_reader:
        enroll_xvector = value
    
    re_decide_dict = {}
    test_xvector_scp_reader = ScriptReader(test_xvector_scp)
    for utt, test_xvector in test_xvector_scp_reader:
        # cos distance
        dist = np.dot(enroll_xvector,test_xvector)/(np.linalg.norm(enroll_xvector)*np.linalg.norm(test_xvector))
        # print(dist)
        if dist >= threshold_value:
            re_decide_dict[utt] = 1
        else:
            re_decide_dict[utt] = 0
    return re_decide_dict
Exemplo n.º 4
0
def test_archive_writer(ark, scp):
    # for matrix
    with ArchiveWriter(ark, scp) as writer:
        for i in range(10):
            mat = np.random.rand(100, 20)
            writer.write("mat-{:d}".format(i), mat)
    scp_reader = ScriptReader(scp)
    for key, mat in scp_reader:
        print("{0}: {1}".format(key, mat.shape))
    # for vector
    with ArchiveWriter(ark, scp) as writer:
        for i in range(10):
            vec = np.random.rand(100)
            writer.write("vec-{:d}".format(i), vec)
    scp_reader = ScriptReader(scp)
    for key, vec in scp_reader:
        print("{0}: {1}".format(key, vec.size))
    print("TEST *test_archieve_writer* DONE!")
Exemplo n.º 5
0
def run(args):
    feats_reader = ScriptReader(args.feats)
    computer = NnetComputer(args.checkpoint, args.gpu)
    if not os.path.exists(args.dump_dir):
        os.makedirs(args.dump_dir)
    for key, feats in feats_reader:
        logger.info("Compute dvector on utterance {}...".format(key))
        dvector = computer.compute(feats)
        np.save(os.path.join(args.dump_dir, key), dvector)
    logger.info("Compute over {:d} utterances".format(len(feats_reader)))
Exemplo n.º 6
0
 def ark2hdf_caching(scp_file, hdf_file):
     ark_reader = ScriptReader(scp_file)
     writer = vio.HDFWriter(file_name=hdf_file)
     cnt = 0
     for fn in ark_reader.index_keys:
         feat = ark_reader[fn]
         # dump features
         writer.append(file_id=fn, feat=feat)
         cnt += 1
         print("%d. processed: %s" % (cnt, fn))
     writer.close()
Exemplo n.º 7
0
Arquivo: align.py Projeto: yt752/aps
def run(args):
    print(f"Arguments in args:\n{pprint.pformat(vars(args))}", flush=True)

    aligner = CtcAligner(args.am,
                         cpt_tag=args.am_tag,
                         device_id=args.device_id)
    if aligner.accept_raw:
        src_reader = AudioReader(args.feats_or_wav_scp,
                                 sr=args.sr,
                                 channel=args.channel)
    else:
        src_reader = ScriptReader(args.feats_or_wav_scp)
        if args.word_boundary:
            raise RuntimeError(
                "Now can't generate word boundary when using Kaldi's feature")

    txt_reader = Reader(args.text, num_tokens=-1, restrict=False)
    processor = TextPreProcessor(args.dict, space=args.space, spm=args.spm)

    ali_stdout, ali_fd = io_wrapper(args.alignment, "w")

    wdb_stdout, wdb_fd = False, None
    if args.word_boundary:
        wdb_stdout, wdb_fd = io_wrapper(args.word_boundary, "w")
    done = 0
    tot_utts = len(src_reader)
    timer = SimpleTimer()
    for key, str_seq in txt_reader:
        done += 1
        logger.info(
            f"Generate alignment for utterance {key} ({done}/{tot_utts}) ...")
        int_seq = processor.run(str_seq)
        wav_or_feats = src_reader[key]
        ali = aligner.run(wav_or_feats, int_seq)
        header = f"{ali['score']:.3f}, {len(ali['align_seq'])}"
        ali_fd.write(f"{key} {ali['align_str']}\n")
        logger.info(f"{key} ({header}) {ali['align_str']}")
        if wdb_fd:
            dur = wav_or_feats.shape[-1] * 1.0 / args.sr
            wdb = gen_word_boundary(key, dur, ali["align_str"])
            wdb_fd.write("\n".join(wdb) + "\n")
    if not ali_stdout:
        ali_fd.close()
    if wdb_fd and not wdb_stdout:
        wdb_fd.close()
    cost = timer.elapsed()
    logger.info(f"Generate alignments for {tot_utts} utterance done, " +
                f"time cost = {cost:.2f}m")
Exemplo n.º 8
0
 def __init__(self,
              shuffle=True,
              mix_scp="",
              ref_scp="",
              emb_scp="",
              embed_format="kaldi",
              sr=16000):
     if embed_format not in ["kaldi", "numpy"]:
         raise RuntimeError(
             "Unknown embedding format {}".format(embed_format))
     self.mix = WaveReader(mix_scp, sr=sr)
     self.ref = WaveReader(ref_scp, sr=sr)
     self.emb = NumpyReader(
         emb_scp) if embed_format == "numpy" else ScriptReader(emb_scp,
                                                               matrix=False)
     self.shuffle = shuffle
Exemplo n.º 9
0
def run(args):
    computer = NnetComputer(args.checkpoint, args.gpu)
    num_done = 0
    feats_conf = load_json(args.checkpoint, "feats.json")
    spectra = Processor(args.spectra, **feats_conf)
    spatial = ScriptReader(args.spatial) if args.spatial else None
    dump_dir = Path(args.dump_dir)
    dump_dir.mkdir(exist_ok=True, parents=True)
    for key, feats in spectra:
        logger.info("Compute on utterance {}...".format(key))
        if spatial:
            spa = spatial[key]
            feats = np.hstack([feats, spa])
        spk_masks = computer.compute(feats)
        for i, m in enumerate(spk_masks):
            (dump_dir / f"spk{i + 1:d}").mkdir(exist_ok=True)
            np.save(dump_dir / f"spk{i + 1:d}" / key, m)
        num_done += 1
    logger.info("Compute over {:d} utterances".format(num_done))
Exemplo n.º 10
0
def run(args):
    computer = NnetComputer(args.checkpoint, args.gpu)
    num_done = 0
    feats_conf = load_json(args.checkpoint, "feats.json")
    spectra = Processor(args.spectra, **feats_conf)
    spatial = ScriptReader(args.spatial) if args.spatial else None

    for key, feats in spectra:
        logger.info("Compute on utterance {}...".format(key))
        if spatial:
            spa = spatial[key]
            feats = np.hstack([feats, spa])
        spk_masks = computer.compute(feats)
        for i, m in enumerate(spk_masks):
            fdir = os.path.join(args.dump_dir, "spk{:d}".format(i + 1))
            make_dir(fdir)
            np.save(os.path.join(fdir, key), m)
        num_done += 1
    logger.info("Compute over {:d} utterances".format(num_done))
Exemplo n.º 11
0
def test_multiprocess_script_reader(scp):
    # test ScriptReader
    scp_reader = ScriptReader(scp)
    pool = Pool(processes=2)
    try:
        utt_list = scp_reader.index_keys
        result_list = list()
        for (utt_id, utt_path) in utt_list:
            result = pool.apply_async(scp_reader.__getitem__, args = (utt_id))
            result_list.append(result)
        pool.close()
        pool.join()

        for result in result_list:
            print(result.get())
    except TypeError as e:
        print("Using ScriptReader leads to the error:\n", e)
    finally:
        del scp_reader
        del pool

    # test SynchronizedScriptReader
    scp_reader = SynchronizedScriptReader(scp)
    pool = Pool(processes=2)
    try:
        utt_list = scp_reader.index_keys
        result_list = list()
        for (utt_id, utt_path) in utt_list:
            result = pool.apply_async(scp_reader.__getitem__, args = (utt_id))
            result_list.append(result)
        pool.close()
        pool.join()

        for result in result_list:
            print(result.get())
    except TypeError as e:
        print("Using SynchronizedScriptReader leads to the error:\n", e)
    finally:
        del scp_reader
        del pool

    print("TEST *multiprocess_script_reader* DONE!")
Exemplo n.º 12
0
Arquivo: kaldi.py Projeto: yt752/aps
 def __init__(self,
              feats_scp: str,
              text: str,
              utt2num_frames: str,
              vocab_dict: Optional[Dict],
              skip_utts: str = "",
              min_token_num: int = 1,
              max_token_num: int = 400,
              max_frame_num: float = 3000,
              min_frame_num: float = 40) -> None:
     feats_reader = ScriptReader(feats_scp)
     super(Dataset, self).__init__(feats_reader,
                                   text,
                                   utt2num_frames,
                                   vocab_dict,
                                   max_dur=max_frame_num,
                                   min_dur=min_frame_num,
                                   dur_axis=0,
                                   skip_utts=skip_utts,
                                   min_token_num=min_token_num,
                                   max_token_num=max_token_num)
Exemplo n.º 13
0
def test_script_reader(scp):
    scp_reader = ScriptReader(scp)
    for key, obj in scp_reader:
        print("{0}: {1}".format(key, obj.shape))
    print("TEST *test_script_reader* DONE!")
Exemplo n.º 14
0
def read_vad(vad_scp):
    scp_reader = ScriptReader(vad_scp)
    vad_dict = {}
    for utt, vad in scp_reader:
        vad_dict[utt] = vad
    return vad_dict
Exemplo n.º 15
0
def run(args):
    print(f"Arguments in args:\n{pprint.pformat(vars(args))}", flush=True)

    decoder = FasterDecoder(args.am,
                            cpt_tag=args.am_tag,
                            function=args.function,
                            device_id=args.device_id)
    if decoder.accept_raw:
        src_reader = AudioReader(args.feats_or_wav_scp,
                                 sr=args.sr,
                                 channel=args.channel)
    else:
        src_reader = ScriptReader(args.feats_or_wav_scp)

    if args.lm:
        if Path(args.lm).is_file():
            from aps.asr.lm.ngram import NgramLM
            lm = NgramLM(args.lm, args.dict)
            logger.info(
                f"Load ngram LM from {args.lm}, weight = {args.lm_weight}")
        else:
            lm = NnetEvaluator(args.lm,
                               device_id=args.device_id,
                               cpt_tag=args.lm_tag)
            logger.info(f"Load RNN LM from {args.lm}: epoch {lm.epoch}, " +
                        f"weight = {args.lm_weight}")
            lm = lm.nnet
    else:
        lm = None

    processor = TextPostProcessor(args.dict,
                                  space=args.space,
                                  show_unk=args.show_unk,
                                  spm=args.spm)
    stdout_top1, top1 = io_wrapper(args.best, "w")
    topn = None
    if args.dump_nbest:
        stdout_topn, topn = io_wrapper(args.dump_nbest, "w")
        if args.function == "greedy_search":
            nbest = min(args.beam_size, args.nbest)
        else:
            nbest = 1
        topn.write(f"{nbest}\n")
    ali_dir = args.dump_align
    if ali_dir:
        Path(ali_dir).mkdir(exist_ok=True, parents=True)
        logger.info(f"Dump alignments to dir: {ali_dir}")
    N = 0
    timer = SimpleTimer()
    dec_args = dict(
        filter(lambda x: x[0] in beam_search_params,
               vars(args).items()))
    dec_args["lm"] = lm
    for key, src in src_reader:
        logger.info(f"Decoding utterance {key}...")
        nbest_hypos = decoder.run(src, **dec_args)
        nbest = [f"{key}\n"]
        for idx, hyp in enumerate(nbest_hypos):
            # remove SOS/EOS
            token = hyp["trans"][1:-1]
            trans = processor.run(token)
            score = hyp["score"]
            nbest.append(f"{score:.3f}\t{len(token):d}\t{trans}\n")
            if idx == 0:
                top1.write(f"{key}\t{trans}\n")
            if ali_dir:
                if hyp["align"] is None:
                    raise RuntimeError(
                        "Can not dump alignment out as it's None")
                np.save(f"{ali_dir}/{key}-nbest{idx+1}", hyp["align"].numpy())
        if topn:
            topn.write("".join(nbest))
        if not (N + 1) % 10:
            top1.flush()
            if topn:
                topn.flush()
        N += 1
    if not stdout_top1:
        top1.close()
    if topn and not stdout_topn:
        topn.close()
    cost = timer.elapsed()
    logger.info(
        f"Decode {len(src_reader)} utterance done, time cost = {cost:.2f}m")
Exemplo n.º 16
0
def run(args):
    print(f"Arguments in args:\n{pprint.pformat(vars(args))}", flush=True)
    if args.batch_size == 1:
        warnings.warn("can use decode.py instead as batch_size == 1")
    decoder = BatchDecoder(args.am,
                           device_id=args.device_id,
                           cpt_tag=args.am_tag)
    if decoder.accept_raw:
        src_reader = AudioReader(args.feats_or_wav_scp,
                                 sr=args.sr,
                                 channel=args.channel)
    else:
        src_reader = ScriptReader(args.feats_or_wav_scp)

    if args.lm:
        if Path(args.lm).is_file():
            from aps.asr.lm.ngram import NgramLM
            lm = NgramLM(args.lm, args.dict)
            logger.info(
                f"Load ngram LM from {args.lm}, weight = {args.lm_weight}")
        else:
            lm = NnetEvaluator(args.lm,
                               device_id=args.device_id,
                               cpt_tag=args.lm_tag)
            logger.info(f"Load RNN LM from {args.lm}: epoch {lm.epoch}, " +
                        f"weight = {args.lm_weight}")
            lm = lm.nnet
    else:
        lm = None

    processor = TextPostProcessor(args.dict,
                                  space=args.space,
                                  show_unk=args.show_unk,
                                  spm=args.spm)
    stdout_top1, top1 = io_wrapper(args.best, "w")
    topn = None
    if args.dump_nbest:
        stdout_topn, topn = io_wrapper(args.dump_nbest, "w")
        nbest = min(args.beam_size, args.nbest)
        topn.write(f"{nbest}\n")
    ali_dir = args.dump_align
    if ali_dir:
        Path(ali_dir).mkdir(exist_ok=True, parents=True)
        logger.info(f"Dump alignments to dir: {ali_dir}")
    done = 0
    timer = SimpleTimer()
    batches = []
    dec_args = dict(
        filter(lambda x: x[0] in beam_search_params,
               vars(args).items()))
    dec_args["lm"] = lm
    tot_utts = len(src_reader)
    for key, src in src_reader:
        done += 1
        batches.append({
            "key": key,
            "inp": src,
            "len": src.shape[-1] if decoder.accept_raw else src.shape[0]
        })
        end = (done == len(src_reader) and len(batches))
        if len(batches) != args.batch_size and not end:
            continue
        # decode
        batches = sorted(batches, key=lambda b: b["len"], reverse=True)
        batch_nbest = decoder.run([bz["inp"] for bz in batches], **dec_args)
        keys = [bz["key"] for bz in batches]
        for key, nbest in zip(keys, batch_nbest):
            logger.info(f"Decoding utterance {key} ({done}/{tot_utts}) ...")
            nbest_hypos = [f"{key}\n"]
            for idx, hyp in enumerate(nbest):
                # remove SOS/EOS
                token = hyp["trans"][1:-1]
                trans = processor.run(token)
                score = hyp["score"]
                nbest_hypos.append(f"{score:.3f}\t{len(token):d}\t{trans}\n")
                if idx == 0:
                    logger.info(f"{key} ({score:.3f}, {len(token):d}) {trans}")
                    top1.write(f"{key}\t{trans}\n")
                if ali_dir:
                    if hyp["align"] is None:
                        raise RuntimeError(
                            "Can not dump alignment out as it's None")
                    np.save(f"{ali_dir}/{key}-nbest{idx+1}",
                            hyp["align"].numpy())
            if topn:
                topn.write("".join(nbest_hypos))
        top1.flush()
        if topn:
            topn.flush()
        batches.clear()

    if not stdout_top1:
        top1.close()
    if topn and not stdout_topn:
        topn.close()
    cost = timer.elapsed()
    logger.info(f"Decode {tot_utts} utterance done, time cost = {cost:.2f}m")
Exemplo n.º 17
0
def script_reader(scp):
    reader = ScriptReader(scp, matrix=False)
    return reader