def mixture_feat_extractor_test(): reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=None, ) cutter = stream.ElementFrameCutter( width=400, shift=160, ) extractor = feature.MixtureExtractor( frameDim=400, batchSize=100, mixType=["mfcc", "fbank"], useEnergyForFbank=False, useEnergyForMfcc=False, ) reader.start() cutter.start(inPIPE=reader.outPIPE) extractor.start(inPIPE=cutter.outPIPE) extractor.wait() print(extractor.outPIPE.size()) pac = extractor.outPIPE.get() print(pac.data.shape)
def feat_extractor_test(): reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=None, ) cutter = stream.ElementFrameCutter( width=400, shift=160, ) extractor = feature.MfccExtractor( batchSize=100, useEnergy=False, ) reader.start() cutter.start(inPIPE=reader.outPIPE) extractor.start(inPIPE=cutter.outPIPE) extractor.wait() print(extractor.outPIPE.size())
def stream_reader_test(): vad = None #stream.WebrtcVADetector() reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=vad, ) reader.start() reader.wait() print(reader.outPIPE.size())
def feat_estimator_test(): reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=None, ) cutter = stream.ElementFrameCutter( width=400, shift=160, ) extractor = feature.MfccExtractor( batchSize=100, useEnergy=False, ) processor = feature.FeatureProcessor( featDim=13, delta=2, spliceLeft=10, spliceRight=10, cmvNormalizer=feature.FrameSlideCMVNormalizer(), ) left = 5 right = 5 estimator = decode.AcousticEstimator( featDim=819, batchSize=100, applySoftmax=False, applyLog=False, leftContext=left, rightContext=right, ) estimator.acoustic_function = lambda x: x[left:-right].copy() reader.start() cutter.start(inPIPE=reader.outPIPE) extractor.start(inPIPE=cutter.outPIPE) processor.start(inPIPE=extractor.outPIPE) estimator.start(inPIPE=processor.outPIPE) estimator.wait() print(estimator.outPIPE.size())
def cutter_test(): reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=None, ) cutter = stream.ElementFrameCutter( width=400, shift=160, ) reader.start() cutter.start(inPIPE=reader.outPIPE) cutter.wait() print(cutter.outPIPE.size())
def send_value_packets(): wavPath = "../examples/84-121550-0000.wav" assert os.path.isfile(wavPath), f"No such file: {wavPath}" reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=None, ) sender = transmit.PacketSender( thost="192.168.1.11", tport=9509, batchSize=1024, ) sender.encode_function = transmit.encode_value_packet reader.start() sender.start(inPIPE=reader.outPIPE) sender.wait()
def feat_processor_test(): reader = stream.StreamReader( waveFile=wavPath, chunkSize=480, simulate=False, vaDetector=None, ) cutter = stream.ElementFrameCutter( width=400, shift=160, ) extractor = feature.MfccExtractor( batchSize=100, useEnergy=False, ) processor = feature.FeatureProcessor( featDim=13, delta=2, spliceLeft=10, spliceRight=10, cmvNormalizer=feature.FrameSlideCMVNormalizer(), ) reader.start() cutter.start(inPIPE=reader.outPIPE) extractor.start(inPIPE=cutter.outPIPE) processor.start(inPIPE=extractor.outPIPE) processor.wait() print(processor.outPIPE.size()) pac = processor.outPIPE.get() print(pac.data.shape)
rootDir = f"{KALDI_ROOT}/egs/mini_librispeech/s5/exp" words = f"{rootDir}/tri3b/graph_tgsmall/words.txt" hmm = f"{rootDir}/tri3b_ali_train_clean_5/final.mdl" HCLG = f"{rootDir}/tri3b/graph_tgsmall/HCLG.fst" pdfDim = decode.get_pdf_dim(hmm) kerasmodel = make_DNN_acoustic_model(featDim, pdfDim) kerasmodel.load_weights(kerasModel) ########################## # Define components ########################## # 1. Create a stream reader to read realtime stream from audio file reader = stream.StreamReader(waveFile, simulate=True) # 2. Cutter to cut frame cutter = stream.ElementFrameCutter(width=400, shift=160) # 3. MFCC feature extracting extractor = feature.MfccExtractor( frameDim=400, batchSize=100, useEnergy=False, ) # 4. processing feature processor = feature.FeatureProcessor( featDim=13, batchSize=100, delta=delta, spliceLeft=spliceLeft, spliceRight=spliceRight,
rHostIP = "192.168.1.11" rHostPort = 9509 bHostPort = 9510 assert os.path.isfile(waveFile), f"No such file: {waveFile}" ########################## # Define components ########################## # 1. Create a stream reader to read realtime stream from audio file vad = stream.WebrtcVADetector() reader = stream.StreamReader( waveFile=waveFile, chunkSize=480, simulate=True, vaDetector=vad, ) # 2. Send packets to remote host sender = transmit.PacketSender( thost=rHostIP, tport=rHostPort, batchSize=100, ) sender.encode_function = transmit.encode_value_packet # 3. Receive packets receiver = transmit.PacketReceiver(bport=bHostPort)
featDim = (13 * (delta + 1)) * (spliceLeft + 1 + spliceRight) ########################## # Load DNN acoustic model ########################## pdfDim = decode.get_pdf_dim(hmm) kerasmodel = make_DNN_acoustic_model(featDim, pdfDim) kerasmodel.load_weights(kerasModel) ########################## # Define components ########################## # 1. Create a stream reader to read realtime stream from audio file reader = stream.StreamReader(waveFile, simulate=False) # 2. Cutter to cut frame cutter = stream.ElementFrameCutter(batchSize=50, width=400, shift=160) # 3. MFCC feature extracting extractor = feature.MfccExtractor(useEnergy=False, ) # 4. processing feature processor = feature.MatrixFeatureProcessor( delta=delta, spliceLeft=spliceLeft, spliceRight=spliceRight, cmvNormalizer=feature.FrameSlideCMVNormalizer(), ) # 5. acoustic probability computer def keras_compute(feats):