def run(args): single_speaker = len(args.sep_scp.split(",")) == 1 reporter = Report(args.spk2gender) if single_speaker: sep_reader = WaveReader(args.sep_scp) ref_reader = WaveReader(args.ref_scp) for key, sep in tqdm(sep_reader): ref = ref_reader[key] if sep.size != ref.size: end = min(sep.size, ref.size) sep = sep[:end] ref = ref[:end] snr = si_snr(sep, ref) reporter.add(key, snr) else: sep_reader = SpeakersReader(args.sep_scp) ref_reader = SpeakersReader(args.ref_scp) for key, sep_list in tqdm(sep_reader): ref_list = ref_reader[key] if sep_list[0].size != ref_list[0].size: end = min(sep_list[0].size, ref_list[0].size) sep_list = [s[:end] for s in sep_list] ref_list = [s[:end] for s in ref_list] snr = permute_si_snr(sep_list, ref_list) reporter.add(key, snr) reporter.report()
def __init__(self, shuffle=True, audio_x="", audio_y=None, sample_rate=8000): self.audio_x = WaveReader(audio_x, sample_rate=sample_rate) self.audio_y = [ WaveReader(y, sample_rate=sample_rate) for y in audio_y ] self.shuffle = shuffle
def __init__(self, scps): split_scps = scps.split(",") if len(split_scps) == 1: raise RuntimeError( "Construct SpeakersReader need more than one script, got {}". format(scps)) self.readers = [WaveReader(scp) for scp in split_scps]
class WaveReaderTestCase(unittest.TestCase): def setUp(self): self.wr = WaveReader('tests/data/sample.wav') def test_header(self): self.assertEqual(self.wr.filename, 'tests/data/sample.wav') self.assertEqual(self.wr.nchannels, 1) self.assertEqual(self.wr.samplewidth, 2) # byte width instead of bitwidth self.assertEqual(self.wr.framerate, 16000) self.assertEqual(self.wr.nframes, 28560) def test_duration(self): self.assertEqual(self.wr.get_duration(), 1.785) def test_get_packfmt(self): if os.name == 'posix' and os.name == 'nt': # little endian for posix self.assertEqual(self.wr._get_packfmt()[:5], '<hhhh') def test_pack(self): with patch('audio.WaveReader.content', new_callable=PropertyMock, create=True) as mock_content: if self.wr.samplewidth == 2 and sys.byteorder == 'little': # only true if sample width was 2 and big-endian mock_content.return_value = '\x00\x01' * self.wr.nframes self.assertEqual(self.wr.unpack()[0], 256) def test_lower_sampling(self): self.assertEqual(len(self.wr.lower_sampling(8000)), self.wr.nframes / 2) def test_name_section(self): self.assertEqual(self.wr.name_section(0), 'tests/data/sample_0.wav') self.assertEqual(self.wr.name_section(100), 'tests/data/sample_100.wav') def test_truncate(self): self.assertEqual(len(self.wr.truncate(1)), 2) self.assertEqual(len(self.wr.truncate(10)), 1) sections = self.wr.truncate(1) self.assertEqual(sections[0].samplewidth, 2) self.assertEqual(sections[0].nframes, self.wr.framerate) self.assertEqual(sections[1].nframes, self.wr.nframes - self.wr.framerate) self.assertEqual(sections[1].unpack()[0], 134) def test_voice_segment(self): wr = WaveReader('tests/data/sample_big.wav') wr.voice_segment(0.01, 0.005)
def split_chunk(src_file, dst_file): print "processing " + src_file if os.stat(src_file).st_size > MAX_SIZE: record_fp.write(src_file + os.linesep) return wav = WaveReader(src_file) wav.lower_sampling(low_framerate=8000) # import pdb;pdb.set_trace() sections = wav.smart_truncate(MAX_DURATION) dirname = os.path.dirname(dst_file) if not os.path.exists(dirname): os.makedirs(dirname) if len(sections) == 1: WaveWriter(dst_file, wav.header, wav.content).write() else: for sec in sections: basename = os.path.basename(sec.filename) filename = os.path.join(dirname, basename) WaveWriter(filename, sec.header, sec.content).write()
class WaveReaderTestCase(unittest.TestCase): def setUp(self): self.wr = WaveReader('tests/data/sample.wav') def test_header(self): self.assertEqual(self.wr.filename, 'tests/data/sample.wav') self.assertEqual(self.wr.nchannels, 1) self.assertEqual(self.wr.samplewidth, 2) # byte width instead of bitwidth self.assertEqual(self.wr.framerate, 16000) self.assertEqual(self.wr.nframes, 28560) def test_duration(self): self.assertEqual(self.wr.get_duration(), 1.785) def test_get_packfmt(self): if os.name == 'posix' and os.name == 'nt': # little endian for posix self.assertEqual(self.wr._get_packfmt()[:5], '<hhhh') def test_pack(self): with patch('audio.WaveReader.content', new_callable=PropertyMock, create=True) as mock_content: if self.wr.samplewidth == 2 and sys.byteorder == 'little': # only true if sample width was 2 and big-endian mock_content.return_value = '\x00\x01' * self.wr.nframes self.assertEqual(self.wr.unpack()[0], 256) def test_lower_sampling(self): self.assertEqual(len(self.wr.lower_sampling(8000)), self.wr.nframes/2) def test_name_section(self): self.assertEqual(self.wr.name_section(0), 'tests/data/sample_0.wav') self.assertEqual(self.wr.name_section(100), 'tests/data/sample_100.wav') def test_truncate(self): self.assertEqual(len(self.wr.truncate(1)), 2) self.assertEqual(len(self.wr.truncate(10)), 1) sections = self.wr.truncate(1) self.assertEqual(sections[0].samplewidth, 2) self.assertEqual(sections[0].nframes, self.wr.framerate) self.assertEqual(sections[1].nframes, self.wr.nframes-self.wr.framerate) self.assertEqual(sections[1].unpack()[0], 134) def test_voice_segment(self): wr = WaveReader('tests/data/sample_big.wav') wr.voice_segment(0.01, 0.005)
def run(args): mix_input = WaveReader(args.input, sample_rate=args.fs) computer = NnetComputer(args.checkpoint, args.gpu) for key, mix_samps in mix_input: logger.info("Compute on utterance {}...".format(key)) spks = computer.compute(mix_samps) norm = np.linalg.norm(mix_samps, np.inf) for idx, samps in enumerate(spks): samps = samps[:mix_samps.size] # norm samps = samps * norm / np.max(np.abs(samps)) write_wav(os.path.join(args.dump_dir, "spk{}/{}.wav".format(idx + 1, key)), samps, fs=args.fs) logger.info("Compute over {:d} utterances".format(len(mix_input)))
def test_voice_segment(self): wr = WaveReader('tests/data/sample_big.wav') wr.voice_segment(0.01, 0.005)
def setUp(self): self.wr = WaveReader('tests/data/sample.wav')
def main(filename): wav = WaveReader(filename)
def run(args): min_sdr, max_sdr = list(map(float, args.sdr.split(","))) wav_reader = WaveReader(args.wav_scp, sample_rate=args.fs) logger.info( "Start simulate {:d} utterances from {}, with sdr = {} ...".format( args.num_utts, args.wav_scp, args.sdr)) statsf = open(args.simu_stats, "w") if args.simu_stats else None # 640 = 0.04 * 16000 frame_shift = int(args.fs * args.shift) for _ in tqdm.trange(args.num_utts): # list of dict object min_dur, spks = sample_spks(wav_reader, args.num_spks, args.min_dur) mixture = np.zeros(min_dur) # treat first speaker as target ref_pow = spks[0]["pow"] ref_dur = spks[0]["dur"] ref_spk = spks[0]["wav"] stats = [] # shift for target video shift = random.randint(0, (ref_dur - min_dur) // frame_shift) stats.append((spks[0]["key"], shift)) # target segment segment = ref_spk[shift * frame_shift:shift * frame_shift + min_dur] mixture += segment # interference speakers sdrs = [] infs = [] for spk in spks[1:]: sdr_db = random.uniform(min_sdr, max_sdr) scaler = np.sqrt(ref_pow / spk["pow"] * 10**(-sdr_db / 10)) # video shift shift = random.randint(0, (spk["dur"] - min_dur) // frame_shift) stats.append((spk["key"], shift)) # mixture spkseg = spk["wav"][shift * frame_shift:shift * frame_shift + min_dur] mixture += scaler * spkseg infs.append(scaler * spkseg) sdrs.append("{:+.2f}".format(sdr_db)) uttid = "{0}_{1}".format("_".join([d["key"] for d in spks]), "_".join(sdrs)) scaler = random.uniform(0.6, 0.9) / np.linalg.norm(mixture, np.inf) write_wav(os.path.join(args.dump_dir, "mix/{}.wav".format(uttid)), mixture * scaler, fs=args.fs) write_wav(os.path.join(args.dump_dir, "spk1/{}.wav".format(uttid)), segment * scaler, fs=args.fs) if not args.target_only: for idx, spk in enumerate(infs): write_wav(os.path.join(args.dump_dir, "spk{}/{}.wav".format(idx + 2, uttid)), spk * scaler, fs=args.fs) if statsf: record = uttid for pair in stats: record += " {0} {1}".format(pair[0], pair[1]) statsf.write("{}\n".format(record)) if statsf: statsf.close() logger.info( "Start simulate {:d} utterances from {}, with sdr = {} done".format( args.num_utts, args.wav_scp, args.sdr))