示例#1
0
def run(args):
    single_speaker = len(args.sep_scp.split(",")) == 1
    reporter = Report(args.spk2gender)

    if single_speaker:
        sep_reader = WaveReader(args.sep_scp)
        ref_reader = WaveReader(args.ref_scp)
        for key, sep in tqdm(sep_reader):
            ref = ref_reader[key]
            if sep.size != ref.size:
                end = min(sep.size, ref.size)
                sep = sep[:end]
                ref = ref[:end]
            snr = si_snr(sep, ref)
            reporter.add(key, snr)
    else:
        sep_reader = SpeakersReader(args.sep_scp)
        ref_reader = SpeakersReader(args.ref_scp)
        for key, sep_list in tqdm(sep_reader):
            ref_list = ref_reader[key]
            if sep_list[0].size != ref_list[0].size:
                end = min(sep_list[0].size, ref_list[0].size)
                sep_list = [s[:end] for s in sep_list]
                ref_list = [s[:end] for s in ref_list]
            snr = permute_si_snr(sep_list, ref_list)
            reporter.add(key, snr)
    reporter.report()
示例#2
0
 def __init__(self,
              shuffle=True,
              audio_x="",
              audio_y=None,
              sample_rate=8000):
     self.audio_x = WaveReader(audio_x, sample_rate=sample_rate)
     self.audio_y = [
         WaveReader(y, sample_rate=sample_rate) for y in audio_y
     ]
     self.shuffle = shuffle
示例#3
0
 def __init__(self, scps):
     split_scps = scps.split(",")
     if len(split_scps) == 1:
         raise RuntimeError(
             "Construct SpeakersReader need more than one script, got {}".
             format(scps))
     self.readers = [WaveReader(scp) for scp in split_scps]
示例#4
0
class WaveReaderTestCase(unittest.TestCase):
    def setUp(self):
        self.wr = WaveReader('tests/data/sample.wav')

    def test_header(self):
        self.assertEqual(self.wr.filename, 'tests/data/sample.wav')
        self.assertEqual(self.wr.nchannels, 1)
        self.assertEqual(self.wr.samplewidth,
                         2)  # byte width instead of bitwidth
        self.assertEqual(self.wr.framerate, 16000)
        self.assertEqual(self.wr.nframes, 28560)

    def test_duration(self):
        self.assertEqual(self.wr.get_duration(), 1.785)

    def test_get_packfmt(self):
        if os.name == 'posix' and os.name == 'nt':  # little endian for posix
            self.assertEqual(self.wr._get_packfmt()[:5], '<hhhh')

    def test_pack(self):
        with patch('audio.WaveReader.content',
                   new_callable=PropertyMock,
                   create=True) as mock_content:
            if self.wr.samplewidth == 2 and sys.byteorder == 'little':
                # only true if sample width was 2 and big-endian
                mock_content.return_value = '\x00\x01' * self.wr.nframes
                self.assertEqual(self.wr.unpack()[0], 256)

    def test_lower_sampling(self):
        self.assertEqual(len(self.wr.lower_sampling(8000)),
                         self.wr.nframes / 2)

    def test_name_section(self):
        self.assertEqual(self.wr.name_section(0), 'tests/data/sample_0.wav')
        self.assertEqual(self.wr.name_section(100),
                         'tests/data/sample_100.wav')

    def test_truncate(self):
        self.assertEqual(len(self.wr.truncate(1)), 2)
        self.assertEqual(len(self.wr.truncate(10)), 1)

        sections = self.wr.truncate(1)
        self.assertEqual(sections[0].samplewidth, 2)
        self.assertEqual(sections[0].nframes, self.wr.framerate)
        self.assertEqual(sections[1].nframes,
                         self.wr.nframes - self.wr.framerate)
        self.assertEqual(sections[1].unpack()[0], 134)

    def test_voice_segment(self):
        wr = WaveReader('tests/data/sample_big.wav')
        wr.voice_segment(0.01, 0.005)
示例#5
0
def split_chunk(src_file, dst_file):
    print "processing " + src_file
    if os.stat(src_file).st_size > MAX_SIZE:
        record_fp.write(src_file + os.linesep)
        return

    wav = WaveReader(src_file)
    wav.lower_sampling(low_framerate=8000)
    # import pdb;pdb.set_trace()
    sections = wav.smart_truncate(MAX_DURATION)
    dirname = os.path.dirname(dst_file)
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    if len(sections) == 1:
        WaveWriter(dst_file, wav.header, wav.content).write()
    else:
        for sec in sections:
            basename = os.path.basename(sec.filename)
            filename = os.path.join(dirname, basename)
            WaveWriter(filename, sec.header, sec.content).write()
示例#6
0
def split_chunk(src_file, dst_file):
	print "processing " + src_file
	if os.stat(src_file).st_size > MAX_SIZE:
		record_fp.write(src_file + os.linesep)
		return

	wav = WaveReader(src_file)
	wav.lower_sampling(low_framerate=8000)
	# import pdb;pdb.set_trace()
	sections = wav.smart_truncate(MAX_DURATION)
	dirname = os.path.dirname(dst_file)
	if not os.path.exists(dirname):
		os.makedirs(dirname)
		
	if len(sections) == 1:
		WaveWriter(dst_file, wav.header, wav.content).write()
	else:
		for sec in sections:
			basename = os.path.basename(sec.filename)
			filename = os.path.join(dirname, basename)
			WaveWriter(filename, sec.header, sec.content).write()
示例#7
0
class WaveReaderTestCase(unittest.TestCase):
	def setUp(self):
		self.wr = WaveReader('tests/data/sample.wav')

	def test_header(self):
		self.assertEqual(self.wr.filename, 'tests/data/sample.wav')
		self.assertEqual(self.wr.nchannels, 1)
		self.assertEqual(self.wr.samplewidth, 2)	# byte width instead of bitwidth
		self.assertEqual(self.wr.framerate, 16000)
		self.assertEqual(self.wr.nframes, 28560)

	def test_duration(self):
		self.assertEqual(self.wr.get_duration(), 1.785)

	def test_get_packfmt(self):
		if os.name == 'posix' and os.name == 'nt':	# little endian for posix
			self.assertEqual(self.wr._get_packfmt()[:5], '<hhhh')

	def test_pack(self):
		with patch('audio.WaveReader.content', new_callable=PropertyMock, create=True) as mock_content:
			if self.wr.samplewidth == 2 and sys.byteorder == 'little':
				# only true if sample width was 2 and big-endian
				mock_content.return_value = '\x00\x01' * self.wr.nframes
				self.assertEqual(self.wr.unpack()[0], 256)	

	def test_lower_sampling(self):
		self.assertEqual(len(self.wr.lower_sampling(8000)), self.wr.nframes/2)

	def test_name_section(self):
		self.assertEqual(self.wr.name_section(0), 'tests/data/sample_0.wav')
		self.assertEqual(self.wr.name_section(100), 'tests/data/sample_100.wav')

	def test_truncate(self):
		self.assertEqual(len(self.wr.truncate(1)), 2)
		self.assertEqual(len(self.wr.truncate(10)), 1)

		sections = self.wr.truncate(1)
		self.assertEqual(sections[0].samplewidth, 2)
		self.assertEqual(sections[0].nframes, self.wr.framerate)
		self.assertEqual(sections[1].nframes, self.wr.nframes-self.wr.framerate)
		self.assertEqual(sections[1].unpack()[0], 134)

	def test_voice_segment(self):
		wr = WaveReader('tests/data/sample_big.wav')
		wr.voice_segment(0.01, 0.005)
def run(args):
    mix_input = WaveReader(args.input, sample_rate=args.fs)
    computer = NnetComputer(args.checkpoint, args.gpu)
    for key, mix_samps in mix_input:
        logger.info("Compute on utterance {}...".format(key))
        spks = computer.compute(mix_samps)
        norm = np.linalg.norm(mix_samps, np.inf)
        for idx, samps in enumerate(spks):
            samps = samps[:mix_samps.size]
            # norm
            samps = samps * norm / np.max(np.abs(samps))
            write_wav(os.path.join(args.dump_dir,
                                   "spk{}/{}.wav".format(idx + 1, key)),
                      samps,
                      fs=args.fs)
    logger.info("Compute over {:d} utterances".format(len(mix_input)))
示例#9
0
 def test_voice_segment(self):
     wr = WaveReader('tests/data/sample_big.wav')
     wr.voice_segment(0.01, 0.005)
示例#10
0
 def setUp(self):
     self.wr = WaveReader('tests/data/sample.wav')
示例#11
0
def main(filename):
	wav = WaveReader(filename)
示例#12
0
	def test_voice_segment(self):
		wr = WaveReader('tests/data/sample_big.wav')
		wr.voice_segment(0.01, 0.005)
示例#13
0
	def setUp(self):
		self.wr = WaveReader('tests/data/sample.wav')
示例#14
0
def run(args):
    min_sdr, max_sdr = list(map(float, args.sdr.split(",")))
    wav_reader = WaveReader(args.wav_scp, sample_rate=args.fs)

    logger.info(
        "Start simulate {:d} utterances from {}, with sdr = {} ...".format(
            args.num_utts, args.wav_scp, args.sdr))
    statsf = open(args.simu_stats, "w") if args.simu_stats else None
    # 640 = 0.04 * 16000
    frame_shift = int(args.fs * args.shift)
    for _ in tqdm.trange(args.num_utts):
        # list of dict object
        min_dur, spks = sample_spks(wav_reader, args.num_spks, args.min_dur)

        mixture = np.zeros(min_dur)
        # treat first speaker as target
        ref_pow = spks[0]["pow"]
        ref_dur = spks[0]["dur"]
        ref_spk = spks[0]["wav"]

        stats = []
        # shift for target video
        shift = random.randint(0, (ref_dur - min_dur) // frame_shift)
        stats.append((spks[0]["key"], shift))
        # target segment
        segment = ref_spk[shift * frame_shift:shift * frame_shift + min_dur]
        mixture += segment
        # interference speakers
        sdrs = []
        infs = []
        for spk in spks[1:]:
            sdr_db = random.uniform(min_sdr, max_sdr)
            scaler = np.sqrt(ref_pow / spk["pow"] * 10**(-sdr_db / 10))
            # video shift
            shift = random.randint(0, (spk["dur"] - min_dur) // frame_shift)
            stats.append((spk["key"], shift))
            # mixture
            spkseg = spk["wav"][shift * frame_shift:shift * frame_shift +
                                min_dur]
            mixture += scaler * spkseg
            infs.append(scaler * spkseg)
            sdrs.append("{:+.2f}".format(sdr_db))

        uttid = "{0}_{1}".format("_".join([d["key"] for d in spks]),
                                 "_".join(sdrs))
        scaler = random.uniform(0.6, 0.9) / np.linalg.norm(mixture, np.inf)

        write_wav(os.path.join(args.dump_dir, "mix/{}.wav".format(uttid)),
                  mixture * scaler,
                  fs=args.fs)
        write_wav(os.path.join(args.dump_dir, "spk1/{}.wav".format(uttid)),
                  segment * scaler,
                  fs=args.fs)

        if not args.target_only:
            for idx, spk in enumerate(infs):
                write_wav(os.path.join(args.dump_dir,
                                       "spk{}/{}.wav".format(idx + 2, uttid)),
                          spk * scaler,
                          fs=args.fs)

        if statsf:
            record = uttid
            for pair in stats:
                record += " {0} {1}".format(pair[0], pair[1])
            statsf.write("{}\n".format(record))

    if statsf:
        statsf.close()
    logger.info(
        "Start simulate {:d} utterances from {}, with sdr = {} done".format(
            args.num_utts, args.wav_scp, args.sdr))