Exemplo n.º 1
0
 def __getitem__(self, index):
     utt_id, sample_index = self.retrieve_index[index]
     mix_sample = wavread(self.mix_path[utt_id])[0]
     s1_sample = wavread(self.s1_path[utt_id])[0]
     s2_sample = wavread(self.s2_path[utt_id])[0]
     if sample_index == -1:
         length = len(mix_sample)
         stack_length = self.segment_length - length
         mix_stack_sample = mix_sample[: stack_length].reshape(-1, 1)
         s1_stack_sample = s1_sample[: stack_length].reshape(-1, 1)
         s2_stack_sample = s2_sample[: stack_length].reshape(-1, 1)
         mix_clipped_sample = np.concatenate(
                 (mix_sample, mix_stack_sample), axis=0)
         s1_clipped_sample = np.concatenate(
                 (s1_sample, s1_stack_sample), axis=0)
         s2_clipped_sample = np.concatenate(
                 (s2_sample, s2_stack_sample), axis=0)
     else:
         end_index = sample_index + self.segment_length
         mix_clipped_sample = mix_sample[sample_index : end_index]
         s1_clipped_sample = s1_sample[sample_index : end_index]
         s2_clipped_sample = s2_sample[sample_index : end_index]
     src_clipped_sample = np.stack(
         (s1_clipped_sample, s2_clipped_sample), axis=0).squeeze(-1)
     sample = {
         'mix': mix_clipped_sample.reshape(1, -1),
         'src': src_clipped_sample.reshape(2, -1),
     }
     return sample
Exemplo n.º 2
0
 def read(self):
     for i in range(len(self.mix_path)):
         key = self.key[i]
         mix_sample = wavread(self.mix_path[i])[0]
         s1_sample = wavread(self.s1_path[i])[0]
         s2_sample = wavread(self.s2_path[i])[0]
         sample = {
             'key': key,
             'mix': torch.from_numpy(mix_sample.reshape(1, 1, -1)),
             's1': torch.from_numpy(s1_sample.reshape(1, 1, -1)),
             's2': torch.from_numpy(s2_sample.reshape(1, 1, -1)),
         }
         yield sample
Exemplo n.º 3
0
def evaluate(model, device):
    # Turn on evaluation mode which disables dropout.
    model.eval()

    mix_scp = os.path.join(FLAGS.data_dir, 'tt', 'mix.scp')
    s1_scp = os.path.join(FLAGS.data_dir, 'tt', 's1.scp')
    s2_scp = os.path.join(FLAGS.data_dir, 'tt', 's2.scp')
    dataset = DataReader(mix_scp, s1_scp, s2_scp)

    total_num = len(dataset)
    save_path = os.path.join(FLAGS.model_dir, 'wav')
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    print('=> Decoding ...')
    sys.stdout.flush()
    start_time = datetime.datetime.now()

    output_spk1 = np.zeros(0)
    output_spk2 = np.zeros(0)
    index = 0
    with torch.no_grad():
        for idx, data in enumerate(dataset.read()):
            start = datetime.datetime.now()
            key = data['key']
            mix = data['mix'].to(device)
            s1 = data['s1']
            s2 = data['s2']
            length = mix.size(-1)
            output = model(mix, length)
            output1 = np.squeeze(output[:, 0, :].cpu().numpy())
            output2 = np.squeeze(output[:, 1, :].cpu().numpy())
            mix = np.squeeze(mix.cpu().numpy())
            s1 = np.squeeze(s1.numpy())
            s2 = np.squeeze(s2.numpy())
            clean_s1_path = FLAGS.test_wav_dir + '/s1/' + key + '.wav'
            s1_clean = np.squeeze(wavread(clean_s1_path)[0])
            sys.stdout.flush()
            if np.sum(s1 - s1_clean) != 0:
                print('[*]:', key, s1, s1_clean)
                sys.exit(0)
            save_prefix = os.path.join(save_path, key)
            output_spk1 = output1 / np.max(np.abs(output1)) * 0.7
            output_spk2 = output2 / np.max(np.abs(output2)) * 0.7
            wavwrite(output_spk1, SAMPLE_RATE, save_prefix + '_1.wav')
            wavwrite(output_spk2, SAMPLE_RATE, save_prefix + '_2.wav')
            index += 1
            elapsed = (datetime.datetime.now() - start).total_seconds()
            logger.info('{:04d}/{:04d} | time = {:.3f} s'.format(
                index, total_num, elapsed))
            # logger.info('total_length = {} | cur_lenght = {}'.format(
            #     total_length, output_spk1.size))

            # Reset buffer
            output_spk1 = np.zeros(0)
            output_spk2 = np.zeros(0)

        elapsed = (datetime.datetime.now() - start_time).total_seconds()
        print('=> Decode done. Total time is {:.2f} mins'.format(elapsed /
                                                                 60.0))
Exemplo n.º 4
0
    def __init__(self,
                 mix_c1_scp,
                 s1_c1_scp,
                 s2_c1_scp,
                 mix_c2_scp,
                 s1_c2_scp,
                 s2_c2_scp,
                 sample_rate,
                 sample_clip_size=4):
        """Initialize the TimeDomainDateset. (2 mixtures)

        Args:
            mix_scp: scp file for mixed waves (KALDI format)
            s1_scp: scp file for speaker 1
            s2_scp: scp file for speaker 2
            sample_clip_size: segmental length (default: 4s)
        """
        check(mix_c1_scp, s1_c1_scp, s2_c1_scp, mix_c2_scp, s1_c2_scp,
              s2_c2_scp)
        self.sample_rate = sample_rate
        self.sample_clip_size = sample_clip_size
        self.segment_length = self.sample_rate * self.sample_clip_size

        self.mix_c1_path = read_path(mix_c1_scp)
        self.s1_c1_path = read_path(s1_c1_scp)
        self.s2_c1_path = read_path(s2_c1_scp)
        self.mix_c2_path = read_path(mix_c2_scp)
        self.s1_c2_path = read_path(s1_c2_scp)
        self.s2_c2_path = read_path(s2_c2_scp)

        self.retrieve_index = []
        for i in range(len(self.mix_c1_path)):
            sample_size = len(wavread(self.mix_c1_path[i])[0])
            if sample_size < self.segment_length:
                # wave length is smaller than segmental length
                if sample_size * 2 < self.segment_length:
                    continue
                self.retrieve_index.append((i, -1))
            else:
                # Cut wave into clips and restore the retrieve index
                sample_index = 0
                while sample_index + self.segment_length < sample_size:
                    self.retrieve_index.append((i, sample_index))
                    sample_index += self.segment_length
                if sample_index != sample_size - 1:
                    self.retrieve_index.append(
                        (i, sample_size - self.segment_length))
 def read(self):
     for i in range(len(self.mix_c1_path)):
         key = self.key[i]
         mix_c1_sample = wavread(self.mix_c1_path[i])[0]
         s1_c1_sample = wavread(self.s1_c1_path[i])[0]
         s2_c1_sample = wavread(self.s2_c1_path[i])[0]
         mix_c2_sample = wavread(self.mix_c2_path[i])[0]
         s1_c2_sample = wavread(self.s1_c2_path[i])[0]
         s2_c2_sample = wavread(self.s2_c2_path[i])[0]
         # mix_c1_sample = mix_c1_sample[:30000]
         # mix_c2_sample = mix_c2_sample[:30000]
         mix_sample = np.stack(
         (mix_c1_sample - mix_c2_sample, 
          # mix_c1_sample + mix_c2_sample,
          mix_c1_sample,
          mix_c2_sample,
         ), axis=0).squeeze(-1)
         sample = {
             'key': key,
             'mix': torch.from_numpy(mix_sample.reshape(1, 3, -1)),
             's1': torch.from_numpy(s1_c1_sample.reshape(1, 1, -1)),
             's2': torch.from_numpy(s2_c1_sample.reshape(1, 1, -1)),
         }
         yield sample
Exemplo n.º 6
0
    def __getitem__(self, index):
        utt_id, sample_index = self.retrieve_index[index]
        mix_c1_sample = wavread(self.mix_c1_path[utt_id])[0]
        s1_c1_sample = wavread(self.s1_c1_path[utt_id])[0]
        # s2_c1_sample = wavread(self.s2_c1_path[utt_id])[0]
        mix_c2_sample = wavread(self.mix_c2_path[utt_id])[0]
        # s1_c2_sample = wavread(self.s1_c2_path[utt_id])[0]
        # s2_c2_sample = wavread(self.s2_c2_path[utt_id])[0]
        if sample_index == -1:
            length = len(mix_c1_sample)
            stack_length = self.segment_length - length
            mix_c1_stack_sample = mix_c1_sample[:stack_length].reshape(-1, 1)
            s1_c1_stack_sample = s1_c1_sample[:stack_length].reshape(-1, 1)
            # s2_c1_stack_sample = s2_c1_sample[: stack_length].reshape(-1, 1)
            mix_c2_stack_sample = mix_c2_sample[:stack_length].reshape(-1, 1)
            # s1_c2_stack_sample = s1_c2_sample[: stack_length].reshape(-1, 1)
            # s2_c2_stack_sample = s2_c2_sample[: stack_length].reshape(-1, 1)
            mix_c1_clipped_sample = np.concatenate(
                (mix_c1_sample, mix_c1_stack_sample), axis=0)
            s1_c1_clipped_sample = np.concatenate(
                (s1_c1_sample, s1_c1_stack_sample), axis=0)
            # s2_c1_clipped_sample = np.concatenate(
            #         (s2_c1_sample, s2_c1_stack_sample), axis=0)
            mix_c2_clipped_sample = np.concatenate(
                (mix_c2_sample, mix_c2_stack_sample), axis=0)
            # s1_c2_clipped_sample = np.concatenate(
            #         (s1_c2_sample, s1_c2_stack_sample), axis=0)
            # s2_c2_clipped_sample = np.concatenate(
            #         (s2_c2_sample, s2_c2_stack_sample), axis=0)
        else:
            end_index = sample_index + self.segment_length
            mix_c1_clipped_sample = mix_c1_sample[sample_index:end_index]
            s1_c1_clipped_sample = s1_c1_sample[sample_index:end_index]
            # s2_c1_clipped_sample = s2_c1_sample[sample_index : end_index]
            mix_c2_clipped_sample = mix_c2_sample[sample_index:end_index]
            # s1_c2_clipped_sample = s1_c2_sample[sample_index : end_index]
            # s2_c2_clipped_sample = s2_c2_sample[sample_index : end_index]
        whitenoise = np.random.normal(0, 1, len(mix_c1_clipped_sample))
        whitenoise = whitenoise.reshape(-1, 1)
        whitenoise = whitenoise.astype(np.float32)
        mix_c1_clipped_sample = mix_c1_clipped_sample + 5e-4 * whitenoise
        mix_c2_clipped_sample = mix_c2_clipped_sample + 5e-4 * whitenoise

        mix_clipped_sample = np.stack(
            (
                mix_c1_clipped_sample - mix_c2_clipped_sample,
                # mix_c1_clipped_sample + mix_c2_clipped_sample,
                mix_c1_clipped_sample,
                mix_c2_clipped_sample,
            ),
            axis=0).squeeze(-1)

        src_clipped_sample = np.stack(
            (
                s1_c1_clipped_sample
                # s2_c1_clipped_sample
            ),
            axis=0).squeeze(-1)
        sample = {
            'mix': mix_clipped_sample.reshape(3, -1),
            'src': src_clipped_sample.reshape(1, -1),
        }
        return sample