Exemplo n.º 1
0
    def _adjust_time_resolution(self, batch, local_condition, max_time_steps):
        '''Adjust time resolution between audio and local condition
        '''
        if local_condition:
            new_batch = []
            for b in batch:
                x, c, g, l = b
                self._assert_ready_for_upsample(x, c)
                if max_time_steps is not None:
                    max_steps = _ensure_divisible(
                        max_time_steps, audio.get_hop_size(self._hparams),
                        True)
                    if len(x) > max_time_steps:
                        max_time_frames = max_steps // audio.get_hop_size(
                            self._hparams)
                        start = np.random.randint(0, len(c) - max_time_frames)
                        time_start = start * audio.get_hop_size(self._hparams)
                        x = x[time_start:time_start + max_time_frames *
                              audio.get_hop_size(self._hparams)]
                        c = c[start:start + max_time_frames, :]
                        self._assert_ready_for_upsample(x, c)

                new_batch.append((x, c, g, l))
            return new_batch
        else:
            new_batch = []
            for b in batch:
                x, c, g, l = b
                x = audio.trim(x)
                if max_time_steps is not None and len(x) > max_time_steps:
                    start = np.random.randint(0, len(c) - max_time_steps)
                    x = x[start:start + max_time_steps]
                new_batch.append((x, c, g, l))
            return new_batch
Exemplo n.º 2
0
    def _adjust_time_resolution(self, batch, local_condition, max_time_steps):
        '''Adjust time resolution between audio and local condition
		'''
        if local_condition:
            new_batch = []
            for b in batch:
                x, c, g, l = b
                if len(x) < len(c) * audio.get_hop_size(self._hparams):
                    pad_length = audio.get_hop_size(
                        self._hparams) * len(c) - len(x)
                    if pad_length % 2 == 0:
                        x = np.pad(x, (pad_length // 2, pad_length // 2),
                                   mode='constant',
                                   constant_values=_pad)
                    else:
                        x = np.pad(x, (pad_length // 2, (pad_length + 1) // 2),
                                   mode='constant',
                                   constant_values=_pad)
                else:
                    c = self._pad_specs(
                        c,
                        len(x) // audio.get_hop_size(self._hparams))
                self._assert_ready_for_upsample(x, c)
                if max_time_steps is not None:
                    max_steps = _ensure_divisible(
                        max_time_steps, audio.get_hop_size(self._hparams),
                        True)
                    if len(x) > max_time_steps:
                        max_time_frames = max_steps // audio.get_hop_size(
                            self._hparams)
                        start = np.random.randint(0, len(c) - max_time_frames)
                        time_start = start * audio.get_hop_size(self._hparams)
                        x = x[time_start:time_start + max_time_frames *
                              audio.get_hop_size(self._hparams)]
                        c = c[start:start + max_time_frames, :]
                        self._assert_ready_for_upsample(x, c)

                new_batch.append((x, c, g, l))
            return new_batch
        else:
            new_batch = []
            for b in batch:
                x, c, g, l = b
                x = audio.trim(x)
                if max_time_steps is not None and len(x) > max_time_steps:
                    start = np.random.randint(0, len(c) - max_time_steps)
                    x = x[start:start + max_time_steps]
                new_batch.append((x, c, g, l))
            return new_batch