示例#1
0
文件: base.py 项目: suldier/audiotsm
    def do_transform(self, in_buffer, out_buffer):
        """Run the data of ``in_buffer`` through the
        :class:`~audiotsm.base.tsm.TSM` object and write the output to
        ``out_buffer``.

        :param in_buffer: a ``Gst.Buffer`` containing the input data.
        :param out_buffer: a ``Gst.Buffer`` where the output data will be
            written.
        """
        # There is a bug that increases the refcount of out_buffer, making it
        # non writable (https://bugzilla.gnome.org/show_bug.cgi?id=727702#c4).
        # Set the refcount to 1 to fix this.
        refcount = out_buffer.mini_object.refcount
        out_buffer.mini_object.refcount = 1

        # Set the position of the output buffer
        out_buffer.pts = self._position

        # Run the TSM procedure
        reader = ArrayReader(self._gstbuffer_to_ndarray(in_buffer))
        writer = ArrayWriter(self._channels)

        self._tsm.run(reader, writer, flush=False)

        self._ndarray_to_gstbuffer(out_buffer, writer.data)
        out_buffer.duration = (
            (out_buffer.get_size() * Gst.SECOND) //
            (self._bps * self._samplerate)
        )
        self._position += out_buffer.duration

        # Reset the refcount
        out_buffer.mini_object.refcount = refcount

        return Gst.FlowReturn.OK
示例#2
0
def test_cbuffer_read_from(in_buffer, write_buffer, out_n, out_data):
    """Run tests for the CBuffer.write method."""
    reader = ArrayReader(np.array(write_buffer))
    n = in_buffer.read_from(reader)

    assert n == out_n
    assert_almost_equal(in_buffer.to_array(), out_data)
def wsola_sample(sample, speed):
    '''Scales sample by speed'''
    sample = sample.reshape(1, len(sample))
    reader = ArrayReader(sample)
    writer = ArrayWriter(channels=1)
    tsm = wsola(1, speed=speed)
    tsm.run(reader, writer)
    return writer.data[0]
示例#4
0
def time_stretch(data, speed):
    data = data.reshape(1, -1)
    reader = ArrayReader(data)
    writer = ArrayWriter(channels=1)
    tsm = wsola(channels=1, speed=speed)
    tsm.run(reader, writer)
    output = np.ascontiguousarray(writer.data.T)
    output = output.flatten()
    return output
示例#5
0
def test_skip(data_in, n_in, n_out, data_out):
    """Run tests for the ArrayReader.skip method."""
    reader = ArrayReader(np.array(data_in))

    n = reader.skip(n_in)
    assert n == n_out

    # Check the data remaining in the reader
    buffer = np.zeros_like(data_out)
    reader.read(buffer)
    assert_almost_equal(buffer, data_out)

    # Check that there is no more data in the reader
    buffer = np.zeros_like(data_in)
    n = reader.read(buffer)
    assert not buffer.any()
    assert n == 0
示例#6
0
    def execute(self):
        # get values of audio frames, 0 for silence, 1 for loudness.
        has_loud_audio = self.get_loud_frame()
        # get edit points of silence and loudness.
        edit_points = self.get_edit_points(has_loud_audio)

        start_frame = 0
        output = self.get_output()
        for edit_point in edit_points:
            audio_chunk = self.parameter.audio_data[
                          int(edit_point.start_frame * self.parameter.samples_per_frame):
                          int(edit_point.end_frame * self.parameter.samples_per_frame)
                          ]

            # need channels * frames, transpose data first.
            reader = ArrayReader(np.transpose(audio_chunk))
            writer = ArrayWriter(reader.channels)
            tsm = phasevocoder(reader.channels, speed=self.parameter.new_speed[int(edit_point.should_keep)])
            tsm.run(reader, writer)
            altered_audio_data = np.transpose(writer.data)

            altered_audio_data_length = altered_audio_data.shape[0]
            if altered_audio_data_length < self.parameter.audio_fade_envelope_size:
                altered_audio_data[:] = 0  # audio is less than 0.01 sec, let's just remove it.
            else:
                self.fade_out_silence(altered_audio_data)
            end_frame = start_frame + altered_audio_data_length

            start_output_frame = int(math.ceil(start_frame / self.parameter.samples_per_frame))
            end_output_frame = int(math.ceil(end_frame / self.parameter.samples_per_frame))

            output.apply_edit_point(edit_point, altered_audio_data, start_output_frame, end_output_frame)

            start_frame = end_frame

        output.close()
示例#7
0
def test_read(data_in, read_out, n_out, data_out):
    """Run tests for the ArrayReader.read method."""
    reader = ArrayReader(np.array(data_in))

    buffer = np.zeros_like(read_out, dtype=np.float32)
    n = reader.read(buffer)
    assert_almost_equal(buffer, read_out)
    assert n == n_out

    # Check the data remaining in the reader
    buffer = np.zeros_like(data_out)
    reader.read(buffer)
    assert_almost_equal(buffer, data_out)

    # Check that there is no more data in the reader
    buffer = np.zeros_like(data_in)
    n = reader.read(buffer)
    assert not buffer.any()
    assert n == 0
示例#8
0
文件: sine.py 项目: suldier/audiotsm
"""
# pylint: disable=invalid-name

import numpy as np
import sounddevice as sd
from audiotsm import wsola
from audiotsm.io.array import ArrayReader, ArrayWriter


# The parameters of the input signal
length = 1  # in seconds
samplerate = 44100  # in Hz
frequency = 440  # an A4

# Generate the input signal
time = np.linspace(0, length, int(length * samplerate))
input_signal = np.sin(np.pi * frequency * time).reshape((1, -1))

# Run the TSM procedure
reader = ArrayReader(input_signal)
writer = ArrayWriter(channels=1)

tsm = wsola(channels=1, speed=0.5)
tsm.run(reader, writer)

# Play the output
# This example was written to show how to use an ArrayWriter. If you want to
# play the output of a TSM procedure you should use an
# audiotsm.io.stream.StreamWriter.
sd.play(np.ascontiguousarray(writer.data.T), samplerate, blocking=True)
示例#9
0
def speed_up_video(input_file: str,
                   output_file: str = None,
                   frame_rate: float = 30,
                   sample_rate: int = 44100,
                   silent_threshold: float = 0.03,
                   silent_speed: float = 5.0,
                   sounded_speed: float = 1.0,
                   frame_spreadage: int = 1,
                   audio_fade_envelope_size: int = 400,
                   temp_folder: str = 'TEMP') -> None:
    """
    Speeds up a video file with different speeds for the silent and loud sections in the video.

    :param input_file: The file name of the video to be sped up.
    :param output_file: The file name of the output file. If not given will be 'input_file'_ALTERED.ext.
    :param frame_rate: The frame rate of the given video. Only needed if not extractable through ffmpeg.
    :param sample_rate: The sample rate of the audio in the video.
    :param silent_threshold: The threshold when a chunk counts towards being a silent chunk.
                             Value ranges from 0 (nothing) - 1 (max volume).
    :param silent_speed: The speed of the silent chunks.
    :param sounded_speed: The speed of the loud chunks.
    :param frame_spreadage: How many silent frames adjacent to sounded frames should be included to provide context.
    :param audio_fade_envelope_size: Audio transition smoothing duration in samples.
    :param temp_folder: The file path of the temporary working folder.
    """
    # Set output file name based on input file name if none was given
    if output_file is None:
        output_file = _input_to_output_filename(input_file)

    # Create Temp Folder
    if os.path.exists(temp_folder):
        _delete_path(temp_folder)
    _create_path(temp_folder)

    # Find out framerate and duration of the input video
    command = 'ffprobe -i "{}" -hide_banner -loglevel error -select_streams v' \
              ' -show_entries format=duration:stream=avg_frame_rate'.format(input_file)
    p = subprocess.Popen(command,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE,
                         bufsize=1,
                         universal_newlines=True,
                         shell=True)
    std_out, err = p.communicate()
    match_frame_rate = re.search(r'frame_rate=(\d*)/(\d*)', str(std_out))
    if match_frame_rate is not None:
        frame_rate = float(match_frame_rate.group(1)) / float(
            match_frame_rate.group(2))
        # print(f'Found Framerate {frame_rate}')

    match_duration = re.search(r'duration=([\d.]*)', str(std_out))
    original_duration = 0.0
    if match_duration is not None:
        original_duration = float(match_duration.group(1))
        # print(f'Found Duration {original_duration}')

    # Extract the audio
    command = 'ffmpeg -i "{}" -ab 160k -ac 2 -ar {} -vn {} -hide_banner' \
        .format(input_file,
                sample_rate,
                temp_folder + '/audio.wav')

    _run_timed_ffmpeg_command(command,
                              total=int(original_duration * frame_rate),
                              unit='frames',
                              desc='Extracting audio:')

    wav_sample_rate, audio_data = wavfile.read(temp_folder + "/audio.wav")
    audio_sample_count = audio_data.shape[0]
    max_audio_volume = _get_max_volume(audio_data)
    samples_per_frame = wav_sample_rate / frame_rate
    audio_frame_count = int(math.ceil(audio_sample_count / samples_per_frame))

    # Find frames with loud audio
    has_loud_audio = np.zeros(audio_frame_count, dtype=bool)

    for i in range(audio_frame_count):
        start = int(i * samples_per_frame)
        end = min(int((i + 1) * samples_per_frame), audio_sample_count)
        audio_chunk = audio_data[start:end]
        chunk_max_volume = float(
            _get_max_volume(audio_chunk)) / max_audio_volume
        if chunk_max_volume >= silent_threshold:
            has_loud_audio[i] = True

    # Chunk the frames together that are quiet or loud
    chunks = [[0, 0, 0]]
    should_include_frame = np.zeros(audio_frame_count, dtype=bool)
    for i in tqdm(range(audio_frame_count),
                  desc='Finding chunks:',
                  unit='frames'):
        start = int(max(0, i - frame_spreadage))
        end = int(min(audio_frame_count, i + 1 + frame_spreadage))
        should_include_frame[i] = np.any(has_loud_audio[start:end])
        if i >= 1 and should_include_frame[i] != should_include_frame[
                i - 1]:  # Did we flip?
            chunks.append([chunks[-1][1], i, should_include_frame[i - 1]])

    chunks.append([
        chunks[-1][1], audio_frame_count,
        should_include_frame[audio_frame_count - 1]
    ])
    chunks = chunks[1:]

    # Generate audio data with varying speed for each chunk
    new_speeds = [silent_speed, sounded_speed]
    output_pointer = 0
    audio_buffers = []
    for index, chunk in tqdm(enumerate(chunks),
                             total=len(chunks),
                             desc='Changing audio:',
                             unit='chunks'):
        audio_chunk = audio_data[int(chunk[0] *
                                     samples_per_frame):int(chunk[1] *
                                                            samples_per_frame)]

        reader = ArrayReader(np.transpose(audio_chunk))
        writer = ArrayWriter(reader.channels)
        tsm = phasevocoder(reader.channels, speed=new_speeds[int(chunk[2])])
        tsm.run(reader, writer)
        altered_audio_data = np.transpose(writer.data)

        # smooth out transition's audio by quickly fading in/out
        if altered_audio_data.shape[0] < audio_fade_envelope_size:
            altered_audio_data[:] = 0  # audio is less than 0.01 sec, let's just remove it.
        else:
            premask = np.arange(
                audio_fade_envelope_size) / audio_fade_envelope_size
            mask = np.repeat(premask[:, np.newaxis], 2,
                             axis=1)  # make the fade-envelope mask stereo
            altered_audio_data[:audio_fade_envelope_size] *= mask
            altered_audio_data[-audio_fade_envelope_size:] *= 1 - mask

        audio_buffers.append(altered_audio_data / max_audio_volume)

        end_pointer = output_pointer + altered_audio_data.shape[0]
        start_output_frame = int(math.ceil(output_pointer / samples_per_frame))
        end_output_frame = int(math.ceil(end_pointer / samples_per_frame))
        chunks[index] = chunk[:2] + [start_output_frame, end_output_frame]

        output_pointer = end_pointer

    # print(chunks)

    output_audio_data = np.concatenate(audio_buffers)
    wavfile.write(temp_folder + "/audioNew.wav", sample_rate,
                  output_audio_data)

    # Cut the video parts to length
    expression = _get_tree_expression(chunks)

    filter_graph_file = open(temp_folder + "/filterGraph.txt", 'w')
    filter_graph_file.write(f'fps=fps={frame_rate},setpts=')
    filter_graph_file.write(expression.replace(',', '\\,'))
    filter_graph_file.close()

    command = 'ffmpeg -i "{}" -i "{}" -filter_script:v "{}" -map 0 -map -0:a -map 1:a -c:a aac "{}"' \
              ' -loglevel warning -stats -y -hide_banner' \
        .format(input_file,
                temp_folder + '/audioNew.wav',
                temp_folder + '/filterGraph.txt',
                output_file)

    _run_timed_ffmpeg_command(command,
                              total=chunks[-1][3],
                              unit='frames',
                              desc='Generating final:')

    _delete_path(temp_folder)
示例#10
0
def apply_speed_to_audio(audio, speed):
    reader = ArrayReader(audio)
    writer = ArrayWriter(2)
    tsm = phasevocoder(reader.channels, speed)
    tsm.run(reader, writer)
    return writer.data