Пример #1
0
def extract(args):
    audio_directory, output_directory, af, overwrite = args
    subdir, output_file = os.path.split(af.split(audio_directory)[1])
    output_file = os.path.splitext(output_file)[0]
    output_file = os.path.join(output_directory, output_file)

    if os.path.exists(output_file) and not overwrite:
        print('Skipping {}. Already exists.'.format(output_file))
        return

    output = dict()

    try:
        y, _sr = soundfile.read(af)
        y = to_mono(y)
        sr = 22050
        y = resample(y, _sr, sr)
    except Exception as e:
        y, sr = load(af)

    output['linspec_mag'], output['linspec_phase'] = linspec(y)
    output['melspec'] = melspec(y, sr=sr)
    output['logspec'] = logspec(y, sr=sr)
    output['hcqt_mag'], output['hcqt_phase'] = hcqt(y, sr=sr)
    output['vggish_melspec'] = vggish_melspec(y, sr=sr)

    # high-level
    output['percussive_ratio'], output['percussive_rms'], output[
        'total_rms'] = percussive_ratio(y, margin=3.0)
    output['onset_strength'] = onset_strength(y, detrend=True)
    output['tempogram'] = tempogram(y)
    output['onset_patterns'] = onset_patterns(y, sr=sr)

    np.savez_compressed(output_file, **output)
Пример #2
0
def findTimbral(wave):  # 19 dimensions
    timbral_feature = {}

    centroid = feature.spectral_centroid(wave)
    timbral_feature['mu_centroid'] = np.mean(centroid)
    timbral_feature['var_centroid'] = np.var(centroid, ddof=1)

    rolloff = feature.spectral_rolloff(wave)
    timbral_feature['mu_rolloff'] = np.mean(rolloff)
    timbral_feature['var_rolloff'] = np.var(rolloff, ddof=1)

    flux = onset_strength(wave, lag=1)  # spectral flux
    timbral_feature['mu_flux'] = np.mean(flux)
    timbral_feature['var_flux'] = np.var(flux, ddof=1)

    zero_crossing = feature.zero_crossing_rate(wave)
    timbral_feature['mu_zcr'] = np.mean(zero_crossing)
    timbral_feature['var_zcr'] = np.var(zero_crossing)

    five_mfcc = feature.mfcc(wave, n_mfcc=10)  # n_mfcc = 10 dim
    i = 1
    for coef in five_mfcc:
        timbral_feature['mu_mfcc' + str(i)] = np.mean(coef)
        timbral_feature['var_mfcc' + str(i)] = np.var(coef, ddof=1)
        i = i + 1

    percent = feature_low_energy(wave)  # 1 dim
    timbral_feature['low_energy'] = percent

    return timbral_feature
Пример #3
0
def findRhythmic(wave):  # 3 dimensions
    rhythm_feature = {}

    env = onset_strength(wave)
    tempogram = feature.tempogram(onset_envelope=env, hop_length=hop_size)
    rhythm_feature['tempo_sum'] = np.sum(tempogram)

    return rhythm_feature
Пример #4
0
def initialize_bpf(filename, filepath, only_show=False, rewrite=False):
    wav_filename = audioconvert.convert_to_monowav(filename, filepath)
    timestart = time.time()
    y, sr = load(wav_filename, dtype="float32", res_type=TYPE)

    print("{LOAD TIME}:%f" % (time.time() - timestart))
    tempo, beats = beat_track(y=y, tightness=100)  # 计算主要节拍点
    tempo1, beats1 = beat_track(y=y,
                                tightness=1)  # 计算节拍点,tightness就是对节拍的吸附性,越低越混乱
    onset_envelope = onset_strength(y=y)
    rms_envelope = rmse(y=y)
    # -----------RMS ENVELOPE
    tempo = normalize_tempo(tempo)
    MAX_RMS = np.max(rms_envelope)
    AVERAGE_RMS = np.mean(rms_envelope)
    onset_all_beat = []
    frame_all_beat = []
    for beat in beats1:
        onset_all_beat.append(onset_envelope[beat])
        frame_all_beat.append(beat)
    AVERAGE_ONSET = np.mean(onset_all_beat)
    new_frames_list = []
    if not os.path.exists("dat/plt/%s.plt" % filename) or rewrite:
        print("No plt found, initializing...")
        plt_file = open("dat/plt/%s.plt" % filename, mode="w")
        plt_file.write(
            repr((filename, rms_envelope.T.tolist(), onset_all_beat,
                  frame_all_beat, MAX_RMS, AVERAGE_RMS, AVERAGE_ONSET)))
        plt_file.close()
    plt_file = open("dat/plt/%s.plt" % filename, mode="r")
    plt_file_content = eval(plt_file.read())
    plt_process = Process(target=plt_show, args=plt_file_content)
    plt_process.start()
    if not only_show:
        for beat in beats1:
            if onset_envelope[beat] > AVERAGE_ONSET / ONSET_DETECT_RATIO \
                    or rms_envelope.T[beat] > MAX_RMS / RMS_RATIO:
                new_frames_list.append(beat)
        print("{MAX_ONSET}:%f" % onset_envelope.max())
        new_beats_frame = np.array(new_frames_list)
        mainbeatlocation = frames_to_time(beats)
        beatlocation = frames_to_time(new_beats_frame).tolist()
        beatmain = []

        for beat in beatlocation:  # 分别计算出每个节拍到主要节拍点的距离,也就是这个节拍的主要程度

            p = abs(mainbeatlocation - beat)
            # print("%f:   %f" % (beat, p.min()))
            beatmain.append(p.min())
        file = open("dat/bpf/%s.bpf" % filename, mode="w")
        file.write(
            repr([tempo, beatlocation, beatmain,
                  mainbeatlocation.tolist()]))
        file.close()
    if (os.path.exists("dat/%s.wav" % filename)):
        os.remove("dat/%s.wav" % filename)
    return "dat/bpf/%s.bpf" % filename
Пример #5
0
    def get_bpm(self):
        """
        Get BPM (Beats Per Minute)

        :return: Integer
        """

        onset_env = onset_strength(self.y, sr=self.sr)

        return int(tempo(onset_envelope=onset_env, sr=self.sr)[0])
Пример #6
0
def plt_show_solo(filename, filepath):
    wav_filename = audioconvert.convert_to_monowav(filename, filepath)
    timestart = time.time()
    y, sr = load(wav_filename, dtype="float32", res_type=TYPE)
    print("{LOAD TIME}:%f" % (time.time() - timestart))

    tempo1, beats1 = beat_track(y=y,
                                tightness=1)  # 计算节拍点,tightness就是对节拍的吸附性,越低越混乱
    onset_envelope = onset_strength(y=y)
    rms_envelope = rmse(y=y)
    # -----------RMS ENVELOPE

    MAX_RMS = np.max(rms_envelope)
    AVERAGE_RMS = np.mean(rms_envelope)
    onset_all_beat = []
    frame_all_beat = []
    for beat in beats1:
        onset_all_beat.append(onset_envelope[beat])
        frame_all_beat.append(beat)
    AVERAGE_ONSET = np.mean(onset_all_beat)
    plt_show(filename, rms_envelope.T, onset_all_beat, frame_all_beat, MAX_RMS,
             AVERAGE_RMS, AVERAGE_ONSET)
Пример #7
0
def onset_detect(
    y=None,
    sr=22050,
    onset_envelope=None,
    hop_length=512,
    backtrack=False,
    energy=None,
    units="frames",
    normalize=True,
    **kwargs
):
    """Basic onset detector.  Locate note onset events by picking peaks in an
    onset strength envelope. Modified from `librosa.onset.onset_detect` to add a
    `normalize` flag.

    The `peak_pick` parameters were chosen by large-scale hyper-parameter
    optimization over the dataset provided by [1]_.

    .. [1] https://github.com/CPJKU/onset_db


    Parameters
    ----------
    y          : np.ndarray [shape=(n,)]
        audio time series

    sr         : number > 0 [scalar]
        sampling rate of `y`

    onset_envelope     : np.ndarray [shape=(m,)]
        (optional) pre-computed onset strength envelope

    hop_length : int > 0 [scalar]
        hop length (in samples)

    units : {'frames', 'samples', 'time'}
        The units to encode detected onset events in.
        By default, 'frames' are used.

    backtrack : bool
        If `True`, detected onset events are backtracked to the nearest
        preceding minimum of `energy`.

        This is primarily useful when using onsets as slice points for segmentation.

    energy : np.ndarray [shape=(m,)] (optional)
        An energy function to use for backtracking detected onset events.
        If none is provided, then `onset_envelope` is used.

    noramlize : bool (optional)
        If `True`, normalize the onset envelope before peak picking. By default
        this parameter is `True`.

    kwargs : additional keyword arguments
        Additional parameters for peak picking.

        See `librosa.util.peak_pick` for details.


    Returns
    -------

    onsets : np.ndarray [shape=(n_onsets,)]
        estimated positions of detected onsets, in whichever units
        are specified.  By default, frame indices.

        .. note::
            If no onset strength could be detected, onset_detect returns
            an empty list.


    Raises
    ------
    ParameterError
        if neither `y` nor `onsets` are provided

        or if `units` is not one of 'frames', 'samples', or 'time'
    """

    # First, get the frame->beat strength profile if we don't already have one
    if onset_envelope is None:
        if y is None:
            raise ParameterError("y or onset_envelope must be provided")

        onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)

    # Shift onset envelope up to be non-negative
    # (a common normalization step to make the threshold more consistent)
    onset_envelope -= onset_envelope.min()

    # Do we have any onsets to grab?
    if not onset_envelope.any():
        return np.array([], dtype=np.int)

    if normalize:
        # Normalize onset strength function to [0, 1] range
        onset_envelope /= onset_envelope.max()

    # These parameter settings found by large-scale search
    kwargs.setdefault("pre_max", 0.03 * sr // hop_length)  # 30ms
    kwargs.setdefault("post_max", 0.00 * sr // hop_length + 1)  # 0ms
    kwargs.setdefault("pre_avg", 0.10 * sr // hop_length)  # 100ms
    kwargs.setdefault("post_avg", 0.10 * sr // hop_length + 1)  # 100ms
    kwargs.setdefault("wait", 0.03 * sr // hop_length)  # 30ms
    kwargs.setdefault("delta", 0.07)

    # Peak pick the onset envelope
    onsets = util.peak_pick(onset_envelope, **kwargs)

    # Optionally backtrack the events
    if backtrack:
        if energy is None:
            energy = onset_envelope

        onsets = onset_backtrack(onsets, energy)

    if units == "frames":
        pass
    elif units == "samples":
        onsets = core.frames_to_samples(onsets, hop_length=hop_length)
    elif units == "time":
        onsets = core.frames_to_time(onsets, hop_length=hop_length, sr=sr)
    else:
        raise ParameterError("Invalid unit type: {}".format(units))

    return onsets
Пример #8
0
def compute_tempo(y, sr):
    return float(tempo(onset_envelope=onset_strength(y, sr=sr), sr=sr))