Exemplo n.º 1
0
def algorithm_rhythm_percival14_mod(sound):
    """
    Percival, G., & Tzanetakis, G. (2014). Streamlined tempo estimation based on autocorrelation and cross-correlation
    with pulses. IEEE/ACM Transactions on Audio, Speech, and Language Processing, 22(12), 1765-1776.
    :param sound: sound dictionary from dataset
    :return: dictionary with results
    """
    results = dict()
    sample_rate = 44100
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=sample_rate)

    # Convert to mono and add silence at the beginning if sound is shorter than 6s (otherwise algorithm fails)
    if len(audio.shape) > 1:
        audio = audio[:, 0]
    try:
        from algorithms.Percival14Mod.defs_class import Defs
        from algorithms.Percival14Mod.onset_strength import onset_strength_signal
        from algorithms.Percival14Mod.beat_period_detection import beat_period_detection
        from algorithms.Percival14Mod.accumulator_overall import accumulator_overall
        defs = Defs()
        oss_sr, oss_data = onset_strength_signal(defs, sample_rate, audio, plot=False)
        tempo_lags = beat_period_detection(defs, oss_sr, oss_data, plot=False)
        bpm = accumulator_overall(defs, tempo_lags, oss_sr)
        results['Percival14Mod'] = {'bpm': bpm}
        #tempo_lags = beat_period_detection(defs, oss_sr, oss_data, plot=False, limit_pulse_trains=True)
        #bpm = accumulator_overall(defs, tempo_lags, oss_sr)
        #results['Percival14ModLimitPulseTrains'] = {'bpm': bpm}
        #tempo_lags = beat_period_detection(defs, oss_sr, oss_data, plot=False, skip_calc_pulse_trains=True)
        #bpm = accumulator_overall(defs, tempo_lags, oss_sr)
        #results['Percival14ModSkipPulseTrains'] = {'bpm': bpm}
    except ValueError:
        pass
    return results
def algorithm_rhythm_percival14_mod(sound):
    """
    Percival, G., & Tzanetakis, G. (2014). Streamlined tempo estimation based on autocorrelation and cross-correlation
    with pulses. IEEE/ACM Transactions on Audio, Speech, and Language Processing, 22(12), 1765-1776.
    :param sound: sound dictionary from dataset
    :return: dictionary with results
    """
    results = dict()
    sample_rate = 44100
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=sample_rate)

    # Convert to mono and add silence at the beginning if sound is shorter than 6s (otherwise algorithm fails)
    if len(audio.shape) > 1:
        audio = audio[:, 0]
    try:
        from algorithms.Percival14Mod.defs_class import Defs
        from algorithms.Percival14Mod.onset_strength import onset_strength_signal
        from algorithms.Percival14Mod.beat_period_detection import beat_period_detection
        from algorithms.Percival14Mod.accumulator_overall import accumulator_overall
        defs = Defs()
        oss_sr, oss_data = onset_strength_signal(defs, sample_rate, audio, plot=False)
        tempo_lags = beat_period_detection(defs, oss_sr, oss_data, plot=False)
        bpm = accumulator_overall(defs, tempo_lags, oss_sr)
        results['Percival14Mod'] = {'bpm': bpm}
        #tempo_lags = beat_period_detection(defs, oss_sr, oss_data, plot=False, limit_pulse_trains=True)
        #bpm = accumulator_overall(defs, tempo_lags, oss_sr)
        #results['Percival14ModLimitPulseTrains'] = {'bpm': bpm}
        #tempo_lags = beat_period_detection(defs, oss_sr, oss_data, plot=False, skip_calc_pulse_trains=True)
        #bpm = accumulator_overall(defs, tempo_lags, oss_sr)
        #results['Percival14ModSkipPulseTrains'] = {'bpm': bpm}
    except ValueError:
        pass
    return results
Exemplo n.º 3
0
def algorithm_rhythm_percival14(sound):
    """
    Percival, G., & Tzanetakis, G. (2014). Streamlined tempo estimation based on autocorrelation and cross-correlation
    with pulses. IEEE/ACM Transactions on Audio, Speech, and Language Processing, 22(12), 1765-1776.
    :param sound: sound dictionary from dataset
    :return: dictionary with results
    """
    results = dict()
    sample_rate = 44100
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=sample_rate)

    # Convert to mono and add silence at the beginning if sound is shorter than 6s (otherwise algorithm fails)
    if len(audio.shape) > 1:
        audio = audio[:, 0]
    min_duration = 6  # In seconds
    minimum_size = min_duration * sample_rate
    if audio.shape[0] < minimum_size:
        audio = np.append(np.zeros(minimum_size - audio.shape[0]), audio)
    try:
        from algorithms.Percival14.defs_class import Defs
        from algorithms.Percival14.onset_strength import onset_strength_signal
        from algorithms.Percival14.beat_period_detection import beat_period_detection
        from algorithms.Percival14.accumulator_overall import accumulator_overall
        defs = Defs()
        oss_sr, oss_data = onset_strength_signal(defs, sample_rate, audio, plot=False)
        tempo_lags = beat_period_detection(defs, oss_sr, oss_data, plot=False)
        bpm = accumulator_overall(defs, tempo_lags, oss_sr)
        results['Percival14'] = {'bpm': bpm}
    except ValueError:
        pass
    return results
Exemplo n.º 4
0
def algorithm_durations(sound):
    """
    Returns the duration of a file according to its length in number of samples and according to an envelope
    computation (See FFont ismir paper TODO: cite correctly).
    :param sound: sound dictionary from dataset
    :return: dictionary with results per different methods
    """
    results = dict()
    sample_rate = 44100
    n_channels = 1
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=sample_rate)
    length_samples = len(audio)
    duration = float(len(audio))/(sample_rate * n_channels)
    # NOTE: load_audio_file will resample to the given sample_rate and downmix to mono

    # Effective duration
    env = estd.Envelope(attackTime=10, releaseTime=10)
    envelope = env(essentia.array(audio))
    threshold = envelope.max() * 0.05
    envelope_above_threshold = np.where(envelope >= threshold)
    start_effective_duration = envelope_above_threshold[0][0]
    end_effective_duration = envelope_above_threshold[0][-1]
    length_samples_effective_duration = end_effective_duration - start_effective_duration

    results['durations'] = {
        'duration': duration,
        'length_samples': length_samples,
        'length_samples_effective_duration': length_samples_effective_duration,
        'start_effective_duration': start_effective_duration,
        'end_effective_duration': end_effective_duration
    }
    return results
def algorithm_rhythm_percival14(sound):
    """
    Percival, G., & Tzanetakis, G. (2014). Streamlined tempo estimation based on autocorrelation and cross-correlation
    with pulses. IEEE/ACM Transactions on Audio, Speech, and Language Processing, 22(12), 1765-1776.
    :param sound: sound dictionary from dataset
    :return: dictionary with results
    """
    results = dict()
    sample_rate = 44100
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=sample_rate)

    # Convert to mono and add silence at the beginning if sound is shorter than 6s (otherwise algorithm fails)
    if len(audio.shape) > 1:
        audio = audio[:, 0]
    min_duration = 6  # In seconds
    minimum_size = min_duration * sample_rate
    if audio.shape[0] < minimum_size:
        audio = np.append(np.zeros(minimum_size - audio.shape[0]), audio)
    try:
        from algorithms.Percival14.defs_class import Defs
        from algorithms.Percival14.onset_strength import onset_strength_signal
        from algorithms.Percival14.beat_period_detection import beat_period_detection
        from algorithms.Percival14.accumulator_overall import accumulator_overall
        defs = Defs()
        oss_sr, oss_data = onset_strength_signal(defs, sample_rate, audio, plot=False)
        tempo_lags = beat_period_detection(defs, oss_sr, oss_data, plot=False)
        bpm = accumulator_overall(defs, tempo_lags, oss_sr)
        results['Percival14'] = {'bpm': bpm}
    except ValueError:
        pass
    return results
Exemplo n.º 6
0
def algorithm_durations(sound):
    """
    Returns the duration of a file according to its length in number of samples and according to an envelope
    computation (See FFont ismir paper TODO: cite correctly).
    :param sound: sound dictionary from dataset
    :return: dictionary with results per different methods
    """
    results = dict()
    sample_rate = 44100
    n_channels = 1
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=sample_rate)
    length_samples = len(audio)
    duration = float(len(audio))/(sample_rate * n_channels)
    # NOTE: load_audio_file will resample to the given sample_rate and downmix to mono

    # Effective duration
    env = estd.Envelope(attackTime=10, releaseTime=10)
    envelope = env(essentia.array(audio))
    threshold = envelope.max() * 0.05
    envelope_above_threshold = np.where(envelope >= threshold)
    start_effective_duration = envelope_above_threshold[0][0]
    end_effective_duration = envelope_above_threshold[0][-1]
    length_samples_effective_duration = end_effective_duration - start_effective_duration

    results['durations'] = {
        'duration': duration,
        'length_samples': length_samples,
        'length_samples_effective_duration': length_samples_effective_duration,
        'start_effective_duration': start_effective_duration,
        'end_effective_duration': end_effective_duration
    }
    return results
Exemplo n.º 7
0
def algorithm_rhythm_percival_essentia(sound):
    results = dict()
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=44100)
    tempo_estimator = estd.PercivalBpmEstimator()
    bpm = tempo_estimator(audio)
    results['Percival14_essentia'] = {'bpm': bpm}

    return results
def algorithm_rhythm_percival_essentia(sound):
    results = dict()
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=44100)
    tempo_estimator = estd.PercivalBpmEstimator()
    bpm = tempo_estimator(audio)
    results['Percival14_essentia'] = {'bpm': bpm}

    return results
Exemplo n.º 9
0
def algorithm_tonal_key_essentia_basic(sound):
    """
    Estimates the tonality of a given audio file.
    See http://essentia.upf.edu/documentation/reference/std_KeyExtractor.html.
    :param sound: sound dictionary from dataset
    :return: dictionary with results per different methods
    """
    results = dict()

    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=44100)

    key_extractor = estd.KeyExtractor()
    key, scale, strength = key_extractor(audio)
    results['EssentiaBasic'] = {
        'key': '%s %s' % (key, scale),
        'strength': strength
    }

    return results
Exemplo n.º 10
0
def algorithm_rhythm_essentia_basic(sound):
    """
    Estimates bpm of given audio file using Zapata14 and Degara12.
    * Zapata14: Jose R Zapata, Matthew E P Davies, and Emilia Gomez. Multi-Feature Beat Tracking. IEEE/ACM
    Transactions on Audio, Speech, and Language Processing, 22(4):816-825, 2014.
    * Degara12: Norberto Degara,Enrique Argones Rua,Antonio Pena, Soledad Torres-Guijarro, Matthew EP Davies, and Mark D
    Plumbley. Reliability-Informed Beat Track- ing of Musical Signals. IEEE Transactions on Audio, Speech, and
    Language Processing, 20(1):290-301, 2012.
    :param sound: sound dictionary from dataset
    :return: dictionary with results per different methods
    """
    results = dict()
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=44100)

    # Method RhythmExtractor2013 - multifeature
    rhythm_extractor_2013 = estd.RhythmExtractor2013()
    bpm, ticks, confidence, _, bpm_intervals = rhythm_extractor_2013(audio)
    results['Zapata14'] = {'bpm': bpm, 'confidence': float(confidence)}

    # Method RhythmExtractor2013 - degara
    rhythm_extractor_2013 = estd.RhythmExtractor2013(method='degara')
    bpm, ticks, confidence, _, bpm_intervals = rhythm_extractor_2013(audio)
    results['Degara12'] = {'bpm': bpm}
    return results
def algorithm_rhythm_essentia_basic(sound):
    """
    Estimates bpm of given audio file using Zapata14 and Degara12.
    * Zapata14: Jose R Zapata, Matthew E P Davies, and Emilia Gomez. Multi-Feature Beat Tracking. IEEE/ACM
    Transactions on Audio, Speech, and Language Processing, 22(4):816-825, 2014.
    * Degara12: Norberto Degara,Enrique Argones Rua,Antonio Pena, Soledad Torres-Guijarro, Matthew EP Davies, and Mark D
    Plumbley. Reliability-Informed Beat Track- ing of Musical Signals. IEEE Transactions on Audio, Speech, and
    Language Processing, 20(1):290-301, 2012.
    :param sound: sound dictionary from dataset
    :return: dictionary with results per different methods
    """
    results = dict()
    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=44100)

    # Method RhythmExtractor2013 - multifeature
    rhythm_extractor_2013 = estd.RhythmExtractor2013()
    bpm, ticks, confidence, _, bpm_intervals = rhythm_extractor_2013(audio)
    results['Zapata14'] = {'bpm': bpm, 'confidence': float(confidence)}

    # Method RhythmExtractor2013 - degara
    rhythm_extractor_2013 = estd.RhythmExtractor2013(method='degara')
    bpm, ticks, confidence, _, bpm_intervals = rhythm_extractor_2013(audio)
    results['Degara12'] = {'bpm': bpm}
    return results
def algorithm_pitch_note_essentia(sound):
    """
    Estimates the note of a given audio file.
    
    :param sound: sound dictionary from dataset
    :return: dictionary with results per different methods
    """
    results = dict()

    audio = load_audio_file(file_path=sound[SOUND_FILE_KEY], sample_rate=44100)
    frameSize = 1024
    hopsize = frameSize

    # Estimate pitch using PitchYin
    frames = estd.FrameGenerator(audio, frameSize=frameSize, hopSize=hopsize)
    pitchDetect = estd.PitchYin(frameSize=frameSize, sampleRate=44100)
    pitches = []
    confidence = []
    for frame in frames:
        f, conf = pitchDetect(frame)
        pitches += [f]
        confidence += [conf]

    pitches = [pitch for pitch in pitches if pitch > 0]
    if not pitches:
        pitch_median = 0.1
    else:
        pitch_median = median(pitches)
    midi_note = frequency_to_midi_note(pitch_median)
    note = midi_note_to_note(midi_note)
    results.update({
        'EssentiaPitchYin': {
            'note': note,
            'midi_note': midi_note,
            'pitch': pitch_median
        }
    })

    # Estimate pitch using PithYinFFT
    frames = estd.FrameGenerator(audio, frameSize=frameSize, hopSize=hopsize)
    pitchDetect = estd.PitchYinFFT(frameSize=frameSize, sampleRate=44100)
    win = estd.Windowing(type='hann')
    pitches = []
    confidence = []
    for frame in frames:
        spec = estd.Spectrum()(win(frame))
        f, conf = pitchDetect(spec)
        pitches += [f]
        confidence += [conf]
    pitches = [pitch for pitch in pitches if pitch > 0]
    if not pitches:
        pitch_median = 0.1
    else:
        pitch_median = median(pitches)
    midi_note = frequency_to_midi_note(pitch_median)
    note = midi_note_to_note(midi_note)
    results.update({
        'EssentiaPitchYinFFT': {
            'note': note,
            'midi_note': midi_note,
            'pitch': pitch_median
        }
    })

    return results