def analyse(filename, resample_to=2756, bt_hop_length=128,
            chroma_hop_length=512, chroma_n_fft=1024):
    samples, sampleRate = librosa.load(filename)
    length = float(len(samples))/sampleRate
    if resample_to:
        samples = librosa.resample(samples, sampleRate, resample_to)
        sampleRate = resample_to
    newSampleRate = 2756
    samples = librosa.resample(samples, sampleRate, newSampleRate)
    sampleRate = newSampleRate
    tempo, beats = librosa.beat.beat_track(samples, sampleRate,
                                           hop_length=bt_hop_length)
    beat_times = librosa.frames_to_time(beats, sampleRate,
                                        hop_length=bt_hop_length)
    chromagram = librosa.feature.chromagram(samples, sampleRate,
                                            hop_length=chroma_hop_length,
                                            n_fft=chroma_n_fft)
    chromagram = numpy.transpose(chromagram)
    distances = scipy.spatial.distance.cdist(chromagram, CHORDS, "cosine")
    chords = distances.argmin(axis=1)
    chords = scipy.signal.medfilt(chords, 11)
    chord_frames = numpy.array(numpy.where(numpy.diff(chords) != 0))
    chords = chords[chord_frames][0].astype(int)
    chord_times = librosa.frames_to_time(chord_frames, sampleRate,
                                         hop_length=chroma_hop_length,
                                         n_fft=chroma_n_fft)[0]
    chord_names = CHORD_NAMES[chords]
    return {"beats": list(beat_times),
            "chords": [{"chord": chord_name, "time": chord_time} for chord_name, chord_time in zip(chord_names, chord_times)],
            "tempo": tempo}
Пример #2
0
def features(filename):
    # print '\t[1/5] loading audio'
    y, sr = librosa.load(filename, sr=SR)

    # print '\t[2/5] Separating harmonic and percussive signals'
    y_perc, y_harm = hp_sep(y)

    # print '\t[3/5] detecting beats'
    bpm, beats = get_beats(y=y_perc, sr=sr, hop_length=HOP_LENGTH)

    # print '\t[4/5] generating CQT'
    M1 = np.abs(
        librosa.cqt(y=y_harm, sr=sr, hop_length=HOP_LENGTH, bins_per_octave=12, fmin=librosa.midi_to_hz(24), n_bins=72)
    )

    M1 = librosa.logamplitude(M1 ** 2.0, ref_power=np.max)

    # print '\t[5/5] generating MFCC'
    S = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=HOP_LENGTH, n_mels=N_MELS)
    M2 = librosa.feature.mfcc(S=librosa.logamplitude(S), n_mfcc=N_MFCC)

    n = min(M1.shape[1], M2.shape[1])

    beats = beats[beats < n]

    beats = np.unique(np.concatenate([[0], beats]))

    times = librosa.frames_to_time(beats, sr=sr, hop_length=HOP_LENGTH)

    times = np.concatenate([times, [float(len(y)) / sr]])
    M1 = librosa.feature.sync(M1, beats, aggregate=np.median)
    M2 = librosa.feature.sync(M2, beats, aggregate=np.mean)
    return (M1, M2), times
Пример #3
0
def logcqt_onsets(x, fs, pre_max=0, post_max=1, pre_avg=0,
                  post_avg=1, delta=0.05, wait=50):
    """
    Parameters
    ----------
    x : np.ndarray
        Audio signal

    fs : scalar
        Samplerate of the audio signal.

    pre_max, post_max, pre_avg, post_avg, delta, wait
        See `librosa.util.peak_pick` for details.

    Returns
    -------
    onsets : np.ndarray, ndim=1
        Times in seconds for splitting.
    """
    hop_length = 1024
    x_noise = x + np.random.normal(scale=10.**-3, size=x.shape)
    cqt = librosa.cqt(x_noise.flatten(),
                      sr=fs, hop_length=hop_length, fmin=27.5,
                      n_bins=24*8, bins_per_octave=24, tuning=0,
                      sparsity=0, real=False, norm=1)
    cqt = np.abs(cqt)
    lcqt = np.log1p(5000*cqt)

    c_n = utils.canny(51, 3.5, 1)
    onset_strength = sig.lfilter(c_n, np.ones(1), lcqt, axis=1).mean(axis=0)

    peak_idx = librosa.onset.onset_detect(
        onset_envelope=onset_strength, delta=delta, wait=wait)
    return librosa.frames_to_time(peak_idx, hop_length=hop_length)
Пример #4
0
def get_beat(y, PARAMETERS):
    '''Estimate beat times and tempo'''
    # Compute a log-power mel spectrogram on the percussive component
    S_p = librosa.feature.melspectrogram(y=y, 
                                         sr=PARAMETERS['load']['sr'], 
                                         n_fft=PARAMETERS['stft']['n_fft'], 
                                         hop_length=PARAMETERS['beat']['hop_length'],
                                         n_mels=PARAMETERS['mel']['n_mels'],
                                         fmax=PARAMETERS['mel']['fmax'])
    
    S_p = librosa.logamplitude(S_p, ref_power=S_p.max())
    
    # Compute the median onset aggregation
    odf = librosa.onset.onset_strength(S=S_p, aggregate=np.median)
    
    # Get beats
    tempo, beats = librosa.beat.beat_track(onset_envelope=odf, 
                                           sr=PARAMETERS['load']['sr'], 
                                           hop_length=PARAMETERS['beat']['hop_length'])
      
    beat_times = librosa.frames_to_time(beats, 
                                        sr=PARAMETERS['load']['sr'], 
                                        hop_length=PARAMETERS['beat']['hop_length'])
    
    return tempo, beat_times, odf
Пример #5
0
def compute_beats(y_percussive, sr=22050):
    """Computes the beats using librosa.

    Parameters
    ----------
    y_percussive: np.array
        Percussive part of the audio signal in samples.
    sr: int
        Sample rate.

    Returns
    -------
    beats_idx: np.array
        Indeces in frames of the estimated beats.
    beats_times: np.array
        Time of the estimated beats.
    """
    logging.info("Estimating Beats...")
    tempo, beats_idx = librosa.beat.beat_track(y=y_percussive, sr=sr,
                                               hop_length=msaf.Anal.hop_size)

    # Add first and last beat
    beats_idx = np.concatenate(([0], beats_idx,
                                [len(y_percussive) / msaf.Anal.hop_size])).\
        astype(np.int)

    # To times
    times = librosa.frames_to_time(beats_idx, sr=sr,
                                   hop_length=msaf.Anal.hop_size)

    return beats_idx, times
Пример #6
0
    def analyse_bpm(self, y, sr):
        """
        determine le bpm d'une musique
        exemple de test:
                        analyse1 = analyse("/home/bettini/Musique/Deorro.wav", "fichier_csv")
                        y, sr = analyse1.extrairedatamusic()
                        analyse1.analyse_bpm(y, sr)

        :param pathtofile: chemin absolue du fichier audio dont on veut analyser le bpm
        :param fichier_csv: fichier csv dans lequel sera enregistre les bpms du morceau (nom de la playlist en cours)
        :Comment ecrit dans le fichier csv a la fin

        """
        # creation de la liste qui va etre exportee dans le csv
        ElemCsv = []

        # execution du tracker bpm par default
        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)

        # Converti les sequences d'indice de beat en un chronogramme correspondant aux impulsions d'énergie au cours de la musique
        beat_times = librosa.frames_to_time(beat_frames, sr=sr)

        # calcul du bpm du debut et de la fin de la musique dans le cas d'un changement au cours de la musique
        bpm_d = 0
        bpm_f = 0
        for i in range(100):
            bpm_d = bpm_d + (beat_times[i + 1] - beat_times[i])
            bpm_f = bpm_f + (beat_times[len(beat_times) - i - 1] - beat_times[len(beat_times) - i - 2])

        # on complete la lste qui va etre mis dans le de la base de donnée
        ElemCsv.append(tempo)
        ElemCsv.append(60 / (bpm_d / 100))
        ElemCsv.append(60 / (bpm_f / 100))

        return ElemCsv  # bpm debut, bpm fin , bpm moyen
Пример #7
0
def envelope_onsets(x, fs, wait=100):
    """
    Parameters
    ----------
    filename : str
        Path to an audiofile to split.

    Returns
    -------
    onsets : np.ndarray, ndim=1
        Times in seconds for splitting.
    """

    log_env = 10 * np.log10(10. ** -4.5 + np.power(x.flatten()[:], 2.0))
    w_n = np.hanning(100)
    w_n /= w_n.sum()
    log_env_lpf = sig.filtfilt(w_n, np.ones(1), log_env)

    n_hop = 100
    kernel = utils.canny(100, 3.5, 1)
    kernel /= np.abs(kernel).sum()
    onsets_forward = sig.lfilter(
        kernel, np.ones(1),
        log_env_lpf[::n_hop] - log_env_lpf.min(), axis=0)

    onsets_pos = onsets_forward * (onsets_forward > 0)
    peak_idx = librosa.util.peak_pick(onsets_pos,
                                      pre_max=500, post_max=500, pre_avg=10,
                                      post_avg=10, delta=0.025, wait=wait)
    return librosa.frames_to_time(peak_idx, hop_length=n_hop)
Пример #8
0
def libroRMS(filepath, kRatio):
    y, sr = librosa.load(filepath) # Load the waveform as y, sr is sample rate
    clipLength = librosa.get_duration(y=y, sr=sr)
    kValue = int(clipLength/kRatio +1) #sets up relative ratio of samples

    ### get the RMS of the audio sample ###
    data = librosa.feature.rmse(y=y, hop_length=2048)
    boundaries = librosa.segment.agglomerative(data, k=kValue) # Agglomeration
    boundary_times = librosa.frames_to_time(boundaries, hop_length=2048) # ~.1s
    intervals = np.hstack([boundary_times[:-1, np.newaxis], boundary_times[1:, np.newaxis]])
    get_rms = librosa.feature.sync(data, boundaries, aggregate=np.max)

    nkValue = kValue-1 #because, for some reason, the intervals above leave out the last one
    fixedN = np.delete(get_rms, nkValue, axis=1)
    npsTurn = np.concatenate((intervals, fixedN.T), axis=1)

    #transform from np array to regular list
    flatnps = npsTurn.tolist()
    slice_value = int(kValue//3)
    rmsOut1 = sorted(flatnps, key = lambda x: int(x[2]), reverse=True)
    #rmsOut2 = slice(rmsOut1[0: slice_value])
    rmsOut2 = rmsOut1[0 : slice_value]
    rmsOut3 = sorted(rmsOut2, key = lambda x: int(x[0]))

    return rmsOut3
Пример #9
0
def logcqt_onsets(x, fs, pre_max=0, post_max=1, pre_avg=0,
                  post_avg=1, delta=0.05, wait=50, hop_length=1024):
    """
    Parameters
    ----------
    x : np.ndarray
        Audio signal

    fs : scalar
        Samplerate of the audio signal.

    pre_max, post_max, pre_avg, post_avg, delta, wait
        See `librosa.util.peak_pick` for details.

    Returns
    -------
    onsets : np.ndarray, ndim=1
        Times in seconds for splitting.
    """
    lcqt = logcqt(x, fs, hop_length)
    c_n = utils.canny(51, 3.5, 1)
    onset_strength = sig.lfilter(c_n, np.ones(1), lcqt, axis=1).mean(axis=0)

    peak_idx = librosa.onset.onset_detect(
        onset_envelope=onset_strength, delta=delta, wait=wait)
    return librosa.frames_to_time(peak_idx, hop_length=hop_length)
Пример #10
0
    def estimate_beats(self):
        """Estimates the beats using librosa.

        Returns
        -------
        times: np.array
            Times of estimated beats in seconds.
        frames: np.array
            Frame indeces of estimated beats.
        """
        # Compute harmonic-percussive source separiation if needed
        if self._audio_percussive is None:
            self._audio_harmonic, self._audio_percussive = self.compute_HPSS()

        # Compute beats
        tempo, frames = librosa.beat.beat_track(
            y=self._audio_percussive, sr=self.sr,
            hop_length=self.hop_length)

        # To times
        times = librosa.frames_to_time(frames, sr=self.sr,
                                       hop_length=self.hop_length)

        # TODO: Is this really necessary?
        if len(times) > 0 and times[0] == 0:
            times = times[1:]
            frames = frames[1:]

        return times, frames
    def filter_out(self,nob,song2):
        song2.change_temp(self.tempo)
        song2.cut_song(self.length_of_songs)
        l=scipy.signal.firwin( numtaps=10, cutoff=300, nyq=self.sr/2)
        h=-l
        h[10/2]=h[10/2]+1
        fader_l=self.audio_left[int(self.bars[-nob-1][1]*self.sr):]
        fader_r=self.audio_right[int(self.bars[-nob-1][1]*self.sr):]
        fader=np.arange(float(len(fader_l)))/float(len(fader_l))
        fader_l=scipy.signal.lfilter(l,1.0,fader_l*fader[::-1])
        fader_r=scipy.signal.lfilter(l,1.0,fader_r*fader[::-1])
        haha=scipy.signal.lfilter(h,1.0,(song2.audio_left[int(song2.beat_times[0]*self.sr):int(song2.beat_times[0]*self.sr)+len(self.audio_left[int(self.bars[-nob-1][1]*self.sr):])]*fader))

        self.audio_left[int(self.bars[-nob-1][1]*self.sr):]=fader_l+haha

        self.audio_left=np.concatenate((self.audio_left,song2.audio_left[len(haha):]))

        haha=scipy.signal.lfilter(h,1.0,(song2.audio_right[int(song2.beat_times[0]*self.sr):int(song2.beat_times[0]*self.sr)+len(self.audio_right[int(self.bars[-nob-1][1]*self.sr):])]*fader))

        self.audio_right[int(self.bars[-nob-1][1]*self.sr):]=fader_r+haha

        self.audio_right=np.concatenate((self.audio_right,song2.audio_right[len(haha):]))
        tempo, beats = librosa.beat.beat_track(y=self.audio_left, sr=self.sr)
        self.beat_times=librosa.frames_to_time(beats, sr=self.sr)
        bars=[]
        for i in range(len(self.beat_times)/4-1):
            bars.append([self.beat_times[i*4],self.beat_times[(i+1)*4]])
        self.bars=np.array(bars)
Пример #12
0
def beat_track(input_file, output_csv):
    '''Beat tracking function
    
    :parameters:
      - input_file : str
          Path to input audio file (wav, mp3, m4a, flac, etc.)

      - output_file : str
          Path to save beat event timestamps as a CSV file
    '''

    print 'Loading ', input_file
    y, sr         = librosa.load(input_file, sr=22050)

    # Use a default hop size of 64 frames @ 22KHz ~= 11.6ms
    HOP_LENGTH  = 64

    # This is the window length used by default in stft
    N_FFT       = 2048

    print 'Tracking beats'
    tempo, beats    = librosa.beat.beat_track(y=y, sr=sr, hop_length=HOP_LENGTH)

    print 'Estimated tempo: %0.2f beats per minute' % tempo

    # 3. save output
    # 'beats' will contain the frame numbers of beat events.

    beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=HOP_LENGTH, n_fft=N_FFT)

    print 'Saving output to ', output_csv
    librosa.output.times_csv(output_csv, beat_times)
    print 'done!'
Пример #13
0
def hpss_beats(input_file, output_csv):
    '''HPSS beat tracking
    
    :parameters:
      - input_file : str
          Path to input audio file (wav, mp3, m4a, flac, etc.)

      - output_file : str
          Path to save beat event timestamps as a CSV file
    '''

    # Load the file
    print 'Loading  ', input_file
    y, sr = librosa.load(input_file)

    # Do HPSS
    print 'Harmonic-percussive separation ... '
    y = percussive(y)

    # Construct onset envelope from percussive component
    print 'Tracking beats on percussive component'
    onsets = librosa.onset.onset_strength(y=y, sr=sr, hop_length=HOP_LENGTH, n_fft=N_FFT, aggregate=np.median)

    # Track the beats
    tempo, beats = librosa.beat.beat_track( onsets=onsets, 
                                            sr=sr, 
                                            hop_length=HOP_LENGTH)

    beat_times  = librosa.frames_to_time(beats, 
                                         sr=sr, 
                                         hop_length=HOP_LENGTH,
                                         n_fft=N_FFT)
    # Save the output
    print 'Saving beats to ', output_csv
    librosa.output.times_csv(output_csv, beat_times)
Пример #14
0
def extract_cqt(audio_data):
    '''
    CQT routine with default parameters filled in, and some post-processing.

    Parameters
    ----------
    audio_data : np.ndarray
        Audio data to compute CQT of

    Returns
    -------
    cqt : np.ndarray
        CQT of the supplied audio data.
    frame_times : np.ndarray
        Times, in seconds, of each frame in the CQT
    '''
    # Compute CQT
    cqt = librosa.cqt(audio_data, sr=FS, fmin=librosa.midi_to_hz(NOTE_START),
                      n_bins=N_NOTES, hop_length=HOP_LENGTH, tuning=0.)
    # Compute the time of each frame
    times = librosa.frames_to_time(
        np.arange(cqt.shape[1]), sr=FS, hop_length=HOP_LENGTH)
    # Use float32 for the cqt to save space/memory
    cqt = cqt.astype(np.float32)
    return cqt, times
Пример #15
0
def process_file(input_file, **kwargs):

    output_file = os.path.basename(input_file)
    output_file = os.path.splitext(output_file)[0]
    output_file = os.path.extsep.join([output_file, "log"])

    if kwargs["median"]:
        output_file = os.path.extsep.join([output_file, "med"])
    else:
        output_file = os.path.extsep.join([output_file, "sum"])

    output_file = os.path.extsep.join([output_file, kwargs["spectrogram"]])
    output_file = os.path.extsep.join([output_file, "csv"])
    output_file = os.path.join(kwargs["destination"], output_file)

    with open(input_file, "r") as f:
        S = pickle.load(f)[SPECMAP[kwargs["spectrogram"]]].astype(np.float32)

    if kwargs["median"]:
        odf = librosa.onset.onset_strength(S=S, sr=SR, hop_length=HOP, n_fft=N_FFT, aggregate=np.median)
    else:
        odf = librosa.onset.onset_strength(S=S, sr=SR, hop_length=HOP, n_fft=N_FFT, aggregate=np.mean)

    tempo, beats = librosa.beat.beat_track(onsets=odf, sr=SR, hop_length=HOP, tightness=TIGHTNESS)

    times = librosa.frames_to_time(beats, sr=SR, hop_length=HOP, n_fft=N_FFT)
    librosa.output.times_csv(output_file, times)
Пример #16
0
def segment_audio_timeit(signal, sr):

    start_time = timeit.default_timer()
    silence_threshold = get_silence_threshold(signal, sr)
    print("getsilencethreshold: ")
    print(timeit.default_timer() - start_time)

    start_time = timeit.default_timer()
    o_env = librosa.onset.onset_strength(y=signal, sr=sr, centering=False, hop_length=HOP_LENGTH)
    onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr, hop_length=HOP_LENGTH)
    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=HOP_LENGTH)
    print("librosa.onset_detect: ")
    print(timeit.default_timer() - start_time)

    segments = []

    overalltime = timeit.default_timer()
    for i in range(len(onset_times)):
        segment_start = onset_times[i]*sr
        if i != len(onset_times)-1:
            segment_end = (onset_times[i+1]*sr)-HOP_LENGTH
        else:
            segment_end = len(signal)-1
        segment_end = find_segment_end(segment_start, segment_end, signal, silence_threshold)

        if (segment_end - segment_start >= MIN_SOUND_LEN*sr) and (onset_times[i] > START_TIME)\
                and (onset_times[i] < (len(signal)/sr-END_TIME)):
            segments.append((signal[segment_start: segment_end], onset_times[i]))

    print('all segments')
    print(timeit.default_timer() - overalltime)

    return segments
Пример #17
0
def compute_beats(y_percussive, sr=22050):
    """Computes the beats using librosa.

    Parameters
    ----------
    y_percussive: np.array
        Percussive part of the audio signal in samples.
    sr: int
        Sample rate.

    Returns
    -------
    beats_idx: np.array
        Indeces in frames of the estimated beats.
    beats_times: np.array
        Time of the estimated beats.
    """
    logging.info("Estimating Beats...")
    tempo, beats_idx = librosa.beat.beat_track(y=y_percussive, sr=sr,
                                               hop_length=msaf.Anal.hop_size)
    times = librosa.frames_to_time(beats_idx, sr=sr,
                                   hop_length=msaf.Anal.hop_size)

    # Remove first beat time if 0
    if times[0] == 0:
        times = times[1:]
        beats_idx = beats_idx[1:]
    return beats_idx, times
Пример #18
0
def beat_track(input_file, output_csv):
    '''Beat tracking function

    :parameters:
      - input_file : str
          Path to input audio file (wav, mp3, m4a, flac, etc.)

      - output_file : str
          Path to save beat event timestamps as a CSV file
    '''

    print('Loading ', input_file)
    y, sr = librosa.load(input_file, sr=22050)

    # Use a default hop size of 512 samples @ 22KHz ~= 23ms
    hop_length = 512

    # This is the window length used by default in stft
    print('Tracking beats')
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_length)

    print('Estimated tempo: {:0.2f} beats per minute'.format(tempo))

    # save output
    # 'beats' will contain the frame numbers of beat events.
    beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=hop_length)

    print('Saving output to ', output_csv)
    librosa.output.times_csv(output_csv, beat_times)
    print('done!')
Пример #19
0
 def analyze(self):
     audio_path = self.path
     y, sr = librosa.load(audio_path, sr=None)
     tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
     self.tempo = tempo
     self.beats = list(beats)
     self.times = list(librosa.frames_to_time(beats, sr=sr))
    def fade_out(self,nob,song2):
        song2.change_temp(self.tempo)
        song2.cut_song(self.length_of_songs)
        fader_l=self.audio_left[int(self.bars[-nob-1][1]*self.sr):]
        fader_r=self.audio_right[int(self.bars[-nob-1][1]*self.sr):]
        fader=np.arange(float(len(fader_l)))/float(len(fader_l))
        fader_l=fader_l*fader[::-1]
        fader_r=fader_r*fader[::-1]
        haha=song2.audio_left[int(song2.beat_times[0]*self.sr):int(song2.beat_times[0]*self.sr)+len(self.audio_left[int(self.bars[-nob-1][1]*self.sr):])]*fader

        self.audio_left[int(self.bars[-nob-1][1]*self.sr):]=fader_l+haha

        self.audio_left=np.concatenate((self.audio_left,song2.audio_left[len(haha):]))

        haha=song2.audio_right[int(song2.beat_times[0]*self.sr):int(song2.beat_times[0]*self.sr)+len(self.audio_right[int(self.bars[-nob-1][1]*self.sr):])]*fader

        self.audio_right[int(self.bars[-nob-1][1]*self.sr):]=fader_r+haha

        self.audio_right=np.concatenate((self.audio_right,song2.audio_right[len(haha):]))
        tempo, beats = librosa.beat.beat_track(y=self.audio_left, sr=self.sr)
        self.beat_times=librosa.frames_to_time(beats, sr=self.sr)
        bars=[]
        for i in range(len(self.beat_times)/4-1):
            bars.append([self.beat_times[i*4],self.beat_times[(i+1)*4]])
        self.bars=np.array(bars)
Пример #21
0
def ellis_bpm(fname, start_bpm, hpss=True, hop_length=512, tightness=100.0, plot=False, sound=False):
    y, sr = librosa.load(fname, sr=None)
    log.debug(u'Estimating tempo: {}'.format(TERM.cyan(fname)))
    if hpss:
        log.debug(TERM.magenta("Getting percussive elements"))
        y_harmonic, y_percussive = librosa.effects.hpss(y)
        chunks = np.array_split(y_percussive, PLOT_SPLIT)
        log.debug(TERM.magenta("Estimating beats per minute"))
        bpm, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr, start_bpm=start_bpm, hop_length=hop_length, tightness=tightness)
    else:
        log.debug(TERM.magenta("Estimating beats per minute"))
        bpm, beat_frames = librosa.beat.beat_track(y=y, sr=sr, start_bpm=start_bpm, hop_length=hop_length, tightness=tightness)
        chunks = np.array_split(y, PLOT_SPLIT)

    log.debug(u'Tempo: {:6.2f} bpm'.format(bpm))
    if plot:
        plt.figure(figsize=(16,10))

        curr_frame = 0
        for i in range(PLOT_SPLIT):
            plt.subplot(PLOT_SPLIT * 100 + 11 + i)
            plt.plot(curr_frame + np.arange(len(chunks[i])), chunks[i], 'g')
            for b in beat_frames:
                plt.axvline(x=b*hop_length, color='k')
            plt.xlim([curr_frame, len(chunks[i]) + curr_frame])
            curr_frame += len(chunks[i])
        plt.show(block=False)
    if sound:
        beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=hop_length)
        clicks = mir_eval.sonify.clicks(beat_times, sr, length=len(y))
        sd.play(y + clicks, sr)
        input('Press Return key to stop sound')
        sd.stop()
    return bpm
Пример #22
0
def main():
	""" 
	main() - Main function for feature extraction

		Inputs: 
			None

		Outputs:
			Pickle file with feature data
	"""

	vocalData = loadmat('../../Data/firstVerseTimes.mat')
	audioPath = '../../Audio/Vocals/'
	assert isdir(audioPath), "Audio path does not exist"		# Make sure directory of audio exists

	fileList = [ join(audioPath, 'Vocals_' + str(vocalData['firstVerseTimes'][i][3][0])) for i in range(len(vocalData['firstVerseTimes'])) ]
	numFiles = len(fileList)
	vocalFeatures = np.zeros((numFiles, 8))

	for i in range(numFiles):

		print 'Working on file {} of {}'.format(i, numFiles)
		# Read in audio 
		audio, sr = librosa.load(fileList[i], sr=44100)
		S = librosa.stft(audio, n_fft = 1024, hop_length = 512)
		spec = np.abs(S)

		# Extract features
		centroids = centroid(spec, sr)														# Spectral centroid
		contrasts = contrast(spec, sr, 0.05)												# Spectral contrast
		onset_frames    = librosa.onset.onset_detect(y=audio, sr=sr, hop_length=64)			# Calculate frames of onsets
		onset_times     = librosa.frames_to_time(onset_frames, sr, hop_length=64)			# Calculate times of onsets

		# Extract feature statistics
		vocalFeatures[i,0] =  np.mean(np.diff(onset_times))						# Mean of onset durations
		vocalFeatures[i,1] = np.var(np.diff(onset_times))						# Variance of onset durations
		vocalFeatures[i,2], beats = librosa.beat.beat_track(audio, sr )			# Get beats and tempo
		vocalFeatures[i,3] = np.mean(centroids)									# Mean of centroids
		vocalFeatures[i,4] = np.var(centroids)									# Variance of centroids
		vocalFeatures[i,5] = np.mean(contrasts)									# Mean of spectral contrast
		vocalFeatures[i,6] = np.var(contrasts)									# Mean of spectral contrast
		vocalFeatures[i,7] = onset_times.shape[0] / (audio.shape[0] / float(sr))# Onset density

	# Create dictionary for features
	dataDict = {'ID': np.array([vocalData['firstVerseTimes'][i][0][0][0] for i in range(len(vocalData['firstVerseTimes']))]), 
				'onsetMean': vocalFeatures[:,0],
				'onsetVar': vocalFeatures[:,1],
				'tempo': vocalFeatures[:,2],
				'centroidMean': vocalFeatures[:,3],
				'centroidVar': vocalFeatures[:,4],
				'contrastMean': vocalFeatures[:,5],
				'contrastVar': vocalFeatures[:,6],
				'onsetDensity': vocalFeatures[:,7],
				'artist': [vocalData['firstVerseTimes'][i][1][0] for i in range(len(vocalData['firstVerseTimes']))],
				'song': np.array([vocalData['firstVerseTimes'][i][2][0] for i in range(len(vocalData['firstVerseTimes']))])}

	dump(dataDict, open('vocalFeatureData.p', 'w'))

	print ('Done')
Пример #23
0
def extract_timing_data(filename, samplerate=22050, channels=1, hop_length=64):
    x_n, fs = marl.audio.read(filename, samplerate, channels)
    onset_env = librosa.onset.onset_strength(
        x_n.squeeze(), fs, hop_length=hop_length, aggregate=np.median)
    tempo, beat_frames = librosa.beat.beat_track(
        onset_envelope=onset_env, sr=fs, hop_length=hop_length)
    beat_times = librosa.frames_to_time(
        beat_frames, sr=fs, hop_length=hop_length)
    onset_frames = librosa.onset.onset_detect(
        onset_envelope=onset_env, sr=fs, hop_length=hop_length)
    onset_times = librosa.frames_to_time(
        onset_frames, sr=fs, hop_length=hop_length)
    duration = len(x_n) / fs
    return dict(onset_times=onset_times.tolist(),
                beat_times=beat_times.tolist(),
                tempo=tempo,
                duration=duration)
Пример #24
0
def analyze_frames(y, sr, debug=False):
    A = {}
    
    hop_length = 128

    # First, get the track duration
    A['duration'] = float(len(y)) / sr

    # Then, get the beats
    if debug: print "> beat tracking"
    tempo, beats = librosa.beat.beat_track(y, sr, hop_length=hop_length)

    # Push the last frame as a phantom beat
    A['tempo'] = tempo
    A['beats'] = librosa.frames_to_time(beats, sr, hop_length=hop_length).tolist()

    if debug: print "beats count: ", len(A['beats'])

    if debug: print "> spectrogram"
    S = librosa.feature.melspectrogram(y, sr,   n_fft=2048, 
                                                hop_length=hop_length, 
                                                n_mels=80, 
                                                fmax=8000)
    S = S / S.max()

    # A['spectrogram'] = librosa.logamplitude(librosa.feature.sync(S, beats)**2).T.tolist()

    # Let's make some beat-synchronous mfccs
    if debug: print "> mfcc"
    S = librosa.feature.mfcc(librosa.logamplitude(S), n_mfcc=40)
    A['timbres'] = librosa.feature.sync(S, beats).T.tolist()

    if debug: print "timbres count: ", len(A['timbres'])

    # And some chroma
    if debug: print "> chroma"
    S = N.abs(librosa.stft(y, hop_length=hop_length))

    # Grab the harmonic component
    H = librosa.decompose.hpss(S)[0]
    # H = librosa.hpss.hpss_median(S, win_P=31, win_H=31, p=1.0)[0]
    A['chroma'] = librosa.feature.sync(librosa.feature.chromagram(S=H, sr=sr),
                                        beats,
                                        aggregate=N.median).T.tolist()

    # Relative loudness
    S = S / S.max()
    S = S**2

    if debug: print "> dists"
    dists = structure(N.vstack([N.array(A['timbres']).T, N.array(A['chroma']).T]))
    A['dense_dist'] = dists

    edge_lens = [A["beats"][i] - A["beats"][i - 1]
                 for i in xrange(1, len(A["beats"]))]
    A["avg_beat_duration"] = N.mean(edge_lens)

    return A
Пример #25
0
    def __test(units, hop_length, y, sr):

        tempo, b1 = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_length)
        _, b2 = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_length,
                                        units=units)

        t1 = librosa.frames_to_time(b1, sr=sr, hop_length=hop_length)

        if units == 'time':
            t2 = b2

        elif units == 'samples':
            t2 = librosa.samples_to_time(b2, sr=sr)

        elif units == 'frames':
            t2 = librosa.frames_to_time(b2, sr=sr, hop_length=hop_length)

        assert np.allclose(t1, t2)
Пример #26
0
    def __test(units, hop_length, y, sr):

        b1 = librosa.onset.onset_detect(y=y, sr=sr, hop_length=hop_length)
        b2 = librosa.onset.onset_detect(y=y, sr=sr, hop_length=hop_length,
                                        units=units)

        t1 = librosa.frames_to_time(b1, sr=sr, hop_length=hop_length)

        if units == 'time':
            t2 = b2

        elif units == 'samples':
            t2 = librosa.samples_to_time(b2, sr=sr)

        elif units == 'frames':
            t2 = librosa.frames_to_time(b2, sr=sr, hop_length=hop_length)

        assert np.allclose(t1, t2)
    def __test(infile):
        DATA    = load(infile)
        
        (bpm, beats) = librosa.beat.beat_track(y=None, sr=8000, hop_length=32,
                                               onsets=DATA['onsetenv'][0], n_fft=None)

        print beats
        print DATA['beats']
        assert numpy.allclose(librosa.frames_to_time(beats, sr=8000, hop_length=32), DATA['beats'])
        pass
Пример #28
0
	def beat_analysis(self):
		"""runs the analysis on the song to determine where the beats are, and adds a beat channel"""
		self.tempo, self.beat_frames = librosa.beat.beat_track(self.waveform,self.sample_rate)
		self.beat_times = librosa.frames_to_time(self.beat_frames, self.sample_rate)
		self.beat_channel=Channel('Beat',False)
		for second in self.beat_times:
			#rounds time to 1/10 of a second
			second = round(second, 1)
			time=datetime.timedelta(0,second)
			#saves beat in channel
			self.beat_channel.update(time, True)
Пример #29
0
    def __test(infile):

        DATA = load(infile)

        (bpm, beats) = librosa.beat.beat_track(y=None,
                                               sr=8000,
                                               hop_length=32,
                                               onset_envelope=DATA['onsetenv'][0])

        beat_times = librosa.frames_to_time(beats, sr=8000, hop_length=32)
        assert np.allclose(beat_times, DATA['beats'])
 def direct(self,song2):
     song2.change_temp(self.tempo)
     song2.cut_song(self.length_of_songs)
     self.audio_left=np.concatenate((self.audio_left[:int(self.beat_times[-1]*self.sr)],song2.audio_left[int(song2.beat_times[0]*self.sr):]))
     self.audio_right=np.concatenate((self.audio_right[:int(self.beat_times[-1]*self.sr)],song2.audio_right[int(song2.beat_times[0]*self.sr):]))
     tempo, beats = librosa.beat.beat_track(y=self.audio_left, sr=self.sr)
     self.beat_times=librosa.frames_to_time(beats, sr=self.sr)
     bars=[]
     for i in range(len(self.beat_times)/4-1):
         bars.append([self.beat_times[i*4],self.beat_times[(i+1)*4]])
     self.bars=np.array(bars)
Пример #31
0
def midi_to_chroma(midi):
  return midi.get_chroma(times = librosa.frames_to_time(np.arange(midi.get_end_time()*22050/512)))
def test_feature():
    file = "/mnt/hgfs/vmfiles/genres/pop/pop.00003.wav"
    fp = FeaturePlan(sample_rate=22050)
    fp.addFeature('mfcc: MFCC blockSize=512 stepSize=256')  #13
    fp.addFeature('sr: SpectralRolloff blockSize=512 stepSize=256')  #1
    fp.addFeature('sf: SpectralFlux blockSize=512 stepSize=256')  #1
    fp.addFeature(
        'scfp: SpectralCrestFactorPerBand FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256'
    )  #19
    fp.addFeature(
        'sf1: SpectralFlatness FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256'
    )  #1
    fp.addFeature(
        'sc: SpectralShapeStatistics FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256'
    )  #4
    fp.addFeature(
        'sfp: SpectralFlatnessPerBand FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256'
    )  #19
    fp.addFeature('energy: Energy blockSize=512  stepSize=256')  #1
    fp.addFeature(
        'loudness: Loudness FFTLength=0  FFTWindow=Hanning  LMode=Relative  blockSize=512  stepSize=256'
    )  #24
    fp.addFeature(
        'ms: MagnitudeSpectrum FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256'
    )  #257
    fp.addFeature(
        'ps: PerceptualSharpness FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256'
    )  #1
    fp.addFeature('zcr:ZCR blockSize=512  stepSize=256')  #1
    engine = Engine()
    engine.load(fp.getDataFlow())
    afp = AudioFileProcessor()
    afp.processFile(engine, file)
    feats = engine.readAllOutputs()
    ceps = feats['scfp']
    print 'scfp', ceps.shape
    print 'loudness', feats['loudness'].shape
    print 'sfp', feats['sfp'].shape
    #num_ceps = len(ceps)
    c = calc_statistical_features(ceps.transpose())
    print 'c', c.shape
    y, sr = librosa.load(file)
    print y.shape
    print sr

    hop_length = 256
    oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
    tempogram = librosa.feature.tempogram(onset_envelope=oenv,
                                          sr=sr,
                                          hop_length=hop_length)
    ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])  #384
    ac_global = librosa.util.normalize(ac_global)
    print ac_global.shape
    tempo = librosa.beat.estimate_tempo(oenv, sr=sr, hop_length=hop_length)  #1
    print "tempo", tempo
    print "tempogram", tempogram.shape  #384

    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    print "tempo", tempo
    print "beat_frames", beat_frames.shape
    beat_times = librosa.frames_to_time(beat_frames, sr=sr)
    print "beat_times", beat_times.shape
    print beat_times
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    # Compute MFCC features from the raw signal
    mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
    print "mfcc", mfcc.shape
    # And the first-order differences (delta features)
    mfcc_delta = librosa.feature.delta(mfcc)
    print "mfcc_delta", mfcc_delta.shape
    # Stack and synchronize between beat events
    # This time, we'll use the mean value (default) instead of median
    beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]),
                                           beat_frames)

    print "beat_mfcc_delta", beat_mfcc_delta.shape

    chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)  #12

    c = np.mean(chromagram, axis=1)
    print "c", c.shape
    print "chromagram", chromagram.shape
    r = calc_statistical_features(chromagram)
    print r.shape

    beat_chroma = librosa.feature.sync(chromagram,
                                       beat_frames,
                                       aggregate=np.median)
    print "beat_chroma", beat_chroma.shape
    #print beat_chroma
    # Finally, stack all beat-synchronous features together
    beat_features = np.vstack([beat_chroma, beat_mfcc_delta])
    print "beat_features", beat_features.shape
    beat_feature_set = np.mean(beat_features, axis=1)
    print beat_feature_set.shape
    print beat_feature_set
    #print np.mean(ceps,axis =0)
    #return np.mean(ceps[int(num_ceps*1/10):int(num_ceps*9/10)], axis=0)

    a1 = calc_statistical_features(feats['scfp'].transpose())  #19*7 = 133
    a1 = a1.reshape(a1.shape[0] * a1.shape[1])
    print a1.shape
    a2 = calc_statistical_features(feats['sfp'].transpose())  #19*7 = 133
    a2 = a2.reshape(a2.shape[0] * a2.shape[1])
    print a2.shape
    a3 = calc_statistical_features(feats['loudness'].transpose())  #24*7 = 168
    a3 = a3.reshape(a3.shape[0] * a3.shape[1])
    print a3.shape
    a4 = calc_statistical_features(tempogram)
    a4 = a4.reshape(a4.shape[0] * a4.shape[1])
    print a4.shape
    a5 = calc_statistical_features(chromagram)  #12*7 = 84
    a5 = a5.reshape(a5.shape[0] * a5.shape[1])
    print a5.shape
    feature5_set = np.hstack((a1, a2, a3, a4, a5))  #384*7 = 2688
    print "feature5_set", feature5_set.shape
Пример #33
0
        recognize_y = onsets_frames.copy()
        onsets_frames_strength = librosa.onset.onset_strength(y=y, sr=sr)
        #onsets_frames = get_onsets_by_all_v2(y, sr,len(codes[type_index])+2)
        if len(onsets_frames) < 3:
            continue

        #print("onsets_frames is {}".format(onsets_frames))

        # 标准节拍时间点
        base_frames = onsets_base_frames(codes[type_index],
                                         total_frames_number)
        #print("base_frames is {}".format(base_frames))

        min_d, best_y, onsets_frames = get_dtw_min(onsets_frames, base_frames,
                                                   65)
        base_onsets = librosa.frames_to_time(best_y, sr=sr)
        #print("base_onsets is {}".format(base_onsets))

        # 节拍时间点
        onstm = librosa.frames_to_time(onsets_frames, sr=sr)
        #print("onstm is {}".format(onstm))
        duration = librosa.get_duration(y, sr=sr)  # 获取音频时长
        #print("duration is {}".format(duration))

        #节拍数之差
        diff_real_base = len(onsets_frames) - len(base_frames)

        #librosa.display.waveplot(y, sr=sr)
        # plt.show()

        plt.vlines(onstm,
Пример #34
0
def test_model_on_folk():

    # X_polovnicek = wav2cqt_spec('polovnicek.wav')
    # times = librosa.frames_to_time(np.arange(X_polovnicek.shape[0]), sr=SAMPLE_RATE, hop_length=HOP_LENGTH)
    # y_polovnicek = midi2labels('polovnicek.MID', times)
    # #
    # X_jedna = wav2cqt_spec('jedna.mp3')
    # times = librosa.frames_to_time(np.arange(X_jedna.shape[0]), sr=SAMPLE_RATE, hop_length=HOP_LENGTH)
    # y_jedna = midi2labels('jedna.MID', times)
    # #
    # X_kohutik = wav2cqt_spec('kohutik.wav')
    # times = librosa.frames_to_time(np.arange(X_kohutik.shape[0]), sr=SAMPLE_RATE, hop_length=HOP_LENGTH)
    # y_kohutik = midi2labels('kohutik.MID', times)
    #
    # X_marienka = wav2cqt_spec('marienka.mp3')
    # times = librosa.frames_to_time(np.arange(X_marienka.shape[0]), sr=SAMPLE_RATE, hop_length=HOP_LENGTH)
    # y_marienka = midi2labels('marienka.mid', times)
    #
    # X_hora = wav2cqt_spec('hora.mp3')
    # times = librosa.frames_to_time(np.arange(X_hora.shape[0]), sr=SAMPLE_RATE, hop_length=HOP_LENGTH)
    # y_hore = midi2labels('hora.mid', times)
    #
    # X_onvo = wav2cqt_spec('onvo.mp3')
    # times = librosa.frames_to_time(np.arange(X_marienka.shape[0]), sr=SAMPLE_RATE, hop_length=HOP_LENGTH)
    # y_onvo = midi2labels('onvo.mid', times)

    # model = load_model('DNN_mp3_piano.hdf5')
    # dnn = DNN(3, 256)
    # dnn.set_model(model)
    # dnn.summary()
    #
    #
    #
    #
    # dnn.predict(X_kohutik, y_kohutik)
    #dnn.predict(X_polovnicek, y_polovnicek)
    # X = wav2cqt_spec('MAPS_MUS-alb_esp2_AkPnCGdD.flac')
    # dnn.predict(X)

    # X = wav2cqt_spec('alb_esp{0}.wav'.format(1))
    # times = librosa.frames_to_time(np.arange(X.shape[0]), sr=SAMPLE_RATE, hop_length=HOP_LENGTH)
    # y = midi2labels('alb_esp{0}.mid'.format(1), times)
    #
    # dnn.predict(X, y)

    #exit(0)



    X_all, y_all = None, None


    for i in range(1, 7):
        X = wav2cqt_spec('alb_esp{0}.mp3'.format(i))
        times = librosa.frames_to_time(np.arange(X.shape[0]), sr=SAMPLE_RATE, hop_length=HOP_LENGTH)
        y = midi2labels('alb_esp{0}.mid'.format(i), times)
        print(X.shape, y.shape)

        if i == 1:
            X_all, y_all = X, y
        else:
            X_all, y_all = np.concatenate((X_all, X)), np.concatenate((y_all, y))

    # wavs = [x for x in listdir(WAV_DIR) if x.endswith('.mp3') and 'format0' not in x]
    # np.random.seed()
    # np.random.shuffle(wavs)
    #
    # i, length = 1, len(wavs)
    # X_all, y_all = None, None
    # for wav in wavs:
    #     try:
    #         X = wav2cqt_spec(wav)
    #         times = librosa.frames_to_time(np.arange(X.shape[0]), sr=SAMPLE_RATE, hop_length=HOP_LENGTH)
    #         y = midi2labels('{0}.mid'.format(wav.split('.')[0]), times)
    #
    #         if X_all is None or y_all is None:
    #             X_all, y_all = X, y
    #         elif X.shape[0] == y.shape[0]:
    #             X_all, y_all = np.concatenate((X_all, X)), np.concatenate((y_all, y))
    #
    #         print('{0}/{1} {2} {3}.mid'.format(i, length, wav, wav.split('.')[0]), X.shape, '/', X_all.shape, y.shape,
    #               '/', y_all.shape)
    #         i += 1
    #
    #         if i >= 20:
    #             break
    #
    #     except FileNotFoundError as err:
    #         print(err)
    #     except Exception as err:
    #         print(err)




    min_all, max_all = X_all.min(axis=0), X_all.max(axis=0)
    X_all = (X_all - min_all) / (max_all - min_all)

    size = X_all.shape[0]
    half_size, third_size = size // 2, size // 2 + size // 4

    X_train, y_train = X_all[:half_size], y_all[:half_size]
    X_val, y_val = X_all[half_size:third_size], y_all[half_size:third_size]
    X_test, y_test = X_all[third_size:], y_all[third_size:]



    # dnn = DNN(256, 3)
    # dnn.create()
    # dnn.train(X_train, y_train, X_val, y_val)
    # dnn.predict(X_test, y_test)

    X_train = np.array([X_train[i:i + LSTM_SAMPLE_SIZE, :] for i in range(0, len(X_train) - LSTM_SAMPLE_SIZE + 1, LSTM_SAMPLE_SIZE)])
    y_train = np.array([y_train[i:i + LSTM_SAMPLE_SIZE, :] for i in range(0, len(y_train) - LSTM_SAMPLE_SIZE + 1, LSTM_SAMPLE_SIZE)])

    X_val = np.array([X_val[i:i + LSTM_SAMPLE_SIZE, :] for i in range(0, len(X_val) - LSTM_SAMPLE_SIZE + 1, LSTM_SAMPLE_SIZE)])
    y_val = np.array([y_val[i:i + LSTM_SAMPLE_SIZE, :] for i in range(0, len(y_val) - LSTM_SAMPLE_SIZE + 1, LSTM_SAMPLE_SIZE)])

    X_test = np.array([X_test[i:i + LSTM_SAMPLE_SIZE, :] for i in range(0, len(X_test) - LSTM_SAMPLE_SIZE + 1, LSTM_SAMPLE_SIZE)])
    y_val = np.array([y_test[i:i + LSTM_SAMPLE_SIZE, :] for i in range(0, len(y_test) - LSTM_SAMPLE_SIZE + 1, LSTM_SAMPLE_SIZE)])

    try:


        lstm = LSTM(256, 3)
        lstm.create()
        lstm.summary()
        lstm.train(X_train, y_train, X_val, y_val)
        lstm.predict(X_test, y_test)
    except Exception as ex:
        print(ex)
Пример #35
0

praatEXE = 'C:/Users/user/Desktop/Praat.exe'
all_song = 'C:/Users/user/Desktop/mir_final/lemon.wav'
file = 'C:/Users/user/Desktop/mir_final/lemon.wav'
data, fs = librosa.load(file, sr=None, dtype='double')
all_data, fs = librosa.load(all_song, sr=None, dtype='double')
''' Param setting '''
win_len = 2048  # n of fft
hop_len = 512  # samples

rmse = np.log(
    librosa.feature.rmse(y=data, frame_length=win_len, hop_length=hop_len))
''' frame step to time step'''
time_step = librosa.frames_to_time(range(rmse.shape[-1]),
                                   sr=fs,
                                   hop_length=hop_len,
                                   n_fft=win_len)
''' ZCR, pitch and energy to find candidates for beat'''
zcr = librosa.feature.zero_crossing_rate(data,
                                         frame_length=win_len,
                                         hop_length=hop_len)
energy = extractIntensity(file,
                          'C:/Users/user/Desktop/mir_final/energy.txt',
                          praatEXE,
                          minPitch=65,
                          sampleStep=librosa.samples_to_time(hop_len, fs),
                          forceRegenerate=True,
                          undefinedValue=0)
pitch = extractPitch(file,
                     'C:/Users/user/Desktop/mir_final/pitch.txt',
                     praatEXE,
Пример #36
0
M = np.vstack([mfcc, delta_mfcc, delta2_mfcc])

#######################
# Beat tracking
#######################
# Now, let's run the beat tracker.
# We'll use the percussive component for this part
plt.figure(figsize=(12, 6))
tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr)

# Let's re-draw the spectrogram, but this time, overlay the detected beats
plt.figure(figsize=(12, 4))
librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')

# Let's draw transparent lines over the beat frames
plt.vlines(librosa.frames_to_time(beats),
           1,
           0.5 * sr,
           colors='w',
           linestyles='-',
           linewidth=2,
           alpha=0.5)

plt.axis('tight')

plt.colorbar(format='%+02.0f dB')

plt.tight_layout()

print('Estimated tempo:        %.2f BPM' % tempo)
Пример #37
0
def analyzeSound(filename, beatsPerMeasure):
    y, sr = librosa.load(filename)
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    print('Estimated tempo: {:.2f} beats per minute'.format(tempo))
    beat_times = librosa.frames_to_time(beat_frames, sr=sr)
    return tempo #returns b
Пример #38
0
# beats per min goes here
#from __future__ import print_function

import librosa

import librosa

#add default location here
filename = 'train/edmtest.mp3'

y, s = librosa.load(filename)

temp, frames = librosa.beat.beat_track(y=y, sr=s)

print('Estimated tempo: {:.2f} beats per minute'.format(temp))

bpm = librosa.frames_to_time(frames, sr=s)

print('Saving output to bpm.csv')
#librosa.output.times_csv('bpm.csv', bpm)
Пример #39
0
librosa.display.specshow(librosa.amplitude_to_db(S_full, ref=np.max),
                         y_axis='log',
                         x_axis='time',
                         sr=sr)
plt.colorbar()
plt.tight_layout()

###########################################################
# As you can see, there are periods of silence and
# non-silence throughout this recording.
#

# As a first step, we can plot the root-mean-square (RMS) curve
rms = librosa.feature.rms(y=y)[0]

times = librosa.frames_to_time(np.arange(len(rms)))

plt.figure(figsize=(12, 4))
plt.plot(times, rms)
plt.axhline(0.02, color='r', alpha=0.5)
plt.xlabel('Time')
plt.ylabel('RMS')
plt.axis('tight')
plt.tight_layout()

# The red line at 0.02 indicates a reasonable threshold for silence detection.
# However, the RMS curve occasionally dips below the threshold momentarily,
# and we would prefer the detector to not count these brief dips as silence.
# This is where the Viterbi algorithm comes in handy!

#####################################################
plt.figure(figsize=(14, 5))
plt.plot(x[n0:n1])
plt.grid()
plt.title(title)
plt.show()

#Zero Crossings
zero_crossings = librosa.zero_crossings(x[n0:n1], pad=False)
print(f'Zero Crossings: {sum(zero_crossings)}')

#Spectral Centroid
spectral_centroids = librosa.feature.spectral_centroid(x, sr=sr)[0]
spectral_centroids.shape
# Computing the time variable for visualization
frames = range(len(spectral_centroids))
t = librosa.frames_to_time(frames)

# Normalising the spectral centroid for visualisation
def normalize(x, axis=0):
    return sklearn.preprocessing.minmax_scale(x, axis=axis)

#Plotting the Spectral Centroid along the waveform
dsp.waveplot(x, sr=sr, alpha=0.4)

plt.plot(t, normalize(spectral_centroids), color='r')
plt.title(title)
plt.show()

#Spectral Rolloff
#specified percentage of the total spectral energy, e.g. 85%, lies.
spectral_rolloff = librosa.feature.spectral_rolloff(x, sr=sr)[0]
Пример #41
0
 def start_frames(self, value):
     self.__start_frames = value
     self.__start_time = librosa.frames_to_time(value, sr=utils.SAMPLE_RATE)
     self.__start_beat = round_to_sixteenth(
         0.25 * (self.__start_time / self.quarter_note_time))
Пример #42
0
    def analyze(self, inputFile, count=2):
        if not(self.loaded):
            raise UnloadedException()

        timeS = time.time()

        try:
            (signal, samplerate) = sf.read(inputFile)
        except:
            print(
                "Error with chunk file. Unable to perform features extraction on the file.")
            raise Exception()

        # The number of columns in the dataset (except for index)
        dataset_shape = (self.PARAM_FRAME_LENGTH / 10) * self.PARAM_NUMBER_MELS
        X_test_vectors = [ np.repeat(0, dataset_shape) ]

        signal = librosa.to_mono(np.transpose(signal))
        signal, _ = librosa.effects.trim(signal, top_db=50)
        #spectrogram = librosa.feature.melspectrogram(signal, sr=samplerate, n_fft=1024, hop_length=160, fmin=240, fmax=3000)
        spectrogram = librosa.feature.melspectrogram(signal, sr=samplerate, n_fft=1024, hop_length=160)

        logSpectrogram = self.refFun(spectrogram)

        signalLength = float(len(signal) / samplerate) * 1000
        indexPosition = 0
        while indexPosition < signalLength - self.PARAM_FRAME_LENGTH:
        	row = np.asarray(logSpectrogram[:, int(indexPosition / 10):int((indexPosition + self.PARAM_FRAME_LENGTH) / 10)]).ravel()
        	X_test_vectors.append(row)
        	indexPosition += self.PARAM_FRAME_LENGTH
        X_test_vectors = X_test_vectors[1:] # We remove first row which is only 0

        X_test = []
        for i in range(len(X_test_vectors)):
        	matrix = np.zeros((self.PARAM_NUMBER_MELS, int(self.PARAM_FRAME_LENGTH / 10)))
        	for l in range(self.PARAM_NUMBER_MELS):
        		for m in range(int(self.PARAM_FRAME_LENGTH / 10)):
        			matrix[l, m] = X_test_vectors[i][l * int(self.PARAM_FRAME_LENGTH / 10) + m]
        	X_test.append([matrix])

        # Creating vector into clustering space
        cluster_space_layer = K.function([self.model.layers[0].input], [self.model.layers[7].output])
        layer_output = cluster_space_layer([X_test])[0]

        cosinus_dist = 1. - sklearn.metrics.pairwise.cosine_similarity(layer_output)
        cosinus_dist[cosinus_dist < 0] = 0
        cosine_tsne = manifold.TSNE(n_components=2, metric='precomputed').fit_transform(cosinus_dist)

        Z = linkage(layer_output, metric='cosine', method='complete')
        minDist = max([row[2] for row in Z])
        nb_clusters = len(Z)
        for i in range(len(Z)-1):
        	if (minDist > Z[i+1][2] - Z[i][2]):
        		minDist = Z[i+1][2] - Z[i][2]
        		nb_clusters = i

        if count is None:
            count = 2
        int(count)
        clustering = AgglomerativeClustering(affinity='cosine', linkage="complete", n_clusters=count).fit_predict(layer_output)

        # Now we need to find indexes when current speaker changes
        flags = []
        currentSpeaker = clustering[0]
        for i in range(1, len(clustering)):
        	if clustering[i] != currentSpeaker:
        		currentSpeaker = clustering[i]
        		flags.append(i)

        finalClustering = []
        for flag in flags:
        	fragment = signal[(flag-1)*samplerate:(flag+1)*samplerate]
        	chroma = librosa.feature.chroma_cens(y=fragment, sr=samplerate)
        #librosa.output.write_wav("output/test_fragment.wav", test_fragment, samplerate)
        	bounds = librosa.segment.agglomerative(chroma, 3)
        	speakerStartPos = (flag-1) + librosa.frames_to_time(bounds, sr=samplerate)[1]
        	finalClustering.append(float("{0:.3f}".format(speakerStartPos)))

        flags.insert(0, 0)
        finalClustering.insert(0, 0)
        result = [[] for i in range(count)]

        for i in range(1, len(flags)):
            print(flags[i] - 1)
            n = clustering[flags[i] - 1]

            result[n].append((finalClustering[i-1], finalClustering[i] - 0.001))
        result[clustering[-1]].append((finalClustering[-1], "EOF"))


        #clustering = KMeans(n_clusters=4).fit_predict(layer_output)
        return {'res': result, 'exec_time': time.time() - timeS}
Пример #43
0
    def worker(self):
        audio = pyaudio.PyAudio()

        print('\n*******************************************')
        print('RHAPSODY MODULE-I INPUT')
        print('*******************************************\n')
        print('\n===========================================')
        print('STARTED RECORDING')
        print('===========================================\n')

        for i in range(1, 4):
            print('\n===========================================')
            print(str(i) + '...')
            print('===========================================\n')
            sleep(1)

        stream = audio.open(format=self.FORMAT,
                            channels=self.CHANNELS,
                            rate=self.RATE,
                            input=True,
                            frames_per_buffer=self.CHUNK)

        f = []

        for i in range(0, int(self.RATE / self.CHUNK * self.RECORD_SECONDS)):
            data = stream.read(self.CHUNK)
            f.append(data)

        print('\n===========================================')
        print('DONE RECORDING')
        print('===========================================\n')

        stream.stop_stream()
        stream.close()
        audio.terminate()

        wf = wave.open(self.WAVE_OUTPUT_FILENAME, 'wb')
        wf.setnchannels(self.CHANNELS)
        wf.setsampwidth(audio.get_sample_size(self.FORMAT))
        wf.setframerate(self.RATE)
        wf.writeframes(b''.join(f))
        wf.close()
        """""" """""" """""" """""" """""" """""" """
        1 - Loading File
        """ """""" """""" """""" """""" """""" """"""
        filename = self.WAVE_OUTPUT_FILENAME
        y, sr = librosa.load(filename)
        """""" """""" """""" """""" """""" """""" """
        2 - Get Tempo == bpm
        """ """""" """""" """""" """""" """""" """"""
        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
        print('\n===========================================')
        print('Estimated tempo: {:.2f} beats per minute'.format(tempo))
        print('===========================================\n')

        # generate csv files with beat times
        #CSV_FILENAME = self.WAVE_OUTPUT_FILENAME_NO_EXTENSION + ".csv"

        beat_times = librosa.frames_to_time(beat_frames, sr=sr)
        CSV_FILENAME = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "Recordings",
                         self.final + ".csv"))
        librosa.output.times_csv(CSV_FILENAME, beat_times)

        # WRITING A FILE WITH THE TEMPO
        #TEXT_FILENAME = self.WAVE_OUTPUT_FILENAME_NO_EXTENSION + ".txt"
        TEXT_FILENAME = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "Recordings",
                         self.final + ".txt"))
        bpm_value = open(TEXT_FILENAME, 'w')
        tempo_text = str(tempo) + '\n'
        bpm_value.write(tempo_text)
        """""" """""" """""" """""" """""" """""" """
        3 - Get Notes
        """ """""" """""" """""" """""" """""" """"""
        hz = librosa.feature.chroma_cqt(y=y, sr=sr)

        ## GET STRONGEST OCTAVE
        strongestOctave = 0
        strongestOctave_sum = 0
        for octave in range(len(hz)):
            sum = 0
            for frame in hz[octave]:
                sum = sum + frame
            if sum > strongestOctave_sum:
                strongestOctave_sum = sum
                strongestOctave = octave

        ## GET HEIGHEST HZ FOR EACH TIME FRAME
        strongestHz = []
        for i in range(len(hz[0])):
            strongestHz.append(0)

        notes = []
        for i in range(len(hz[0])):
            notes.append(0)

        for frame_i in range(len(hz[0])):
            strongest_temp = 0
            for octave_i in range(len(hz)):

                if hz[octave_i][frame_i] > strongest_temp:
                    strongest_temp = hz[octave_i][frame_i]
                    strongestHz[frame_i] = octave_i + 1
                    notes[frame_i] = librosa.hz_to_note(hz[octave_i][frame_i])

        # C C# D D# E F F# G G# A  A# B
        # 1 2  3 4  5 6 7  8 9  10 11 12
        strongestHz_sum = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        for note in strongestHz:
            strongestHz_sum[note - 1] = strongestHz_sum[note - 1] + 1

        for i in range(len(strongestHz_sum)):
            strongestHz_sum[i] = float(strongestHz_sum[i]) / len(strongestHz)

        noteSorted = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        for num in range(len(noteSorted)):
            biggest = strongestHz_sum.index(max(strongestHz_sum))
            noteSorted[num] = biggest + 1
            strongestHz_sum[biggest] = strongestHz_sum[biggest] - 0.25

        for note in noteSorted:
            noteString = str(note) + '\n'
            bpm_value.write(noteString)

        bpm_value.close()

        print('\n===========================================')
        print('RECORDING ANALYSIS COMPLETED SUCCESSFULLY!!!')
        print('===========================================\n')

        self.finished.emit()
Пример #44
0
                                      hop_length=hop_length,
                                      units='time')

######################################################################
# If you look carefully, the default onset detector (top sub-plot) has
# several false positives in high-vibrato regions, eg around 0.62s or
# 1.80s.
#
# The superflux method (middle plot) is less susceptible to vibrato, and
# does not detect onset events at those points.

# sphinx_gallery_thumbnail_number = 2
plt.figure(figsize=(6, 6))

frame_time = librosa.frames_to_time(np.arange(len(odf_default)),
                                    sr=sr,
                                    hop_length=hop_length)

ax = plt.subplot(2, 1, 2)
librosa.display.specshow(librosa.power_to_db(S, ref=np.max),
                         y_axis='mel',
                         x_axis='time',
                         sr=sr,
                         hop_length=hop_length,
                         fmin=fmin,
                         fmax=fmax)
plt.xlim([0, 5.0])
plt.axis('tight')

plt.subplot(4, 1, 1, sharex=ax)
plt.plot(frame_time, odf_default, label='Spectral flux')
def preprocess(args):
    #params
    path = os.path.join('models',args['model_name'])
    config = load_config(os.path.join(path,'config.json'))



    bin_multiple = int(args['bin_multiple'])
    spec_type = args['spec_type']




    framecnt = 0

    # hack to deal with high PPQ from MAPS
    # https://github.com/craffel/pretty-midi/issues/112
    pretty_midi.pretty_midi.MAX_TICK = 1e10


    for s in os.listdir(data_dir):
        subdir = os.path.join(data_dir,s)
        if not os.path.isdir(subdir):
            continue
        # recursively search in subdir
        print subdir
        inputs,outputs = [],[]
        addCnt, errCnt = 0,0
        for dp, dn, filenames in os.walk(subdir):
            # in each level of the directory, look at filenames ending with .mid
            for f in filenames:
                # if there exists a .wav file and .midi file with the same name

                if f.endswith('.wav'):
                    audio_fn = f
                    fprefix = audio_fn.split('.wav')[0]
                    mid_fn = fprefix + '.mid'
                    txt_fn = fprefix + '.txt'
                    if mid_fn in filenames:
                        # wav2inputnp
                        audio_fn = os.path.join(dp,audio_fn)
                        # mid2outputnp
                        mid_fn = os.path.join(dp,mid_fn)

                        pm_mid = pretty_midi.PrettyMIDI(mid_fn)

                        inputnp = wav2inputnp(audio_fn,spec_type=spec_type,bin_multiple=bin_multiple)
                        times = librosa.frames_to_time(np.arange(inputnp.shape[0]),sr=sr,hop_length=hop_length)
                        outputnp = mid2outputnp(pm_mid,times)

                        # check that num onsets is equal
                        if inputnp.shape[0] == outputnp.shape[0]:
                            print("adding to dataset fprefix {}".format(fprefix))
                            addCnt += 1
                            framecnt += inputnp.shape[0]
                            print("framecnt is {}".format(framecnt))
                            inputs.append(inputnp)
                            outputs.append(outputnp)
                        else:
                            print("error for fprefix {}".format(fprefix))
                            errCnt += 1
                            print(inputnp.shape)
                            print(outputnp.shape)

        print("{} examples in dataset".format(addCnt))
        print("{} examples couldnt be processed".format(errCnt))


        if addCnt:
            inputs = np.concatenate(inputs)
            outputs = np.concatenate(outputs)

            fn = subdir.split('/')[-1]
            if not fn:
                fn = subdir.split('/')[-2]
            datapath = joinAndCreate(path,'data')
            fnpath = joinAndCreate(datapath,fn)

            mmi = np.memmap(filename=os.path.join(fnpath,'input.dat'), mode='w+',shape=inputs.shape)
            mmi[:] = inputs[:]
            mmo = np.memmap(filename=os.path.join(fnpath,'output.dat'), mode='w+',shape=outputs.shape)
            mmo[:] = outputs[:]
            del mmi
            del mmo
Пример #46
0
    # filename, onset_code = 'F:/项目/花城音乐项目/样式数据/7.18MP3/旋律/小学8题20190718-9728-3.wav', '[2000;250,250,250,250,1000;2000;500,500,1000]'  # 100
    # filename, onset_code = 'F:/项目/花城音乐项目/样式数据/7.18MP3/旋律/小学8题20190718-9728-4.wav', '[1000,250,250,250,250;2000;1000,500,500;2000]'  # 100
    # rhythm_code = '[1000,1000;500,500,1000;500,250,250,500,500;2000]'
    # melody_code = '[5,5,3,2,1,2,2,3,2,6-,5-]'
    print("rhythm_code is {}".format(rhythm_code))
    print("pitch_code is {}".format(pitch_code))
    # plt, total_score, onset_score, note_scroe, detail_content = draw_plt(filename, rhythm_code, pitch_code)
    # plt.show()
    # plt.clf()
    y, sr = librosa.load(filename)
    CQT = librosa.amplitude_to_db(librosa.cqt(y, sr=16000), ref=np.max)
    CQT = np.where(CQT > -22, np.max(CQT), np.min(CQT))

    plt.subplot(2,1,1)
    rms, sig_ff, max_indexs = get_cqt_diff(filename)
    times = librosa.frames_to_time(np.arange(len(rms)))
    librosa.display.specshow(CQT, x_axis='time')
    #plt.plot(times, rms)
    #plt.plot(times, sig_ff)
    plt.xlim(0, np.max(times))
    max_index_times = librosa.frames_to_time(max_indexs)
    #plt.vlines(max_index_times, 0, np.max(rms), color='r', linestyle='dashed')

    start, end, length = get_onset_frame_length(filename,onset_code)
    base_frames = onsets_base_frames(onset_code, length)
    base_frames_diff =np.diff(base_frames)

    start_indexs = get_cqt_start_indexs(filename)
    print("start_indexs is {} ,size {}".format(start_indexs, len(start_indexs)))
    # best_start_indexs = get_best_cqt_start_indexs_by_diff_level(filename,start, end,base_frames)
    # start_indexs = best_start_indexs
Пример #47
0
    parser = TestOptions()
    args = parser.parse()
    args.train = False

    thr = args.thr

    # Process music and get feature
    infile = args.aud_path
    outfile = 'style.npy'
    p.preprocess(infile, outfile)

    y, sr = librosa.load(infile)
    onset_env = librosa.onset.onset_strength(y, sr=sr, aggregate=np.median)
    times = librosa.frames_to_time(np.arange(len(onset_env)),
                                   sr=sr,
                                   hop_length=512)
    tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
    np.save('beats.npy', times[beats])
    beats = np.round(librosa.frames_to_time(beats, sr=sr) * 15)

    beats = np.load('beats.npy')
    aud = np.load('style.npy')
    os.remove('beats.npy')
    os.remove('style.npy')
    #shutil.rmtree('normalized')

    #### Pretrain network from Decomp
    initp_enc, stdp_dec, movement_enc = loadDecompModel(args)

    #### Comp network
Пример #48
0
def cqt_one(input_file, output_file, cqt_params=None, audio_params=None,
            harmonic_params=None, skip_existing=True):
    """Compute the CQT for a input/output file Pair.

    Parameters
    ----------
    input_file : str
        Audio file to apply the CQT

    output_file : str
        Path to write the output.

    cqt_params : dict, default=None
        Parameters for the CQT function. See `librosa.cqt`.

    audio_params : dict, default=None
        Parameters for reading the audio file. See `claudio.read`.

    harmonic_params : dict, default=None
        Parameters for the `harmonic_cqt` function, which will update those in
        cqt_params.

    skip_existing : bool, default=True
        Skip outputs that exist.

    Returns
    -------
    success : bool
        True if the output file was successfully created.
    """
    input_exists, output_exists = [os.path.exists(f)
                                   for f in (input_file, output_file)]
    if not input_exists:
        logger.warning("[{0}] Input file doesn't exist, skipping: {1}"
                       "".format(time.asctime(), input_file))
        return input_exists

    if skip_existing and output_exists:
        logger.info("[{0}] Output file exists, skipping: {1}"
                    "".format(time.asctime(), output_file))
        return output_exists

    logger.debug("[{0}] Starting {1}".format(time.asctime(), input_file))
    if not cqt_params:
        cqt_params = CQT_PARAMS.copy()

    if not audio_params:
        audio_params = AUDIO_PARAMS.copy()

    if not harmonic_params:
        harmonic_params = HARMONIC_PARAMS.copy()

    logger.debug("[{0}] Audio conversion {1}".format(
        time.asctime(), input_file))
    try:
        x, fs = claudio.read(input_file, **audio_params)
        if len(x) <= 0:
            logger.error("Bad Input signal length={} for audio {}".format(
                len(x), input_file))
            return False
        logger.debug("[{0}] Computing features {1}".format(
            time.asctime(), input_file))
        cqt_spectra = np.array([np.abs(librosa.cqt(x_c, sr=fs, **cqt_params).T)
                                for x_c in x.T])

        cqt_params.update(**harmonic_params)
        harm_spectra = harmonic_cqt(x, fs, **cqt_params)

        frame_idx = np.arange(cqt_spectra.shape[1])
        time_points = librosa.frames_to_time(
            frame_idx, sr=fs, hop_length=cqt_params['hop_length'])
        logger.debug("[{0}] Saving: {1}".format(time.asctime(), output_file))
        np.savez(
            output_file, time_points=time_points,
            cqt=np.abs(cqt_spectra).astype(np.float32),
            harmonic_cqt=np.abs(harm_spectra).astype(np.float32))
    except AssertionError as e:
        logger.error("Failed to load audio file: {} with error:\n{}".format(
                     input_file, e))
    logger.debug("[{0}] Finished: {1}".format(time.asctime(), output_file))
    return os.path.exists(output_file)
Пример #49
0
def extract_feature(path):
    id = 1  # Song ID
    feature_set = pd.DataFrame()  # Feature Matrix

    # Individual Feature Vectors
    songname_vector = pd.Series()
    tempo_vector = pd.Series()
    total_beats = pd.Series()
    average_beats = pd.Series()
    chroma_stft_mean = pd.Series()
    chroma_stft_std = pd.Series()
    chroma_stft_var = pd.Series()
    chroma_cq_mean = pd.Series()
    chroma_cq_std = pd.Series()
    chroma_cq_var = pd.Series()
    chroma_cens_mean = pd.Series()
    chroma_cens_std = pd.Series()
    chroma_cens_var = pd.Series()
    mel_mean = pd.Series()
    mel_std = pd.Series()
    mel_var = pd.Series()
    mfcc_mean = pd.Series()
    mfcc_std = pd.Series()
    mfcc_var = pd.Series()
    mfcc_delta_mean = pd.Series()
    mfcc_delta_std = pd.Series()
    mfcc_delta_var = pd.Series()
    rmse_mean = pd.Series()
    rmse_std = pd.Series()
    rmse_var = pd.Series()
    cent_mean = pd.Series()
    cent_std = pd.Series()
    cent_var = pd.Series()
    spec_bw_mean = pd.Series()
    spec_bw_std = pd.Series()
    spec_bw_var = pd.Series()
    contrast_mean = pd.Series()
    contrast_std = pd.Series()
    contrast_var = pd.Series()
    rolloff_mean = pd.Series()
    rolloff_std = pd.Series()
    rolloff_var = pd.Series()
    poly_mean = pd.Series()
    poly_std = pd.Series()
    poly_var = pd.Series()
    tonnetz_mean = pd.Series()
    tonnetz_std = pd.Series()
    tonnetz_var = pd.Series()
    zcr_mean = pd.Series()
    zcr_std = pd.Series()
    zcr_var = pd.Series()
    harm_mean = pd.Series()
    harm_std = pd.Series()
    harm_var = pd.Series()
    perc_mean = pd.Series()
    perc_std = pd.Series()
    perc_var = pd.Series()
    frame_mean = pd.Series()
    frame_std = pd.Series()
    frame_var = pd.Series()

    # Traversing over each file in path
    file_data = [f for f in listdir(path) if isfile(join(path, f))]
    for line in file_data:
        if (line[-1:] == '\n'):
            line = line[:-1]

        # Reading Song
        songname = path + line
        y, sr = librosa.load(songname, duration=60)
        S = np.abs(librosa.stft(y))

        # Extracting Features
        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_cq = librosa.feature.chroma_cqt(y=y, sr=sr)
        chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
        melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
        rmse = librosa.feature.rmse(y=y)
        cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        poly_features = librosa.feature.poly_features(S=S, sr=sr)
        tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        harmonic = librosa.effects.harmonic(y)
        percussive = librosa.effects.percussive(y)

        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        mfcc_delta = librosa.feature.delta(mfcc)

        onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
        frames_to_time = librosa.frames_to_time(onset_frames[:20], sr=sr)

        # Transforming Features
        songname_vector.set_value(id, line)  # song name
        tempo_vector.set_value(id, tempo)  # tempo
        total_beats.set_value(id, sum(beats))  # beats
        average_beats.set_value(id, np.average(beats))
        chroma_stft_mean.set_value(id, np.mean(chroma_stft))  # chroma stft
        chroma_stft_std.set_value(id, np.std(chroma_stft))
        chroma_stft_var.set_value(id, np.var(chroma_stft))
        chroma_cq_mean.set_value(id, np.mean(chroma_cq))  # chroma cq
        chroma_cq_std.set_value(id, np.std(chroma_cq))
        chroma_cq_var.set_value(id, np.var(chroma_cq))
        chroma_cens_mean.set_value(id, np.mean(chroma_cens))  # chroma cens
        chroma_cens_std.set_value(id, np.std(chroma_cens))
        chroma_cens_var.set_value(id, np.var(chroma_cens))
        mel_mean.set_value(id, np.mean(melspectrogram))  # melspectrogram
        mel_std.set_value(id, np.std(melspectrogram))
        mel_var.set_value(id, np.var(melspectrogram))
        mfcc_mean.set_value(id, np.mean(mfcc))  # mfcc
        mfcc_std.set_value(id, np.std(mfcc))
        mfcc_var.set_value(id, np.var(mfcc))
        mfcc_delta_mean.set_value(id, np.mean(mfcc_delta))  # mfcc delta
        mfcc_delta_std.set_value(id, np.std(mfcc_delta))
        mfcc_delta_var.set_value(id, np.var(mfcc_delta))
        rmse_mean.set_value(id, np.mean(rmse))  # rmse
        rmse_std.set_value(id, np.std(rmse))
        rmse_var.set_value(id, np.var(rmse))
        cent_mean.set_value(id, np.mean(cent))  # cent
        cent_std.set_value(id, np.std(cent))
        cent_var.set_value(id, np.var(cent))
        spec_bw_mean.set_value(id, np.mean(spec_bw))  # spectral bandwidth
        spec_bw_std.set_value(id, np.std(spec_bw))
        spec_bw_var.set_value(id, np.var(spec_bw))
        contrast_mean.set_value(id, np.mean(contrast))  # contrast
        contrast_std.set_value(id, np.std(contrast))
        contrast_var.set_value(id, np.var(contrast))
        rolloff_mean.set_value(id, np.mean(rolloff))  # rolloff
        rolloff_std.set_value(id, np.std(rolloff))
        rolloff_var.set_value(id, np.var(rolloff))
        poly_mean.set_value(id, np.mean(poly_features))  # poly features
        poly_std.set_value(id, np.std(poly_features))
        poly_var.set_value(id, np.var(poly_features))
        tonnetz_mean.set_value(id, np.mean(tonnetz))  # tonnetz
        tonnetz_std.set_value(id, np.std(tonnetz))
        tonnetz_var.set_value(id, np.var(tonnetz))
        zcr_mean.set_value(id, np.mean(zcr))  # zero crossing rate
        zcr_std.set_value(id, np.std(zcr))
        zcr_var.set_value(id, np.var(zcr))
        harm_mean.set_value(id, np.mean(harmonic))  # harmonic
        harm_std.set_value(id, np.std(harmonic))
        harm_var.set_value(id, np.var(harmonic))
        perc_mean.set_value(id, np.mean(percussive))  # percussive
        perc_std.set_value(id, np.std(percussive))
        perc_var.set_value(id, np.var(percussive))
        frame_mean.set_value(id, np.mean(frames_to_time))  # frames
        frame_std.set_value(id, np.std(frames_to_time))
        frame_var.set_value(id, np.var(frames_to_time))

        print(songname)
        id = id + 1

    # Concatenating Features into one csv and json format
    feature_set['song_name'] = songname_vector  # song name
    feature_set['tempo'] = tempo_vector  # tempo
    feature_set['total_beats'] = total_beats  # beats
    feature_set['average_beats'] = average_beats
    feature_set['chroma_stft_mean'] = chroma_stft_mean  # chroma stft
    feature_set['chroma_stft_std'] = chroma_stft_std
    feature_set['chroma_stft_var'] = chroma_stft_var
    feature_set['chroma_cq_mean'] = chroma_cq_mean  # chroma cq
    feature_set['chroma_cq_std'] = chroma_cq_std
    feature_set['chroma_cq_var'] = chroma_cq_var
    feature_set['chroma_cens_mean'] = chroma_cens_mean  # chroma cens
    feature_set['chroma_cens_std'] = chroma_cens_std
    feature_set['chroma_cens_var'] = chroma_cens_var
    feature_set['melspectrogram_mean'] = mel_mean  # melspectrogram
    feature_set['melspectrogram_std'] = mel_std
    feature_set['melspectrogram_var'] = mel_var
    feature_set['mfcc_mean'] = mfcc_mean  # mfcc
    feature_set['mfcc_std'] = mfcc_std
    feature_set['mfcc_var'] = mfcc_var
    feature_set['mfcc_delta_mean'] = mfcc_delta_mean  # mfcc delta
    feature_set['mfcc_delta_std'] = mfcc_delta_std
    feature_set['mfcc_delta_var'] = mfcc_delta_var
    feature_set['rmse_mean'] = rmse_mean  # rmse
    feature_set['rmse_std'] = rmse_std
    feature_set['rmse_var'] = rmse_var
    feature_set['cent_mean'] = cent_mean  # cent
    feature_set['cent_std'] = cent_std
    feature_set['cent_var'] = cent_var
    feature_set['spec_bw_mean'] = spec_bw_mean  # spectral bandwidth
    feature_set['spec_bw_std'] = spec_bw_std
    feature_set['spec_bw_var'] = spec_bw_var
    feature_set['contrast_mean'] = contrast_mean  # contrast
    feature_set['contrast_std'] = contrast_std
    feature_set['contrast_var'] = contrast_var
    feature_set['rolloff_mean'] = rolloff_mean  # rolloff
    feature_set['rolloff_std'] = rolloff_std
    feature_set['rolloff_var'] = rolloff_var
    feature_set['poly_mean'] = poly_mean  # poly features
    feature_set['poly_std'] = poly_std
    feature_set['poly_var'] = poly_var
    feature_set['tonnetz_mean'] = tonnetz_mean  # tonnetz
    feature_set['tonnetz_std'] = tonnetz_std
    feature_set['tonnetz_var'] = tonnetz_var
    feature_set['zcr_mean'] = zcr_mean  # zero crossing rate
    feature_set['zcr_std'] = zcr_std
    feature_set['zcr_var'] = zcr_var
    feature_set['harm_mean'] = harm_mean  # harmonic
    feature_set['harm_std'] = harm_std
    feature_set['harm_var'] = harm_var
    feature_set['perc_mean'] = perc_mean  # percussive
    feature_set['perc_std'] = perc_std
    feature_set['perc_var'] = perc_var
    feature_set['frame_mean'] = frame_mean  # frames
    feature_set['frame_std'] = frame_std
    feature_set['frame_var'] = frame_var

    # Converting Dataframe into CSV Excel and JSON file
    feature_set.to_csv('Emotion_features.csv')
    feature_set.to_json('Emotion_features.json')
Пример #50
0
"""
Segmentation using silence detection with spectral flatness of chroma features.
WiMIR workshop topic: Verse and chorus detection in vocal cover versions.
Author - Shreyan Chowdhury
"""

import librosa
from librosa import display
import numpy as np
from matplotlib import pyplot as plt


y, sr = librosa.core.load('/home/shreyan/PROJECTS/_data/structure_workshop/hero_vocals.wav')

chroma = librosa.feature.chroma_stft(y)

chroma_flatness = librosa.feature.spectral_flatness(S=chroma)

smoothed_chroma_flatness = np.convolve(chroma_flatness.squeeze(), np.ones(100))

bounds = librosa.segment.agglomerative(smoothed_chroma_flatness, 20)
xtimes = librosa.frames_to_time(range(len(smoothed_chroma_flatness)), sr=sr)

fig, (ax1, ax2) = plt.subplots(2, 1)
librosa.display.specshow(chroma, ax=ax1)

ax2.plot(xtimes, smoothed_chroma_flatness)
ax2.vlines(librosa.frames_to_time(bounds, sr=sr), 0, max(smoothed_chroma_flatness), color='black', linestyle='--',linewidth=2, alpha=0.9, label='Segment boundaries')
plt.show()
Пример #51
0
    # Estimates harmonic energy
    S = np.abs(librosa.stft(y))
    freqs = librosa.core.fft_frequencies(sr)
    harms = [1, 2, 3, 4]
    weights = [1.0, 0.5, 0.33, 0.25]
    S_sal = librosa.salience(S, freqs, harms, weights, fill_value=0)

    # Estimates of a possible beat to be filtered later, and it's timing, magnitude
    onset_env = librosa.onset.onset_strength(y=y, sr=sr, aggregate=np.median)
    tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env,
                                                 hop_length=hop_length,
                                                 y=y_harmonic,
                                                 sr=sr,
                                                 tightness=0.1)
    timing = librosa.frames_to_time(beat_frames)
    pitches, magnitudes = librosa.core.piptrack(y=y_harmonic,
                                                sr=sr,
                                                n_fft=(hop_length * 4),
                                                hop_length=hop_length,
                                                threshold=0.1)

    # Notes are the corresponding to the timing variable to be plotted -> sent to pickle
    notes, pick, mags, freq, harm = [], [], [], [], []
    for x in range(0, len(beat_frames)):
        try:
            freq.append(detect_pitch(y_harmonic, sr, beat_frames[x]))
            harm.append(get_energy(y_harmonic, sr, beat_frames[x]))
            note = librosa.hz_to_note(
                detect_pitch(y_harmonic, sr, beat_frames[x]))
            notes.append(note)
Пример #52
0
def get_detail_cqt_rms_secondary_optimised(filename):

    onset_frames_cqt, best_y, best_threshold, _ = get_detail_cqt_rms(filename)

    y, sr = librosa.load(filename)

    loss_frames = []
    for i in range(len(onset_frames_cqt) - 1):
        start = onset_frames_cqt[i]
        end = onset_frames_cqt[i + 1]

        if end - start > 30:
            start_end_time = librosa.frames_to_time([start, end], sr=sr)
            #print("start_end_time is {}".format(start_end_time))
            y1, sr1 = librosa.load(filename,
                                   offset=start_end_time[0],
                                   duration=start_end_time[1] -
                                   start_end_time[0])
            # 根据rms阀值线找漏的
            if len(onset_frames_cqt) > 0:
                threshold = 0.6
                tmp = get_missing_by_best_threshod(y1, [start, end], threshold)
                if len(tmp) >= 3:
                    for j in range(1, len(tmp) - 1):
                        loss_frames.append(tmp[j])
                        #print("add is {}".format(tmp[1:-1]))
            # rms = librosa.feature.rmse(y=y1)[0]
            # rms_on_onset_frames_cqt = [rms[x] for x in [start,end]]
            # min_rms_on_onset_frames_cqt = np.min(rms_on_onset_frames_cqt)
            # rms = [1 if x >=min_rms_on_onset_frames_cqt else 0 for x in rms]
            #
            # loss = [i for i in range(len(rms)-6) if rms[i] == 0 and rms[i+1] == 1 and np.min(rms[i+1:i+6]) == 1 and i < end and i > start ]
            # for x in loss:
            #     loss_frames.append(x)

    if len(loss_frames) > 0:
        for x in loss_frames:
            onset_frames_cqt.append(x)
        onset_frames_cqt.sort()

    CQT = librosa.amplitude_to_db(librosa.cqt(y, sr=16000), ref=np.max)
    #onset_frames_cqt, best_threshold = get_onsets_by_cqt_rms_optimised(filename)
    #print("5. onset_frames_cqt,best_threshold is {},{}".format(onset_frames_cqt, best_threshold))
    # if len(onset_frames_cqt)<topN:
    onset_frames_cqt = get_miss_onsets_by_cqt(y, onset_frames_cqt)
    onset_frames_cqt = find_false_onsets_rms_secondary_optimised(
        y, onset_frames_cqt, 0.1, 0.1)
    if onset_frames_cqt:
        min_width = 5
        # print("min_width is {}".format(min_width))
        onset_frames_cqt = del_overcrowding(onset_frames_cqt, min_width)
    #print("6. onset_frames_cqt,best_threshold is {},{}".format(onset_frames_cqt, best_threshold))
    #onset_frames_cqt = check_onset_by_cqt_v2(y, onset_frames_cqt)
    #print("7. onset_frames_cqt,best_threshold is {},{}".format(onset_frames_cqt, best_threshold))
    onset_frames_cqt_time = librosa.frames_to_time(onset_frames_cqt, sr=sr)

    type_index = get_onsets_index_by_filename(filename)
    total_frames_number = get_total_frames_number(filename)
    best_y = []
    # 标准节拍时间点
    if len(onset_frames_cqt) > 0:
        base_frames = onsets_base_frames_for_note(filename)
        base_frames = [
            x + onset_frames_cqt[0] - base_frames[0] for x in base_frames
        ]
        min_d, best_y, onsets_frames = get_dtw_min(onset_frames_cqt,
                                                   base_frames, 65)
    else:
        base_frames = onsets_base_frames_for_note(filename)
    base_onsets = librosa.frames_to_time(base_frames, sr=sr)
    plt.close()  # 关闭第一次的图片句柄

    # librosa.display.specshow(CQT)
    plt.figure(figsize=(10, 6))
    plt.subplot(4, 1, 1)  # 要生成两行两列,这是第一个图plt.subplot('行','列','编号')
    # plt.colorbar(format='%+2.0f dB')
    # plt.title('Constant-Q power spectrogram (note)')
    # for x in onset_frames_cqt:
    #     sub_cqt = CQT.copy()[:,x]
    #     sub_cqt[0:20] = np.min(CQT)
    #     max_index = np.where(sub_cqt==np.max(sub_cqt))[0][0]
    #     print("max_index is {}".format(max_index))
    #     #plt.axhline(max_index,color="r")
    #     CQT[max_index,:] = np.min(CQT)

    librosa.display.specshow(CQT, y_axis='cqt_note', x_axis='time')
    plt.vlines(onset_frames_cqt_time, 0, sr, color='y', linestyle='solid')
    #plt.vlines(base_onsets, 0, sr, color='r', linestyle='solid')

    # print(plt.figure)

    plt.subplot(4, 1, 2)  # 要生成两行两列,这是第一个图plt.subplot('行','列','编号')
    librosa.display.waveplot(y, sr=sr)
    plt.vlines(onset_frames_cqt_time,
               -1 * np.max(y),
               np.max(y),
               color='y',
               linestyle='solid')

    plt.subplot(4, 1, 3)
    rms = librosa.feature.rmse(y=y)[0]
    rms = [x / np.std(rms) for x in rms]
    max_rms = np.max(rms)
    # rms = np.diff(rms)
    times = librosa.frames_to_time(np.arange(len(rms)))
    rms_on_onset_frames_cqt = [rms[x] for x in onset_frames_cqt]
    min_rms_on_onset_frames_cqt = np.min(rms_on_onset_frames_cqt)
    rms = [1 if x >= min_rms_on_onset_frames_cqt else 0 for x in rms]
    plt.plot(times, rms)
    # plt.axhline(min_rms_on_onset_frames_cqt)
    plt.axhline(max_rms * best_threshold)
    # plt.vlines(onsets_frames_rms_best_time, 0,np.max(rms), color='y', linestyle='solid')
    plt.vlines(onset_frames_cqt_time,
               0,
               np.max(rms),
               color='y',
               linestyle='solid')
    #plt.vlines(base_onsets, 0, np.max(rms), color='r', linestyle='solid')
    plt.xlim(0, np.max(times))

    plt.subplot(4, 1, 4)
    plt.vlines(base_onsets, 0, np.max(rms), color='r', linestyle='dashed')
    plt.xlim(0, np.max(times))
    plt.axhline(max_rms * best_threshold)
    return onset_frames_cqt, best_y, best_threshold, plt
Пример #53
0
# merge all the features in a matrix as training feature
def merge(onset_strength, is_onset, beat, mfcc):
    feature = []
    for i in range(len(onset_strength)):
        feature.append(onset_strength[i] + is_onset[i] + beat[i] + mfcc[i])
    return feature


if __name__ == "__main__":
    if len(sys.argv) != 3:
        raise argparse.ArgumentTypeError('the number of argument has to be 3')
        exit(-1)

    y, sr = librosa.load(sys.argv[1], sr=None)
    o_env = librosa.onset.onset_strength(y, sr=sr)
    times = librosa.frames_to_time(np.arange(len(o_env)), sr=sr)

    onset = getOnset(y, sr)

    with open(sys.argv[2], 'r') as my_file:
        csvreader = csv.reader(my_file)
        mis = list(csvreader)

    mis = [[int(ele[0]), float(ele[1])] for ele in mis]
    mfcc = getmfcc(y, sr, mis)
    onset_strength = [[
        o_env[librosa.core.time_to_frames(e[1] / 1000, sr=sr)[0]]
    ] for e in mis]
    is_onset = isonset(onset, mis)
    beat = isbeat(mis)
    tr_f = merge(onset_strength, is_onset, beat, mfcc)
Пример #54
0
def get_detail_cqt_rms(filename):
    y, sr = librosa.load(filename)
    CQT = librosa.amplitude_to_db(librosa.cqt(y, sr=16000), ref=np.max)
    onset_frames_cqt, best_threshold = get_onsets_by_cqt_rms_optimised(
        filename)
    #print("5. onset_frames_cqt,best_threshold is {},{}".format(onset_frames_cqt, best_threshold))
    # if len(onset_frames_cqt)<topN:
    onset_frames_cqt = get_miss_onsets_by_cqt(y, onset_frames_cqt)
    #print("6. onset_frames_cqt,best_threshold is {},{}".format(onset_frames_cqt, best_threshold))
    #onset_frames_cqt = check_onset_by_cqt_v2(y, onset_frames_cqt)
    #print("7. onset_frames_cqt,best_threshold is {},{}".format(onset_frames_cqt, best_threshold))
    onset_frames_cqt_time = librosa.frames_to_time(onset_frames_cqt, sr=sr)
    #print("onset_frames_cqt_time is {}".format(onset_frames_cqt_time))

    type_index = get_onsets_index_by_filename(filename)
    total_frames_number = get_total_frames_number(filename)
    best_y = []
    # 标准节拍时间点
    if len(onset_frames_cqt) > 0:
        base_frames = onsets_base_frames(
            codes[type_index], total_frames_number - onset_frames_cqt[0])
        base_frames = [
            x + (onset_frames_cqt[0] - base_frames[0]) for x in base_frames
        ]
        min_d, best_y, onsets_frames = get_dtw_min(onset_frames_cqt,
                                                   base_frames, 65)
    else:
        base_frames = onsets_base_frames(codes[type_index],
                                         total_frames_number)
    base_onsets = librosa.frames_to_time(base_frames, sr=sr)

    # librosa.display.specshow(CQT)
    plt.figure(figsize=(10, 6))
    plt.subplot(4, 1, 1)  # 要生成两行两列,这是第一个图plt.subplot('行','列','编号')
    # plt.colorbar(format='%+2.0f dB')
    # plt.title('Constant-Q power spectrogram (note)')
    librosa.display.specshow(CQT, y_axis='cqt_note', x_axis='time')
    plt.vlines(onset_frames_cqt_time, 0, sr, color='y', linestyle='solid')
    #plt.vlines(base_onsets, 0, sr, color='r', linestyle='solid')

    # print(plt.figure)

    plt.subplot(4, 1, 2)  # 要生成两行两列,这是第一个图plt.subplot('行','列','编号')
    librosa.display.waveplot(y, sr=sr)
    plt.vlines(onset_frames_cqt_time,
               -1 * np.max(y),
               np.max(y),
               color='y',
               linestyle='solid')

    plt.subplot(4, 1, 3)
    rms = librosa.feature.rmse(y=y)[0]
    rms = [x / np.std(rms) for x in rms]
    max_rms = np.max(rms)
    # rms = np.diff(rms)
    times = librosa.frames_to_time(np.arange(len(rms)))
    # rms_on_onset_frames_cqt = [rms[x] for x in onset_frames_cqt]
    # min_rms_on_onset_frames_cqt = np.min(rms_on_onset_frames_cqt)
    # rms = [1 if x >=min_rms_on_onset_frames_cqt else 0 for x in rms]
    plt.plot(times, rms)
    # plt.axhline(min_rms_on_onset_frames_cqt)
    plt.axhline(max_rms * best_threshold)
    # plt.vlines(onsets_frames_rms_best_time, 0,np.max(rms), color='y', linestyle='solid')
    plt.vlines(onset_frames_cqt_time,
               0,
               np.max(rms),
               color='y',
               linestyle='solid')
    #plt.vlines(base_onsets, 0, np.max(rms), color='r', linestyle='solid')
    plt.xlim(0, np.max(times))

    plt.subplot(4, 1, 4)
    plt.vlines(base_onsets, 0, np.max(rms), color='r', linestyle='dashed')
    plt.xlim(0, np.max(times))
    plt.axhline(max_rms * best_threshold)
    return onset_frames_cqt, best_y, best_threshold, plt
Пример #55
0
def analyze_audio(original, recording, tempo):
    o_y, o_sr = librosa.load(original)
    r_y, r_sr = librosa.load(recording)

    # calculate tempos and tempo evaluation (USE GIVEN TEMPO HERE? ALSO REPEAT IS DUMB, probably just change this to my own thing)
    o_tempo, o_beat_frames = librosa.beat.beat_track(y=o_y,
                                                     sr=o_sr,
                                                     start_bpm=tempo)
    r_tempo, r_beat_frames = librosa.beat.beat_track(y=r_y,
                                                     sr=r_sr,
                                                     start_bpm=tempo)
    o_beats, r_beats = librosa.frames_to_time(
        o_beat_frames, sr=o_sr), librosa.frames_to_time(r_beat_frames, sr=r_sr)
    ref_weight = 0.5
    # mir_eval.tempo.validate(np.repeat(o_tempo,2),ref_weight,np.repeat(r_tempo,2))
    tempo_score = mir_eval.tempo.evaluate(np.repeat(o_tempo, 2), ref_weight,
                                          np.repeat(r_tempo, 2))['P-score']

    # beat calculations using DP
    o_beats, r_beats = mir_eval.beat.trim_beats(
        o_beats), mir_eval.beat.trim_beats(r_beats)
    beat_metrics = mir_eval.beat.evaluate(o_beats, r_beats)
    beat_p, beat_kl = beat_metrics['P-score'], beat_metrics['Information gain']

    # onset calculation (between [0,1])
    o_onsets = librosa.onset.onset_detect(y=o_y, sr=o_sr, units='time')
    r_onsets = librosa.onset.onset_detect(y=r_y, sr=r_sr, units='time')
    onset_precision = mir_eval.onset.evaluate(o_onsets, r_onsets)['Precision']

    # cosine similarity between spectral centroids
    o_centroids = librosa.feature.spectral_centroid(y=o_y, sr=o_sr)
    r_centroids = librosa.feature.spectral_centroid(y=r_y, sr=r_sr)
    o_len, r_len = o_centroids.shape[1], r_centroids.shape[1]
    if o_len < r_len:
        r_centroids = r_centroids[:, (r_len - o_len):]
    else:
        o_centroids = o_centroids[:, (o_len - r_len):]
    centroid_sim = np.sum(o_centroids * r_centroids) / (
        np.linalg.norm(o_centroids) * np.linalg.norm(r_centroids))

    # chroma freq (12 pitch classes per frame, compute freq for max per frame using Short Time FT)
    o_chroma, r_chroma = librosa.feature.chroma_stft(
        y=o_y, sr=o_sr), librosa.feature.chroma_stft(y=r_y, sr=r_sr)
    o_mchroma, r_mchroma = np.argmax(o_chroma, axis=0), np.argmax(r_chroma,
                                                                  axis=0)
    o_counts, r_counts = collections.Counter(o_mchroma), collections.Counter(
        r_mchroma)
    oc_len, rc_len = len(o_mchroma), len(r_mchroma)
    if oc_len < rc_len:
        r_mchroma = r_mchroma[(rc_len - oc_len):]
    elif oc_len > rc_len:
        o_mchroma = o_mchroma[(oc_len - rc_len):]
    nmse = 0
    for i in range(len(o_mchroma)):
        if abs(o_mchroma[i] - r_mchroma[i]) > 2:
            nmse += np.sign(o_mchroma[i] - r_mchroma[i])
    nmse /= oc_len
    nmse = 1 - abs(nmse)

    # probabilistic YIN (HMM model on pitch classes for computing fundamental frequencies)
    """
    o_f0, ovf, ovp = librosa.pyin(o_y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
    r_f0, rvf, rvp = librosa.pyin(r_y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
    o_f0 = np.array([el for el in o_f0 if not math.isnan(el)])
    r_f0 = np.array([el for el in r_f0 if not math.isnan(el)])
    of_len, rf_len = len(o_f0), len(r_f0)
    if of_len < rf_len:
        r_f0 = r_f0[(rf_len-of_len):]
    else:
        o_f0 = o_f0[(of_len-rf_len):]
    f0_sim = np.sum(o_f0 * r_f0) / (np.linalg.norm(o_f0) * np.linalg.norm(r_f0))
    """
    f0_sim = centroid_sim

    # MFCC (mel freq cepstral coefficients for ML)
    return (tempo_score, beat_p, beat_kl, onset_precision, nmse, centroid_sim,
            f0_sim)
Пример #56
0
import librosa
import sys

filename = 'humble.mp3'
#load humble
y, sr = librosa.load(filename)

#get beats
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
#print
print(beat_times)
librosa.output.times_csv('beat_times.csv', beat_times)
Пример #57
0
    return audio, sr


savepath = 'e:/test_image/'
filename = 'F:/项目/花城音乐项目/样式数据/2.27MP3/旋律/视唱1-02(90).wav'

#y, sr = librosa.load(filename)
y, sr = load_and_trim(filename)
chromagram = librosa.feature.chroma_cqt(y, sr=sr)
librosa.display.specshow(chromagram,
                         x_axis='time',
                         y_axis='chroma',
                         cmap='coolwarm')

onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
onset_times = librosa.frames_to_time(onset_frames, sr=sr)
plt.vlines(onset_times, 0, y.max(), color='r', linestyle='--')
onset_samples = librosa.time_to_samples(onset_times)
print(onset_samples)
#plt.subplot(len(onset_times),1,1)
plt.show()

plt.figure(figsize=(5, 80))
for i in range(0, len(onset_times)):
    start = onset_samples[i] - sr / 2
    if start < 0:
        start = 0
    end = onset_samples[i] + sr / 2
    #y2 = [x if i> start and i<end else 0 for i,x in enumerate(y)]
    y2 = [x for i, x in enumerate(y) if i > start and i < end]
    y2[int(len(y2) / 2)] = np.max(y)  # 让图片展示归一化
Пример #58
0
def midi_to_piano_cqt(midi):
  piano_roll = midi.get_piano_roll(times = librosa.frames_to_time(np.arange(midi.get_end_time()*22050/512)))
  piano_subset = piano_roll[36:96]+1e-10 #want just C3 to C8 of piano roll
  return piano_subset
Пример #59
0
    img.itemset((c_max[x],x), 1)
    img.itemset((c_max[x],x), 1)
    img.itemset((c_max[x],x), 1)

# 最强音色图
# librosa.display.specshow(img, x_axis='time',  cmap='coolwarm')

# 音频时长
time = librosa.get_duration(y)
print("time is {}".format(time))
# 节拍点
onsets_frames = librosa.onset.onset_detect(y)
print(onsets_frames)

# 节拍时间点
onstm = librosa.frames_to_time(onsets_frames, sr=sr)
print(onstm)
#plt.rcParams['figure.figsize'] = (2.0, 2.0) # 设置figure_size尺寸
#plt.rcParams['savefig.dpi'] = 28 #图片像素
#plt.rcParams['figure.dpi'] = 28 #分辨率
#librosa.display.specshow(librosa.amplitude_to_db(D))
#plt.vlines(onstm, 0, sr, color='r', linestyle='dashed')
#plt.colorbar()

code = '[500,500,1000;500,500,1000;500,500,750,250;2000]'
pitch_code = '[3,3,3,3,3,3,3,5,1,2,3]'
pitch_v = get_chroma_pitch(pitch_code)
onsets_base_frames = onsets_base_frames(code,h)
onsets_base_frames[-1] = onsets_base_frames[-1]-1
print(onsets_base_frames)
print(np.diff(onsets_base_frames))
Пример #60
0
librosa.display.specshow(C,
                         y_axis='cqt_hz',
                         sr=sr,
                         bins_per_octave=BINS_PER_OCTAVE,
                         x_axis='time')
plt.tight_layout()

##########################################################
# To reduce dimensionality, we'll beat-synchronous the CQT
tempo, beats = librosa.beat.beat_track(y=y, sr=sr, trim=False)
Csync = librosa.util.sync(C, beats, aggregate=np.median)

# For plotting purposes, we'll need the timing of the beats
# we fix_frames to include non-beat frames 0 and C.shape[1] (final frame)
beat_times = librosa.frames_to_time(librosa.util.fix_frames(beats,
                                                            x_min=0,
                                                            x_max=C.shape[1]),
                                    sr=sr)

plt.figure(figsize=(12, 4))
librosa.display.specshow(Csync,
                         bins_per_octave=12 * 3,
                         y_axis='cqt_hz',
                         x_axis='time',
                         x_coords=beat_times)
plt.tight_layout()

#####################################################################
# Let's build a weighted recurrence matrix using beat-synchronous CQT
# (Equation 1)
# width=3 prevents links within the same bar
# mode='affinity' here implements S_rep (after Eq. 8)