Exemplo n.º 1
0
def compute_chroma(data):
    """
    :param data:
    :return:
    """
    chr = feature.chroma_stft(data)
    return chr
Exemplo n.º 2
0
def chromagram(audio, fs, params):
	# params is dictionary with correct parameters for chroma_stft librosa convenience function
	# fill in docs with their meanings later on by checking librosa docs
	
	if params["n_fft"] < params["win_length"]:
		raise ParamInputError("Window too large! For {} FFT samples and {}Hz sampling frequency, maximum window size is {:.2f}ms".format(params["n_fft"], fs, params["n_fft"]*1000/fs))

	chromagram = chroma_stft(
		audio,
		fs,
		norm = None,
		n_fft = params["n_fft"],
		hop_length = params["hop_length"],
		win_length = params["win_length"],
		window = params["window"],
		center = params["center"]
	)

	# Compression
	chromagram = np.log10(1+params["gamma"]*chromagram)

	# Normalization
	for column in chromagram.T:
		if np.linalg.norm(column, params["norm"]) < params["epsilon"]:
			column[...] = (1/np.linalg.norm(np.ones(12), params["norm"])) * np.ones(12)
		else:
			column[...] = column/np.linalg.norm(column, params["norm"])

	return chromagram
Exemplo n.º 3
0
    def get_chroma_frequencies(self, outside_series=None, outside_sr=None):
        y = self.select_series(outside_series)
        sr = self.select_sr(outside_sr)

        hop_length = 512

        return hop_length, chroma_stft(y, sr=sr, hop_length=hop_length)
Exemplo n.º 4
0
def chroma_stft(args):
    psd = get_psd(args)
    fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap'])
    hopsize = nfft - noverlap
    return rosaft.chroma_stft(y=None,
                              sr=fs,
                              S=psd,
                              n_fft=nfft,
                              hop_length=hopsize)
Exemplo n.º 5
0
    def STFT(self, audio):
        '''Computes STFT of audio signal
        \naudio -> mono audio signal in form of numpy array'''

        audio = self._make_mono(audio)
        stft = chroma_stft(audio,
                           sr=self.sr,
                           n_chroma=n_chroma,
                           hop_length=hop_len)
        return stft
Exemplo n.º 6
0
def create_features():                                      #Function extracts features from all files
    print("\nConverting LM data to features...")            #prints to user when the function starts up
    features_list = []                                      #creates an empty list to hold features from all files
    for i, (file_name) in enumerate(file_names):            #for each smaller .wav file in specified directory
        print("File " + str(i+1) + "...")                   #prints to use what file extracting features from
        full_file_name = source_location + "\\" + \
                         file_name                          #get full name of file, inc directory
        rate, data = wvf.read(full_file_name)               #read in file as .wav as data and its sampling rate

        if audio_processing_choice == "chroma":             #if feature extraction choice is 'chroma',
            data = np.asarray(                              #process data by flattening data to 1D, taking every
                [float(datum) for datum in                  #other value of data, converting each to floats, and
                 data.flatten()[0::2]])                     #converting to a numpy array
            features = chroma_stft(y=data, sr=rate).T       #This modified data is passed to chroma function
                                                            #with sampling rate and result transposed
                                                            #to give ('# frames' x '12 features')
            features = np.repeat(features, 3, axis=1)       #Append several copies of this horizontally to give
                                                            #36 features (enough for CNNv3 to work with)

        elif audio_processing_choice == "cqt":              #else if feature extraction choice is 'cqt',
            data = np.asarray(                              #process data by flattening data to 1D, taking every
                [float(datum) for datum in                  #other value of data, converting each to floats, and
                 data.flatten()[0::2]])                     #converting to a numpy array
            features = chroma_cqt(y=data, sr=rate,          #This modified data is passed to cqt function with
                                  n_chroma=reduced_dim).T   #sampling rate and result transposed to give
                                                            #('# frames' x 'reduced_dim features')

        elif audio_processing_choice == "mfcc":             #else if feature extraction choice is 'mfcc',
            features = mfcc(signal=data, samplerate=rate,   #pass .wav data directly with sampling rate
                            winlen=frame_time_len,          #to 'mfcc' function and result is feature vector as
                            winstep=frame_time_len,         #('# frames' x 'reduced_dim features')
                            numcep=reduced_dim,
                            nfilt=reduced_dim*2,
                            nfft= frame_len)

        elif audio_processing_choice == "fbank":
            features = fbank(signal=data, samplerate=rate,  #else if feature extraction choice is 'fbank',
                             winlen=frame_time_len,         #pass .wav data directly with sampling rate
                             winstep=frame_time_len,        #to 'fbank' function and result is feature vector as
                             nfilt=reduced_dim,             #('# frames' x 'reduced_dim features')
                             nfft=frame_len)[0]             #(with only first item from list as this is the numpy
                                                            #array we're interested in; the other being array
                                                            #of energies in each frame)

        else:                                               #else if feature extraction choice is anything else
            features = logfbank(signal=data,                #pass .wav data directly with sampling rate
                                samplerate=rate,            #to 'logfbank' function and result is feature vector
                                winlen=frame_time_len,      #as ('# frames' x 'reduced_dim features')
                                winstep=frame_time_len,
                                nfilt=reduced_dim,
                                nfft=frame_len)
        features_list.append(features)                      #Add the extracted features of current .wav file to
                                                            #list, and return this list after features
    return features_list                                    #of all files have been extracted
Exemplo n.º 7
0
def get_chromagram(y, sr, chroma):
	"""
		returns chromagram

		Parameters
		----------
		y : number > 0 [scalar]
			audio

		sr: number > 0 [scalar]
			target sampling rate

		chroma: str 
		    chroma-samplerate-framesize-overlap

		
		Returns
		-------
		list of chromagrams	
	"""
	params = get_parameters_chroma(chroma)
	chroma = params["chroma"]
	doce_bins_tuned_chroma = None
	if chroma == 'nnls':
		doce_bins_tuned_chroma = get_nnls(y, params["sr"], params["fr"], params["off"])
	elif chroma == 'cqt':
		win = get_window('blackmanharris', params["fr"])
		doce_bins_tuned_chroma = chroma_cqt(y=y, sr=params["sr"],
		                                    C=None,
		                                    hop_length=params["off"],
		                                    norm=None,
		                                    # threshold=0.0,
		                                    window=win,
		                                    fmin=110,
		                                    n_chroma=12,
		                                    n_octaves=4 if params["chroma"] == "cqt" and params["sr"] == 5525 else 5,
		                                    bins_per_octave=36)
	elif chroma == 'cens':
		win = get_window('blackmanharris', params["fr"])
		doce_bins_tuned_chroma = chroma_cens(y=y, sr=params["sr"],
		                                     C=None,
		                                     hop_length=params["off"],
		                                     norm=None,
		                                     window=win,
		                                     fmin=110,
		                                     n_chroma=12,
		                                     n_octaves=5,
		                                     bins_per_octave=36)
	elif chroma == 'stft':
		win = get_window('blackmanharris', params["fr"])
		doce_bins_tuned_chroma = chroma_stft(y=y, sr=params["sr"], hop_length=params["off"], norm=None, window=win,
		                                     n_chroma=12)
	return doce_bins_tuned_chroma
Exemplo n.º 8
0
def feature_extractor (y, sr):
	print('вошли в процедyрy feature_extractor')
	from librosa import feature as f        

	print('либрозy как f загрyзили')                                                    
	rmse = f.rms(y=y)[0] #f.rmse (y = y)                            
	spec_cent = f.spectral_centroid (y = y, sr = sr) 
	spec_bw = f.spectral_bandwidth (y = y, sr = sr)  
	rolloff = f.spectral_rolloff (y = y, sr = sr)     
	zcr = f.zero_crossing_rate (y)       
	mfcc = f.mfcc(y = y, sr = sr)     # mel cepstral coefficients
	chroma = f.chroma_stft(y=y, sr=sr)
	output = np.vstack([rmse, spec_cent, spec_bw, rolloff, zcr, chroma, mfcc]).T
	print('feature_extractor закончил работy')                                                    
	return (output)
Exemplo n.º 9
0
 def extract_features(self):
     # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
     stft = np.abs(librosa.stft(self.waveform))
     # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
     mfccs = mfcc(y=self.waveform, sr=self.sample_rate,
                  n_mfcc=40).mean(axis=1)
     # Computes a chromagram from a waveform or power spectrogram.
     chroma = chroma_stft(S=stft, sr=self.sample_rate).mean(axis=1)
     # Computes a mel-scaled spectrogram.
     mel = melspectrogram(self.waveform, sr=self.sample_rate).mean(axis=1)
     # Computes spectral contrast
     contrast = spectral_contrast(S=stft, sr=self.sample_rate).mean(axis=1)
     # Computes the tonal centroid features (tonnetz)
     harmonic = librosa.effects.harmonic(self.waveform)
     tonn = tonnetz(y=harmonic, sr=self.sample_rate).mean(axis=1)
     return np.concatenate([mfccs, chroma, mel, contrast, tonn], axis=0)
def get_beat_sync_chroma(audio):
    """
    Get a beat synchronous chroma
    :param audio: The path to the audio file
    :return: A beat synchronous chroma
    """
    y, sr = core.load(audio, sr=44100)
    tempo, framed_dbn = self_tempo_estimation(y, sr)
    np.append(framed_dbn, np.array(len(y)/sr))
    # Calculate chroma semitone spectrum
    chromas = []
    for i in range(1, len(framed_dbn)):
        stft = abs(core.stft(y[int(framed_dbn[i-1]*sr):int(framed_dbn[i]*sr)]))
        chroma = np.mean(feature.chroma_stft(y=None, S=stft**2), axis=1)
        chromas.append(chroma)
    chromas = np.array(chromas).transpose()
    return chromas
Exemplo n.º 11
0
def plot_chroma(file, title):
    # Load in the song using kaiser_fast to speed up loading
    x, sr = librosa.load(file, sr=42000, res_type='kaiser_fast')

    s = np.abs(librosa.stft(x, n_fft=4096))**2

    # Generating chromagram
    chroma = chroma_stft(S=s, sr=sr)

    plt.figure(figsize=(18, 4))

    librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')

    plt.colorbar()
    plt.title(title + ' Chromagram')
    plt.tight_layout()
    plt.show()
    pass
Exemplo n.º 12
0
 def extract_coefficents(self, type, hasLabel=True):
     out = General_Out(type, 'raw', 'chroma')
     rootdir = os.getcwd()
     LoadDir = os.path.join(rootdir, 'data/' + type)
     for subdir, dirs, files in os.walk(LoadDir):
         for file in files:
             if file.endswith(".au"):
                 y, sr = load(
                     os.path.join(subdir, file),
                     sr=None)  #sr=None gets the original sample_rate
                 ceps = chroma_stft(y, sr)
                 ceps = np.transpose(ceps)
                 num_ceps = ceps.shape[0]
                 x = np.mean(ceps[int(num_ceps * 1 / 10):int(num_ceps * 9 /
                                                             10)],
                             axis=0)
                 print(file, x)
                 out.add(file, x)
     out.write()
    def _calc_feat(self, window, feat_name):
        feat = None
        # calculate feature
        if feat_name == 'mfcc':
            feat = FT.mfcc(y=window, sr=self.sr, n_mfcc=_N_MFCC)
        elif feat_name == 'chroma_stft':
            feat = FT.chroma_stft(y=window, sr=self.sr)
        elif feat_name == 'melspectrogram':
            feat = FT.melspectrogram(y=window,
                                     sr=self.sr,
                                     n_mels=128,
                                     n_fft=1024,
                                     hop_length=512)
            feat = L.power_to_db(feat)
        elif feat_name == 'spectral_centroid':
            feat = FT.spectral_centroid(y=window, sr=self.sr)
        elif feat_name == 'spectral_rolloff':
            feat = FT.spectral_rolloff(y=window, sr=self.sr)
        elif feat_name == 'tonnetz':
            feat = FT.tonnetz(y=window, sr=self.sr)
        elif feat_name == 'zero_crossing_rate':
            feat = FT.zero_crossing_rate(y=window)
        else:
            assert False, 'Invalid feature'

        # pool feature from multiple frames
        if self.feature_pool == 'sum':
            feat = feat.sum(axis=1)
        elif self.feature_pool == 'max':
            feat = feat.max(axis=1)
        elif self.feature_pool == 'mean':
            feat = feat.mean(axis=1)
        elif self.feature_pool == 'flatten':
            feat = feat.flatten()
        elif self.feature_pool == 'none':
            pass
        else:
            assert False, 'Invalid feature pooling scheme'

        # normalize features
        if self.l2_norm and feat.shape[0] > 1:
            feat /= np.linalg.norm(feat)
        return feat
def get_dbeat_sync_chroma(audio):
    """
    Get a downbeat synchronous chroma
    :param audio: The path to the audio file
    :return: A downbeat synchronous chroma
    """
    y, sr = core.load(audio, sr=44100)
    tempo, beats = self_tempo_estimation(y, sr)
    np.append(beats, np.array(len(y)/sr))
    act = beatrnn()(audio)
    beats = downbeattrack(beats_per_bar=[4, 4], fps=100)(act)
    downbeats = beats[beats[:, 1] == 1][:][:, 0]
    framed_dbn = np.concatenate([np.array([0]), downbeats ])

    # Calculate chroma semitone spectrum
    semitones = []
    chromas = []
    for i in range(1, len(framed_dbn)):
        stft = abs(core.stft(y[int(framed_dbn[i-1]*sr):int(framed_dbn[i]*sr)]))
        chroma = np.mean(feature.chroma_stft(y=None, S=stft**2), axis=1)
        semitone = np.mean(hz_to_pitch(stft, sr=sr), axis=1)
        chromas.append(chroma)
        semitones.append(semitone)
    chromas = np.array(chromas).transpose()
    semitones = np.array(semitones).transpose()

    # Plot the results and return the values
    time = np.arange(len(y)) / sr
    fig, ax = plt.subplots(3, 1)
    ax[0].plot(time, y)
    ax[0].vlines(framed_dbn, -1, 1, colors='r', linestyles='dashdot')
    ax[0].set_xlim(framed_dbn[0], framed_dbn[-1])
    plt.sca(ax[1])
    plt.pcolor(framed_dbn, np.arange(13), chromas)
    plt.yticks(np.arange(13)+0.5, ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"])
    plt.ylim(0, 12)
    plt.sca(ax[2])
    plt.pcolor(semitones)
    print(tempo)

    return chromas, semitones, downbeats, tempo
def get_beat_sync_chroma_and_spectrum(audio, sr=None, bpm=None):
    """
    Returns the beat_sync_chroma and the beat_sync_spectrums
    :param audio: Path to the song, or numpy array
    :param rate: Sample rate in case the audio param is numpy array
    :param bpm: Precalculated bpm
    :return: (beat_sync_chroma, beat_sync_spec)
    """
    if not isinstance(audio, np.ndarray):
        sr = 44100
        y = std.MonoLoader(filename=audio, samplerate=44100)()
    else:
        y = audio
    eql_y = std.EqualLoudness()(y)
    tempo, framed_dbn = self_tempo_estimation(y, sr, tempo=bpm)
    if framed_dbn.shape[0] % 4 == 0:
        framed_dbn = np.append(framed_dbn, np.array(len(y)/sr))
    band1 = (0, 220)
    band2 = (220, 1760)
    band3 = (1760, sr / 2)
    band1list = []
    band2list = []
    band3list = []
    chromas = []
    for i in range(1, len(framed_dbn)):
        fft_eq = abs(np.fft.fft(eql_y[int(framed_dbn[i - 1] * sr):int(framed_dbn[i] * sr)]))
        freqs = np.fft.fftfreq(len(fft_eq), 1 / sr)
        band1list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band1[0], freqs < band1[1]))]**2))))
        band2list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band2[0], freqs < band2[1]))]**2))))
        band3list.append(np.sqrt(np.mean(sum(fft_eq[np.where(np.logical_and(freqs > band3[0], freqs < band3[1]))]**2))))
        stft = abs(core.stft(y[int(framed_dbn[i - 1] * sr):int(framed_dbn[i] * sr)]))
        chroma = np.mean(feature.chroma_stft(y=None, S=stft ** 2), axis=1)
        chromas.append(chroma)
    chromas = np.array(chromas).transpose()
    band1list = np.array(band1list).transpose()
    band2list = np.array(band2list).transpose()
    band3list = np.array(band3list).transpose()
    return (chromas, np.vstack([band1list, band2list, band3list]))
Exemplo n.º 16
0
'''This program is used to plot chromagrams'''
import matplotlib.pyplot as plt
import librosa.display as ld
import scipy.io.wavfile as sc
import librosa.feature as lf
import librosa as lb

file_name = 'Training_Data_Set/rock/rock.00001.wav'
plt.figure(1)
rate, data = sc.read(file_name)
plt.figure(1)
plt.plot(data)
plt.show()
M = lf.chroma_stft(data, sr=rate, n_fft=4096, hop_length=512)
ld.specshow(M, x_axis='frames', y_axis='chroma')
plt.colorbar()
plt.title('rock_1')
plt.show()
def main(aud):
    waves = {}
    sg, mask, data, audio_mask, sample_rate = load_audio(str(aud))
    waves['audio'] = data[audio_mask]
    length = len(data[audio_mask])

    w = myfunc()
    windo = w(length)

    windows = {}
    wave = waves['audio']
    species = 'gens_specie'
    windows[species] = []
    for i in range(0, int(len(wave) / 6.144000e+03)):
        windows[species].append(wave[i:int(i + 6.144000e+03)])

#creating df for test audio
    new_dataset_test = pd.DataFrame()
    for species in windows.keys():
        for i in range(0, len(windows)):
            data_point = {
                'species': species.split('_')[1],
                'genus': species.split('_')[0]
            }
            #print(type(data_point))
            spec_centroid = feature.spectral_centroid(windows[species][i])[0]
            #print(windows_fixed[species][i])
            chroma = feature.chroma_stft(windows[species][i], sample_rate)
            for j in range(0, 13):
                data_point['spec_centr_' + str(j)] = spec_centroid[j]
                for k in range(0, 12):
                    data_point['chromogram_' + str(k) + "_" +
                               str(j)] = chroma[k, j]
            new_dataset_test = new_dataset_test.append(data_point,
                                                       ignore_index=True)

    #classification of test audio
    features = list(new_dataset.columns)
    features.remove('species')
    features.remove('genus')

    X = new_dataset[features].values
    y = new_dataset['species'].values
    X_test = new_dataset_test[features].values
    y_test = new_dataset_test['species'].values

    NB = naive_bayes.GaussianNB()
    SSS = sklearn.model_selection.StratifiedShuffleSplit(n_splits=5,
                                                         test_size=0.2)

    for train_index, val_index in SSS.split(X, y):
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]

        NB.fit(X_train, y_train)
        y_pred = NB.predict(X_test)
    check = pd.DataFrame()
    df = pd.read_csv("/home/megha/Desktop/Audio_website/templates/descr.csv",
                     delimiter=';')
    check = df.loc[df['check'] == y_pred[0]]
    #print(check['Description'])
    #accs.append(sklearn.metrics.accuracy_score(y_pred=y_pred, y_true=y_val))
    return y_pred[0], check
Exemplo n.º 18
0
import IPython.display
import matplotlib.style as ms

#y, sr = librosa.load('MoanaAudio2016.wav')
y, sr = librosa.load('SecondHalfOfMoana.wav', offset=30.0, duration=5.0)
from librosa.feature import chroma_stft
block_gen = sf.blocks('SecondHalfOfMoana.wav', blocksize=1024)
print(block_gen)

samplerate = sf.info('SecondHalfOfMoana.wav').samplerate
chromas = []
for bl in block_gen:
    # downmix frame to mono (averaging out the channel dimension)
    y = np.mean(bl, axis=1)
    # compute chroma feature
    chromas.append(chroma_stft(y, sr=sr))
print("This is Y:%d \n", y)

ms.use('seaborn-muted')
#%matplotlib inline
#Load the example track
y, sr = librosa.load(librosa.util.example_audio_file())
# How about something more advanced?  Let's decompose a spectrogram with NMF, and then resynthesize an individual component
D = librosa.stft(y)

# Separate the magnitude and phase
S, phase = librosa.magphase(D)

# Decompose by nmf
components, activations = librosa.decompose.decompose(S,
                                                      n_components=8,
Exemplo n.º 19
0
def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]):
    """
    extracts features with help of librosa
    :param soundwave: extracted soundwave from file
    :param sampling_rate: sampling rate
    :param feature_list: list of features to compute
    :param sound_name: type of sound, i.e. dog
    :return: np.array of all features for the soundwave
    """
    print("Computing features for ",sound_name)

    if len(feature_list)==0:
        feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram",
                      "mfcc","rmse","spectral_centroid","spectral_bandwidth",
                      "spectral_contrast","spectral_flatness","spectral_rolloff",
                      "poly_features","tonnetz","zero_crossing_rate"]

    features=[]


    #feature_len
    #"chroma_stft":12
    if "chroma_stft" in feature_list:
        features.append(feat.chroma_stft(soundwave, sampling_rate))

    #"chroma_cqt":12
    if "chroma_cqt" in feature_list:
        features.append(feat.chroma_cqt(soundwave, sampling_rate))

    #"chroma_cens":12
    if "chroma_cens" in feature_list:
        features.append(feat.chroma_cens(soundwave, sampling_rate))

    #"malspectrogram":128
    if "melspectrogram" in feature_list:
        features.append(feat.melspectrogram(soundwave, sampling_rate))

    #"mfcc":20
    if "mfcc" in feature_list:
        features.append(feat.mfcc(soundwave, sampling_rate))

    #"rmse":1
    if "rmse" in feature_list:
        features.append(feat.rmse(soundwave))

    #"spectral_centroid":1
    if "spectral_centroid" in feature_list:
        features.append(feat.spectral_centroid(soundwave, sampling_rate))

    #"spectral_bandwidth":1
    if "spectral_bandwidth" in feature_list:
        features.append(feat.spectral_bandwidth(soundwave, sampling_rate))

    #"spectral_contrast":7
    if "spectral_contrast" in feature_list:
        features.append(feat.spectral_contrast(soundwave, sampling_rate))

    #"spectral_flatness":1
    if "spectral_flatness" in feature_list:
        features.append(feat.spectral_flatness(soundwave))

    #"spectral_rolloff":1
    if "spectral_rolloff" in feature_list:
        features.append(feat.spectral_rolloff(soundwave, sampling_rate))

    #"poly_features":2
    if "poly_features" in feature_list:
        features.append(feat.poly_features(soundwave, sampling_rate))

    #"tonnetz":6
    if "tonnetz" in feature_list:
        features.append(feat.tonnetz(soundwave, sampling_rate))

    #"zero_crossing_rate":1
    if "zero_crossing_rate" in feature_list:
        features.append(feat.zero_crossing_rate(soundwave))


    return np.concatenate(features)
Exemplo n.º 20
0
        adir = os.path.join(audio_dir, genre)
        ldir = os.path.join(label_dir, genre)
        file_names = [".".join(f.split(".")[:-1]) for f in os.listdir(adir)]
        file_names = sorted(file_names, key=last_5chars)

        acc = []
        count = 0
        print("Running genre", genre, "...")
        for f in file_names:
            with open(os.path.join(ldir, f + label_ext)) as label_file:
                t = int(label_file.readline())
                if t < 0:
                    continue
                count += 1
                data, sr = load(os.path.join(adir, f + audio_ext), sr=None)
                chroma = chroma_stft(y=data, sr=sr, n_fft=4096, base_c=False)
                chroma = np.mean(chroma, axis=1)
                chroma = np.log(1 + g * chroma)

                prob = np.apply_along_axis(pearsonr, 1, template, chroma)[:, 0]
                weight = np.tile(chroma, 4)
                y = np.argmax(prob * weight) % 24

                acc.append(mirex_evaluate(y, t))
                print(f + "\t" + str(y))

        table.add_row([
            genre,
            acc.count(1),
            acc.count(0.5),
            acc.count(0.3),
Exemplo n.º 21
0
    mean_filt2 = np.ones(w // 2 // d + 1) / (w // 2 // d + 1)
    overall_acc = []

    sym2num = np.vectorize(inv_key_map.get)
    num2sym = np.vectorize(key_map.get, otypes=[np.str])
    evaluate_vec = np.vectorize(mirex_evaluate, otypes=[float])

    for f in file_names:
        label = np.loadtxt(os.path.join(data_dir, ref_prefix + f + '.txt'), dtype='str')
        t = sym2num(label[:, 1])

        data, sr = load(os.path.join(data_dir, f + '.wav'), sr=None)
        hop_size = int(sr / d)
        window_size = hop_size * 2

        chroma_a = chroma_stft(y=data, sr=sr, hop_length=hop_size, n_fft=window_size, base_c=False)
        chroma_a = np.apply_along_axis(fftconvolve, 1, chroma_a, mean_filt, 'same')

        if chroma_a.shape[1] > len(label) * d:
            chroma_a = chroma_a[:, :len(label) * d]
        elif chroma_a.shape[1] < len(label) * d:
            chroma_a = np.column_stack((chroma_a, np.zeros((12, len(label) * d - chroma_a.shape[1]))))

        # chroma_a = decimate(chroma_a[:, int(d/2):], d, axis=1)
        chroma_a = chroma_a.reshape(12, len(label), d).mean(axis=2)
        chroma_a = np.log(1 + g * chroma_a)

        chroma_a = np.apply_along_axis(fftconvolve, 1, chroma_a, mean_filt2, 'same')

        prob = np.zeros((ks_template.shape[0], chroma_a.shape[1]))
        for n in range(chroma_a.shape[1]):
from librosa.feature import chroma_stft
from librosa.display import specshow
import matplotlib.pyplot as plt
# Bhairav Block Wise Reading

## Bhairav 1

block_gen = sf.blocks('data/Hindustani/wav/Bhairav/bhairav1.wav', blocksize=2646000)
rate = sf.info("data/Hindustani/wav/Bhairav/bhairav1.wav").samplerate
info = sf.info("data/Hindustani/wav/Bhairav/bhairav1.wav")
print(info)
chromas = []

for bl in block_gen:
    y = np.mean(bl, axis=1)
    chromas.append(chroma_stft(y, sr=rate))

len(chromas)
for j, chroma in enumerate(chromas):
    specshow(chroma, x_axis="time", y_axis="chroma", vmin=0, vmax=1)
    plt.title(f"Chromagram of Bhairav1_{j}")
    plt.savefig(f"data/chroma_files/bhairav-chromas/bhairav1/bhairav1_{j}.png")



## Bhairav 2

block_gen = sf.blocks('data/Hindustani/wav/Bhairav/bhairav2.wav', blocksize=2646000)
rate = sf.info("data/Hindustani/wav/Bhairav/bhairav2.wav").samplerate
info = sf.info("data/Hindustani/wav/Bhairav/bhairav2.wav")
print(info)
Exemplo n.º 23
0
    sym2num = np.vectorize(inv_key_map.get)
    num2sym = np.vectorize(key_map.get, otypes=[np.str])
    evaluateVec = np.vectorize(mirex_evaluate, otypes=[float])

    for f in file_names:
        label = np.loadtxt(os.path.join(data_dir, f + '.txt'), dtype='str')
        t = sym2num(label[:, 1])

        data, sr = load(os.path.join(data_dir, f + '.wav'), sr=None)
        hopSize = int(sr / d)
        windowSize = hopSize * 2

        chromaVec = chroma_stft(y=data,
                                sr=sr,
                                hop_length=hopSize,
                                n_fft=windowSize,
                                base_c=False)
        chromaVec = np.apply_along_axis(fftconvolve, 1, chromaVec, meanFilt,
                                        'same')

        if chromaVec.shape[1] > len(label) * d:
            chromaVec = chromaVec[:, :len(label) * d]
        elif chromaVec.shape[1] < len(label) * d:
            chromaVec = np.column_stack(
                (chromaVec, np.zeros(
                    (12, len(label) * d - chromaVec.shape[1]))))

        chromaVec = chromaVec.reshape(12, len(label), d).mean(axis=2)
        chromaVec = np.log(1 + g * chromaVec)
Exemplo n.º 24
0
import os
import numpy as np
import soundfile as sf
from librosa.feature import chroma_stft
from librosa.display import specshow
import matplotlib.pyplot as plt

bhup_files = os.listdir("data/Hindustani/wav/Bhup")
print(bhup_files)

for h, i in enumerate(bhup_files):
    os.system(f"mkdir data/chroma_files/bhup-chromas/bhup{h+1}")

chroma_dict = {}
for j in range(len(bhup_files)):
    rate = sf.info(f"data/Hindustani/wav/Bhup/bhup{j+1}.wav").samplerate
    block_gen = sf.blocks(f"data/Hindustani/wav/Bhup/bhup{j+1}.wav",
                          blocksize=rate * 60)
    chroma_dict[f"bhup{j+1}"] = []

    for bl in block_gen:
        y = np.mean(bl, axis=1)
        chroma_dict[f"bhup{j+1}"].append(chroma_stft(y, sr=rate))

    for k, chroma in enumerate(chroma_dict[f"bhup{j+1}"]):
        specshow(chroma, x_axis="time", y_axis="chroma", vmin=0, vmax=1)
        plt.title(f"Chromagram of Bhup{j+1}_{k+1}")
        plt.savefig(
            f"data/chroma_files/bhup-chromas/bhup{j+1}/bhup{j+1}_{k+1}.png")
feature=dict()                                 #this contains feature vecotrs in repective genres
feature.fromkeys(geners)
for gener in geners:                          # This iterates in various directories over each genre


    for music in Train_files[gener]:
        final=np.empty((1,12),float)#this iterates over each files in a given genre
        name=working_dir + '\\' + gener + '\\' + music
        rate,data=sc.read(name)

        #print rate,data
        if (len(data.shape) == 2):  # resovling  files with 2 channels.
            data = data[:,0]


        C_DFT=lf.chroma_stft(y=data,sr=rate,n_fft=4096,hop_length=2048)
        '''select=range(0,C_DFT.shape[1],6)
        for i in range(len(select)-1):
            C_DFT_temp=C_DFT[:,select[i]:select[i+1]]

            col=np.mean(C_DFT_temp,axis=1)

            bins=np.array(range(1,13,1))

            col2=np.reshape(col,(1,len(col)))
            cen=util.centorid(col,bins)
            var=util.spread(col,bins,cen)
            max=np.argmax(col)
            min=np.argmin(col)
            if(i!=len(select)-2):
                C_DFT_temp=C_DFT[:,select[i+1]:select[i+2]]
Exemplo n.º 26
0
train_data_1.head()
print(np.array(train_data_1)[0])

train_data = np.array([])

counter = 0
train_data = np.array([])
for chunk in train_data_reader:
    #print(chunk)
    chunk1 = np.array(chunk)
    for thing in chunk1:
        print(counter)
        thing1 = np.array(thing)
        #print(thing1)
        row = np.array([])
        cstft = np.mean(lf.chroma_stft(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, cstft))
        cqt = np.mean(lf.chroma_cqt(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, cqt))
        sens = np.mean(lf.chroma_cens(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, sens))
        spcent = np.mean(lf.spectral_centroid(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, spcent))
        flatness = np.mean(lf.spectral_flatness(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, flatness))
        rolloff = np.mean(lf.spectral_rolloff(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, rolloff))
        mspec = np.mean(lf.melspectrogram(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, mspec))
        mfcc = np.mean(lf.mfcc(thing1[:-1], n_mfcc=30).T, axis=0)
        row = np.concatenate((row, mfcc))
Exemplo n.º 27
0
Train_files.fromkeys(geners)
for x in geners:  # To make to dictionary Training data
    data = os.listdir(working_dir + '\\' + x)
    Train_files[x] = data

fig = plt.figure(1)  #generating a figure to polt later
feature = dict()  #this contains feature vecotrs in repective genres
feature.fromkeys(geners)
for gener in geners:  # This iterates in various directories over each genre

    for music in Train_files[
            gener]:  #this iterates over each files in a given genre
        name = working_dir + '\\' + gener + '\\' + music
        rate, data = sc.read(name)

        print rate, data
        if (len(data.shape) == 2):  # resovling  files with 2 channels.
            data = data[:, 0]

        C_DFT = lf.chroma_stft(y=data, sr=rate)

        C_DF = C_DFT.T
        label = [[gener] * C_DFT.shape[1]]
        label = np.array(label)
        print label.shape, C_DF.shape
        out = np.concatenate((label.T, C_DF), axis=1)
        print out.shape
        tds_write.writerows(out)

tds.close()
Exemplo n.º 28
0
import os
import numpy as np
import soundfile as sf
from librosa.feature import chroma_stft
from librosa.display import specshow
import matplotlib.pyplot as plt
des_files = os.listdir("data/Hindustani/wav/Des")
print(des_files)

for h,i in enumerate(des_files):
    os.system(f"mkdir data/chroma_files/des-chromas/des{h+1}")

chroma_dict = {}
for j in range(len(des_files)):
    rate = sf.info(f"data/Hindustani/wav/Des/des{j+1}.wav").samplerate
    block_gen = sf.blocks(f"data/Hindustani/wav/Des/des{j+1}.wav", blocksize=rate*60)
    chroma_dict[f"des{j+1}"] = []

    for bl in block_gen:
        y = np.mean(bl, axis=1)
        chroma_dict[f"des{j+1}"].append(chroma_stft(y, sr=rate))

    for k, chroma in enumerate(chroma_dict[f"des{j+1}"]):
        specshow(chroma, x_axis="time", y_axis="chroma", vmin=0, vmax=1)
        plt.title(f"Chromagram of Des{j+1}_{k+1}")
        plt.savefig(f"data/chroma_files/des-chromas/des{j+1}/des{j+1}_{k+1}.png")
def get_feature_from_librosa(wave_name, window):
    #print wave_name
    (rate, sig) = wav.read(wave_name)

    chroma_stft_feat = feature.chroma_stft(sig,
                                           rate,
                                           n_fft=window,
                                           hop_length=window / 2)
    #print chroma_stft_feat.shape
    mfcc_feat = feature.mfcc(y=sig, sr=rate, n_mfcc=13, hop_length=window / 2)
    mfcc_feat = mfcc_feat[1:, :]
    #print mfcc_feat.shape
    d_mfcc_feat = feature.delta(mfcc_feat)
    #print d_mfcc_feat.shape
    d_d_mfcc_feat = feature.delta(d_mfcc_feat)
    #print d_d_mfcc_feat.shape
    zero_crossing_rate_feat = feature.zero_crossing_rate(sig,
                                                         frame_length=window,
                                                         hop_length=window / 2)
    #print zero_crossing_rate_feat.shape

    S = librosa.magphase(
        librosa.stft(sig,
                     hop_length=window / 2,
                     win_length=window,
                     window='hann'))[0]
    rmse_feat = feature.rmse(S=S)
    #print rmse_feat.shape

    centroid_feat = feature.spectral_centroid(sig,
                                              rate,
                                              n_fft=window,
                                              hop_length=window / 2)
    #print centroid_feat.shape

    bandwith_feat = feature.spectral_bandwidth(sig,
                                               rate,
                                               n_fft=window,
                                               hop_length=window / 2)
    #print bandwith_feat.shape

    contrast_feat = feature.spectral_contrast(sig,
                                              rate,
                                              n_fft=window,
                                              hop_length=window / 2)
    #print contrast_feat.shape
    rolloff_feat = feature.spectral_rolloff(sig,
                                            rate,
                                            n_fft=window,
                                            hop_length=window / 2)  #计算滚降频率
    #print rolloff_feat.shape

    poly_feat = feature.poly_features(sig,
                                      rate,
                                      n_fft=window,
                                      hop_length=window /
                                      2)  #拟合一个n阶多项式到谱图列的系数。
    #print poly_feat.shape
    #==============================================================================
    #     print(chroma_stft_feat.shape)
    #     #print(corr_feat.shape)
    #     print(mfcc_feat.shape)
    #     print(d_mfcc_feat.shape)
    #     print(d_d_mfcc_feat.shape)
    #     print(zero_crossing_rate_feat.shape)
    #     print(rmse_feat.shape)
    #     print(centroid_feat.shape)
    #     print(bandwith_feat.shape)
    #     print(contrast_feat.shape)
    #     print(rolloff_feat.shape)
    #     print(poly_feat.shape)
    #==============================================================================
    feat = numpy.hstack(
        (chroma_stft_feat.T, mfcc_feat.T, d_mfcc_feat.T, d_d_mfcc_feat.T,
         zero_crossing_rate_feat.T, rmse_feat.T, centroid_feat.T,
         bandwith_feat.T, contrast_feat.T, rolloff_feat.T, poly_feat.T))
    feat = feat.T
    return feat  #一行代表一帧的特征
Exemplo n.º 30
0
    songname = '.'.join(songname)
    song.export(songname, format = "wav")
else:
    songname = '.'.join(songname)

print 'Start reading file'
# read file
src, samplerate = load(songname)
dur = get_duration(y=src, sr=samplerate)

# set time
stime = time()

# get chromagram
print 'get chromagram'
chromagram = chroma_stft(y = src, sr = samplerate, hop_length = 512 * 8)

printDt(stime, time())

# count correlation
print 'count correlation'
correlation = np.corrcoef(
    np.cov(np.transpose(chromagram)))

corsize = correlation.shape[0]

printDt(stime, time())

thumbnailSize = int(22 / dur * corsize)
startSec = int(5 / dur * corsize)

header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

file = open('data_training.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
sukus = 'banjar_hulu banjar_kuala dayak_bakumpai dayak_ngaju'.split()
for g in sukus:
    for filename in os.listdir(f'data_training/{g}'):
        songname = f'data_training/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = fitur.chroma_stft(y=y, sr=sr)
        spec_cent = fitur.spectral_centroid(y=y, sr=sr)
        spec_bw = fitur.spectral_bandwidth(y=y, sr=sr)
        rmse = fitur.rmse(y)
        zcr = fitur.zero_crossing_rate(y)
        mfcc = fitur.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)}  {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data_training.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())