def Get_Frames(wavfilepath, flength, fshift): wavFilePath = "/home/matsui-pc/matsui/sound/newsound_section01~10/" + wavfilepath wavefile = AudioFile.open(wavFilePath) # WAVファイルの読み込み # print len(wavefile) 'フレームシフトを利用する場合' frames = wavefile.frames(flength, fshift, np.hamming) # フレーム化(フレームシフトを追加したもの)(フレーム数が倍になっていたためきちんと動いている) 'フレームシフトを利用しない場合' "pymirのFrame.pyを変更" # frames = wavefile.frames(flength, np.hamming) # フレーム化 # print "############# frames ###################" # print len(frames) return frames
def calcLPCC(wav_path, frame_size=1024, lpcc_order=12): wavData = AudioFile.open(wav_path) lpcc_feature = [] # Decomposing into frames # Fixed frame size windowFunction = numpy.hamming fixedFrames = wavData.frames(frame_size, windowFunction) for frame_item in fixedFrames: frame_lpcc_feature = frame_item.lpcc(lpcc_order) drop_flag = False for number in frame_lpcc_feature: if math.isinf(number) or math.isnan(number): drop_flag = True break if not drop_flag: lpcc_feature.append(frame_lpcc_feature) # print lpcc_feature # print len(lpcc_feature) lpcc_feature = numpy.array(lpcc_feature) return lpcc_feature
def Get_Centroid(wavfilepath, fs, flength, fshift, freqbin): wavFilePath = "/home/matsui-pc/matsui/sound/newsound_section01~10/" + wavfilepath wavefile = AudioFile.open(wavFilePath) # WAVファイルの読み込み slength = len(wavefile) # 音声の長さ # print "******************" # print slength fnum = int(math.floor((slength - flength) / fshift + 1)) """ pymir/Frame.py """ frames = wavefile.frames(flength, fshift, np.hamming) # フレーム化 """ 関数preprocess """ preprocess_result = preprocess(wavefile, fs, flength, fshift, fnum) spectrum_complex = preprocess_result[0] spectrum_abs = preprocess_result[1] powerspec = spectrum_abs ** 2 """ 関数Centroid """ centroid = Centroid(powerspec, freqbin, fs, wavfilepath) return centroid
def visioning(name, art): filename = "tempaudio/%s" % name # filename = "tempaudio/test.mp3" ##for testing audiofile = AudioFile.open(filename) y, sr = librosa.load(filename) tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr) beat_times = librosa.frames_to_time(beat_frames, sr=sr) # print tempo # print beat_times numberbeat = len(beat_frames) frames = audiofile.frames(8820, numpy.hamming) size = len(frames) width = size / 2 height = width # print height # print width visual = Image.new("RGB", (height, width), "black") draw = ImageDraw.Draw(visual) frameIndex = 0 startIndex = 0 # getting the chords and colors from the modified pitch module for frame in frames: spectrum = frame.spectrum() chroma = spectrum.chroma() chord, coloring, mode = Pitch.getChord(chroma) endIndex = startIndex + len(frame) startTime = startIndex / frame.sampleRate endTime = endIndex / frame.sampleRate # print startTime start = startTime / randint(1,2) end = endTime * randint(1,2) defx = (height/randint(1,3) - endTime) defy = (width/randint(1,3) - startTime) x = (defy, startTime) w = (endTime, defx) # x = (int(startTime), randint(1, height)) # w = (randint(1, width), int(endTime)) # start = height / randint(1,2) # end = width / randint(1,2) # print "%s | %s " % (chord, coloring) frameIndex = frameIndex + 1 startIndex = startIndex + len(frame) if mode == 0: ## chord is minor randomnumber = randint(0,3) if randomnumber==0: draw.pieslice((x, w), start, end, fill = coloring) elif randomnumber==1: draw.line((x, w), fill = coloring) elif randomnumber==2: draw.polygon((x, w), fill = coloring) else: draw.rectangle((x, w), fill = coloring) elif mode == 1: ## chord is major randomnumber = randint(0,3) if randomnumber==0: draw.arc((x, w), start, end, coloring) elif randomnumber==1: draw.chord((x, w), start, end, fill = coloring) elif randomnumber==2: draw.ellipse((x, w), fill = coloring) else: draw.point((x, w), fill = coloring) i= 0 ## Beat creates a white point in the immage to show disruption while i < numberbeat: if i+1 > numberbeat: break x = (beat_times[i], randint(1,height)) w = ((randint(1, width), beat_times[i])) draw.point((x, w), fill = "white") i += 1 ## Tempo of song effects smoothness of the image tempo1= int(tempo) visual = visual.filter(ImageFilter.EDGE_ENHANCE) if 50<=tempo1<90: visual = visual.filter(ImageFilter.SMOOTH_MORE) elif 90<=tempo1<110: visual = visual.filter(ImageFilter.SMOOTH) elif tempo1>=130: for i in range(0,5): visual = visual.filter(ImageFilter.EDGE_ENHANCE) else: pass path= "static/picture/%s.png" % art visual.save(path, "PNG")
def FeatureVectorForAudioFile(filename): # jank random parameter version if version == 1: featureVector = [] wavData = AudioFile.open(filename) spectrumData = wavData.spectrum() featureVector.append(wavData.rms()) featureVector.append(spectrumData.mean()) featureVector.append(wavData.zcr()) featureVector.append(spectrumData.centroid()) featureVector.append(spectrumData.variance()) featureVector.append(spectrumData.rolloff()) mfcc = spectrumData.mfcc2() for i in range(12): featureVector.append(mfcc[i]) featureVector = NormalizeVector(featureVector) return featureVector # more thought out - frame-by-frame and mean/std of rms,pitch,mfcc if version == 2: featureVector = [] wavData = AudioFile.open(filename) windowFunction = np.hamming fixedFrames = wavData.frames(1024, windowFunction) rms = [] mfcc = [] spectralMean = [] # spectralCentroid = [] # spectralRolloff = [] # spectralSkewness = [] # spectralSpread = [] # spectralVariance = [] for frame in fixedFrames: try: spectrum = frame.spectrum() rms.append(frame.rms()) mfcc.append(spectrum.mfcc2()) spectralMean.append(spectrum.mean()) except: pass # spectralCentroid.append(spectrum.centroid()) # spectralRolloff.append(spectrum.rolloff()) # spectralSkewness.append(spectrum.skewnness()) # spectralSpread.append(spectrum.spread()) # spectralVariance.append(spectrum.variance()) # # featureVector.append(wavData.rms()) # featureVector.append(wavSpectrum.mean()) # featureVector.append(wavData.zcr()) # featureVector.append(wavSpectrum.centroid()) # featureVector.append(wavSpectrum.variance()) # featureVector.append(wavSpectrum.rolloff()) #normalize vectors rms = NormalizeVector(rms) for mfccInTime in mfcc: mfccInTime = NormalizeVector(mfccInTime) spectralMean = NormalizeVector(spectralMean) # calculate mean of all features rmsMean = np.mean(rms, axis = 0) mfccMean = np.mean(mfcc, axis = 0) spectralMeanMean = np.mean(spectralMean, axis = 0) # calculate std of all features rmsStd = np.std(rms, axis = 0) mfccStd = np.std(mfcc, axis = 0) spectralMeanStd = np.std(spectralMean, axis = 0) # add to feature vector featureVector.append(rmsMean) featureVector.append(rmsStd) featureVector.append(spectralMeanMean) featureVector.append(spectralMeanStd) for i in range(12): featureVector.append(mfccMean[i]) featureVector.append(mfccStd[i]) return featureVector
from pymir import AudioFile import glob import numpy as np import librosa import os from python_speech_features import mfcc wavs = [] maxSize = 949 for i in range(1, 2): for y in range(1, 2): dirName = 'Data/' + str(i) + '/' + str(y) + '/sorted/' for filename in glob.glob(dirName + '*.wav'): features = [] wavData = AudioFile.open(filename) fixedFrames = wavData.frames(1024) print(len(fixedFrames)) if ((len(fixedFrames)) < 350): for frame in fixedFrames: features.append(frame.lpcc()) featuresNP = np.array(features) print(featuresNP.shape) # wavs.append(featuresNP.ravel()) # print (dirName + ' '+ str(len(fixedFrames))) # print(len(wavs)) # np.savetxt('lpc/lpcFrameLens512.csv',wavs,delimiter=',') # print((fixedFrames[1].lpc()) ) # LPC, with order = len(fixedFrames[0])-1 # print(len(fixedFrames)) # print((fixedFrames[0].lpc()))
from pymir import AudioFile from pymir import Onsets from pymir import SpectralFlux import numpy as np import csv feature = [] wavData = AudioFile.open("dataset_audio/9.wav") #mp3Data = AudioFile.open("test-stereo.mp3") fixedFrames = wavData.frames(44688) windowFunction = np.hamming #fixedFrames = AudioFile.frames(1024,windowFunction) #energyOnsets = Onsets.onsetsByEnergy(wavData) #framesFromOnsets = wavData.framesFromOnsets(energyOnsets) #print(fixedFrames[0].cqt()) # Constant Q Transform #print(fixedFrames[0].dct()) # Discrete Cosine Transform #print(np.sum(fixedFrames[0].energy(windowSize = 256))) # Energy # fixedFrames[0].play() # Playback using pyAudio #print(np.size(fixedFrames)) #fixedFrames[0].plot() # Plot using matplotlib #print(fixedFrames[0].rms()) # Root-mean-squared amplitude #print(fixedFrames[0].zcr()) # Compute the spectra of each frame i = 0 spectra = [f.spectrum() for f in fixedFrames]
for root, directories, filenames in os.walk(folderPath): for filename in filenames: path = os.path.join(root,filename) if fnmatch.fnmatch(filename, '*.wav'): job_list.append(str(path)) return job_list job_list = make_job_list('/Users/kamal/Developpement/Explosound/features_extraction_/ExplosoundSamples-master') analyzed = 0 audiofile_features = {} for i in range(30): try: audiofile = AudioFile.open(job_list[i]) spectrum = audiofile.spectrum() features = [audiofile.rms(), spectrum.centroid(), str(spectrum.flatness())] audiofile_features[job_list[i]] = features except: pass else: pass finally: pass rms = [] centroid = [] flatness = [] for i in audiofile_features.keys():
Currently under development Last updated: 9 December 2012 """ import sys sys.path.append('..') from pymir import AudioFile from pymir import Energy from pymir import Onsets import matplotlib.pyplot as plt filename = "../audio_files/drum_loop_01.wav" print "Opening File: " + filename audiofile = AudioFile.open(filename) plt.plot(audiofile) plt.show() # Time-based methods print "Finding onsets using Energy function (temporal domain)" o = Onsets.onsetsByEnergy(audiofile) print o frames = audiofile.framesFromOnsets(o) for i in range(0, len(frames)): print "Frame " + str(i) plt.plot(frames[i]) plt.show()
def get_features(job_list, force_rescan): print "get_features" if (force_rescan == True): print ">force_rescan on " + str(len(job_list)) + " jobs" analysed = 0 audiofile_features = {} for i in range(len(job_list)): try: print "> " + str(i) + " / " + str(len(job_list)), audiofile = AudioFile.open(job_list[i]) print ",s", spectrum = audiofile.spectrum() rms = audiofile.rms() print ",rms", centroid = spectrum.centroid print ",centroid", flatness = -1 try: flatness = str(spectrum.flatness()) pass except: pass finally: pass print ",flatness", features = [rms, centroid, flatness] print ",done!" audiofile_features[job_list[i]] = features analysed = analysed + 1 except: #print "Exception: " + job_list[i] pass else: pass finally: pass print "<force_rescan " + str(analysed) + "/" + str( len(job_list)) + " scanned." rms = [] centroid = [] flatness = [] for i in audiofile_features.keys(): rms.append(audiofile_features[i][0]) centroid.append(audiofile_features[i][1]) flatness.append(float(audiofile_features[i][2])) #serialisation avec pickle #centrer et reduire rms = ppr.scale(rms) centroid = np.log(centroid) m = max(centroid) n = min(centroid) centroid = [(c - n) / (m - n) for c in centroid] flatness = ppr.scale(flatness) m = max(flatness) n = min(flatness) flatness = [(c - n) / (m - n) for c in flatness] filtered_features = [] for i in range(len(rms)): filtered_features.append( [job_list[i], rms[i], centroid[i], flatness[i]]) return filtered_features
delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) # header = ['File','SC_mu','SC_sigma','Bandwidth_mean', 'Bandwidth_sigma','ZCR', 'STE','Weiner'] # outwriter.writerow(header) for root, _, files in os.walk(directory): print "# of files: " + str(len(files)) ctr = 0 for f in files: print str(ctr) + " :" + f ctr += 1 fullpath = os.path.join(root, f) row = [] row.append(f) wavData = AudioFile.open(fullpath) windowFunction = numpy.hamming fixedFrames = wavData.frames(1024, windowFunction) # # Temporal Features # # ZCR = [] for i in range(0, len(fixedFrames)): zcr = fixedFrames[i].zcr() if numpy.isnan(numpy.min(zcr)): zcr = 0 ZCR.append(zcr)
def load(path): return AudioFile.open(path)
""" from __future__ import division import sys sys.path.append('..') from pymir import AudioFile from pymir import Pitch from pymir import Onsets import matplotlib.pyplot as plt # Load the audio print "Loading Audio" audiofile = AudioFile.open("../audio_files/test-stereo.mp3") plt.plot(audiofile) plt.show() print "Finding onsets using Spectral Flux (spectral domain)" o = Onsets.onsetsByFlux(audiofile) print o print "Extracting Frames" frames = audiofile.framesFromOnsets(o) #for i in range(0, len(frames)): # print "Frame " + str(i) # plt.plot(frames[i]) # plt.show()
""" Tests of different onset detection methods Currently under development Last updated: 9 December 2012 """ import sys sys.path.append('..') from pymir import AudioFile from pymir import Energy from pymir import Onsets import matplotlib.pyplot as plt import numpy filename = "../audio_files/drum_loop_01.wav" print "Opening File: " + filename audiofile = AudioFile.open(filename) #plt.plot(audiofile) #plt.show() frames = audiofile.frames(2048, numpy.hamming) print len(frames)
def get_features(job_list, force_rescan): print "get_features" if(force_rescan == True): print ">force_rescan on " + str(len(job_list)) + " jobs" analysed = 0 audiofile_features = {} for i in range(len(job_list)): try: print "> " + str(i) + " / " + str(len(job_list)), audiofile = AudioFile.open(job_list[i]) print ",s", spectrum = audiofile.spectrum() rms = audiofile.rms() print ",rms", centroid = spectrum.centroid print ",centroid", flatness = -1 try: flatness = str(spectrum.flatness()) pass except: pass finally: pass print ",flatness", features = [rms, centroid, flatness] print ",done!" audiofile_features[job_list[i]] = features analysed = analysed + 1 except: #print "Exception: " + job_list[i] pass else: pass finally: pass print "<force_rescan " + str(analysed) + "/" + str(len(job_list)) + " scanned." rms = [] centroid = [] flatness = [] for i in audiofile_features.keys(): rms.append(audiofile_features[i][0]) centroid.append(audiofile_features[i][1]) flatness.append(float(audiofile_features[i][2])) #serialisation avec pickle #centrer et reduire rms = ppr.scale(rms) centroid = np.log(centroid) m = max(centroid) n = min(centroid) centroid = [(c-n)/(m-n) for c in centroid] flatness = ppr.scale(flatness) m = max(flatness) n = min(flatness) flatness = [(c-n)/(m-n) for c in flatness] filtered_features = [] for i in range(len(rms)): filtered_features.append([job_list[i], rms[i], centroid[i], flatness[i]]) return filtered_features
with open('BirdsDataset.csv', 'w') as csvfile: outwriter = csv.writer(csvfile, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL) # header = ['File','SC_mu','SC_sigma','Bandwidth_mean', 'Bandwidth_sigma','ZCR', 'STE','Weiner'] # outwriter.writerow(header) for root, _, files in os.walk(directory): print "# of files: " + str(len(files)) ctr = 0 for f in files: print str(ctr) + " :" + f ctr += 1 fullpath = os.path.join(root, f) row = [] row.append(f) wavData = AudioFile.open(fullpath) windowFunction = numpy.hamming fixedFrames = wavData.frames(1024, windowFunction) # # Temporal Features # # ZCR = [] for i in range(0,len(fixedFrames)): zcr = fixedFrames[i].zcr() if numpy.isnan(numpy.min(zcr)):
prev_ste = frames[i].energy(windowSize)[0]; print "The result is written on %s" % output try: target = sys.argv[1] except: print "Usage: %s [wav file] " % sys.argv[0] exit(0) #TODO read from configuration file frameSize = 882 # 20ms freq = 44100 token = os.path.basename(target).split(".") filename = ".".join(token[0:len(token)-1]) wav = AudioFile.open(target) # Data set 2 - Using sliding Window sampling = [s for s in frange(frameSize/4.0,len(wav),frameSize/4.0)] frames = slidingWindow(wav,sampling,frameSize) output = '%s-sampling.csv' % filename writeCsv(sampling,frames,output)
Last updated: 9 December 2012 """ from __future__ import division import sys sys.path.append('..') from pymir import AudioFile from pymir import Pitch from pymir import Onsets import matplotlib.pyplot as plt # Load the audio print "Loading Audio" audiofile = AudioFile.open("../audio_files/test-stereo.mp3") #audiofile = AudioFile.open("/Users/jsawruk/mir-samples/foo-fighters/01-The-Pretender.mp3") plt.plot(audiofile) plt.show() #audiofile = audiofile[:100000] print "Finding onsets using Spectral Flux (spectral domain)" o = Onsets.onsetsByFlux(audiofile) print o print "Extracting Frames" frames = audiofile.framesFromOnsets(o) #for i in range(0, len(frames)): # print "Frame " + str(i)