def extract_lpcc_feat(audiopath): songXlpcc = [] songYlabel = [] audioData, fs = wavread(audiopath) if "/train/" in audiopath: lab_path = audiopath.replace("/train/", "/lab/")[:-3] + "lab" elif "/test/" in audiopath: lab_path = audiopath.replace("/test/", "/lab/")[:-3] + "lab" else: lab_path = audiopath.replace('/valid/', '/lab/')[:-3] + "lab" lab_file = open(lab_path, 'r') lab_content = lab_file.readlines() for segment in lab_content: list_segment = segment.split(" ") start = float(list_segment[0]) end = float(list_segment[1]) label = list_segment[2][:-1] segmentData = audioData[int(start * fs):int(end * fs)] temp_path = 'tempSegment.wav' wavwrite(temp_path, segmentData, fs) segmentLPCC = calcLPCC(temp_path) os.remove(temp_path) for lpcc_item in segmentLPCC: songXlpcc.append(lpcc_item) songYlabel.append(label) return songXlpcc, songYlabel
def extract_feat_from_wav(wav_path): if '/train/' in wav_path: lab_path = wav_path.replace('/train/', '/lab/')[:-3] + 'lab' elif '/test/' in wav_path: lab_path = wav_path.replace('/test/', '/lab/')[:-3] + 'lab' elif '/valid/' in wav_path: lab_path = wav_path.replace('/valid/', '/lab/')[:-3] + 'lab' else: lab_path = 'null' label_file = open(lab_path, 'r') labels = label_file.readlines() label_file.close() audioData, fs = wavread(wav_path) song_mfcc_X = [] song_label_Y = [] for item_label in labels: startTime, endTime, labelY = item_label.split(' ') startTime = float(startTime) endTime = float(endTime) labelY = labelY[:-1] audio_part_data = audioData[int(startTime * fs):int(endTime * fs)] mfcc = calcMFCC(audio_part_data, fs) for mfcc_item in mfcc: song_mfcc_X.append(mfcc_item) song_label_Y.append(labelY) return song_mfcc_X, song_label_Y
def singing_voice_detection(audio_path): predict_song_label = [] true_song_label = [] if '/train/' in audio_path: lab_path = audio_path.replace('/train/', '/lab/')[:-3] + 'lab' elif '/test/' in audio_path: lab_path = audio_path.replace('/test/', '/lab/')[:-3] + 'lab' elif '/valid/' in audio_path: lab_path = audio_path.replace('/valid/', '/lab/')[:-3] + 'lab' else: lab_path = 'null' label_file = open(lab_path, 'r') labels = label_file.readlines() label_file.close() audioData, fs = wavread(audio_path) for item_label in labels: startTime, endTime, labelY = item_label.split(' ') startTime = float(startTime) endTime = float(endTime) labelY = labelY[:-1] true_song_label.append(labelY) audio_part_data = audioData[int(startTime * fs):int(endTime * fs)] segment_mfcc = [] mfcc = calcMFCC(audio_part_data, fs) for item_mfcc in mfcc: segment_mfcc.append(item_mfcc) models = [ 'Models/dt0.58.pkl', 'Models/NB0.59.pkl', 'Models/NC0.57.pkl', 'Models/NNP0.61.pkl', 'Models/sgd0.54.pkl' ] all_pre = [] for model in models: # print model clf = joblib.load(model) predictY = clf.predict(segment_mfcc) all_pre.append(predictY) voteRes = voteIt(all_pre) if voteRes.count('sing') > voteRes.count('nosing'): segmentLabel = 'sing' else: segmentLabel = 'nosing' predict_song_label.append(segmentLabel) return predict_song_label, true_song_label
import numpy from baseZhang import calcMFCC, wavread from pydub import AudioSegment lab = '01 - 01 Les Jardins Japonais.lab' label = open(lab, 'r') labels = label.readlines() label.close() # print labels[0] startTime, endTime, labelY = labels[0].split(' ') startTime = float(startTime) endTime = float(endTime) labelY = labelY[:-1] # print labelY song = AudioSegment.from_file(lab.replace('.lab', '.ogg')) song.set_channels(1) song = song.export(lab.replace('.lab', '.wav'), 'wav') audioData, fs = wavread(lab.replace('.lab', '.wav')) # print len(audioData) # print fs part1 = audioData[int(startTime * fs):int(endTime * fs)] mfcc = calcMFCC(part1, fs) print numpy.shape(mfcc)