def readGroundTruth(fileList, gtExt = '.gtruth', pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine'): """ Returns the start and end time of ground truth phrases in seconds along with the label representing the annotated name of the phrase """ lines = open(fileList,'r').readlines() for ii, line in enumerate(lines): gt_filename = line.strip() + gtExt gt_file = np.loadtxt(gt_filename) pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt) tonic = np.loadtxt(line.strip() + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) segment = [] time_stamps = [] count = 4 st, en = [0.0]*count, [0.0]*count for ii in range(count): s, e = gt_file[ii][0], gt_file[ii][1] st[ii], en[ii] = s, e start_ind = find_ind(time, s) end_ind = find_ind(time, e) #print start_ind, end_ind time_stamp = np.arange(start_ind, end_ind) pitch_vals = pcents[start_ind:end_ind] time_stamps.append(time_stamp) segment.append(pitch_vals) return segment, time_stamps, st, en, Hop, pcents
def plotFoundMatches(fileList, pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine'): """ """ song_str, st_seg, en_seg, spl_arr = readFullTransFile(fileList, fullTransExt = '.fullTrans') centroids = readCentroids(centroids_file = 'centroids.npy') aligned = getAlignment(fileList) lines = open(fileList,'r').readlines() for ii, line in enumerate(lines): pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt) tonic = np.loadtxt(line.strip() + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) #recons = get_quantized_ts(st_seg/Hop, en_seg/Hop, song_str, centroids, pcents) for ii in range(len(aligned)): matches = aligned[ii] print "Query index:", ii+1 count = 0 for s, e in matches: st = st_seg[s] en = en_seg[e] #print st, en contour = pcents[st/Hop:en/Hop] plt.plot(np.arange(len(contour))*Hop, contour) plt.ylim((-300,1100)) #plt.show() count += 1 print "# motifs found: ", count
def plotFoundMatches(fileList, pitchExt='.pitchSilIntrpPP', tonicExt='.tonicFine'): """ """ song_str, st_seg, en_seg, spl_arr = readFullTransFile( fileList, fullTransExt='.fullTrans') centroids = readCentroids(centroids_file='centroids.npy') aligned = getAlignment(fileList) lines = open(fileList, 'r').readlines() for ii, line in enumerate(lines): pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt) tonic = np.loadtxt(line.strip() + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) #recons = get_quantized_ts(st_seg/Hop, en_seg/Hop, song_str, centroids, pcents) for ii in range(len(aligned)): matches = aligned[ii] print "Query index:", ii + 1 count = 0 for s, e in matches: st = st_seg[s] en = en_seg[e] #print st, en contour = pcents[st / Hop:en / Hop] plt.plot(np.arange(len(contour)) * Hop, contour) plt.ylim((-300, 1100)) #plt.show() count += 1 print "# motifs found: ", count
def get_transients(fileList, map_file, segExt='.seg', pitchExt='.pitchSilIntrpPP', tonicExt='.tonicFine'): """ """ map_data = pickle.load(open(map_file, 'r')) lines = open(fileList, 'r').readlines() ids_data = [] cnt = 0 for ii, line in enumerate(lines): print line.strip() seg_filename = line.strip() + segExt seg_file = np.loadtxt(seg_filename) pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt) tonic = np.loadtxt(line.strip() + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) #print pitch for jj in range(seg_file.shape[0]): if seg_file[jj][2] == -20000: ids_data.append(map_data['file_row_to_id_map'][ii][jj]) start_time = seg_file[jj][0] end_time = seg_file[jj][1] #trans_id = map_data[][ii][jj] start_ind = find_ind(time, start_time) end_ind = find_ind(time, end_time) segment = pitch[start_ind:end_ind] #print len(segment) if len(segment) >= 60: segment_norm = polyfit_shapes_norm(segment) if cnt == 0: aggregate = np.array([segment_norm]) else: aggregate = np.vstack((aggregate, segment_norm)) cnt += 1 print aggregate.shape #plt.show() # For training data #------------------ #np.save('transientIds',np.array(ids_data)) #np.save('transientShapes',aggregate) # For unknown data #----------------- np.save('transientIds_eval', np.array(ids_data)) np.save('transientShapes_eval', aggregate)
def get_transients(fileList, map_file, segExt = '.seg', pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine'): """ """ map_data = pickle.load(open(map_file,'r')) lines = open(fileList,'r').readlines() ids_data = [] cnt = 0 for ii, line in enumerate(lines): print line.strip() seg_filename = line.strip() + segExt seg_file = np.loadtxt(seg_filename) pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt) tonic = np.loadtxt(line.strip() + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) #print pitch for jj in range(seg_file.shape[0]): if seg_file[jj][2] == -20000: ids_data.append(map_data['file_row_to_id_map'][ii][jj]) start_time = seg_file[jj][0] end_time = seg_file[jj][1] #trans_id = map_data[][ii][jj] start_ind = find_ind(time, start_time) end_ind = find_ind(time, end_time) segment = pitch[start_ind:end_ind] #print len(segment) if len(segment) >= 60: segment_norm = polyfit_shapes_norm(segment) if cnt == 0: aggregate = np.array([segment_norm]) else: aggregate = np.vstack((aggregate, segment_norm)) cnt += 1 print aggregate.shape #plt.show() # For training data #------------------ #np.save('transientIds',np.array(ids_data)) #np.save('transientShapes',aggregate) # For unknown data #----------------- np.save('transientIds_eval',np.array(ids_data)) np.save('transientShapes_eval',aggregate)
def generateLinearDataset(self, root_dir, output_dir, pitchExt, tonicExt, downsampleFactor, min_nyas_dur=-1): pitch=np.array([]) timeInfo=np.array([]) fileInfo={} filenames = BP.GetFileNamesInDir(root_dir,pitchExt) for filename in filenames: fname, ext = os.path.splitext(filename) #reading pitch and tonic data pitchData,timeStamps,pHop = BPO.readPitchFile(fname+pitchExt) tonic = np.loadtxt(open(fname+tonicExt,"r")) pCents = BPO.PitchHz2Cents(pitchData, tonic) #some preprocessing #removing flat regions if (min_nyas_dur>0): msObj = MS.nyasSegmentation() msObj.ComputeNyasCandidates(pitchData, tonic.tolist(), pHop) msObj.FilterNyasCandidates(min_nyas_duration=min_nyas_dur) for swar in msObj.nyasInfo.keys(): for seg in msObj.nyasInfo[swar]: pCents[seg[0]:seg[1]]=-5000 #downsampling factor=downsampleFactor pCents, pHop, timeStamps = BPO.downsamplesPitchData(pCents,pHop,timeStamps, factor) #removing silence regions ind_silence = np.where(pCents<-4000)[0] ###Please correct this silence condition once log eps is used pCents = np.delete(pCents,ind_silence) timeStamps = np.delete(timeStamps,ind_silence) #accumulating pitch = np.append(pitch, pCents) timeInfo = np.append(timeInfo, timeStamps) fileInfo[filename]= [timeInfo.size-timeStamps.size, timeInfo.size] np.savetxt(output_dir+'/'+'AggPitch.txt', pitch, fmt='%.2f') np.savetxt(output_dir+'/'+'AggTime.txt', timeInfo, fmt='%.2f') stream = file(output_dir+'/'+'fileInfo.yaml','w') yaml.dump(fileInfo, stream)
def get_quantized_ts_onlySteadyNotes(fileList, pitchExt='.pitch'): """ """ lines = open(fileList, 'r').readlines() for ii, line in enumerate(lines): pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt) #tonic = np.loadtxt(line.strip() + tonicExt) #pcents = BO.PitchHz2Cents(pitch, tonic) song_str, st_seg, en_seg, spl_arr = readFullTransFile( fileList, fullTransExt='.fullTrans') st, en = st_seg / Hop, en_seg / Hop qts = np.array([None] * (max(en))) for ii in range(len(st)): if song_str[ii] != -100000 and song_str[ii] % 100 == 0: qts[st[ii]:en[ii]] = song_str[ii] #print st[ii], en[ii], song_str[ii] #plt.plot(qts,linewidth=2) #plt.ylim((-500,1700)) #plt.show() #segment, time_stamps, st, en, Hop, pcents = readGroundTruth(fileList) #for ii in range(len(segment)): #plt.subplot(2,2,ii+1) #plt.plot(time_stamps[ii]*Hop, segment[ii]) #plt.plot(time_stamps[ii]*Hop, qts[time_stamps[ii]], 'r', linewidth=3) #plt.ylim((-300,1100)) #plt.show() stylized_pitch = np.array(qts, dtype=np.float) lines = open(fileList, 'r').readlines() for ii, line in enumerate(lines): filename = line.strip() steadyContourFilename = filename + '.steadyPitch' fid = open(steadyContourFilename, 'w') for ii in range(len(stylized_pitch))[::2]: #print ii, (Hop*ii), stylized_pitch[ii] fid.write("%f\t%f" % ((Hop * ii), stylized_pitch[ii])) fid.write('\n') fid.close() return qts
def get_quantized_ts_onlySteadyNotes(fileList, pitchExt = '.pitch'): """ """ lines = open(fileList,'r').readlines() for ii, line in enumerate(lines): pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt) #tonic = np.loadtxt(line.strip() + tonicExt) #pcents = BO.PitchHz2Cents(pitch, tonic) song_str, st_seg, en_seg, spl_arr = readFullTransFile(fileList, fullTransExt = '.fullTrans') st, en = st_seg/Hop, en_seg/Hop qts = np.array([None]*(max(en))) for ii in range(len(st)): if song_str[ii] != -100000 and song_str[ii]%100 == 0: qts[st[ii]:en[ii]] = song_str[ii] #print st[ii], en[ii], song_str[ii] #plt.plot(qts,linewidth=2) #plt.ylim((-500,1700)) #plt.show() #segment, time_stamps, st, en, Hop, pcents = readGroundTruth(fileList) #for ii in range(len(segment)): #plt.subplot(2,2,ii+1) #plt.plot(time_stamps[ii]*Hop, segment[ii]) #plt.plot(time_stamps[ii]*Hop, qts[time_stamps[ii]], 'r', linewidth=3) #plt.ylim((-300,1100)) #plt.show() stylized_pitch = np.array(qts, dtype=np.float) lines = open(fileList,'r').readlines() for ii, line in enumerate(lines): filename = line.strip() steadyContourFilename = filename + '.steadyPitch' fid = open(steadyContourFilename,'w') for ii in range(len(stylized_pitch))[::2]: #print ii, (Hop*ii), stylized_pitch[ii] fid.write("%f\t%f"%((Hop*ii),stylized_pitch[ii])) fid.write('\n') fid.close() return qts
def generateSubsequenceDataset(self, pitchFile, tonicFile, outputCandidateFile): #reading pitch data tonic = np.loadtxt(open(tonicFile,"r")) pitchData,timeData,pHop = BPO.readPitchFile(pitchFile) pCents = BPO.PitchHz2Cents(pitchData, tonic) #preprocessing pitch data factor=3 pCents, pHop, timeData = BPO.downsamplesPitchData(pCents,pHop,timeData, factor) if self.params.keys()[0]=='slidingWindow': segments = self.slidingWindowCandidates(pCents, pHop, self.params['slidingWindow']['windowLength'],pHop*3) dataset = [] [dataset.append(pCents[segment[0]:segment[1]]) for segment in segments] np.savetxt(outputCandidateFile,np.array(dataset),fmt='%.2f')
def batchProc(root_dir, audioExt='.mp3', pitchExt='.pitchSilIntrpPP', tonicExt='.tonicFine'): filenames = BP.GetFileNamesInDir(root_dir, '.mp3') segObj = seg.melodySegmentation() #fig = plt.figure(figsize=(15,10), dpi=80) for filename in filenames[:]: print "Processing file %s" % filename #====================== ## This is done for all #====================== fname, ext = os.path.splitext(filename) pitch, time, Hop = BO.readPitchFile(fname + pitchExt) tonic = np.loadtxt(fname + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) pdata = (time, pcents, Hop) ## Extract Breath Phrases #------------------------ breathPhrases = findBreathPhrases(segObj, fname, pcents, Hop) ## Histogram processing to extract note locations #------------------------------------------------ svaraSemitone, ignoreNotes = findValidSvaras(pitch, tonic) #print svaraSemitone, ignoreNotes print "Notes being ignored are: %s" % ignoreNotes ## Read valid region for evolution #--------------------------------- #endTime = readValidVistarRegion(fname) ## Svara transcription #--------------------- transcription = transcribePitch(fname, pdata, ignoreNotes) #print transcription print "-------\nDone !!\n-------" '''
def batchProc(fileList, centroids_file = 'centroids.npy', audioExt = '.mp3', pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine', fullTransExt = '.fullTrans', gtExt = '.gtruth'): """ """ centroids = readCentroids(centroids_file) lines = open(fileList,'r').readlines() for ii, line in enumerate(lines): filename = line.strip() print "Processing file: %s" %filename # Read pitch data #---------------- pitch, time, Hop = BO.readPitchFile(filename + pitchExt) tonic = np.loadtxt(line.strip() + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) # Read transcription #------------------- song_str, st_seg, en_seg = readFullTransFile(filename, fullTransExt = fullTransExt) # Read ground truth #------------------ st_gt, en_gt, str_gt = visualizeGroundTruth(filename, pcents, time, Hop, song_str, st_seg, en_seg) # Get query strings #------------------ note_sym = getQuertString(str_gt) # Get song string #---------------- search_str = getSearchString(song_str) # Get aligned contour indices by SW #---------------------------------- aligned = getAlignment(song_str, note_sym) # Get contour segments #--------------------- plotFoundMatches(aligned, st_seg, en_seg, pcents, Hop) print "-------\nDone !!\n-------"
def readGroundTruth(fileList, gtExt='.gtruth', pitchExt='.pitchSilIntrpPP', tonicExt='.tonicFine'): """ Returns the start and end time of ground truth phrases in seconds along with the label representing the annotated name of the phrase """ lines = open(fileList, 'r').readlines() for ii, line in enumerate(lines): gt_filename = line.strip() + gtExt gt_file = np.loadtxt(gt_filename) pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt) tonic = np.loadtxt(line.strip() + tonicExt) pcents = BO.PitchHz2Cents(pitch, tonic) segment = [] time_stamps = [] count = 4 st, en = [0.0] * count, [0.0] * count for ii in range(count): s, e = gt_file[ii][0], gt_file[ii][1] st[ii], en[ii] = s, e start_ind = find_ind(time, s) end_ind = find_ind(time, e) #print start_ind, end_ind time_stamp = np.arange(start_ind, end_ind) pitch_vals = pcents[start_ind:end_ind] time_stamps.append(time_stamp) segment.append(pitch_vals) return segment, time_stamps, st, en, Hop, pcents
def generateSubsequenceDataset(self, root_dir, output_dir, pitchExt, tonicExt, downsampleFactor, windowLength, combineData = 0, writeBinary = 1, meanNormalize = 0, flatnessThreshold=0.8, binsPOctave=120, fixPointData=1): if combineData: timeInfo=np.array([]) fileInfo={} #obtaining all the files in the directory filenames = BP.GetFileNamesInDir(root_dir,pitchExt) # iterating over each file for kk, filename in enumerate(filenames): #separate file name from extension fname, ext = os.path.splitext(filename) audiofile = fname.split('/')[-1] #if data is not to be combined, then output goes into individual directories (dirname = filename) if not combineData: out_dir = output_dir + '/' + audiofile #creating directory if doesn't exist if not os.path.isdir(out_dir): os.makedirs(out_dir) #reading pitch and tonic data pitchData,timeStamps,pHop = BPO.readPitchFile(fname+pitchExt) tonic = np.loadtxt(open(fname+tonicExt,"r")) #Convert the pitch values into cents, Note that we add a offset of 1 octave to make everything positive, assuming that pitch wont go below one octave to tonic pCents=np.round(binsPOctave*np.log2((eps+pitchData)/tonic)).astype(np.int) + binsPOctave #downsampling pitch data factor=downsampleFactor pCents, pHop, timeStamps = BPO.downsamplesPitchData(pCents,pHop,timeStamps, factor) #removing silence regions from the pitch sequence ind_silence = np.where(pCents<0)[0] ###Please correct this silence condition once log eps is used pCents = np.delete(pCents,ind_silence) timeStamps = np.delete(timeStamps,ind_silence) #computing all the indices which have non flat region of pitch (binsPOctave is an input to decide threshold, thats it!!) nonFlatIndexes = self.nonFlatIndexes(pCents, pHop,binsPOctave) #making an array which will be 1 for nonflat and 0 for flat regions flatNonflat = np.zeros(pCents.shape[0]) flatNonflat[nonFlatIndexes]=1 #given the windowLength or motigLength compute how many samples do they correspond to windowSamples = int(np.round(windowLength/pHop)) #to create a matlab buffer like thing, we do the following row = np.array([np.arange(windowSamples)]) col = np.array([np.arange(pCents.size-windowSamples)]) col = np.transpose(col) col2 = copy.deepcopy(col) col2[:]=1 ind = row*col2 + col #so after creating matrix where each row is a subsequence, look which subsequence to reject based on flatness threshold mtx = flatNonflat[ind] mean_array = np.mean(mtx,axis=1) ind_Invalid = np.where(mean_array<flatnessThreshold)[0] ind = np.delete(ind,[ind_Invalid],axis=0) #finally obtain pitch matrix and timestamp array mtx = pCents[ind] timeStamps = timeStamps[ind[:,0]] ###TODO impement mean normalizatoin ### Adding a crucial check!! if timeStamps.shape[0] != mtx.shape[0]: print filename if combineData:#keep on appending the data to combine it if kk==0: pitch = copy.deepcopy(mtx) timeInfo = copy.deepcopy(timeStamps) else: pitch = np.append(pitch, mtx,axis=0) timeInfo = np.append(timeInfo, timeStamps) fileInfo[filename]= [timeInfo.size-timeStamps.size, timeInfo.size] else:#just dump for each file if writeBinary: if fixPointData: mtx.astype(np.uint32).tofile(out_dir+'/'+ audiofile +'.pitchSubDBbin') else: mtx.astype(np.float).tofile(out_dir+'/'+ audiofile +'.pitchSubDBbin') timeStamps.astype(np.float).tofile(out_dir+'/'+ audiofile +'.timeSubDBbin') else: np.savetxt(out_dir+'/'+ audiofile +'.pitchSubDBtxt', mtx , fmt='%d') np.savetxt(out_dir+'/'+ audiofile +'.timeSubDBtxt', timeStamps, fmt='%.3f') if combineData: #another crucial check at each step if pitch.shape[0] != timeInfo.shape[0]: print filename if combineData: if writeBinary: if fixPointData: pitch.astype(np.uint32).tofile(output_dir+'/'+'AggPitch.bin') else: pitch.astype(np.float).tofile(output_dir+'/'+'AggPitch.bin') timeInfo.astype(np.float).tofile(output_dir+'/'+'AggTime.bin') else: np.savetxt(output_dir+'/'+'AggPitch.txt', pitch , fmt='%d') np.savetxt(output_dir+'/'+'AggTime.txt', timeInfo, fmt='%.3f') stream = file(output_dir+'/'+'fileInfo.yaml','w') yaml.dump(fileInfo, stream) print "Total number of time series : " + str(pitch.shape[0]) print "Length of single Sub sequence : " + str(pitch.shape[1])