def readGroundTruth(fileList, gtExt = '.gtruth', pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine'):
  """
  Returns the start and end time of ground truth phrases in seconds
  along with the label representing the annotated name of the phrase
  """
  lines = open(fileList,'r').readlines()
  
  for ii, line in enumerate(lines):
    gt_filename = line.strip() + gtExt
    gt_file = np.loadtxt(gt_filename)
    
    pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
    tonic = np.loadtxt(line.strip()  + tonicExt)
    pcents = BO.PitchHz2Cents(pitch, tonic)
    
    segment = []
    time_stamps = []
    count = 4
    st, en = [0.0]*count, [0.0]*count
    for ii in range(count):
      s, e = gt_file[ii][0], gt_file[ii][1]
      st[ii], en[ii] = s, e
      
      start_ind = find_ind(time, s)
      end_ind = find_ind(time, e)
      #print start_ind, end_ind
      
      time_stamp = np.arange(start_ind, end_ind)
      pitch_vals = pcents[start_ind:end_ind]
      
      time_stamps.append(time_stamp)
      segment.append(pitch_vals)	
	
  return segment, time_stamps, st, en, Hop, pcents
def plotFoundMatches(fileList, pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine'):
  """
  """
  song_str, st_seg, en_seg, spl_arr = readFullTransFile(fileList, fullTransExt = '.fullTrans')
  centroids = readCentroids(centroids_file = 'centroids.npy')
  aligned = getAlignment(fileList)
  
  lines = open(fileList,'r').readlines()
  
  for ii, line in enumerate(lines):
    pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
    tonic = np.loadtxt(line.strip()  + tonicExt)
    pcents = BO.PitchHz2Cents(pitch, tonic)
  
  #recons = get_quantized_ts(st_seg/Hop, en_seg/Hop, song_str, centroids, pcents)
  
  for ii in range(len(aligned)):
    matches = aligned[ii]
    print "Query index:", ii+1
    count = 0
    for s, e in matches:
      st = st_seg[s]
      en = en_seg[e]
      #print st, en
      contour = pcents[st/Hop:en/Hop]
      plt.plot(np.arange(len(contour))*Hop, contour)
      plt.ylim((-300,1100))
      #plt.show()
      
      count += 1
    print "# motifs found: ", count  
def plotFoundMatches(fileList,
                     pitchExt='.pitchSilIntrpPP',
                     tonicExt='.tonicFine'):
    """
  """
    song_str, st_seg, en_seg, spl_arr = readFullTransFile(
        fileList, fullTransExt='.fullTrans')
    centroids = readCentroids(centroids_file='centroids.npy')
    aligned = getAlignment(fileList)

    lines = open(fileList, 'r').readlines()

    for ii, line in enumerate(lines):
        pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
        tonic = np.loadtxt(line.strip() + tonicExt)
        pcents = BO.PitchHz2Cents(pitch, tonic)

    #recons = get_quantized_ts(st_seg/Hop, en_seg/Hop, song_str, centroids, pcents)

    for ii in range(len(aligned)):
        matches = aligned[ii]
        print "Query index:", ii + 1
        count = 0
        for s, e in matches:
            st = st_seg[s]
            en = en_seg[e]
            #print st, en
            contour = pcents[st / Hop:en / Hop]
            plt.plot(np.arange(len(contour)) * Hop, contour)
            plt.ylim((-300, 1100))
            #plt.show()

            count += 1
        print "# motifs found: ", count
def get_transients(fileList,
                   map_file,
                   segExt='.seg',
                   pitchExt='.pitchSilIntrpPP',
                   tonicExt='.tonicFine'):
    """
  """
    map_data = pickle.load(open(map_file, 'r'))
    lines = open(fileList, 'r').readlines()
    ids_data = []

    cnt = 0
    for ii, line in enumerate(lines):
        print line.strip()
        seg_filename = line.strip() + segExt
        seg_file = np.loadtxt(seg_filename)

        pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
        tonic = np.loadtxt(line.strip() + tonicExt)
        pcents = BO.PitchHz2Cents(pitch, tonic)
        #print pitch

        for jj in range(seg_file.shape[0]):
            if seg_file[jj][2] == -20000:
                ids_data.append(map_data['file_row_to_id_map'][ii][jj])
                start_time = seg_file[jj][0]
                end_time = seg_file[jj][1]
                #trans_id = map_data[][ii][jj]

                start_ind = find_ind(time, start_time)
                end_ind = find_ind(time, end_time)

                segment = pitch[start_ind:end_ind]
                #print len(segment)

                if len(segment) >= 60:
                    segment_norm = polyfit_shapes_norm(segment)

                if cnt == 0:
                    aggregate = np.array([segment_norm])
                else:
                    aggregate = np.vstack((aggregate, segment_norm))
                cnt += 1

    print aggregate.shape
    #plt.show()

    # For training data
    #------------------
    #np.save('transientIds',np.array(ids_data))
    #np.save('transientShapes',aggregate)

    # For unknown data
    #-----------------
    np.save('transientIds_eval', np.array(ids_data))
    np.save('transientShapes_eval', aggregate)
def get_transients(fileList, map_file, segExt = '.seg', pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine'):
  """
  """
  map_data = pickle.load(open(map_file,'r'))
  lines = open(fileList,'r').readlines()
  ids_data = []
  
  cnt = 0
  for ii, line in enumerate(lines):
    print line.strip()
    seg_filename = line.strip() + segExt
    seg_file = np.loadtxt(seg_filename)
    
    pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
    tonic = np.loadtxt(line.strip()  + tonicExt)
    pcents = BO.PitchHz2Cents(pitch, tonic)
    #print pitch
    
    for jj in range(seg_file.shape[0]):
      if seg_file[jj][2] == -20000:
	ids_data.append(map_data['file_row_to_id_map'][ii][jj])
	start_time = seg_file[jj][0]
	end_time = seg_file[jj][1]
	#trans_id = map_data[][ii][jj]
	
	start_ind = find_ind(time, start_time)
	end_ind = find_ind(time, end_time)
	
	segment = pitch[start_ind:end_ind]
	#print len(segment)
	
	if len(segment) >= 60:
	  segment_norm = polyfit_shapes_norm(segment)

	if cnt == 0:
	  aggregate = np.array([segment_norm])
	else:
	  aggregate = np.vstack((aggregate, segment_norm))
	cnt += 1
  
  print aggregate.shape
  #plt.show()
  
  # For training data
  #------------------
  #np.save('transientIds',np.array(ids_data))
  #np.save('transientShapes',aggregate)
  
  # For unknown data
  #-----------------
  np.save('transientIds_eval',np.array(ids_data))
  np.save('transientShapes_eval',aggregate)  
Пример #6
0
 def generateLinearDataset(self, root_dir, output_dir, pitchExt, tonicExt, downsampleFactor, min_nyas_dur=-1):
     
     pitch=np.array([])
     timeInfo=np.array([])
     fileInfo={}
     
     filenames = BP.GetFileNamesInDir(root_dir,pitchExt)
     
     for filename in filenames:
         fname, ext = os.path.splitext(filename)
         #reading pitch and tonic data
         pitchData,timeStamps,pHop = BPO.readPitchFile(fname+pitchExt)
         tonic = np.loadtxt(open(fname+tonicExt,"r"))
         pCents = BPO.PitchHz2Cents(pitchData, tonic)
         
         
         #some preprocessing
         
         #removing flat regions
         if (min_nyas_dur>0):
             msObj = MS.nyasSegmentation()
             msObj.ComputeNyasCandidates(pitchData, tonic.tolist(), pHop)
             msObj.FilterNyasCandidates(min_nyas_duration=min_nyas_dur)
         
             for swar in msObj.nyasInfo.keys():
                 for seg in msObj.nyasInfo[swar]:
                     pCents[seg[0]:seg[1]]=-5000
         
         
         #downsampling
         factor=downsampleFactor
         pCents, pHop, timeStamps = BPO.downsamplesPitchData(pCents,pHop,timeStamps, factor)
         
         
         
         #removing silence regions
         ind_silence = np.where(pCents<-4000)[0] ###Please correct this silence condition once log eps is used
         pCents = np.delete(pCents,ind_silence)
         timeStamps = np.delete(timeStamps,ind_silence)
         
         #accumulating
         pitch = np.append(pitch, pCents)
         timeInfo = np.append(timeInfo, timeStamps)
         fileInfo[filename]= [timeInfo.size-timeStamps.size, timeInfo.size]
         
     np.savetxt(output_dir+'/'+'AggPitch.txt', pitch, fmt='%.2f')
     np.savetxt(output_dir+'/'+'AggTime.txt', timeInfo, fmt='%.2f')
     stream = file(output_dir+'/'+'fileInfo.yaml','w')
     yaml.dump(fileInfo, stream)
     
def get_quantized_ts_onlySteadyNotes(fileList, pitchExt='.pitch'):
    """
  """
    lines = open(fileList, 'r').readlines()

    for ii, line in enumerate(lines):
        pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
        #tonic = np.loadtxt(line.strip()  + tonicExt)
        #pcents = BO.PitchHz2Cents(pitch, tonic)

    song_str, st_seg, en_seg, spl_arr = readFullTransFile(
        fileList, fullTransExt='.fullTrans')

    st, en = st_seg / Hop, en_seg / Hop

    qts = np.array([None] * (max(en)))
    for ii in range(len(st)):
        if song_str[ii] != -100000 and song_str[ii] % 100 == 0:
            qts[st[ii]:en[ii]] = song_str[ii]
            #print st[ii], en[ii], song_str[ii]

    #plt.plot(qts,linewidth=2)
    #plt.ylim((-500,1700))
    #plt.show()

    #segment, time_stamps, st, en, Hop, pcents = readGroundTruth(fileList)

    #for ii in range(len(segment)):
    #plt.subplot(2,2,ii+1)
    #plt.plot(time_stamps[ii]*Hop, segment[ii])
    #plt.plot(time_stamps[ii]*Hop, qts[time_stamps[ii]], 'r', linewidth=3)
    #plt.ylim((-300,1100))
    #plt.show()

    stylized_pitch = np.array(qts, dtype=np.float)

    lines = open(fileList, 'r').readlines()
    for ii, line in enumerate(lines):
        filename = line.strip()
        steadyContourFilename = filename + '.steadyPitch'

        fid = open(steadyContourFilename, 'w')
        for ii in range(len(stylized_pitch))[::2]:
            #print ii, (Hop*ii), stylized_pitch[ii]
            fid.write("%f\t%f" % ((Hop * ii), stylized_pitch[ii]))
            fid.write('\n')
        fid.close()

    return qts
def get_quantized_ts_onlySteadyNotes(fileList, pitchExt = '.pitch'):
  """
  """
  lines = open(fileList,'r').readlines()
  
  for ii, line in enumerate(lines):
    pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
    #tonic = np.loadtxt(line.strip()  + tonicExt)
    #pcents = BO.PitchHz2Cents(pitch, tonic)
  
  song_str, st_seg, en_seg, spl_arr = readFullTransFile(fileList, fullTransExt = '.fullTrans')
  
  st, en = st_seg/Hop, en_seg/Hop
  
  qts = np.array([None]*(max(en)))
  for ii in range(len(st)):
    if song_str[ii] != -100000 and song_str[ii]%100 == 0:
      qts[st[ii]:en[ii]] = song_str[ii]
      #print st[ii], en[ii], song_str[ii]
      
  #plt.plot(qts,linewidth=2)
  #plt.ylim((-500,1700))
  #plt.show()  
  
  #segment, time_stamps, st, en, Hop, pcents = readGroundTruth(fileList)
  
  #for ii in range(len(segment)):
    #plt.subplot(2,2,ii+1)
    #plt.plot(time_stamps[ii]*Hop, segment[ii])
    #plt.plot(time_stamps[ii]*Hop, qts[time_stamps[ii]], 'r', linewidth=3)
    #plt.ylim((-300,1100))
  #plt.show()
  
  stylized_pitch = np.array(qts, dtype=np.float)
  
  lines = open(fileList,'r').readlines()
  for ii, line in enumerate(lines):
    filename = line.strip()
    steadyContourFilename = filename + '.steadyPitch'
    
    fid = open(steadyContourFilename,'w')
    for ii in range(len(stylized_pitch))[::2]:
      #print ii, (Hop*ii), stylized_pitch[ii]
      fid.write("%f\t%f"%((Hop*ii),stylized_pitch[ii]))
      fid.write('\n')
    fid.close()
  
  return qts     
Пример #9
0
 def generateSubsequenceDataset(self, pitchFile, tonicFile, outputCandidateFile):
     
     #reading pitch data
     tonic = np.loadtxt(open(tonicFile,"r"))
     pitchData,timeData,pHop = BPO.readPitchFile(pitchFile)
     pCents = BPO.PitchHz2Cents(pitchData, tonic)
     
     #preprocessing pitch data
     factor=3
     pCents, pHop, timeData = BPO.downsamplesPitchData(pCents,pHop,timeData, factor)
     
     
     if self.params.keys()[0]=='slidingWindow':
         segments = self.slidingWindowCandidates(pCents, pHop, self.params['slidingWindow']['windowLength'],pHop*3)
     dataset = [] 
     [dataset.append(pCents[segment[0]:segment[1]]) for segment in segments]
     np.savetxt(outputCandidateFile,np.array(dataset),fmt='%.2f')
def batchProc(root_dir,
              audioExt='.mp3',
              pitchExt='.pitchSilIntrpPP',
              tonicExt='.tonicFine'):

    filenames = BP.GetFileNamesInDir(root_dir, '.mp3')
    segObj = seg.melodySegmentation()

    #fig = plt.figure(figsize=(15,10), dpi=80)

    for filename in filenames[:]:
        print "Processing file %s" % filename

        #======================
        ## This is done for all
        #======================

        fname, ext = os.path.splitext(filename)
        pitch, time, Hop = BO.readPitchFile(fname + pitchExt)
        tonic = np.loadtxt(fname + tonicExt)
        pcents = BO.PitchHz2Cents(pitch, tonic)
        pdata = (time, pcents, Hop)

        ## Extract Breath Phrases
        #------------------------
        breathPhrases = findBreathPhrases(segObj, fname, pcents, Hop)

        ## Histogram processing to extract note locations
        #------------------------------------------------
        svaraSemitone, ignoreNotes = findValidSvaras(pitch, tonic)
        #print svaraSemitone, ignoreNotes
        print "Notes being ignored are: %s" % ignoreNotes

        ## Read valid region for evolution
        #---------------------------------
        #endTime = readValidVistarRegion(fname)

        ## Svara transcription
        #---------------------
        transcription = transcribePitch(fname, pdata, ignoreNotes)
        #print transcription

        print "-------\nDone !!\n-------"
        '''
Пример #11
0
def batchProc(fileList, centroids_file = 'centroids.npy', audioExt = '.mp3', pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine', fullTransExt = '.fullTrans', gtExt = '.gtruth'):
  """
  """
  centroids = readCentroids(centroids_file)
    
  lines = open(fileList,'r').readlines()
  
  for ii, line in enumerate(lines):
    
    filename = line.strip()
    print "Processing file: %s" %filename
    
    # Read pitch data
    #----------------
    pitch, time, Hop = BO.readPitchFile(filename + pitchExt)
    tonic = np.loadtxt(line.strip()  + tonicExt)
    pcents = BO.PitchHz2Cents(pitch, tonic)
    
    # Read transcription
    #-------------------
    song_str, st_seg, en_seg = readFullTransFile(filename, fullTransExt = fullTransExt)
    
    # Read ground truth
    #------------------
    st_gt, en_gt, str_gt = visualizeGroundTruth(filename, pcents, time, Hop, song_str, st_seg, en_seg)
    
    # Get query strings
    #------------------
    note_sym = getQuertString(str_gt)
    
    # Get song string
    #----------------
    search_str = getSearchString(song_str)
    
    # Get aligned contour indices by SW
    #----------------------------------
    aligned = getAlignment(song_str, note_sym)
    
    # Get contour segments
    #---------------------
    plotFoundMatches(aligned, st_seg, en_seg, pcents, Hop)
    
    
    print "-------\nDone !!\n-------"
def readGroundTruth(fileList,
                    gtExt='.gtruth',
                    pitchExt='.pitchSilIntrpPP',
                    tonicExt='.tonicFine'):
    """
  Returns the start and end time of ground truth phrases in seconds
  along with the label representing the annotated name of the phrase
  """
    lines = open(fileList, 'r').readlines()

    for ii, line in enumerate(lines):
        gt_filename = line.strip() + gtExt
        gt_file = np.loadtxt(gt_filename)

        pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
        tonic = np.loadtxt(line.strip() + tonicExt)
        pcents = BO.PitchHz2Cents(pitch, tonic)

        segment = []
        time_stamps = []
        count = 4
        st, en = [0.0] * count, [0.0] * count
        for ii in range(count):
            s, e = gt_file[ii][0], gt_file[ii][1]
            st[ii], en[ii] = s, e

            start_ind = find_ind(time, s)
            end_ind = find_ind(time, e)
            #print start_ind, end_ind

            time_stamp = np.arange(start_ind, end_ind)
            pitch_vals = pcents[start_ind:end_ind]

            time_stamps.append(time_stamp)
            segment.append(pitch_vals)

    return segment, time_stamps, st, en, Hop, pcents
Пример #13
0
 def generateSubsequenceDataset(self, root_dir, output_dir, pitchExt, tonicExt, downsampleFactor, windowLength, combineData = 0, writeBinary = 1, meanNormalize = 0, flatnessThreshold=0.8, binsPOctave=120, fixPointData=1):
     
     
     if combineData:
         timeInfo=np.array([])
         fileInfo={}
     
     #obtaining all the files in the directory
     filenames = BP.GetFileNamesInDir(root_dir,pitchExt)
     
     # iterating over each file
     for kk, filename in enumerate(filenames):
         
         #separate file name from extension
         fname, ext = os.path.splitext(filename)
         
         audiofile = fname.split('/')[-1]
         
         #if data is not to be combined, then output goes into individual directories (dirname = filename)
         if not combineData:
             out_dir = output_dir + '/' + audiofile
             #creating directory if doesn't exist
             if not os.path.isdir(out_dir):
                 os.makedirs(out_dir)
         
         #reading pitch and tonic data
         pitchData,timeStamps,pHop = BPO.readPitchFile(fname+pitchExt)
         tonic = np.loadtxt(open(fname+tonicExt,"r"))            
         
         #Convert the pitch values into cents, Note that we add a offset of 1 octave to make everything positive, assuming that pitch wont go below one octave to tonic
         pCents=np.round(binsPOctave*np.log2((eps+pitchData)/tonic)).astype(np.int) + binsPOctave 
         
         
         #downsampling pitch data
         factor=downsampleFactor
         pCents, pHop, timeStamps = BPO.downsamplesPitchData(pCents,pHop,timeStamps, factor)
         
         
         #removing silence regions from the pitch sequence
         ind_silence = np.where(pCents<0)[0] ###Please correct this silence condition once log eps is used
         pCents = np.delete(pCents,ind_silence)
         timeStamps = np.delete(timeStamps,ind_silence)
         
         #computing all the indices which have non flat region of pitch (binsPOctave is an input to decide threshold, thats it!!)
         nonFlatIndexes = self.nonFlatIndexes(pCents, pHop,binsPOctave)
         
         #making an array which will be 1 for nonflat and 0 for flat regions
         flatNonflat = np.zeros(pCents.shape[0])
         flatNonflat[nonFlatIndexes]=1
         
         #given the windowLength or motigLength compute how many samples do they correspond to
         windowSamples = int(np.round(windowLength/pHop))
         
         #to create a matlab buffer like thing, we do the following
         row = np.array([np.arange(windowSamples)])
         col = np.array([np.arange(pCents.size-windowSamples)])
         col = np.transpose(col)
         col2 = copy.deepcopy(col)
         col2[:]=1
         ind = row*col2 + col
         
         
         #so after creating matrix where each row is a subsequence, look which subsequence to reject based on flatness threshold
         mtx = flatNonflat[ind]            
         mean_array = np.mean(mtx,axis=1)            
         ind_Invalid = np.where(mean_array<flatnessThreshold)[0]
         
         ind = np.delete(ind,[ind_Invalid],axis=0)
         
         #finally obtain pitch matrix and timestamp array
         mtx = pCents[ind]
         timeStamps = timeStamps[ind[:,0]]
         
         ###TODO impement mean normalizatoin
         
         ### Adding a crucial check!!
         if timeStamps.shape[0] != mtx.shape[0]:
             print filename
         
         if combineData:#keep on appending the data to combine it
             
             if kk==0:
                 pitch = copy.deepcopy(mtx)
                 timeInfo = copy.deepcopy(timeStamps)
             else:
                 pitch = np.append(pitch, mtx,axis=0)
                 timeInfo = np.append(timeInfo, timeStamps)
             fileInfo[filename]= [timeInfo.size-timeStamps.size, timeInfo.size]
         
         
         else:#just dump for each file
             
             if writeBinary:
                 if fixPointData:
                     mtx.astype(np.uint32).tofile(out_dir+'/'+ audiofile +'.pitchSubDBbin')
                 else:
                     mtx.astype(np.float).tofile(out_dir+'/'+ audiofile +'.pitchSubDBbin')
                     
                 timeStamps.astype(np.float).tofile(out_dir+'/'+ audiofile +'.timeSubDBbin')
             else:
                 np.savetxt(out_dir+'/'+ audiofile +'.pitchSubDBtxt', mtx , fmt='%d')
                 np.savetxt(out_dir+'/'+ audiofile +'.timeSubDBtxt', timeStamps, fmt='%.3f')
             
         if combineData:
             #another crucial check at each step
             if pitch.shape[0] != timeInfo.shape[0]:
                 print filename
         
     if combineData:
         
         if writeBinary:
             if fixPointData:
                     pitch.astype(np.uint32).tofile(output_dir+'/'+'AggPitch.bin')
             else:
                     pitch.astype(np.float).tofile(output_dir+'/'+'AggPitch.bin')
             timeInfo.astype(np.float).tofile(output_dir+'/'+'AggTime.bin')
         else:
             np.savetxt(output_dir+'/'+'AggPitch.txt', pitch , fmt='%d')
             np.savetxt(output_dir+'/'+'AggTime.txt', timeInfo, fmt='%.3f')
         
         stream = file(output_dir+'/'+'fileInfo.yaml','w')
         yaml.dump(fileInfo, stream)
         
         print "Total number of time series : " + str(pitch.shape[0])
         print "Length of single Sub sequence : " + str(pitch.shape[1])