def readGroundTruth(fileList, gtExt = '.gtruth', pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine'):
  Returns the start and end time of ground truth phrases in seconds
  along with the label representing the annotated name of the phrase
  lines = open(fileList,'r').readlines()
  for ii, line in enumerate(lines):
    gt_filename = line.strip() + gtExt
    gt_file = np.loadtxt(gt_filename)
    pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
    tonic = np.loadtxt(line.strip()  + tonicExt)
    pcents = BO.PitchHz2Cents(pitch, tonic)
    segment = []
    time_stamps = []
    count = 4
    st, en = [0.0]*count, [0.0]*count
    for ii in range(count):
      s, e = gt_file[ii][0], gt_file[ii][1]
      st[ii], en[ii] = s, e
      start_ind = find_ind(time, s)
      end_ind = find_ind(time, e)
      #print start_ind, end_ind
      time_stamp = np.arange(start_ind, end_ind)
      pitch_vals = pcents[start_ind:end_ind]
  return segment, time_stamps, st, en, Hop, pcents
def plotFoundMatches(fileList, pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine'):
  song_str, st_seg, en_seg, spl_arr = readFullTransFile(fileList, fullTransExt = '.fullTrans')
  centroids = readCentroids(centroids_file = 'centroids.npy')
  aligned = getAlignment(fileList)
  lines = open(fileList,'r').readlines()
  for ii, line in enumerate(lines):
    pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
    tonic = np.loadtxt(line.strip()  + tonicExt)
    pcents = BO.PitchHz2Cents(pitch, tonic)
  #recons = get_quantized_ts(st_seg/Hop, en_seg/Hop, song_str, centroids, pcents)
  for ii in range(len(aligned)):
    matches = aligned[ii]
    print "Query index:", ii+1
    count = 0
    for s, e in matches:
      st = st_seg[s]
      en = en_seg[e]
      #print st, en
      contour = pcents[st/Hop:en/Hop]
      plt.plot(np.arange(len(contour))*Hop, contour)
      count += 1
    print "# motifs found: ", count  
def plotFoundMatches(fileList,
    song_str, st_seg, en_seg, spl_arr = readFullTransFile(
        fileList, fullTransExt='.fullTrans')
    centroids = readCentroids(centroids_file='centroids.npy')
    aligned = getAlignment(fileList)

    lines = open(fileList, 'r').readlines()

    for ii, line in enumerate(lines):
        pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
        tonic = np.loadtxt(line.strip() + tonicExt)
        pcents = BO.PitchHz2Cents(pitch, tonic)

    #recons = get_quantized_ts(st_seg/Hop, en_seg/Hop, song_str, centroids, pcents)

    for ii in range(len(aligned)):
        matches = aligned[ii]
        print "Query index:", ii + 1
        count = 0
        for s, e in matches:
            st = st_seg[s]
            en = en_seg[e]
            #print st, en
            contour = pcents[st / Hop:en / Hop]
            plt.plot(np.arange(len(contour)) * Hop, contour)
            plt.ylim((-300, 1100))

            count += 1
        print "# motifs found: ", count
def get_transients(fileList,
    map_data = pickle.load(open(map_file, 'r'))
    lines = open(fileList, 'r').readlines()
    ids_data = []

    cnt = 0
    for ii, line in enumerate(lines):
        print line.strip()
        seg_filename = line.strip() + segExt
        seg_file = np.loadtxt(seg_filename)

        pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
        tonic = np.loadtxt(line.strip() + tonicExt)
        pcents = BO.PitchHz2Cents(pitch, tonic)
        #print pitch

        for jj in range(seg_file.shape[0]):
            if seg_file[jj][2] == -20000:
                start_time = seg_file[jj][0]
                end_time = seg_file[jj][1]
                #trans_id = map_data[][ii][jj]

                start_ind = find_ind(time, start_time)
                end_ind = find_ind(time, end_time)

                segment = pitch[start_ind:end_ind]
                #print len(segment)

                if len(segment) >= 60:
                    segment_norm = polyfit_shapes_norm(segment)

                if cnt == 0:
                    aggregate = np.array([segment_norm])
                    aggregate = np.vstack((aggregate, segment_norm))
                cnt += 1

    print aggregate.shape

    # For training data

    # For unknown data
    #-----------------'transientIds_eval', np.array(ids_data))'transientShapes_eval', aggregate)
def get_transients(fileList, map_file, segExt = '.seg', pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine'):
  map_data = pickle.load(open(map_file,'r'))
  lines = open(fileList,'r').readlines()
  ids_data = []
  cnt = 0
  for ii, line in enumerate(lines):
    print line.strip()
    seg_filename = line.strip() + segExt
    seg_file = np.loadtxt(seg_filename)
    pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
    tonic = np.loadtxt(line.strip()  + tonicExt)
    pcents = BO.PitchHz2Cents(pitch, tonic)
    #print pitch
    for jj in range(seg_file.shape[0]):
      if seg_file[jj][2] == -20000:
	start_time = seg_file[jj][0]
	end_time = seg_file[jj][1]
	#trans_id = map_data[][ii][jj]
	start_ind = find_ind(time, start_time)
	end_ind = find_ind(time, end_time)
	segment = pitch[start_ind:end_ind]
	#print len(segment)
	if len(segment) >= 60:
	  segment_norm = polyfit_shapes_norm(segment)

	if cnt == 0:
	  aggregate = np.array([segment_norm])
	  aggregate = np.vstack((aggregate, segment_norm))
	cnt += 1
  print aggregate.shape
  # For training data
  # For unknown data
Exemplo n.º 6
 def generateLinearDataset(self, root_dir, output_dir, pitchExt, tonicExt, downsampleFactor, min_nyas_dur=-1):
     filenames = BP.GetFileNamesInDir(root_dir,pitchExt)
     for filename in filenames:
         fname, ext = os.path.splitext(filename)
         #reading pitch and tonic data
         pitchData,timeStamps,pHop = BPO.readPitchFile(fname+pitchExt)
         tonic = np.loadtxt(open(fname+tonicExt,"r"))
         pCents = BPO.PitchHz2Cents(pitchData, tonic)
         #some preprocessing
         #removing flat regions
         if (min_nyas_dur>0):
             msObj = MS.nyasSegmentation()
             msObj.ComputeNyasCandidates(pitchData, tonic.tolist(), pHop)
             for swar in msObj.nyasInfo.keys():
                 for seg in msObj.nyasInfo[swar]:
         pCents, pHop, timeStamps = BPO.downsamplesPitchData(pCents,pHop,timeStamps, factor)
         #removing silence regions
         ind_silence = np.where(pCents<-4000)[0] ###Please correct this silence condition once log eps is used
         pCents = np.delete(pCents,ind_silence)
         timeStamps = np.delete(timeStamps,ind_silence)
         pitch = np.append(pitch, pCents)
         timeInfo = np.append(timeInfo, timeStamps)
         fileInfo[filename]= [timeInfo.size-timeStamps.size, timeInfo.size]
     np.savetxt(output_dir+'/'+'AggPitch.txt', pitch, fmt='%.2f')
     np.savetxt(output_dir+'/'+'AggTime.txt', timeInfo, fmt='%.2f')
     stream = file(output_dir+'/'+'fileInfo.yaml','w')
     yaml.dump(fileInfo, stream)
def get_quantized_ts_onlySteadyNotes(fileList, pitchExt='.pitch'):
    lines = open(fileList, 'r').readlines()

    for ii, line in enumerate(lines):
        pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
        #tonic = np.loadtxt(line.strip()  + tonicExt)
        #pcents = BO.PitchHz2Cents(pitch, tonic)

    song_str, st_seg, en_seg, spl_arr = readFullTransFile(
        fileList, fullTransExt='.fullTrans')

    st, en = st_seg / Hop, en_seg / Hop

    qts = np.array([None] * (max(en)))
    for ii in range(len(st)):
        if song_str[ii] != -100000 and song_str[ii] % 100 == 0:
            qts[st[ii]:en[ii]] = song_str[ii]
            #print st[ii], en[ii], song_str[ii]


    #segment, time_stamps, st, en, Hop, pcents = readGroundTruth(fileList)

    #for ii in range(len(segment)):
    #plt.plot(time_stamps[ii]*Hop, segment[ii])
    #plt.plot(time_stamps[ii]*Hop, qts[time_stamps[ii]], 'r', linewidth=3)

    stylized_pitch = np.array(qts, dtype=np.float)

    lines = open(fileList, 'r').readlines()
    for ii, line in enumerate(lines):
        filename = line.strip()
        steadyContourFilename = filename + '.steadyPitch'

        fid = open(steadyContourFilename, 'w')
        for ii in range(len(stylized_pitch))[::2]:
            #print ii, (Hop*ii), stylized_pitch[ii]
            fid.write("%f\t%f" % ((Hop * ii), stylized_pitch[ii]))

    return qts
def get_quantized_ts_onlySteadyNotes(fileList, pitchExt = '.pitch'):
  lines = open(fileList,'r').readlines()
  for ii, line in enumerate(lines):
    pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
    #tonic = np.loadtxt(line.strip()  + tonicExt)
    #pcents = BO.PitchHz2Cents(pitch, tonic)
  song_str, st_seg, en_seg, spl_arr = readFullTransFile(fileList, fullTransExt = '.fullTrans')
  st, en = st_seg/Hop, en_seg/Hop
  qts = np.array([None]*(max(en)))
  for ii in range(len(st)):
    if song_str[ii] != -100000 and song_str[ii]%100 == 0:
      qts[st[ii]:en[ii]] = song_str[ii]
      #print st[ii], en[ii], song_str[ii]
  #segment, time_stamps, st, en, Hop, pcents = readGroundTruth(fileList)
  #for ii in range(len(segment)):
    #plt.plot(time_stamps[ii]*Hop, segment[ii])
    #plt.plot(time_stamps[ii]*Hop, qts[time_stamps[ii]], 'r', linewidth=3)
  stylized_pitch = np.array(qts, dtype=np.float)
  lines = open(fileList,'r').readlines()
  for ii, line in enumerate(lines):
    filename = line.strip()
    steadyContourFilename = filename + '.steadyPitch'
    fid = open(steadyContourFilename,'w')
    for ii in range(len(stylized_pitch))[::2]:
      #print ii, (Hop*ii), stylized_pitch[ii]
  return qts     
Exemplo n.º 9
 def generateSubsequenceDataset(self, pitchFile, tonicFile, outputCandidateFile):
     #reading pitch data
     tonic = np.loadtxt(open(tonicFile,"r"))
     pitchData,timeData,pHop = BPO.readPitchFile(pitchFile)
     pCents = BPO.PitchHz2Cents(pitchData, tonic)
     #preprocessing pitch data
     pCents, pHop, timeData = BPO.downsamplesPitchData(pCents,pHop,timeData, factor)
     if self.params.keys()[0]=='slidingWindow':
         segments = self.slidingWindowCandidates(pCents, pHop, self.params['slidingWindow']['windowLength'],pHop*3)
     dataset = [] 
     [dataset.append(pCents[segment[0]:segment[1]]) for segment in segments]
def batchProc(root_dir,

    filenames = BP.GetFileNamesInDir(root_dir, '.mp3')
    segObj = seg.melodySegmentation()

    #fig = plt.figure(figsize=(15,10), dpi=80)

    for filename in filenames[:]:
        print "Processing file %s" % filename

        ## This is done for all

        fname, ext = os.path.splitext(filename)
        pitch, time, Hop = BO.readPitchFile(fname + pitchExt)
        tonic = np.loadtxt(fname + tonicExt)
        pcents = BO.PitchHz2Cents(pitch, tonic)
        pdata = (time, pcents, Hop)

        ## Extract Breath Phrases
        breathPhrases = findBreathPhrases(segObj, fname, pcents, Hop)

        ## Histogram processing to extract note locations
        svaraSemitone, ignoreNotes = findValidSvaras(pitch, tonic)
        #print svaraSemitone, ignoreNotes
        print "Notes being ignored are: %s" % ignoreNotes

        ## Read valid region for evolution
        #endTime = readValidVistarRegion(fname)

        ## Svara transcription
        transcription = transcribePitch(fname, pdata, ignoreNotes)
        #print transcription

        print "-------\nDone !!\n-------"
Exemplo n.º 11
def batchProc(fileList, centroids_file = 'centroids.npy', audioExt = '.mp3', pitchExt = '.pitchSilIntrpPP', tonicExt = '.tonicFine', fullTransExt = '.fullTrans', gtExt = '.gtruth'):
  centroids = readCentroids(centroids_file)
  lines = open(fileList,'r').readlines()
  for ii, line in enumerate(lines):
    filename = line.strip()
    print "Processing file: %s" %filename
    # Read pitch data
    pitch, time, Hop = BO.readPitchFile(filename + pitchExt)
    tonic = np.loadtxt(line.strip()  + tonicExt)
    pcents = BO.PitchHz2Cents(pitch, tonic)
    # Read transcription
    song_str, st_seg, en_seg = readFullTransFile(filename, fullTransExt = fullTransExt)
    # Read ground truth
    st_gt, en_gt, str_gt = visualizeGroundTruth(filename, pcents, time, Hop, song_str, st_seg, en_seg)
    # Get query strings
    note_sym = getQuertString(str_gt)
    # Get song string
    search_str = getSearchString(song_str)
    # Get aligned contour indices by SW
    aligned = getAlignment(song_str, note_sym)
    # Get contour segments
    plotFoundMatches(aligned, st_seg, en_seg, pcents, Hop)
    print "-------\nDone !!\n-------"
def readGroundTruth(fileList,
  Returns the start and end time of ground truth phrases in seconds
  along with the label representing the annotated name of the phrase
    lines = open(fileList, 'r').readlines()

    for ii, line in enumerate(lines):
        gt_filename = line.strip() + gtExt
        gt_file = np.loadtxt(gt_filename)

        pitch, time, Hop = BO.readPitchFile(line.strip() + pitchExt)
        tonic = np.loadtxt(line.strip() + tonicExt)
        pcents = BO.PitchHz2Cents(pitch, tonic)

        segment = []
        time_stamps = []
        count = 4
        st, en = [0.0] * count, [0.0] * count
        for ii in range(count):
            s, e = gt_file[ii][0], gt_file[ii][1]
            st[ii], en[ii] = s, e

            start_ind = find_ind(time, s)
            end_ind = find_ind(time, e)
            #print start_ind, end_ind

            time_stamp = np.arange(start_ind, end_ind)
            pitch_vals = pcents[start_ind:end_ind]


    return segment, time_stamps, st, en, Hop, pcents
Exemplo n.º 13
 def generateSubsequenceDataset(self, root_dir, output_dir, pitchExt, tonicExt, downsampleFactor, windowLength, combineData = 0, writeBinary = 1, meanNormalize = 0, flatnessThreshold=0.8, binsPOctave=120, fixPointData=1):
     if combineData:
     #obtaining all the files in the directory
     filenames = BP.GetFileNamesInDir(root_dir,pitchExt)
     # iterating over each file
     for kk, filename in enumerate(filenames):
         #separate file name from extension
         fname, ext = os.path.splitext(filename)
         audiofile = fname.split('/')[-1]
         #if data is not to be combined, then output goes into individual directories (dirname = filename)
         if not combineData:
             out_dir = output_dir + '/' + audiofile
             #creating directory if doesn't exist
             if not os.path.isdir(out_dir):
         #reading pitch and tonic data
         pitchData,timeStamps,pHop = BPO.readPitchFile(fname+pitchExt)
         tonic = np.loadtxt(open(fname+tonicExt,"r"))            
         #Convert the pitch values into cents, Note that we add a offset of 1 octave to make everything positive, assuming that pitch wont go below one octave to tonic
         pCents=np.round(binsPOctave*np.log2((eps+pitchData)/tonic)).astype( + binsPOctave 
         #downsampling pitch data
         pCents, pHop, timeStamps = BPO.downsamplesPitchData(pCents,pHop,timeStamps, factor)
         #removing silence regions from the pitch sequence
         ind_silence = np.where(pCents<0)[0] ###Please correct this silence condition once log eps is used
         pCents = np.delete(pCents,ind_silence)
         timeStamps = np.delete(timeStamps,ind_silence)
         #computing all the indices which have non flat region of pitch (binsPOctave is an input to decide threshold, thats it!!)
         nonFlatIndexes = self.nonFlatIndexes(pCents, pHop,binsPOctave)
         #making an array which will be 1 for nonflat and 0 for flat regions
         flatNonflat = np.zeros(pCents.shape[0])
         #given the windowLength or motigLength compute how many samples do they correspond to
         windowSamples = int(np.round(windowLength/pHop))
         #to create a matlab buffer like thing, we do the following
         row = np.array([np.arange(windowSamples)])
         col = np.array([np.arange(pCents.size-windowSamples)])
         col = np.transpose(col)
         col2 = copy.deepcopy(col)
         ind = row*col2 + col
         #so after creating matrix where each row is a subsequence, look which subsequence to reject based on flatness threshold
         mtx = flatNonflat[ind]            
         mean_array = np.mean(mtx,axis=1)            
         ind_Invalid = np.where(mean_array<flatnessThreshold)[0]
         ind = np.delete(ind,[ind_Invalid],axis=0)
         #finally obtain pitch matrix and timestamp array
         mtx = pCents[ind]
         timeStamps = timeStamps[ind[:,0]]
         ###TODO impement mean normalizatoin
         ### Adding a crucial check!!
         if timeStamps.shape[0] != mtx.shape[0]:
             print filename
         if combineData:#keep on appending the data to combine it
             if kk==0:
                 pitch = copy.deepcopy(mtx)
                 timeInfo = copy.deepcopy(timeStamps)
                 pitch = np.append(pitch, mtx,axis=0)
                 timeInfo = np.append(timeInfo, timeStamps)
             fileInfo[filename]= [timeInfo.size-timeStamps.size, timeInfo.size]
         else:#just dump for each file
             if writeBinary:
                 if fixPointData:
                     mtx.astype(np.uint32).tofile(out_dir+'/'+ audiofile +'.pitchSubDBbin')
                     mtx.astype(np.float).tofile(out_dir+'/'+ audiofile +'.pitchSubDBbin')
                 timeStamps.astype(np.float).tofile(out_dir+'/'+ audiofile +'.timeSubDBbin')
                 np.savetxt(out_dir+'/'+ audiofile +'.pitchSubDBtxt', mtx , fmt='%d')
                 np.savetxt(out_dir+'/'+ audiofile +'.timeSubDBtxt', timeStamps, fmt='%.3f')
         if combineData:
             #another crucial check at each step
             if pitch.shape[0] != timeInfo.shape[0]:
                 print filename
     if combineData:
         if writeBinary:
             if fixPointData:
             np.savetxt(output_dir+'/'+'AggPitch.txt', pitch , fmt='%d')
             np.savetxt(output_dir+'/'+'AggTime.txt', timeInfo, fmt='%.3f')
         stream = file(output_dir+'/'+'fileInfo.yaml','w')
         yaml.dump(fileInfo, stream)
         print "Total number of time series : " + str(pitch.shape[0])
         print "Length of single Sub sequence : " + str(pitch.shape[1])