Пример #1
0
def get_syls(bird_directory,target_syllable,prepad=10,postpad=10,shuffle=False,n=50, notbatch='notbatch'):
    notmat_lines_list = list(open(bird_directory+notbatch))
    notmat_list = [bird_directory+value[:value.index('\n')] for value in notmat_lines_list]
    if shuffle==True:
        random.shuffle(notmat_list)
    f=0
    if not os.path.exists(bird_directory+'/syllables/'):
        os.mkdir(bird_directory+'/syllables/')
    syllable_directory = bird_directory+'/syllables/'+target_syllable+'/'
    if not os.path.exists(syllable_directory):
        os.mkdir(syllable_directory)
    for file in notmat_list:
        ndict = evfuncs.load_notmat(file)
        wav_file = file.strip('.not.mat')
        try:
            file_audio = AudioSegment.from_wav(wav_file)
            i=0
            for syl in ndict['labels']:
                if syl == target_syllable:
                    try:
                        onset=ndict['onsets'][i]
                        offset=ndict['offsets'][i]
                        syl_snippet = file_audio[onset-prepad:offset+postpad]
                        ID = str(f)+'_'+str(i)
                        snippet_filename = syllable_directory+'/'+ID+'.wav'
                        syl_snippet.export(snippet_filename,format='wav')
                        i+=1
                    except:
                        pass
            f+=1
        except:
            pass
Пример #2
0
 def test_load_notmat(self):
     notmats = sorted(glob(os.path.join(self.test_data_dir, '*.not.mat')))
     for notmat in notmats:
         notmat_dict = evfuncs.load_notmat(notmat)
         self.assertTrue(type(notmat_dict) is dict)
         self.assertTrue('onsets' in notmat_dict)
         self.assertTrue(type(notmat_dict['onsets']) == np.ndarray)
         self.assertTrue(notmat_dict['onsets'].dtype == float)
         self.assertTrue('offsets' in notmat_dict)
         self.assertTrue(type(notmat_dict['offsets']) == np.ndarray)
         self.assertTrue(notmat_dict['offsets'].dtype == float)
         self.assertTrue('labels' in notmat_dict)
         self.assertTrue(type(notmat_dict['labels']) == str)
         self.assertTrue('Fs' in notmat_dict)
         self.assertTrue(type(notmat_dict['Fs']) == int)
         self.assertTrue('fname' in notmat_dict)
         self.assertTrue(type(notmat_dict['fname']) == str)
         self.assertTrue('min_int' in notmat_dict)
         self.assertTrue(type(notmat_dict['min_int']) == int)
         self.assertTrue('min_dur' in notmat_dict)
         self.assertTrue(type(notmat_dict['min_dur']) == int)
         self.assertTrue('threshold' in notmat_dict)
         self.assertTrue(type(notmat_dict['threshold']) == int)
         self.assertTrue('sm_win' in notmat_dict)
         self.assertTrue(type(notmat_dict['sm_win']) == int)
Пример #3
0
 def test_segment_song(self):
     cbins = sorted(glob(os.path.join(self.test_data_dir, '*.cbin')))
     notmats = sorted(glob(os.path.join(self.test_data_dir, '*.not.mat')))
     segment_mats = sorted(glob(os.path.join(self.test_data_dir,
                                             '*unedited_SegmentNotes_output.mat')))
     for cbin, notmat, segment_mat in zip(cbins, notmats, segment_mats):
         dat, fs = evfuncs.load_cbin(cbin)
         smooth = evfuncs.smooth_data(dat, fs)
         nmd = evfuncs.load_notmat(notmat)
         min_syl_dur = nmd['min_dur'] / 1000
         min_silent_dur = nmd['min_int'] / 1000
         threshold = nmd['threshold']
         onsets, offsets = evfuncs.segment_song(smooth, fs,
                                                threshold, min_syl_dur, min_silent_dur)
         segment_dict = loadmat(segment_mat, squeeze_me=True)
         onsets_mat = segment_dict['onsets']
         offsets_mat = segment_dict['offsets']
         # set tolerances for numpy.allclose check.
         # difference np.abs(offsets - offsets_mat) is usually ~0.00003125...
         # We just don't want error to be larger than a millisecond
         # By trial and error, I find that these vals for tolerance result in
         # about that ceiling
         atol = 0.0005
         rtol = 0.00001
         # i.e., 0.0005 + 0.00001 * some_onsets_or_offset_array ~ [0.0005, 0.0005, ...]
         self.assertTrue(np.allclose(onsets, onsets_mat, rtol, atol))
         self.assertTrue(np.allclose(offsets, offsets_mat, rtol, atol))
Пример #4
0
def get_syls(directory,
             prepad=20,
             postpad=20,
             shuffle=True,
             n=50,
             notbatch='notbatch'):
    for fp in [directory + 'snippets', directory + 'spectrograms']:
        if os.path.exists(fp):
            for file in os.listdir(fp):
                os.remove(fp + '/' + file)
            os.rmdir(fp)
        os.makedirs(fp)
    notmat_lines_list = list(
        open(directory + notbatch))  #extract a list of labelled songfiles
    notmat_list = [
        directory + value[:value.index('\n')] for value in notmat_lines_list
    ]  #extract a list of labelled songfiles
    if shuffle == True:  #randomize the order in which the files are processed, if the parameter "shuffle" is set to True
        random.shuffle(notmat_list)
    spectrograms_created = 0  #keeps a count of how many spectrograms have been generated
    f = 0  #keeps track of how many songfiles have been processed
    for file in notmat_list:
        ndict = evfuncs.load_notmat(
            file
        )  #loads variables stored in the notmat file pertaining to the current songfile
        wav_file = file.strip(
            '.not.mat'
        )  #the wav file corresponding to the current notmat file should be the same, minus the ".not.mat" at the end
        file_audio = AudioSegment.from_wav(
            wav_file
        )  #create an AudioSegment object of the relevant wave file, so that it can be sliced up.
        i = 1  #keeps track of how many spectrograms have been generated for the current songfile
        for syl in ndict[
                'labels']:  #iterate through the labelled syllable tokens in the songfile
            if syl in string.ascii_letters and spectrograms_created < n:  #check that the current syllable is a letter of the alphabet (so that "-" and "0" will be ignored)
                try:
                    onset = ndict['onsets'][
                        i]  #retrieve the onset time (in milliseconds) of the syllable within the file
                    offset = ndict['offsets'][i]
                    syl_snippet = file_audio[
                        onset - prepad:offset +
                        postpad]  #create an AudioSegment that begins "prepad" ms before the syllable begins, and ends "postpad" milliseconds after it ends, where those two parameters are specified by the user.
                    ID = str(f) + '_' + str(
                        i
                    )  #create an arbitrary ID for the current syllable, by concatenating the number of songs that have been processed and the number of syllables processed in the current song, with an underscore in between.
                    snippet_filename = directory + 'snippets/' + ID + '.wav'
                    syl_snippet.export(snippet_filename, format='wav')
                    graph_spectrogram(snippet_filename, directory, ID=ID)
                    i += 1
                    spectrograms_created += 1
                except BaseException:
                    pass
        f += 1
Пример #5
0
    label = input_str

directory=sys.argv[1]

songs_dict = {}

label = '-'

notmat_lines_list = list(open(directory+'notbatch')) #extract a list of labelled songfiles
notmat_list = [directory+value[:value.index('\n')] for value in notmat_lines_list] #extract a list of labelled songfiles
spectrograms_created = 0 #keeps a count of how many spectrograms have been generated
f=0 #keeps track of how many songfiles have been processed
for file in notmat_list:
    if label == 'quit':
        break
    ndict = evfuncs.load_notmat(file) #loads variables stored in the notmat file pertaining to the current songfile
    wav_file = file.strip('.not.mat') #the wav file corresponding to the current notmat file should be the same, minus the ".not.mat" at the end
    file_audio = AudioSegment.from_wav(wav_file) #create an AudioSegment object of the relevant wave file, so that it can be sliced up.
    i=1 #keeps track of how many spectrograms have been generated for the current songfile
    #print(type(ndict['onsets']))
    if type(ndict['onsets']) is not int:
        indices = list(range(len(ndict['onsets'])))
        random.shuffle(indices)
        for i in indices: #iterate through the labelled syllable tokens in the songfile
            onset=ndict['onsets'][i] #retrieve the onset time (in milliseconds) of the syllable within the file
            offset=ndict['offsets'][i]
            syl_snippet = file_audio[onset-20:offset+20] #create an AudioSegment that begins "prepad" ms before the syllable begins, and ends "postpad" milliseconds after it ends, where those two parameters are specified by the user.
            ID = str(f)+'_'+str(i) #create an arbitrary ID for the current syllable, by concatenating the number of songs that have been processed and the number of syllables processed in the current song, with an underscore in between.
            snippet_filename = directory+'syl'+'.wav'
            syl_snippet.export(snippet_filename,format='wav')
            ax = make_spectrogram(snippet_filename)['ax']