def test_scaper_init(): ''' Test creation of Scaper object. ''' # bad duration sc = pytest.raises(ScaperError, scaper.Scaper, -5, FG_PATH, BG_PATH) # all args valid sc = scaper.Scaper(10.0, FG_PATH, BG_PATH) assert sc.fg_path == FG_PATH assert sc.bg_path == BG_PATH # bad fg path sc = pytest.raises(ScaperError, scaper.Scaper, 10.0, 'tests/data/audio/wrong', BG_PATH) # bad bg path sc = pytest.raises(ScaperError, scaper.Scaper, 10.0, FG_PATH, 'tests/data/audio/wrong') # ensure fg_labels and bg_labels populated properly sc = scaper.Scaper(10.0, FG_PATH, BG_PATH) assert sc.fg_labels == FB_LABELS assert sc.bg_labels == BG_LABELS # ensure default values have been set assert sc.sr == 44100 assert sc.ref_db == -12 assert sc.n_channels == 1 assert sc.fade_in_len == 0.01 # 10 ms assert sc.fade_out_len == 0.01 # 10 ms
def mix_subset(self, subset): """Mixes examples in a subset folder.""" num_examps = self.num_examples[subset] print('Preparing {} examples.'.format(subset)) os.makedirs(os.path.join(self.output_root, subset), exist_ok=False) fg_file_list = self.get_file_list(subset, 'foreground') bg_file_list = self.get_file_list(subset, 'background') ind_fg = 0 ind_bg = 0 example_list = [] fg_folder = os.path.join(self.fg_root, subset) bg_folder = os.path.join(self.bg_root, subset) # Create a scaper object, folders are used to get labels. self.sc = scaper.Scaper(self.duration, fg_folder, bg_folder, random_state=self.random_seed) self.sc.protected_labels = [] self.sc.ref_db = self.ref_db self.sc.sr = self.sample_rate for n in range(num_examps): print('Generating example: {:d}/{:d}'.format(n + 1, num_examps)) example, ind_fg, ind_bg = self.generate_example( subset, n, num_examps, bg_file_list, fg_file_list, ind_fg, ind_bg) example_list.append(example) return example_list
def create_scaper_generator(foreground_directory, scene_duration, max_sources, background_directory=None): if background_directory is None: background_directory = foreground_directory sc = scaper.Scaper(scene_duration, foreground_directory, background_directory) sc.protected_labels = [] sc.ref_db = -50 sc.n_channels = 1 sc.sr = 44100 sc.min_silence_duration = None n_events = max_sources for n in range(n_events): sc.add_event(label=('choose', ['vocals', 'drums', 'bass', 'other']), source_file=('choose', []), source_time=('uniform', 0, 300), event_time=('const', 0), event_duration=('const', float(scene_duration)), snr=('uniform', 25, 25), pitch_shift=None, time_stretch=None) return sc
def test_scaper_add_event(): sc = scaper.Scaper(10.0, FG_PATH, BG_PATH) # Initially fg_spec should be empty assert sc.fg_spec == [] # Add one event sc.add_event(label=('const', 'siren'), source_file=('choose', []), source_time=('const', 0), event_time=('uniform', 0, 9), event_duration=('truncnorm', 2, 1, 1, 3), snr=('uniform', 10, 20), pitch_shift=('normal', 0, 1), time_stretch=('uniform', 0.8, 1.2)) # Now should be one event in fg_spec assert len(sc.fg_spec) == 1 fg_event_expected = EventSpec(label=('const', 'siren'), source_file=('choose', []), source_time=('const', 0), event_time=('uniform', 0, 9), event_duration=('truncnorm', 2, 1, 1, 3), snr=('uniform', 10, 20), role='foreground', pitch_shift=('normal', 0, 1), time_stretch=('uniform', 0.8, 1.2)) assert sc.fg_spec[0] == fg_event_expected
def scaper_folder(toy_datasets): wsj_sources = toy_datasets['babywsj_oW0F0H9.zip'] fg_path = os.path.join(wsj_sources, 'babywsj', 'dev') n_sources = 2 n_mixtures = 10 duration = 3 ref_db = -40 with tempfile.TemporaryDirectory() as tmp_dir: _dir = tmp_dir if fix_dir is None else fix_dir _dir = os.path.join(_dir, 'scaper') os.makedirs(_dir, exist_ok=True) for i in range(n_mixtures): sc = scaper.Scaper(duration, fg_path, fg_path, random_state=i) sc.ref_db = ref_db sc.sr = 16000 for j in range(n_sources): sc.add_event(label=('choose', []), source_file=('choose', []), source_time=('const', 0), event_time=('const', 0), event_duration=('const', duration), snr=('const', 0), pitch_shift=None, time_stretch=None) audio_path = os.path.join(_dir, f'{i}.wav') jams_path = os.path.join(_dir, f'{i}.jams') sc.generate(audio_path, jams_path, save_isolated_events=True) yield _dir
def build_scape(thisscape, outdir, scape_dur, sourcedir, bg_label, fg_label, junk_label): # print(thisscape) sc = scaper.Scaper(scape_dur, f"{sourcedir}/foreground", f"{sourcedir}/background") sc.ref_db = -52 #TODO fname = thisscape['Scape Name'] audiofile = f"{outdir}/{fname}.wav" jamsfile = f"{outdir}/{fname}.jams" # print(f"fname: {fname}, audiofile: {audiofile}, jamsfile: {jamsfile}") sc.add_background( label=("const", bg_label), source_file=("choose", []), source_time=("uniform", 0, 60 - scape_dur) ) # background files are 1min long for rats as of 2019/05/08. for i in range(len( thisscape['Start Times'])): #add each planned vocalization sc.add_event( label=('const', fg_label), source_file=('const', f'{sourcedir}/foreground/{fg_label}/' + thisscape['Source Files'][i]), source_time=('const', 0), event_time=('const', thisscape['Start Times'][i]), event_duration=('const', thisscape['End Times'][i] - thisscape['Start Times'][i]), snr=( 'uniform', 14, 20 ), #-10, 6), #TODO: this always needs tested in case something is different about the foreground files pitch_shift=None, time_stretch=None) # num_junk = random.randint(0,2) #2 for 5s rats, 10 for easyjunk pnre, 5 for shortjunk pnre # for j in range(num_junk): # sc.add_event(label=('const', junk_label), # source_file = ('choose', []), # source_time = ('const', 0), # event_time = ('uniform', 0, scape_dur-.5), # event_duration = ('const', 5), # TODO: get length so don't have to deal with the warnings. # snr = ('uniform',-5,2), # pitch_shift=('normal', -.5,.5), # time_stretch=('uniform',.5,2)) sc.generate(audiofile, jamsfile, allow_repeated_label=True, allow_repeated_source=True, reverb=0, disable_sox_warnings=True, no_audio=False) df = pd.DataFrame(thisscape) df = df.transpose() df.to_csv(f'{outdir}/{fname}.csv') print(f"Scape {fname} generated.")
def mix_closure(dataset, i): sc = scaper.Scaper( scene_duration, fg_path=fg_path, bg_path=bg_path, random_state=i, ) sc.ref_db = ref_db sc.sr = sample_rate sc.n_channels = num_channels sc.bitdepth = bitdepth sc.fade_in_len = 0 sc.fade_out_len = 0 if isinstance(fg_event_parameters, list): assert len(fg_event_parameters) == len(labels) event_parameters = copy.deepcopy(fg_event_parameters) else: event_parameters = [fg_event_parameters for _ in labels] with warnings.catch_warnings(): if ignore_warnings: warnings.simplefilter("ignore") sc, event = instantiate_and_get_event_spec(sc, labels[0], event_parameters[0], loud_regions) for i, label in enumerate(labels): _pitch_shift = ('const', event.pitch_shift) if event.pitch_shift is None: _pitch_shift = None _time_stretch = ('const', event.time_stretch) if event.time_stretch is None: _time_stretch = None try: sc.add_event(label=('const', label), source_file=('const', event.source_file.replace( labels[0], label)), source_time=('const', event.source_time), event_time=('const', event.event_time), event_duration=('const', event.event_duration), snr=event_parameters[i]['snr'], pitch_shift=_pitch_shift, time_stretch=_time_stretch) except: sc.add_event(**event_parameters[i]) for i in range(num_bg_sources): sc.add_background(**bg_event_parameters) jam, soundscape_audio_data, source_audio_data = sc.generate( None, None, quick_pitch_time=quick_pitch_time, disable_instantiation_warnings=ignore_warnings, ) return _convert_to_output(dataset, jam, soundscape_audio_data, source_audio_data, sample_rate)
def test_scaper_instantiate(): for sr in (44100, 22050): REG_JAM_PATH = TEST_PATHS[sr]['REG'].jams # Here we just instantiate a known fixed spec and check if that jams # we get back is as expected. sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH) sc.ref_db = -50 sc.sr = sr # background sc.add_background( label=('const', 'park'), source_file=( 'const', 'tests/data/audio/background/park/' '268903__yonts__city-park-tel-aviv-israel.wav'), source_time=('const', 0)) # foreground events sc.add_event( label=('const', 'siren'), source_file=('const', 'tests/data/audio/foreground/' 'siren/69-Siren-1.wav'), source_time=('const', 5), event_time=('const', 2), event_duration=('const', 5), snr=('const', 5), pitch_shift=None, time_stretch=None) sc.add_event( label=('const', 'car_horn'), source_file=('const', 'tests/data/audio/foreground/' 'car_horn/17-CAR-Rolls-Royce-Horn.wav'), source_time=('const', 0), event_time=('const', 5), event_duration=('const', 2), snr=('const', 20), pitch_shift=('const', 1), time_stretch=None) sc.add_event( label=('const', 'human_voice'), source_file=('const', 'tests/data/audio/foreground/' 'human_voice/42-Human-Vocal-Voice-taxi-2_edit.wav'), source_time=('const', 0), event_time=('const', 7), event_duration=('const', 2), snr=('const', 10), pitch_shift=None, time_stretch=('const', 1.2)) jam = sc._instantiate(disable_instantiation_warnings=True) regjam = jams.load(REG_JAM_PATH) _compare_scaper_jams(jam, regjam)
def incoherent(fg_folder, bg_folder, event_template, seed): """ This function takes the paths to the MUSDB18 source materials, an event template, and a random seed, and returns an INCOHERENT mixture (audio + annotations). Stems in INCOHERENT mixtures may come from different songs and are not temporally aligned. Parameters ---------- fg_folder : str Path to the foreground source material for MUSDB18 bg_folder : str Path to the background material for MUSDB18 (empty folder) event_template: dict Dictionary containing a template of probabilistic event parameters seed : int or np.random.RandomState() Seed for setting the Scaper object's random state. Different seeds will generate different mixtures for the same source material and event template. Returns ------- mixture_audio : np.ndarray Audio signal for the mixture mixture_jams : np.ndarray JAMS annotation for the mixture annotation_list : list Simple annotation in list format stem_audio_list : list List containing the audio signals of the stems that comprise the mixture """ # Create scaper object and seed random state sc = scaper.Scaper(duration=10.0, fg_path=str(fg_folder), bg_path=str(bg_folder), random_state=seed) # Set sample rate, reference dB, and channels (mono) sc.sr = 44100 sc.ref_db = -20 sc.n_channels = 1 # Copy the template so we can change it event_parameters = event_template.copy() # Iterate over stem types and add INCOHERENT events labels = ['vocals', 'acc'] for label in labels: event_parameters['label'] = ('const', label) sc.add_event(**event_parameters) # Return the generated mixture audio + annotations # while ensuring we prevent audio clipping return sc.generate(fix_clipping=False)
def _create_scaper_object(self, state): sc = scaper.Scaper(self.duration, self.fg_path, self.fg_path, random_state=state) sc.sr = self.sample_rate sc.n_channels = self.n_channels ref_db = self.ref_db if isinstance(ref_db, List): ref_db = state.uniform(ref_db[0], ref_db[1]) sc.ref_db = ref_db return sc
def build_scape(scape_def): # print(scape_def) fname = f"{args['--output_dir']}/{'_'.join(np.unique(scape_def['Label'].values))}_{scape_def['Index'].iloc[0]}" audiofile = f"{fname}.wav" jamsfile = f"{fname}.jams" foreground_folder = f"{base}/foreground" background_folder = f"{base}/background" scape_dur = args["--duration"] scape_count = args["--number"] sc = scaper.Scaper(scape_dur, foreground_folder, background_folder) sc.ref_db = -52 #TODO sc.add_background( label = ("const", bg_label), source_file = ("choose", []), source_time = ("uniform", 0, 60-scape_dur) # background files are 1min long for rats as of 2019/05/08. ) for _, call in scape_def.iterrows(): sc.add_event( label=('const', call["Label"]), source_file=('const', f"{foreground_folder}/{call['Label']}/{call['Filename']}"), source_time=('const', 0), event_time=('const', call["Start"]), event_duration=('const', call["Duration"]), snr=('uniform', 1, 10), #TODO: check for new datasets! pitch_shift=None, time_stretch=None ) sc.generate( audiofile, jamsfile, allow_repeated_label=True, allow_repeated_source=True, reverb=0, disable_sox_warnings=True, no_audio=False ) gen_image(fname)
def mix_closure(dataset, i): sc = scaper.Scaper( scene_duration, fg_path=fg_path, bg_path=bg_path, random_state=i, ) sc.ref_db = ref_db sc.sr = sample_rate sc.bitdepth = bitdepth sc.n_channels = num_channels sc.fade_in_len = 0 sc.fade_out_len = 0 if isinstance(fg_event_parameters, list): assert len(fg_event_parameters) == len(labels) event_parameters = fg_event_parameters else: event_parameters = [ fg_event_parameters for _ in range(num_sources) ] with warnings.catch_warnings(): if ignore_warnings: warnings.simplefilter("ignore") for i in range(num_sources): sc.add_event(**event_parameters[i]) for i in range(num_bg_sources): sc.add_background(**bg_event_parameters) jam, soundscape_audio_data, source_audio_data = sc.generate( None, None, allow_repeated_label=allow_repeated_label, quick_pitch_time=quick_pitch_time, disable_instantiation_warnings=ignore_warnings, ) return _convert_to_output(dataset, jam, soundscape_audio_data, source_audio_data, sample_rate)
def test_scaper_add_background(): ''' Test Scaper.add_background function ''' sc = scaper.Scaper(10.0, FG_PATH, BG_PATH) # Set concrete background label # label, source_file, source_time sc.add_background(("const", "park"), ("choose", []), ("const", 0)) # Check that event has been added to the background spec, and that the # values that are set automatically by this method (event_time, # event_duration, snr and role) are correctly set to their expected values. bg_event_expected = EventSpec(label=("const", "park"), source_file=("choose", []), source_time=("const", 0), event_time=("const", 0), event_duration=("const", sc.duration), snr=("const", 0), role='background', pitch_shift=None, time_stretch=None) assert sc.bg_spec == [bg_event_expected]
def test_generate_audio(atol=1e-4, rtol=1e-8): # Regression test: same spec, same audio (not this will fail if we update # any of the audio processing techniques used (e.g. change time stretching # algorithm. sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH) sc.ref_db = -50 # background sc.add_background( label=('const', 'park'), source_file=('const', 'tests/data/audio/background/park/' '268903__yonts__city-park-tel-aviv-israel.wav'), source_time=('const', 0)) # foreground events sc.add_event(label=('const', 'siren'), source_file=('const', 'tests/data/audio/foreground/' 'siren/69-Siren-1.wav'), source_time=('const', 5), event_time=('const', 2), event_duration=('const', 5), snr=('const', 5), pitch_shift=None, time_stretch=None) sc.add_event(label=('const', 'car_horn'), source_file=('const', 'tests/data/audio/foreground/' 'car_horn/17-CAR-Rolls-Royce-Horn.wav'), source_time=('const', 0), event_time=('const', 5), event_duration=('const', 2), snr=('const', 20), pitch_shift=('const', 1), time_stretch=None) sc.add_event( label=('const', 'human_voice'), source_file=('const', 'tests/data/audio/foreground/' 'human_voice/42-Human-Vocal-Voice-taxi-2_edit.wav'), source_time=('const', 0), event_time=('const', 7), event_duration=('const', 2), snr=('const', 10), pitch_shift=None, time_stretch=('const', 1.2)) tmpfiles = [] with _close_temp_files(tmpfiles): wav_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) tmpfiles.append(wav_file) jam = sc._instantiate(disable_instantiation_warnings=True) sc._generate_audio(wav_file.name, jam.annotations[0]) # validate audio wav, sr = soundfile.read(wav_file.name) regwav, sr = soundfile.read(REG_WAV_PATH) assert np.allclose(wav, regwav, atol=atol, rtol=rtol) # with reverb sc._generate_audio(wav_file.name, jam.annotations[0], reverb=0.2) # validate audio wav, sr = soundfile.read(wav_file.name) regwav, sr = soundfile.read(REG_REVERB_WAV_PATH) assert np.allclose(wav, regwav, atol=atol, rtol=rtol) # Don't disable sox warnings (just to cover line) sc._generate_audio(wav_file.name, jam.annotations[0], disable_sox_warnings=False) # validate audio wav, sr = soundfile.read(wav_file.name) regwav, sr = soundfile.read(REG_WAV_PATH) assert np.allclose(wav, regwav, atol=atol, rtol=rtol) # namespace must be scaper jam.annotations[0].namespace = 'tag_open' pytest.raises(ScaperError, sc._generate_audio, wav_file.name, jam.annotations[0]) # unsupported event role must raise error jam.annotations[0].namespace = 'scaper' jam.annotations[0].data[3].value['role'] = 'ewok' pytest.raises(ScaperError, sc._generate_audio, wav_file.name, jam.annotations[0]) # soundscape with no events will raise warning and won't generate audio sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH) sc.ref_db = -50 jam = sc._instantiate(disable_instantiation_warnings=True) pytest.warns(ScaperWarning, sc._generate_audio, wav_file.name, jam.annotations[0]) # soundscape with only one event will use transformer (regression test) sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH) sc.ref_db = -20 # background sc.add_background( label=('const', 'park'), source_file=('const', 'tests/data/audio/background/park/' '268903__yonts__city-park-tel-aviv-israel.wav'), source_time=('const', 0)) jam = sc._instantiate(disable_instantiation_warnings=True) sc._generate_audio(wav_file.name, jam.annotations[0], reverb=0.2) # validate audio wav, sr = soundfile.read(wav_file.name) regwav, sr = soundfile.read(REG_BGONLY_WAV_PATH) assert np.allclose(wav, regwav, atol=atol, rtol=rtol)
def test_scaper_instantiate_event(): # GF EVENT TO WORK WITH fg_event = EventSpec(label=('const', 'siren'), source_file=('choose', []), source_time=('const', 0), event_time=('uniform', 0, 9), event_duration=('truncnorm', 2, 1, 1, 3), snr=('uniform', 10, 20), role='foreground', pitch_shift=('normal', 0, 1), time_stretch=('uniform', 0.8, 1.2)) # test valid case sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH) instantiated_event = sc._instantiate_event( fg_event, isbackground=False, allow_repeated_label=True, allow_repeated_source=True, used_labels=[], used_source_files=[], disable_instantiation_warnings=True) assert instantiated_event.label == 'siren' assert instantiated_event.source_file == ( 'tests/data/audio/foreground/siren/69-Siren-1.wav') assert instantiated_event.source_time == 0 assert 0 <= instantiated_event.event_time <= 9 assert 1 <= instantiated_event.event_duration <= 3 assert 10 <= instantiated_event.snr <= 20 assert instantiated_event.role == 'foreground' assert scaper.util.is_real_number(instantiated_event.pitch_shift) assert 0.8 <= instantiated_event.time_stretch <= 1.2 # when a label needs to be replaced because it's used already fg_event8 = fg_event._replace(label=('choose', [])) # repeat several times to increase chance of hitting the line we need to # test for _ in range(20): instantiated_event = sc._instantiate_event( fg_event8, isbackground=False, allow_repeated_label=False, allow_repeated_source=True, used_labels=['siren', 'human_voice'], disable_instantiation_warnings=True) assert instantiated_event.label == 'car_horn' # when a source file needs to be replaced because it's used already fg_event9 = fg_event._replace(label=('const', 'human_voice')) # repeat several times to increase chance of hitting the line we need to # test for _ in range(20): instantiated_event = sc._instantiate_event( fg_event9, isbackground=False, allow_repeated_label=True, allow_repeated_source=False, used_labels=[], used_source_files=([ 'tests/data/audio/foreground/human_voice/' '42-Human-Vocal-Voice-all-aboard_edit.wav', 'tests/data/audio/foreground/human_voice/' '42-Human-Vocal-Voice-taxi-1_edit.wav' ]), disable_instantiation_warnings=True) assert instantiated_event.source_file == ( 'tests/data/audio/foreground/human_voice/' '42-Human-Vocal-Voice-taxi-2_edit.wav') # Protected labels must have original source duration and source time 0 sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH, protected_labels='human_voice') fg_event10 = fg_event._replace( label=('const', 'human_voice'), source_file=('const', 'tests/data/audio/foreground/human_voice/' '42-Human-Vocal-Voice-taxi-2_edit.wav'), source_time=('const', 0.3), event_duration=('const', 0.4)) instantiated_event = sc._instantiate_event( fg_event10, disable_instantiation_warnings=True) assert instantiated_event.source_time == 0 assert instantiated_event.event_duration == 0.806236 # repeated label when not allowed throws error sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH) pytest.raises(ScaperError, sc._instantiate_event, fg_event, isbackground=False, allow_repeated_label=False, allow_repeated_source=True, used_labels=['siren']) # repeated source when not allowed throws error pytest.raises(ScaperError, sc._instantiate_event, fg_event, isbackground=False, allow_repeated_label=True, allow_repeated_source=False, used_labels=['siren'], used_source_files=([ 'tests/data/audio/foreground/siren/69-Siren-1.wav' ])) # event duration longer than source duration: warning fg_event2 = fg_event._replace(label=('const', 'car_horn'), event_duration=('const', 5)) pytest.warns(ScaperWarning, sc._instantiate_event, fg_event2) # event duration longer than soundscape duration: warning fg_event3 = fg_event._replace(event_time=('const', 0), event_duration=('const', 15), time_stretch=None) pytest.warns(ScaperWarning, sc._instantiate_event, fg_event3) # stretched event duration longer than soundscape duration: warning fg_event4 = fg_event._replace(event_time=('const', 0), event_duration=('const', 6), time_stretch=('const', 2)) pytest.warns(ScaperWarning, sc._instantiate_event, fg_event4) # source_time + event_duration > source_duration: warning fg_event5 = fg_event._replace(event_time=('const', 0), event_duration=('const', 8), source_time=('const', 20)) pytest.warns(ScaperWarning, sc._instantiate_event, fg_event5) # event_time + event_duration > soundscape duration: warning fg_event6 = fg_event._replace(event_time=('const', 8), event_duration=('const', 5), time_stretch=None) pytest.warns(ScaperWarning, sc._instantiate_event, fg_event6) # event_time + stretched event_duration > soundscape duration: warning fg_event7 = fg_event._replace(event_time=('const', 5), event_duration=('const', 4), time_stretch=('const', 2)) pytest.warns(ScaperWarning, sc._instantiate_event, fg_event7)
def build_scapes(outdir, scape_dur, sourcedir, bg_label, junk_label): csvfile = "ratfiles_inception.csv" with open(csvfile, 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=',') header = ['File', 'Rat', 'A Farinosa'] writer.writerow(header) ratcount = 1000 #1000 parrotcount = 3000 #3000 emptycount = 4000 #4000 ratparrotcount = 2000 #2000 total = ratcount + parrotcount + emptycount + ratparrotcount ############################################################################################### RATS for i in range(ratcount): sc = scaper.Scaper(scape_dur, f"{sourcedir}/foreground", f"{sourcedir}/background") sc.ref_db = -40 #TODO fname = f"rat_scape{str(i)}" audiofile = f"{outdir}/{fname}.wav" jamsfile = f"{outdir}/{fname}.jams" writer.writerow([f"{audiofile}", 1, 0]) sc.add_background(label=("const", bg_label), source_file=("choose", []), source_time=("const", 0)) sc.add_event( label=('const', "clean-rats"), #one rat per file source_file=('choose', []), source_time=('const', 0), event_time=('uniform', 0, 59), event_duration=( 'const', 25 ), #duration of event in synthesized soundscape - you will get warnings snr=('uniform', -10, 2), #TODO - decide pitch_shift= None, #number of semitones (can be fractional) to shift sound up/down time_stretch=None ) #factor to stretch sound by (<1 = shorter, >1 = longer) num_junk = random.randint(0, 10) #10 for just rats for j in range(num_junk): sc.add_event(label=('const', junk_label), source_file=('choose', []), source_time=('const', 0), event_time=('uniform', 0, scape_dur - .5), event_duration=('const', 5), snr=('uniform', -5, 2), pitch_shift=('normal', -.5, .5), time_stretch=('uniform', .5, 2)) sc.generate(audiofile, jamsfile, allow_repeated_label=True, allow_repeated_source=True, reverb=0, disable_sox_warnings=True, no_audio=False) print(f"Rat scape {i} generated.") ################################################################################################# EMPTY for i in range(emptycount): sc = scaper.Scaper(scape_dur, f"{sourcedir}/foreground", f"{sourcedir}/background") sc.ref_db = -40 #TODO fname = f"empty_scape{str(i)}" audiofile = f"{outdir}/{fname}.wav" jamsfile = f"{outdir}/{fname}.jams" writer.writerow([f"{audiofile}", 0, 0]) sc.add_background(label=("const", bg_label), source_file=("choose", []), source_time=("const", 0)) num_junk = random.randint(0, 10) for j in range(num_junk): sc.add_event(label=('const', junk_label), source_file=('choose', []), source_time=('const', 0), event_time=('uniform', 0, scape_dur - .5), event_duration=('const', 5), snr=('uniform', -5, 2), pitch_shift=('normal', -.5, .5), time_stretch=('uniform', .5, 2)) sc.generate(audiofile, jamsfile, allow_repeated_label=True, allow_repeated_source=True, reverb=0, disable_sox_warnings=True, no_audio=False) print(f"Empty scape {i} generated.") ################################################################################################# A. FARINOSA for i in range(parrotcount): sc = scaper.Scaper(scape_dur, f"{sourcedir}/foreground", f"{sourcedir}/background") sc.ref_db = -40 #TODO fname = f"farinosa_scape{str(i)}" audiofile = f"{outdir}/{fname}.wav" jamsfile = f"{outdir}/{fname}.jams" writer.writerow([f"{audiofile}", 0, 1]) sc.add_background(label=("const", "farinosa-minutes"), source_file=("choose", []), source_time=("const", 0)) num_junk = random.randint(0, 10) for j in range(num_junk): sc.add_event(label=('const', junk_label), source_file=('choose', []), source_time=('const', 0), event_time=('uniform', 0, scape_dur - .5), event_duration=('const', 5), snr=('uniform', -5, 2), pitch_shift=('normal', -.5, .5), time_stretch=('uniform', .5, 2)) sc.generate(audiofile, jamsfile, allow_repeated_label=True, allow_repeated_source=True, reverb=0, disable_sox_warnings=True, no_audio=False) print(f"Farinosa scape {i} generated.") ################################################################################################# A. FARINOSA AND RAT for i in range(ratparrotcount): sc = scaper.Scaper(scape_dur, f"{sourcedir}/foreground", f"{sourcedir}/background") sc.ref_db = -40 #TODO fname = f"rat_and_farinosa_scape{str(i)}" audiofile = f"{outdir}/{fname}.wav" jamsfile = f"{outdir}/{fname}.jams" writer.writerow([f"{audiofile}", 1, 1]) sc.add_background(label=("const", "farinosa-minutes"), source_file=("choose", []), source_time=("const", 0)) sc.add_event( label=('const', "clean-rats"), #one rat per file source_file=('choose', []), source_time=('const', 0), event_time=('uniform', 0, 58), event_duration=( 'const', 40 ), #duration of event in synthesized soundscape - you will get warnings snr=('uniform', -10, 2), #TODO - decide pitch_shift= None, #number of semitones (can be fractional) to shift sound up/down time_stretch=None ) #factor to stretch sound by (<1 = shorter, >1 = longer) num_junk = random.randint(0, 10) #10 for farinosas and rat for j in range(num_junk): sc.add_event(label=('const', junk_label), source_file=('choose', []), source_time=('const', 0), event_time=('uniform', 0, scape_dur - .5), event_duration=('const', 5), snr=('uniform', -5, 2), pitch_shift=('normal', -.5, .5), time_stretch=('uniform', .5, 2)) sc.generate(audiofile, jamsfile, allow_repeated_label=True, allow_repeated_source=True, reverb=0, disable_sox_warnings=True, no_audio=False) print(f"Rat and farinosa scape {i} generated.")
def gen_scapes_labels(sounds_toUse, source_dir, scape_count, base_name, background_label, scape_dur): for i in range(len(sounds_toUse)): # read csv of each sound_toUse: sounds_list[i] = pd.read_csv( f"{source_dir}/audio/foreground_csvs/{sounds_toUse[i][0]}.csv", header=None) sounds_list[i].columns = [ "Src_file", "Duration", "Low_freq", "High_freq" ] # print(sounds_list[i], "\n") # print(f"sounds_list[0] = {sounds_list[0]}\n") for scape in range(scape_count): # set up scape info scape_name = f"{base_name}_scape{scape}" print(f"scape{scape}:") label = "" row = f"{out_dir}/{base_name}/JPEGImages/{scape_name}.jpg,\"[" # set up scaper sc = scaper.Scaper(scape_dur, f"{source_dir}/audio/foreground", f"{source_dir}/audio/background") sc.ref_db = -52 #TODO audiofile = f"{out_dir}/scapes/{scape_name}.wav" jamsfile = f"{out_dir}/jams/{scape_name}.jams" sc.add_background( label=("const", background_label), source_file=("choose", []), source_time=("uniform", 0, 60 - scape_dur) ) # background source files are 60 seconds long # TODO for i in range(len(sounds_toUse)): # for each type of call df = sounds_list[i] j = random.randint( sounds_toUse[i][1], sounds_toUse[i] [2]) # choose how many calls of this type in the file if j > 0: # for all_files.csv - if there is an instance of this class, add it to the row if row.endswith("["): row = f"{row}\'{sounds_toUse[i][3]}\'" else: row = f"{row}, \'{sounds_toUse[i][3]}\'" for k in range(j): foreground_label = sounds_toUse[i][0] choice = df.sample( ) # choose a random sound from the list of this call type src = choice["Src_file"].iloc[0] t = round(random.uniform(0, scape_dur - .25), scape_dur) #start time in file dur = round(choice["Duration"].iloc[0], 3) end = t + dur if (end > scape_dur): end = scape_dur dur = end - t lo_freq = choice["Low_freq"].iloc[0] hi_freq = choice["High_freq"].iloc[0] if (hi_freq > max_freq): # just in case hi_freq = max_freq YOLO_class = i xCenter_percent = round((end + t) / (2 * scape_dur), 6) yCenter_percent = round((hi_freq + lo_freq) / (2 * max_freq), 6) width_percent = round((end - t) / scape_dur, 6) height_percent = round((hi_freq - lo_freq) / max_freq, 6) if (label == ""): label = f"{YOLO_class} {xCenter_percent} {yCenter_percent} {width_percent} {height_percent}" else: label = f"{label}\n{YOLO_class} {xCenter_percent} {yCenter_percent} {width_percent} {height_percent}" sc.add_event( label=("const", sounds_toUse[i][0]), source_file= ("const", f"{source_dir}/audio/foreground/{foreground_label}/{src}" ), source_time=("const", 0), event_time=("const", t), event_duration=("const", dur), # might get warnings snr=( "uniform", 5, 10 ), #-10, 6), #TODO: this always needs tested in case something is different about the foreground files pitch_shift=None, time_stretch=None) # save labels to .txt file with open(f"{out_dir}/labels/{scape_name}.txt", "w") as text_file: text_file.write(label) # print(label) # add junk sounds, if any if (junk != None): junk_count = random.randint(junk[1], junk[2]) for j in range(junk_count): sc.add_event( label=("const", junk[0]), source_file=("choose", []), source_time=("const", 0), event_time=("const", t), event_duration=("const", dur), # might get warnings snr=( "uniform", 10, 20 ), #-10, 6), #TODO: this always needs tested in case something is different about the foreground files pitch_shift=None, time_stretch=None) sc.generate(audiofile, jamsfile, allow_repeated_label=True, allow_repeated_source=True, reverb=0, disable_sox_warnings=True, no_audio=False) row = f"{row}]\"" all_files[scape + 1] = row
def coherent(fg_folder, bg_folder, event_template, seed): """ This function takes the paths to the MUSDB18 source materials and a random seed, and returns an COHERENT mixture (audio + annotations). Stems in COHERENT mixtures come from the same song and are temporally aligned. Parameters ---------- fg_folder : str Path to the foreground source material for MUSDB18 bg_folder : str Path to the background material for MUSDB18 (empty folder) event_template: dict Dictionary containing a template of probabilistic event parameters seed : int or np.random.RandomState() Seed for setting the Scaper object's random state. Different seeds will generate different mixtures for the same source material and event template. Returns ------- mixture_audio : np.ndarray Audio signal for the mixture mixture_jams : np.ndarray JAMS annotation for the mixture annotation_list : list Simple annotation in list format stem_audio_list : list List containing the audio signals of the stems that comprise the mixture """ # Create scaper object and seed random state sc = scaper.Scaper(duration=10.0, fg_path=str(fg_folder), bg_path=str(bg_folder), random_state=seed) # Set sample rate, reference dB, and channels (mono) sc.sr = 44100 sc.ref_db = -20 sc.n_channels = 1 # Copy the template so we can change it event_parameters = event_template.copy() # Instatiate the template once to randomly choose a song, # a start time for the sources, a pitch shift and a time # stretch. These values must remain COHERENT across all stems sc.add_event(**event_parameters) event = sc._instantiate_event(sc.fg_spec[0]) # Reset the Scaper object's the event specification sc.reset_fg_event_spec() # Replace the distributions for source time, pitch shift and # time stretch with the constant values we just sampled, to # ensure our added events (stems) are coherent. event_parameters['source_time'] = ('const', event.source_time) event_parameters['pitch_shift'] = ('const', event.pitch_shift) event_parameters['time_stretch'] = ('const', event.time_stretch) # Iterate over the four stems (vocals, drums, bass, other) and # add COHERENT events. labels = ['vocals', 'acc'] for label in labels: # Set the label to the stem we are adding event_parameters['label'] = ('const', label) # To ensure coherent source files (all from the same song), we leverage # the fact that all the stems from the same song have the same filename. # All we have to do is replace the stem file's parent folder name from "vocals" # to the label we are adding in this iteration of the loop, which will give the # correct path to the stem source file for this current label. coherent_source_file = event.source_file.replace('vocals', label) event_parameters['source_file'] = ('const', coherent_source_file) # Add the event using the modified, COHERENT, event parameters sc.add_event(**event_parameters) # Generate and return the mixture audio, stem audio, and annotations return sc.generate(fix_clipping=False)
def test_generate(atol=1e-4, rtol=1e-8): # Final regression test on all files sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH) sc.ref_db = -50 # background sc.add_background( label=('const', 'park'), source_file=('const', 'tests/data/audio/background/park/' '268903__yonts__city-park-tel-aviv-israel.wav'), source_time=('const', 0)) # foreground events sc.add_event(label=('const', 'siren'), source_file=('const', 'tests/data/audio/foreground/' 'siren/69-Siren-1.wav'), source_time=('const', 5), event_time=('const', 2), event_duration=('const', 5), snr=('const', 5), pitch_shift=None, time_stretch=None) sc.add_event(label=('const', 'car_horn'), source_file=('const', 'tests/data/audio/foreground/' 'car_horn/17-CAR-Rolls-Royce-Horn.wav'), source_time=('const', 0), event_time=('const', 5), event_duration=('const', 2), snr=('const', 20), pitch_shift=('const', 1), time_stretch=None) sc.add_event( label=('const', 'human_voice'), source_file=('const', 'tests/data/audio/foreground/' 'human_voice/42-Human-Vocal-Voice-taxi-2_edit.wav'), source_time=('const', 0), event_time=('const', 7), event_duration=('const', 2), snr=('const', 10), pitch_shift=None, time_stretch=('const', 1.2)) tmpfiles = [] with _close_temp_files(tmpfiles): wav_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) jam_file = tempfile.NamedTemporaryFile(suffix='.jams', delete=True) txt_file = tempfile.NamedTemporaryFile(suffix='.txt', delete=True) tmpfiles.append(wav_file) tmpfiles.append(jam_file) tmpfiles.append(txt_file) sc.generate(wav_file.name, jam_file.name, txt_path=txt_file.name, disable_instantiation_warnings=True) # validate audio wav, sr = soundfile.read(wav_file.name) regwav, sr = soundfile.read(REG_WAV_PATH) assert np.allclose(wav, regwav, atol=atol, rtol=rtol) # validate jams jam = jams.load(jam_file.name) regjam = jams.load(REG_JAM_PATH) _compare_scaper_jams(jam, regjam) # validate txt # read in both files txt_data = [] with open(txt_file.name) as file: reader = csv.reader(file, delimiter='\t') for row in reader: txt_data.append(row) txt_data = np.asarray(txt_data) regtxt_data = [] with open(REG_TXT_PATH) as file: reader = csv.reader(file, delimiter='\t') for row in reader: regtxt_data.append(row) regtxt_data = np.asarray(regtxt_data) # compare start and end times assert np.allclose([float(x) for x in txt_data[:, 0]], [float(x) for x in regtxt_data[:, 0]]) assert np.allclose([float(x) for x in txt_data[:, 1]], [float(x) for x in regtxt_data[:, 1]]) # compare labels assert (txt_data[:, 2] == regtxt_data[:, 2]).all() # reverb value must be in (0, 1) range for reverb in [-1, 2]: pytest.raises(ScaperError, sc.generate, wav_file.name, jam_file.name, reverb=reverb, disable_instantiation_warnings=True)
snr_min = 6 snr_max = 30 pitch_dist = 'uniform' pitch_min = -3.0 pitch_max = 3.0 time_stretch_dist = 'uniform' time_stretch_min = 0.8 time_stretch_max = 1.2 # generate a random seed for this Scaper object seed = 123 # create a scaper that will be used below sc = scaper.Scaper(duration, fg_folder, bg_folder, random_state=seed) sc.protected_labels = [] sc.ref_db = ref_db # Generate 100 soundscapes using a truncated normal distribution of start times start_time = time.time() for n in tqdm.trange(n_soundscapes): print('Generating soundscape: {:d}/{:d}'.format(n + 1, n_soundscapes)) # reset the event specifications for foreground and background at the # beginning of each loop to clear all previously added events sc.reset_bg_event_spec() sc.reset_fg_event_spec() # add background
def test_generate(atol=1e-4, rtol=1e-8): # Final regression test on all files sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH) sc.ref_db = -50 # background sc.add_background( label=('const', 'park'), source_file=('const', 'tests/data/audio/background/park/' '268903__yonts__city-park-tel-aviv-israel.wav'), source_time=('const', 0)) # foreground events sc.add_event(label=('const', 'siren'), source_file=('const', 'tests/data/audio/foreground/' 'siren/69-Siren-1.wav'), source_time=('const', 5), event_time=('const', 2), event_duration=('const', 5), snr=('const', 5), pitch_shift=None, time_stretch=None) sc.add_event(label=('const', 'car_horn'), source_file=('const', 'tests/data/audio/foreground/' 'car_horn/17-CAR-Rolls-Royce-Horn.wav'), source_time=('const', 0), event_time=('const', 5), event_duration=('const', 2), snr=('const', 20), pitch_shift=('const', 1), time_stretch=None) sc.add_event( label=('const', 'human_voice'), source_file=('const', 'tests/data/audio/foreground/' 'human_voice/42-Human-Vocal-Voice-taxi-2_edit.wav'), source_time=('const', 0), event_time=('const', 7), event_duration=('const', 2), snr=('const', 10), pitch_shift=None, time_stretch=('const', 1.2)) tmpfiles = [] with _close_temp_files(tmpfiles): wav_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) jam_file = tempfile.NamedTemporaryFile(suffix='.jams', delete=True) txt_file = tempfile.NamedTemporaryFile(suffix='.txt', delete=True) tmpfiles.append(wav_file) tmpfiles.append(jam_file) tmpfiles.append(txt_file) sc.generate(wav_file.name, jam_file.name, txt_path=txt_file.name, disable_instantiation_warnings=True) # validate audio wav, sr = soundfile.read(wav_file.name) regwav, sr = soundfile.read(REG_WAV_PATH) assert np.allclose(wav, regwav, atol=atol, rtol=rtol) # validate jams jam = jams.load(jam_file.name) regjam = jams.load(REG_JAM_PATH) # version might change, rest should be the same regjam.annotations[0].sandbox.scaper['scaper_version'] = \ scaper.__version__ assert jam == regjam # validate txt txt = pd.read_csv(txt_file.name, header=None, sep='\t') regtxt = pd.read_csv(REG_TXT_PATH, header=None, sep='\t') assert (txt == regtxt).all().all() # reverb value must be in (0, 1) range for reverb in [-1, 2]: pytest.raises(ScaperError, sc.generate, wav_file.name, jam_file.name, reverb=reverb, disable_instantiation_warnings=True)
for rate in SAMPLE_RATES: test_names(REG_NAME, rate) print("==========USING BELOW FOR TESTS==============") VAR_NAMES_PARTIAL = ('REG', 'REG_BGONLY', 'REG_REVERB') FILE_BASENAMES = (REG_NAME, REG_BGONLY_NAME, REG_REVERB_NAME) FILE_TYPES = ('WAV', 'JAM', 'TXT') for var, name in zip(VAR_NAMES_PARTIAL, FILE_BASENAMES): for type, path in zip(FILE_TYPES, test_names(name, rate)): print("{}_{}_PATH = '{}'".format(var, type, path)) print() print("==========USING ABOVE FOR TESTS==============") sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH) sc.ref_db = -50 sc.sr = rate # background sc.add_background( label=('const', 'park'), source_file=('const', 'tests/data/audio/background/park/' '268903__yonts__city-park-tel-aviv-israel.wav'), source_time=('const', 0)) # foreground events sc.add_event(label=('const', 'siren'), source_file=('const', 'tests/data/audio/foreground/' 'siren/69-Siren-1.wav'), source_time=('const', 5),
def build_scape(birds, curr, outdir, scape_dur, sourcedir, bg_label, fg_label, junk_label): # birds = df with info about this scape. curr = which scape. outfile = path/to/and/filename. back_list = get_audio_info(f"{sourcedir}/background/{bg_label}") junk_list = get_audio_info(f"{sourcedir}/foreground/{junk_label}") sc = scaper.Scaper(scape_dur, f"{sourcedir}/foreground", f"{sourcedir}/background") sc.ref_db = -30 #TODO fname = f"scape{str(curr)}" audiofile = f"{outdir}/{fname}.wav" jamsfile = f"{outdir}/{fname}.jams" # print(f"fname: {fname}, audiofile: {audiofile}, jamsfile: {jamsfile}") bg = random.choice(back_list) # random choice from background list sc.add_background(label=("const", bg_label), source_file=("const", f"{sourcedir}/background/{bg_label}/{bg}"), source_time=("const", 0)) # print(f"background: {bg}") # print(f"junk_list: {junk_list}") for bird in birds: # bird = row in birds, containing list of call times (if any, else empty list []) in file and boxing info # print(f"for bird: {bird[0]}") if len(bird[1]) > 0: #if bird calls in this scape for t in bird[1]: # for each start time dur = bird[5] #length of call sc.add_event( label=('const', fg_label), source_file=( 'const', f"{sourcedir}/foreground/{fg_label}/{bird[0]}"), source_time=('const', 0), event_time=('const', t), event_duration=( 'const', dur ), #duration of event in synthesized soundscape - you will get warnings snr=('uniform', -15, 6), #TODO - decide pitch_shift= None, #number of semitones (can be fractional) to shift sound up/down time_stretch=None ) #factor to stretch sound by (<1 = shorter, >1 = longer) # print(f" added a {bird[0]}") num_junk = random.randint(0, 5) #10 for easyjunk, 5 for shortjunk for j in range(num_junk): sound = random.choice(junk_list) noiseindex = junk_list.index(sound) sc.add_event( label=('const', junk_label), source_file=( 'const', f"{sourcedir}/foreground/{junk_label}/{sound}"), source_time=('const', 0), event_time=('uniform', 0, scape_dur - .5), event_duration=('const', 5), snr=('uniform', -5, 2), pitch_shift=('normal', -.5, .5), time_stretch=('uniform', .5, 2)) sc.generate(audiofile, jamsfile, allow_repeated_label=True, allow_repeated_source=True, reverb=0, disable_sox_warnings=True, no_audio=False) #, # txt_path=None) print(f"Scape {curr} generated.")
outdir = sys.argv[1] scapecount = int(sys.argv[2]) df = pd.DataFrame() for i in range(scapecount): fname = 'scape' + str(i) audiofile = outdir + '/' + fname + '.wav' jamsfile = outdir + '/' + fname + '.jams' bg = random.choice(back_list) # random choice from background list sc.add_background(label = ('const', bg_label), source_file = ('const', sourcedir + '/background/' + bg_label + '/' + bg), source_time = ('const', 0)) birds = build_scapeData(PNRE_calls, birdcount, scape_dur, outdir, i) # build data and populate csv sc = scaper.Scaper(scape_dur, sourcedir + '/foreground', sourcedir + '/background') sc.ref_db = -30 #TODO #build_scapes(outdir, 60, count, 30, sourcedir, 'noisy_clean', 'PNRE_birdcalls', PNRE_calls) build_scapes(outdir, 15, count, 4, sourcedir, 'noisy_clean', 'PNRE_birdcalls', PNRE_calls) #birdcount = 30 for easyjunk print("Done! Completed in " + str(datetime.now()-start) + " seconds.\n") #def build_scapes(outdir, scape_dur, scapecount, birdcount (max calls per file), sourcedir, bg_label, fg_label, call_list): # outfile = path/to/and/filename. # # note: birdcount = max number of each call to happen per scape file
# to do this on your machine. # + path_to_audio = os.path.join(download_path, 'audio') output_folder = os.path.join(download_path, 'generated') os.makedirs(output_folder, exist_ok=True) soundscape_duration = 10.0 seed = 123 num_mixtures = 5 foreground_folder = os.path.join(path_to_audio, 'foreground') background_folder = os.path.join(path_to_audio, 'background') sc = scaper.Scaper(soundscape_duration, foreground_folder, background_folder, random_state=seed) sc.ref_db = -20 sc.add_background(label=('const', 'park'), source_file=('choose', []), source_time=('const', 0)) sc.add_event(label=('const', 'siren'), source_file=('choose', []), source_time=('const', 0), event_time=('uniform', 0, 9), event_duration=('truncnorm', 3, 1, 0.5, 5), snr=('normal', 10, 3), pitch_shift=('uniform', -2, 2), time_stretch=('uniform', 0.8, 1.2))
def test_generate_from_jams(atol=1e-5, rtol=1e-8): # Test for invalid jams: no annotations tmpfiles = [] with _close_temp_files(tmpfiles): jam = jams.JAMS() jam.file_metadata.duration = 10 jam_file = tempfile.NamedTemporaryFile(suffix='.jams', delete=True) gen_file = tempfile.NamedTemporaryFile(suffix='.jams', delete=True) jam.save(jam_file.name) pytest.raises(ScaperError, scaper.generate_from_jams, jam_file.name, gen_file.name) # Test for valid jams files tmpfiles = [] with _close_temp_files(tmpfiles): # Create all necessary temp files orig_wav_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) orig_jam_file = tempfile.NamedTemporaryFile(suffix='.jams', delete=True) gen_wav_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) gen_jam_file = tempfile.NamedTemporaryFile(suffix='.jams', delete=True) tmpfiles.append(orig_wav_file) tmpfiles.append(orig_jam_file) tmpfiles.append(gen_wav_file) tmpfiles.append(gen_jam_file) # --- Define scaper --- * sc = scaper.Scaper(10, FG_PATH, BG_PATH) sc.protected_labels = [] sc.ref_db = -50 sc.add_background(label=('choose', []), source_file=('choose', []), source_time=('const', 0)) # Add 5 events for _ in range(5): sc.add_event(label=('choose', []), source_file=('choose', []), source_time=('const', 0), event_time=('uniform', 0, 9), event_duration=('choose', [1, 2, 3]), snr=('uniform', 10, 20), pitch_shift=('uniform', -1, 1), time_stretch=('uniform', 0.8, 1.2)) # generate, then generate from the jams and compare audio files # repeat 5 time for _ in range(5): sc.generate(orig_wav_file.name, orig_jam_file.name, disable_instantiation_warnings=True) scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name) # validate audio orig_wav, sr = soundfile.read(orig_wav_file.name) gen_wav, sr = soundfile.read(gen_wav_file.name) assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol) # Now add in trimming! for _ in range(5): sc.generate(orig_wav_file.name, orig_jam_file.name, disable_instantiation_warnings=True) scaper.trim(orig_wav_file.name, orig_jam_file.name, orig_wav_file.name, orig_jam_file.name, np.random.uniform(0, 5), np.random.uniform(5, 10)) scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name) # validate audio orig_wav, sr = soundfile.read(orig_wav_file.name) gen_wav, sr = soundfile.read(gen_wav_file.name) assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol) # Double trimming for _ in range(2): sc.generate(orig_wav_file.name, orig_jam_file.name, disable_instantiation_warnings=True) scaper.trim(orig_wav_file.name, orig_jam_file.name, orig_wav_file.name, orig_jam_file.name, np.random.uniform(0, 2), np.random.uniform(8, 10)) scaper.trim(orig_wav_file.name, orig_jam_file.name, orig_wav_file.name, orig_jam_file.name, np.random.uniform(0, 2), np.random.uniform(4, 6)) scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name) # Tripple trimming for _ in range(2): sc.generate(orig_wav_file.name, orig_jam_file.name, disable_instantiation_warnings=True) scaper.trim(orig_wav_file.name, orig_jam_file.name, orig_wav_file.name, orig_jam_file.name, np.random.uniform(0, 2), np.random.uniform(8, 10)) scaper.trim(orig_wav_file.name, orig_jam_file.name, orig_wav_file.name, orig_jam_file.name, np.random.uniform(0, 1), np.random.uniform(5, 6)) scaper.trim(orig_wav_file.name, orig_jam_file.name, orig_wav_file.name, orig_jam_file.name, np.random.uniform(0, 1), np.random.uniform(3, 4)) scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name) # validate audio orig_wav, sr = soundfile.read(orig_wav_file.name) gen_wav, sr = soundfile.read(gen_wav_file.name) assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol) # Test with new FG and BG paths for _ in range(5): sc.generate(orig_wav_file.name, orig_jam_file.name, disable_instantiation_warnings=True) scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name, fg_path=ALT_FG_PATH, bg_path=ALT_BG_PATH) # validate audio orig_wav, sr = soundfile.read(orig_wav_file.name) gen_wav, sr = soundfile.read(gen_wav_file.name) assert np.allclose(gen_wav, orig_wav, atol=atol, rtol=rtol) # Ensure jam file saved correctly scaper.generate_from_jams(orig_jam_file.name, gen_wav_file.name, jams_outfile=gen_jam_file.name) orig_jam = jams.load(orig_jam_file.name) gen_jam = jams.load(gen_jam_file.name) assert orig_jam == gen_jam
def test_scaper_instantiate(): # Here we just instantiate a known fixed spec and check if that jams # we get back is as expected. sc = scaper.Scaper(10.0, fg_path=FG_PATH, bg_path=BG_PATH) sc.ref_db = -50 # background sc.add_background( label=('const', 'park'), source_file=('const', 'tests/data/audio/background/park/' '268903__yonts__city-park-tel-aviv-israel.wav'), source_time=('const', 0)) # foreground events sc.add_event(label=('const', 'siren'), source_file=('const', 'tests/data/audio/foreground/' 'siren/69-Siren-1.wav'), source_time=('const', 5), event_time=('const', 2), event_duration=('const', 5), snr=('const', 5), pitch_shift=None, time_stretch=None) sc.add_event(label=('const', 'car_horn'), source_file=('const', 'tests/data/audio/foreground/' 'car_horn/17-CAR-Rolls-Royce-Horn.wav'), source_time=('const', 0), event_time=('const', 5), event_duration=('const', 2), snr=('const', 20), pitch_shift=('const', 1), time_stretch=None) sc.add_event( label=('const', 'human_voice'), source_file=('const', 'tests/data/audio/foreground/' 'human_voice/42-Human-Vocal-Voice-taxi-2_edit.wav'), source_time=('const', 0), event_time=('const', 7), event_duration=('const', 2), snr=('const', 10), pitch_shift=None, time_stretch=('const', 1.2)) jam = sc._instantiate(disable_instantiation_warnings=True) regjam = jams.load(REG_JAM_PATH) # print(jam) # print(regression_jam) # Note: can't compare directly, since: # 1. scaper/and jams liberary versions may change # 2. raw annotation sandbox stores specs as OrderedDict and tuples, whereas # loaded ann (regann) simplifies those to dicts and lists # assert jam == regression_jam # Must compare each part "manually" # 1. compare file metadata for k, kreg in zip(jam.file_metadata.keys(), regjam.file_metadata.keys()): assert k == kreg if k != 'jams_version': assert jam.file_metadata[k] == regjam.file_metadata[kreg] # 2. compare jams sandboxes assert jam.sandbox == regjam.sandbox # 3. compare annotations assert len(jam.annotations) == len(regjam.annotations) == 1 ann = jam.annotations[0] regann = regjam.annotations[0] # 3.1 compare annotation metadata assert ann.annotation_metadata == regann.annotation_metadata # 3.2 compare sandboxes # Note: can't compare sandboxes directly, since in raw jam scaper sandbox # stores event specs in EventSpec object (named tuple), whereas in loaded # jam these will get converted to list of lists. # assert ann.sandbox == regann.sandbox assert len(ann.sandbox.keys()) == len(regann.sandbox.keys()) == 1 assert 'scaper' in ann.sandbox.keys() assert 'scaper' in regann.sandbox.keys() # everything but the specs and version can be compared directly: for k, kreg in zip(sorted(ann.sandbox.scaper.keys()), sorted(regann.sandbox.scaper.keys())): assert k == kreg if k not in ['bg_spec', 'fg_spec', 'scaper_version']: assert ann.sandbox.scaper[k] == regann.sandbox.scaper[kreg] # to compare specs need to covert raw specs to list of lists assert ([[list(x) if type(x) == tuple else x for x in e] for e in ann.sandbox.scaper['bg_spec'] ] == regann.sandbox.scaper['bg_spec']) assert ([[list(x) if type(x) == tuple else x for x in e] for e in ann.sandbox.scaper['fg_spec'] ] == regann.sandbox.scaper['fg_spec']) # 3.3. compare namespace, time and duration assert ann.namespace == regann.namespace assert ann.time == regann.time assert ann.duration == regann.duration # 3.4 compare data (ann.data == regann.data).all().all()
def test_trim(atol=1e-5, rtol=1e-8): # Things we want to test: # 1. Jam trimmed correctly (mainly handled by jams.slice) # 2. value dict updated correctly (event_time, event_duration, source_time) # 3. scaper sandbox updated correctly (n_events, poly, gini, duration) # 4. audio trimmed correctly tmpfiles = [] with _close_temp_files(tmpfiles): # Create all necessary temp files orig_wav_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) orig_jam_file = tempfile.NamedTemporaryFile(suffix='.jams', delete=True) trim_wav_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) trim_jam_file = tempfile.NamedTemporaryFile(suffix='.jams', delete=True) trimstrict_wav_file = tempfile.NamedTemporaryFile(suffix='.wav', delete=True) trimstrict_jam_file = tempfile.NamedTemporaryFile(suffix='.jams', delete=True) tmpfiles.append(orig_wav_file) tmpfiles.append(orig_jam_file) tmpfiles.append(trim_wav_file) tmpfiles.append(trim_jam_file) tmpfiles.append(trimstrict_wav_file) tmpfiles.append(trimstrict_jam_file) # --- Create soundscape and save to tempfiles --- # sc = scaper.Scaper(10, FG_PATH, BG_PATH) sc.protected_labels = [] sc.ref_db = -50 sc.add_background(label=('const', 'park'), source_file=('choose', []), source_time=('const', 0)) # Add 5 events start_times = [0.5, 2.5, 4.5, 6.5, 8.5] for event_time in start_times: sc.add_event(label=('const', 'siren'), source_file=('choose', []), source_time=('const', 5), event_time=('const', event_time), event_duration=('const', 1), snr=('const', 10), pitch_shift=None, time_stretch=None) sc.generate(orig_wav_file.name, orig_jam_file.name, disable_instantiation_warnings=True) # --- Trim soundscape using scaper.trim with strict=False --- # scaper.trim(orig_wav_file.name, orig_jam_file.name, trim_wav_file.name, trim_jam_file.name, 3, 7, no_audio=False) # --- Validate output --- # # validate JAMS trimjam = jams.load(trim_jam_file.name) trimann = trimjam.annotations.search(namespace='scaper')[0] # Time and duration of annotation observation must be changed, but # values in the value dict must remained unchanged! for event in trimann.data: if event.value['role'] == 'background': assert (event.time == 0 and event.duration == 4 and event.value['event_time'] == 0 and event.value['event_duration'] == 10 and event.value['source_time'] == 0) else: if event.time == 0: assert (event.duration == 0.5 and event.value['event_time'] == 2.5 and event.value['event_duration'] == 1 and event.value['source_time'] == 5) elif event.time == 1.5: assert (event.duration == 1 and event.value['event_time'] == 4.5 and event.value['event_duration'] == 1 and event.value['source_time'] == 5) elif event.time == 3.5: assert (event.duration == 0.5 and event.value['event_time'] == 6.5 and event.value['event_duration'] == 1 and event.value['source_time'] == 5) else: assert False # validate audio orig_wav, sr = soundfile.read(orig_wav_file.name) trim_wav, sr = soundfile.read(trim_wav_file.name) assert np.allclose(trim_wav, orig_wav[3 * sr:7 * sr], atol=atol, rtol=rtol)
event_time_dist = 'truncnorm' event_time_mean = 5.0 event_time_std = 2.0 event_time_min = 0.0 event_time_max = 10.0 event_duration_dist = 'const' event_duration_value = 0.2 snr_dist = 'const' snr_min = 20 for i in range(Num_files): # create a scaper sc = scaper.Scaper(duration, fg_folder, bg_folder) sc.protected_labels = [] sc.ref_db = ref_db # add background sc.add_background(label=('choose', []), source_file=('choose', []), source_time=('const', 0)) # add random number of foreground events sc.add_event(label=('choose', []), source_file=('choose', []), source_time=(source_time_dist, source_time), event_time=(event_time_dist, event_time_mean, event_time_std, event_time_min, event_time_max),
else: mapped_labels = [lbl_map[y] for y in labels] cnt = Counter(mapped_labels) most_common = cnt.most_common()[0][1] ws = [] for i in range(len(cnt)): ws.append(most_common / cnt[i]) cw = torch.tensor(ws) instance_weights = torch.tensor([cw[lbl_map[ix]] for ix in labels]) print(files[:5]) print(labels[:5]) print(instance_weights[:5]) sc = scaper.Scaper(soundscape_duration, args.fg_path, args.bg_path, random_state=seed) sc.ref_db = -60 sc.sr = 22050 sc.protected_labels = [] if not os.path.exists(outfolder): os.makedirs(outfolder) for n in tqdm.tqdm(range(n_soundscapes)): # print('Generating soundscape: {:d}/{:d}'.format(n+1, n_soundscapes)) # reset the event specifications for foreground and background at the # beginning of each loop to clear all previously added events sc.reset_bg_event_spec()