def extract_spectra(self, offset=50, scaling=80): # Loop over subsets for subset in ["Train", "Val", "Test"]: # Get audio files dir_audio = os.path.join(self._dir_root, "Ego-Noise Prediction", "Dataset", subset, "Audio") files_audio = retrieve_files(dir_audio) # directory for the unsynchronized spectra dir_output = os.path.join(self._dir_root_set, "Unsynchronized", subset, "Spectra") # Refresh directory refresh_directory(dir_output) # Loop through files in set for f in files_audio: # Extract spectrum Z = fh.extract_spectrum(f, self._feature) # Scale spectrum Z += offset Z /= scaling # Save to appropriate directory fn = os.path.split(f)[-1].replace(".wav", ".csv") fp = os.path.join(dir_output, fn) pd.DataFrame(Z).to_csv(fp, index=False, header=False) print_verbose( self.super_verbose, "Finished extracting feature for '%s' set." % subset, )
def generate_silence_category(self, n_instances, duration): """Generates a 'silence' category consisting of white noise. Keyword arguments: n_instances -- number of instances (recordings) generated, duration -- duration of each recording in seconds, verbose -- whether to print the generation of each file (default: False) """ # generate silence np.random.seed(42) silence = np.random.uniform(low=-1.0, high=1.0, size=(n_instances, duration * self._sample_rate)) # loop over instances for i in range(n_instances): # export to file fn = "silence_%02d.wav" % (i + 1) fp = os.path.join(self._dir_root, "Aircraft Classification", "Audio", "Full", fn) sf.write(fp, silence[i], samplerate=self._sample_rate) print_verbose(self.super_verbose, "Generated file: '%s'" % fp) print_verbose( self.verbose, "Finished generating %d instances of silence." % n_instances)
def save_network(self, network, loss, overwrite=False): # generate output filename and directory for model and config network_id = "%.6f_c%d" % (loss, self._states["context_frames"]) dir_model = os.path.join(self._dir_root_set, "Models", network_id) fn_model = "enp_model.pt" fn_config = "enp_config.json" # create or overwrite directory if os.path.exists(dir_model) and not overwrite: print_verbose(self.verbose, "Network already exists.") return dir_model refresh_directory(dir_model) # save network torch.save(network.state_dict(), os.path.join(dir_model, fn_model)) # save network config and settings config_file = open(os.path.join(dir_model, fn_config), "w") json.dump( [ self._net_config, self._spectrum, self._states, self._train_settings ], config_file, ) config_file.close() return dir_model
def _synchronize_pair(self, file_spectrum, file_states, dir_root_output, skip_takeoff): # load spectrum Z = pd.read_csv(file_spectrum, header=None).to_numpy() # get time vector from the spectrum t_mic = librosa.times_like( Z, sr=self._feature["fft_sample_rate"], hop_length=self._feature["stft_hop_length"], ) # load states S = pd.read_csv(file_states, header=0) # get time vector from state data t_mav = S["delta_t"].to_numpy() # scale the states and transpose: (time, states) -> (states, time) S.drop(columns=["delta_t"], inplace=True) S = fh.scale_states(S).to_numpy().transpose() # pair each time in t_mic with the closest found in t_mav assert len(t_mic) < len(t_mav) # requires hop_size / fft_freq > 100 idx = np.searchsorted(t_mav, t_mic) # only keep the matched pairs from t_mav S = S[:, idx] if skip_takeoff: # identify take off by delta-rpm spike (ROW 4-7) delta_rpms = S[4:8] max_spike = abs(delta_rpms).max() _, spikes = np.where( abs(delta_rpms) > 0.5 * max_spike) # get column idx only last_spike = spikes.max() # last column # cut off spikes + buffer frames buf = 5 S = S[:, last_spike + 1 + buf:] Z = Z[:, last_spike + 1 + buf:] # filename of output identical to input fn = os.path.split(file_states)[-1] # export the synchronized spectra fp = os.path.join(dir_root_output, "Spectra", fn) pd.DataFrame(Z).to_csv(fp, header=False, index=False) # export the synchronized states fp = os.path.join(dir_root_output, "States", fn) pd.DataFrame(S).to_csv(fp, header=False, index=False) print_verbose( self.super_verbose, "Synchronized '%s' (%d datapoints)" % (fn, min(len(t_mic), len(t_mav))), )
def import_audio_esc50(self): """Download and extract the ESC-50 dataset. """ # set destination dir_dest = os.path.join(self._dir_root, "Raw", "Aircraft") if not os.path.exists(dir_dest): os.makedirs(dir_dest) # download fp_dest = os.path.join(dir_dest, "ESC-50-master.zip") fp_unzipped = os.path.join(dir_dest, "ESC-50-master") if not os.path.exists(fp_unzipped): print_verbose(self.verbose, "Downloading...") url = "https://github.com/karoldvl/ESC-50/archive/master.zip" wget.download(url, dir_dest) print_verbose(self.verbose, "Download finished.") # unzip with zipfile.ZipFile(fp_dest, "r") as zip_ref: zip_ref.extractall(dir_dest) print_verbose(self.verbose, "Extracted ESC-50 to %s" % os.path.abspath(fp_dest)) else: print_verbose( self.verbose, "ESC-50 has already been extracted to %s" % os.path.abspath(fp_dest), )
def save_network(self, network, loss, overwrite=False): # generate output filename and directory for model and config network_id = "%.6f" % loss dir_model = os.path.join(self._dir_root_set, "Models", network_id) fn_model = "ac_model.pt" fn_config = "ac_config.json" # create or overwrite directory if os.path.exists(dir_model) and not overwrite: print_verbose(self.verbose, "Network already exists.") return dir_model refresh_directory(dir_model) # save network torch.save(network.state_dict(), os.path.join(dir_model, fn_model)) # save network config and settings config_file = open(os.path.join(dir_model, fn_config), "w") if not hasattr(self, "_states"): json.dump( [ self._net_config, self._spectrum, self._feature, self._classification, self._train_settings, ], config_file, ) else: json.dump( [ self._net_config, self._spectrum, self._feature, self._states, self._classification, self._train_settings, ], config_file, ) config_file.close() return dir_model
def _create_network(net_config, verbose=True, super_verbose=False): # set up network network = Net(*net_config.values()) network.to(net_config["device"]) print_verbose(verbose, "Device: %s." % net_config["device"]) print_verbose(super_verbose, network) print_verbose( verbose, "Number of trainable parameters in network: %d." % sum([p.numel() for p in network.parameters()]), ) return network
def split_features( self, subset=None, augmentations=None, noise_set=None, noise_ratio=None ): # default 'noise' is no noise (clean) if noise_set is None: noise_set = "Clean" if subset is not None: if type(subset) == str: subset = [subset] subsets = subset else: # split 'Train', 'Val', 'Test' set if no specific subset is given if augmentations is not None: if type(augmentations) == str: augmentations = [augmentations] # add specific augmentation(s) to default sets subsets = ["Train", "Val", "Test"] subsets += ["Train " + a for a in augmentations] else: # use all available augmentations if noise_set == "Clean": subsets = os.listdir( os.path.join(self._dir_root_set, "Features", "Clean", "Spectra") ) else: subsets = os.listdir( os.path.join( self._dir_root_set, "Features", noise_set, "Spectra", "Ratio_%.2f" % noise_ratio, ) ) # root input directory spectra dir_root_spectra_in = os.path.join( self._dir_root_set, "Features", noise_set, "Spectra" ) if noise_set != "Clean": dir_root_spectra_in = os.path.join( dir_root_spectra_in, "Ratio_%.2f" % noise_ratio ) for subset in sorted(subsets): # load 5-second spectra belonging to categories dir_in_spectra = os.path.join(dir_root_spectra_in, subset) files_spectra = retrieve_files(dir_in_spectra) files_spectra = [ f for f in files_spectra if os.path.split(f)[-1].split("_")[0] in self._classification["categories"] ] # set output directory (augmentations i.e. 'Train Denoised' go in 'Train') dir_out_spectra = os.path.join( self._dir_root_set, "Dataset", subset.split(" ")[0], "Spectra" ) # refresh directories only for non-augmented sets if subset in ["Train", "Val", "Test"]: refresh_directory(dir_out_spectra) # split spectra part = partial(self._split_spectra, dir_output=dir_out_spectra) with Pool(processes=os.cpu_count() - 1) as pool: pool.map(part, files_spectra) # split the states in case of implicit denoising if hasattr(self, "_states"): # load states dir_in_states = os.path.join( self._dir_root_set, "Features", "Mixed", "States", subset ) files_states = retrieve_files(dir_in_states) files_states = [ f for f in files_states if os.path.split(f)[-1].split("_")[0] in self._classification["categories"] ] # refresh output directory only for non-augmented sets dir_out_states = os.path.join( self._dir_root_set, "Dataset", subset.split(" ")[0], "States" ) if subset in ["Train", "Val", "Test"]: refresh_directory(dir_out_states) # split states part = partial(self._split_states, dir_output=dir_out_states) with Pool(processes=os.cpu_count() - 1) as pool: pool.map(part, files_states) print_verbose( self.verbose, "Split %d files (%d categories) into %d files" % ( len(files_spectra), len(self._classification["categories"]), len(os.listdir(dir_out_spectra)), ), )
def train_network( train_settings, train_set, val_set, net_config, loss_fn, verbose=True, super_verbose=False, ): # reset the seed torch.manual_seed(42) # create network from config network = _create_network(net_config) # copy optimizer settings to avoid modifying train_settings dict_optimizer = train_settings["optimizer"].copy() # select the optimizer in torch.optim from settings optimizer = getattr(torch.optim, dict_optimizer.pop("optimizer")) # bind network, unpack optimizer settings optimizer = optimizer(network.parameters(), **dict_optimizer) if "lr_scheduler" in train_settings: # copy scheduler settings to avoid modifying train_settings dict_scheduler = train_settings["lr_scheduler"].copy() # select the lr scheduler in torch.optim from settings lr_scheduler = getattr(torch.optim.lr_scheduler, dict_scheduler.pop("scheduler")) # bind optimizer, unpack scheduler settings lr_scheduler = lr_scheduler(optimizer, **dict_scheduler) # create train dataloader train_loader = torch.utils.data.DataLoader( dataset=train_set, batch_size=train_settings["batch_size"], shuffle=True, drop_last=False, ) # create validation dataloader if len(val_set) > 2048: val_batch_size = 2048 # cap batch size to avoid memory issues else: val_batch_size = len(val_set) val_loader = torch.utils.data.DataLoader(dataset=val_set, batch_size=val_batch_size, drop_last=False) if "es_patience" in train_settings: # set up early stopping checkpoint fp_checkpoint = "checkpoint-es.pt" early_stopping = es.EarlyStopping( patience=train_settings["es_patience"], delta=1e-7, verbose=super_verbose, output_fp=fp_checkpoint, ) training_loss_history = [] validation_loss_history = [] # loop over epochs for epoch in range(train_settings["epochs"]): train_losses = [] # set in training mode network.train() for data in train_loader: # to device (gpu/cpu) x_train = data[0].to(net_config["device"]) if len(data) > 2: x2_train = data[1].to(net_config["device"]) y_train = data[-1].to(net_config["device"]) # clear gradient of optimizer optimizer.zero_grad() # forward pass if len(data) == 2: yhat = network(x_train) else: yhat = network(x_train, x2_train) # compute loss loss = loss_fn(yhat, y_train) # backward pass loss.backward() # record loss train_losses.append(loss.item()) # update parameters optimizer.step() # record loss and update loss history training_loss = np.mean(train_losses) training_loss_history.append(training_loss) # validation loss with torch.no_grad(): val_losses = [] # set in eval mode network.eval() for data in val_loader: # to device (gpu/cpu) x_val = data[0].to(net_config["device"]) if len(data) > 2: x2_val = data[1].to(net_config["device"]) y_val = data[-1].to(net_config["device"]) # forward pass if len(data) == 2: yhat = network(x_val) else: yhat = network(x_val, x2_val) # compute loss val_loss = loss_fn(yhat, y_val) # record loss val_losses.append(val_loss.item()) # record loss and update loss history validation_loss = np.mean(val_losses) validation_loss_history.append(validation_loss) print_verbose( super_verbose, "epoch %d: training loss = %.6f, validation loss = %.6f" % (epoch + 1, training_loss, validation_loss), ) if "es_patience" in train_settings: # check early stopping criterion early_stopping(validation_loss, network) if early_stopping.early_stop: # get training loss at best epoch training_loss = training_loss_history[ epoch - train_settings["es_patience"]] # get validation loss at best epoch validation_loss = early_stopping.val_loss_min print_verbose( super_verbose, "Early stopping (using model at epoch %d with val. loss %.5f)" % (epoch + 1 - train_settings["es_patience"], validation_loss), ) # end training break if "lr_scheduler" in train_settings: # update learning rate lr_scheduler.step(validation_loss) if "es_patience" in train_settings: # load network from checkpoint network.load_state_dict(torch.load(early_stopping.output_fp)) # delete checkpoint !!! loss = (training_loss, validation_loss) loss_history = (training_loss_history, validation_loss_history) return network, loss, loss_history
def extract_audio_from_esc50(self, categories, db_trim=30, overwrite=False): """Export relevant categories from the ESC-50 dataset. Keyword arguments: categories -- iterable containing the categories from the ESC-50 dataset that should be extracted, db_trim -- threshold for the trimming of silence (default: 30dB), overwrite -- whether to overwrite existing data (default: False), verbose -- whether to print each file export (default: False). """ # set directories dir_esc50 = os.path.join(self._dir_root, "Raw", "Aircraft", "ESC-50-master") fp_esc50_csv = os.path.join(dir_esc50, "meta", "esc50.csv") dir_input = os.path.join(dir_esc50, "audio") dir_output = os.path.join(self._dir_root, "Aircraft Classification", "Audio", "Full") # check if output directory exists if os.path.exists(dir_output): if overwrite: shutil.rmtree(dir_output) else: print_verbose("Output directory already exists.") return os.makedirs(os.path.join(dir_output)) # get dataframe with filenames, categories df = pd.read_csv(fp_esc50_csv) df.drop(["fold", "target", "esc10", "src_file", "take"], axis=1, inplace=True) df.replace("_", "-", inplace=True, regex=True) # less tedious later # extract relevant categories df_binary = df.loc[df["category"].isin(categories)] categories = df_binary["category"].unique() # loop over categories for cat in categories: # load files belonging to category files_src = df_binary.loc[df_binary["category"] == cat]["filename"].to_list() # loop over files for i, file in enumerate(files_src): src = os.path.join(dir_input, file) # load audio y, sr = librosa.load(src, sr=self._sample_rate) # trim audio y_trim, _ = librosa.effects.trim(y, top_db=db_trim) # export audio fn_out = "%s_%02d.wav" % (cat, i + 1) dest = os.path.join(dir_output, fn_out) sf.write(dest, y_trim, samplerate=sr) # printing if self.super_verbose: # set trim message if len(y_trim) < len(y): trim_msg = " (trimmed to %.3f sec.)" % (len(y_trim) / sr) else: trim_msg = "" print("%s ---> %s%s" % (file, dest, trim_msg)) print_verbose( self.verbose, "Finished exporting %d files (sr = %d Hz)" % (len(df_binary), sr), )
def augment_training_data(self, overwrite=False): """Augment the training data. Keyword arguments: overwrite -- whether to overwrite existing data (default: False). Augmentations include: Pitch Shift at [-2, -1, 1, 2] octaves, Time Stretch with ratios of [0.70,0.85, 1.15, 1.30], Intra-category mixing with four random files belonging to the same category. The 'silence' category (if generated) is omitted from the augmentation. """ # set directories dir_input = os.path.join(self._dir_root, "Aircraft Classification", "Audio", "Train") dir_root_output = os.path.join(self._dir_root, "Aircraft Classification", "Audio") # get files, but ignore augmentation for 'silence' category files = [ os.path.join(dir_input, f) for f in sorted(os.listdir(dir_input)) if os.path.split(f)[-1].split("_")[0] != "silence" ] # loop through augmentations augmentations = ["Pitch Shift", "Time Stretch", "Class Mix"] do_augmentations = [] for aug in augmentations: # set output directory dir_output = os.path.join(dir_root_output, "Train " + aug) # check if it exists or should be overwritten if overwrite or not os.path.exists(dir_output): refresh_directory(dir_output) do_augmentations.append(aug) # do augmentations if len(do_augmentations) > 0: for aug in do_augmentations: dir_output = os.path.join(dir_root_output, "Train " + aug) if aug == "Class Mix": # generate a list of directory-specific 'seeds' from the given seed # to preserve reproducible randomness while multiprocessing np.random.seed(42) seeds = np.random.randint(0, 10 * len(files), len(files)) part = partial(self._augment_class_mix, dir_out=dir_output, all_files=files) with Pool(processes=os.cpu_count() - 1) as pool: pool.starmap(part, list(zip(files, seeds))) elif aug == "Pitch Shift": part = partial(self._augment_pitch_shift, dir_out=dir_output) with Pool(processes=os.cpu_count() - 1) as pool: pool.map(part, files) elif aug == "Time Stretch": part = partial(self._augment_time_stretch, dir_out=dir_output) with Pool(processes=os.cpu_count() - 1) as pool: pool.map(part, files) print_verbose( self.verbose, "Augmentation: %d --> %d files using %s augmentation" % ( len(files), len(do_augmentations) * 4 * len(files), do_augmentations, ), ) else: print_verbose(self.verbose, "Augmentation has already been done.")
def split_dataset(self, train_test_ratio=0.8, train_val_ratio=0.8, overwrite=False): """Split the dataset into a training, validation and test subset. Keyword arguments: train_test_ratio -- ratio of the training set over the complete, set, the remainder will be assigned to the test subset (default: 0.8), train_val_ratio -- ratio of the actual training set over the training set, the remainder will be assigned to the validation subset (default: 0.8), overwrite -- whether to overwrite existing data (default: False). """ # directories dir_input = os.path.join(self._dir_root, "Aircraft Classification", "Audio", "Full") dir_root_output = os.path.join(self._dir_root, "Aircraft Classification", "Audio") # check if data should be overwritten if it exists if os.path.exists(os.path.join(dir_root_output, "Train")) and not overwrite: print_verbose( self.verbose, "Dataset already exists and should not be overwritten.") return # refresh the output directories subdirs = ["Train", "Val", "Test"] for subdir in subdirs: refresh_directory(os.path.join(dir_root_output, subdir)) # read files into array for easy slicing files = np.array(retrieve_files(dir_input)) # get categories file_categories = np.array( [os.path.split(f)[-1].split("_")[0] for f in files]) categories = np.unique(file_categories) files_per_category = len(files) // len(categories) # get train, val, test indices per category train_idcs, test_idcs = train_test_split(np.arange(files_per_category), train_size=train_test_ratio, random_state=42) train_idcs, val_idcs = train_test_split(train_idcs, train_size=train_val_ratio, random_state=42) print_verbose( self.verbose, "Split per category (Train, Val, Test): (%d, %d, %d)" % (len(train_idcs), len(val_idcs), len(test_idcs)), ) # extract train, val, test files using indices and export to subdirs indices = [train_idcs, val_idcs, test_idcs] for idcs, subdir in zip(indices, subdirs): files_set = [ f for f in files if int(os.path.split(f)[-1].split("_")[-1].split(".")[0]) - 1 in idcs ] for file in files_set: dest = os.path.join(dir_root_output, subdir, os.path.split(file)[-1]) shutil.copyfile(file, dest) # remove the now redundant 'Full' input directory shutil.rmtree(dir_input)