def extract(dataset, recompute=False): """Extract feature vectors from the given dataset. Args: dataset: Dataset to extract features from. recompute (bool): Whether to recompute existing features. """ import features # Use a logmel representation for feature extraction extractor = features.LogmelExtractor( cfg.sample_rate, cfg.n_window, cfg.hop_length, cfg.n_mels, ) # Ensure output directory exists and set file path os.makedirs(cfg.extraction_path, exist_ok=True) output_path = os.path.join(cfg.extraction_path, dataset.name + '.h5') # Save free parameters to disk utils.log_parameters(cfg.logmel, os.path.join(cfg.extraction_path, 'parameters.json')) # Extract features for each audio clip in the dataset df = io.read_metadata(dataset.metadata_path) features.extract_dataset( dataset_path=dataset.path, file_names=df.index.tolist(), extractor=extractor, output_path=output_path, recompute=recompute, )
def train(): """Train the neural network model. See Also: :func:`training.train` Note: For reproducibility, the random seed is set to a fixed value. """ import training # Ensure output directories exist os.makedirs(os.path.dirname(cfg.scaler_path), exist_ok=True) os.makedirs(cfg.model_path, exist_ok=True) os.makedirs(cfg.log_path, exist_ok=True) # Load (standardized) input data and target values tr_x, tr_y, _ = _load_data(cfg.training_set, is_training=True) val_x, val_y, _ = _load_data(cfg.validation_set) # Try to create reproducible results np.random.seed(cfg.initial_seed) # Save free parameters to disk utils.log_parameters(cfg.training, os.path.join(cfg.model_path, 'parameters.json')) training.train(tr_x, tr_y, val_x, val_y)
def predict(dataset, fold): """Generate predictions for audio tagging. This function uses an ensemble of trained models to generate the predictions, with the averaging function being an arithmetic mean. Computed predictions are then saved to disk. Args: dataset: Dataset to generate predictions for. fold (int): The specific fold to generate predictions for. Only applicable for the training dataset. """ import inference # Load input data and associated metadata x, df = _load_data(dataset) dataset_name = dataset.name if dataset.name == 'training': if fold == -1: raise ValueError('Invalid fold: %d' % fold) dataset_name += str(fold) mask = df.fold == fold tr_x = x[~mask] x = x[mask] df = df[mask] else: tr_x, tr_df = _load_data(cfg.to_dataset('training')) if fold >= 0: dataset_name += str(fold) tr_x = tr_x[tr_df.fold != fold] generator = utils.fit_scaler(tr_x) x = generator.standardize(x) # Predict class probabilities for each model (epoch) preds = [] for epoch in _determine_epochs(cfg.prediction_epochs, fold, n=4): pred = utils.timeit(lambda: _load_model(fold, epoch).predict(x), '[Epoch %d] Predicted class probabilities' % epoch) preds.append(inference.merge_predictions(pred, df.index)) pred_mean = pd.concat(preds).groupby(level=0).mean() # Ensure output directory exists and set file path format os.makedirs(os.path.dirname(cfg.predictions_path), exist_ok=True) predictions_path = cfg.predictions_path.format('%s', dataset_name) # Save free parameters to disk utils.log_parameters({'prediction_epochs': cfg.prediction_epochs}, os.path.join(os.path.dirname(cfg.predictions_path), 'parameters.json')) # Write predictions to disk pred_mean.to_csv(predictions_path % 'predictions') io.write_predictions(pred_mean, predictions_path % 'submission')
def predict(dataset): """Generate predictions for audio tagging and sound event detection. This function uses an ensemble of trained models to generate the predictions, with the averaging function being an arithmetic mean. Computed predictions are then saved to disk. Args: dataset: Dataset to generate predictions for. """ import capsnet # Load (standardized) input data and associated file names test_x, _, names = _load_data(dataset) # Predict class probabilities for each model (epoch) at_preds, sed_preds = [], [] for epoch in _determine_epochs(cfg.prediction_epochs): model = _load_model(epoch) at_pred, sed_pred = utils.timeit( lambda: capsnet.gccaps_predict(test_x, model), '[Epoch %d] Predicted class probabilities' % epoch) at_preds.append(at_pred) sed_preds.append(sed_pred) # Average predictions to give an overall output total_at_pred = np.mean(at_preds, axis=0) total_sed_pred = np.mean(sed_preds, axis=0) # Ensure output directory exists and set file path format os.makedirs(os.path.dirname(cfg.predictions_path), exist_ok=True) predictions_path = cfg.predictions_path.format('%s', dataset.name) # Save free parameters to disk utils.log_parameters({'prediction_epochs': cfg.prediction_epochs}, os.path.join(os.path.dirname(cfg.predictions_path), 'parameters.json')) # Write predictions to disk utils.write_predictions(names, total_at_pred, predictions_path % 'at') utils.write_predictions(names, total_sed_pred, predictions_path % 'sed')
def extract(dataset): """Extract feature vectors from the given dataset. Args: dataset: Dataset to extract features from. """ import data_augmentation as aug import features # Use a logmel representation for feature extraction extractor = features.LogmelExtractor(sample_rate=cfg.sample_rate, n_window=cfg.n_window, hop_length=cfg.hop_length, n_mels=cfg.n_mels, ) # Prepare for data augmentation if enabled file_names, target_values = utils.read_metadata(dataset.metadata_path) if dataset == cfg.training_set and cfg.enable_augmentation: n_transforms_iter = aug.transform_counts(target_values) file_names = aug.expand_metadata((file_names, target_values))[0] else: n_transforms_iter = None # Ensure output directory exists and set file path os.makedirs(cfg.extraction_path, exist_ok=True) output_path = os.path.join(cfg.extraction_path, dataset.name + '.h5') # Save free parameters to disk utils.log_parameters(cfg.logmel, os.path.join(cfg.extraction_path, 'parameters.json')) # Generate features for each audio clip in the dataset features.extract_dataset(dataset.path, file_names, extractor, cfg.clip_duration, output_path, n_transforms_iter=n_transforms_iter, )
def train(model, fold, use_class_weight, noisy_sample_weight): """Train the neural network model. Args: model (str): The neural network architecture. fold (int): The fold to use for validation. use_class_weight (bool): Whether to use class-wise weights. noisy_sample_weight (float): Examples that are not verified are weighted according to this value. Note: For reproducibility, the random seed is set to a fixed value. """ import training # Try to create reproducible results np.random.seed(cfg.initial_seed) # Load training data and associated metadata x, df = _load_data(cfg.to_dataset('training')) # Get one-hot representation of target values y = utils.to_categorical(df.label) # Split training data into training and validation if fold >= 0: mask = df.fold == fold else: mask = np.zeros(len(df), dtype=bool) val_mask = mask & (df.manually_verified == 1) tr_x = x[~mask] tr_y = y[~mask] val_x = x[val_mask] val_y = y[val_mask] val_index = df.index[val_mask] # Compute class weights based on number of class examples if use_class_weight: group = utils.group_by_name(df) n_examples = group.first().groupby('label').size().values class_weight = len(group) / (len(n_examples) * n_examples) else: class_weight = None # Assign a specific sample weight to unverified examples if noisy_sample_weight: sample_weight = df[~mask].manually_verified.values.astype(float) sample_weight[sample_weight == 0] = noisy_sample_weight else: sample_weight = None # Ensure output directories exist fold_dir = str(fold) if fold >= 0 else 'all' os.makedirs(os.path.join(cfg.model_path, fold_dir), exist_ok=True) os.makedirs(cfg.log_path.format(fold_dir), exist_ok=True) # Save free parameters to disk utils.log_parameters(cfg.training, os.path.join(cfg.model_path, 'parameters.json')) training.train(tr_x, tr_y, val_x, val_y, val_index, model, fold, class_weight=class_weight, sample_weight=sample_weight)