Пример #1
0
def extract(dataset, recompute=False):
    """Extract feature vectors from the given dataset.

    Args:
        dataset: Dataset to extract features from.
        recompute (bool): Whether to recompute existing features.
    """
    import features

    # Use a logmel representation for feature extraction
    extractor = features.LogmelExtractor(
        cfg.sample_rate,
        cfg.n_window,
        cfg.hop_length,
        cfg.n_mels,
    )

    # Ensure output directory exists and set file path
    os.makedirs(cfg.extraction_path, exist_ok=True)
    output_path = os.path.join(cfg.extraction_path, dataset.name + '.h5')

    # Save free parameters to disk
    utils.log_parameters(cfg.logmel,
                         os.path.join(cfg.extraction_path, 'parameters.json'))

    # Extract features for each audio clip in the dataset
    df = io.read_metadata(dataset.metadata_path)
    features.extract_dataset(
        dataset_path=dataset.path,
        file_names=df.index.tolist(),
        extractor=extractor,
        output_path=output_path,
        recompute=recompute,
    )
Пример #2
0
def train():
    """Train the neural network model.

    See Also:
        :func:`training.train`

    Note:
        For reproducibility, the random seed is set to a fixed value.
    """
    import training

    # Ensure output directories exist
    os.makedirs(os.path.dirname(cfg.scaler_path), exist_ok=True)
    os.makedirs(cfg.model_path, exist_ok=True)
    os.makedirs(cfg.log_path, exist_ok=True)

    # Load (standardized) input data and target values
    tr_x, tr_y, _ = _load_data(cfg.training_set, is_training=True)
    val_x, val_y, _ = _load_data(cfg.validation_set)

    # Try to create reproducible results
    np.random.seed(cfg.initial_seed)

    # Save free parameters to disk
    utils.log_parameters(cfg.training, os.path.join(cfg.model_path,
                                                    'parameters.json'))

    training.train(tr_x, tr_y, val_x, val_y)
Пример #3
0
def predict(dataset, fold):
    """Generate predictions for audio tagging.

    This function uses an ensemble of trained models to generate the
    predictions, with the averaging function being an arithmetic mean.
    Computed predictions are then saved to disk.

    Args:
        dataset: Dataset to generate predictions for.
        fold (int): The specific fold to generate predictions for. Only
            applicable for the training dataset.
    """
    import inference

    # Load input data and associated metadata
    x, df = _load_data(dataset)
    dataset_name = dataset.name
    if dataset.name == 'training':
        if fold == -1:
            raise ValueError('Invalid fold: %d' % fold)

        dataset_name += str(fold)
        mask = df.fold == fold
        tr_x = x[~mask]
        x = x[mask]
        df = df[mask]
    else:
        tr_x, tr_df = _load_data(cfg.to_dataset('training'))
        if fold >= 0:
            dataset_name += str(fold)
            tr_x = tr_x[tr_df.fold != fold]

    generator = utils.fit_scaler(tr_x)
    x = generator.standardize(x)

    # Predict class probabilities for each model (epoch)
    preds = []
    for epoch in _determine_epochs(cfg.prediction_epochs, fold, n=4):
        pred = utils.timeit(lambda: _load_model(fold, epoch).predict(x),
                            '[Epoch %d] Predicted class probabilities' % epoch)

        preds.append(inference.merge_predictions(pred, df.index))

    pred_mean = pd.concat(preds).groupby(level=0).mean()

    # Ensure output directory exists and set file path format
    os.makedirs(os.path.dirname(cfg.predictions_path), exist_ok=True)
    predictions_path = cfg.predictions_path.format('%s', dataset_name)

    # Save free parameters to disk
    utils.log_parameters({'prediction_epochs': cfg.prediction_epochs},
                         os.path.join(os.path.dirname(cfg.predictions_path),
                                      'parameters.json'))

    # Write predictions to disk
    pred_mean.to_csv(predictions_path % 'predictions')
    io.write_predictions(pred_mean, predictions_path % 'submission')
Пример #4
0
def predict(dataset):
    """Generate predictions for audio tagging and sound event detection.

    This function uses an ensemble of trained models to generate the
    predictions, with the averaging function being an arithmetic mean.
    Computed predictions are then saved to disk.

    Args:
        dataset: Dataset to generate predictions for.
    """
    import capsnet

    # Load (standardized) input data and associated file names
    test_x, _, names = _load_data(dataset)

    # Predict class probabilities for each model (epoch)
    at_preds, sed_preds = [], []

    for epoch in _determine_epochs(cfg.prediction_epochs):
        model = _load_model(epoch)
        at_pred, sed_pred = utils.timeit(
            lambda: capsnet.gccaps_predict(test_x, model),
            '[Epoch %d] Predicted class probabilities' % epoch)

        at_preds.append(at_pred)
        sed_preds.append(sed_pred)

    # Average predictions to give an overall output
    total_at_pred = np.mean(at_preds, axis=0)
    total_sed_pred = np.mean(sed_preds, axis=0)

    # Ensure output directory exists and set file path format
    os.makedirs(os.path.dirname(cfg.predictions_path), exist_ok=True)
    predictions_path = cfg.predictions_path.format('%s', dataset.name)

    # Save free parameters to disk
    utils.log_parameters({'prediction_epochs': cfg.prediction_epochs},
                         os.path.join(os.path.dirname(cfg.predictions_path),
                                      'parameters.json'))

    # Write predictions to disk
    utils.write_predictions(names, total_at_pred, predictions_path % 'at')
    utils.write_predictions(names, total_sed_pred, predictions_path % 'sed')
Пример #5
0
def extract(dataset):
    """Extract feature vectors from the given dataset.

    Args:
        dataset: Dataset to extract features from.
    """
    import data_augmentation as aug
    import features

    # Use a logmel representation for feature extraction
    extractor = features.LogmelExtractor(sample_rate=cfg.sample_rate,
                                         n_window=cfg.n_window,
                                         hop_length=cfg.hop_length,
                                         n_mels=cfg.n_mels,
                                         )

    # Prepare for data augmentation if enabled
    file_names, target_values = utils.read_metadata(dataset.metadata_path)
    if dataset == cfg.training_set and cfg.enable_augmentation:
        n_transforms_iter = aug.transform_counts(target_values)
        file_names = aug.expand_metadata((file_names, target_values))[0]
    else:
        n_transforms_iter = None

    # Ensure output directory exists and set file path
    os.makedirs(cfg.extraction_path, exist_ok=True)
    output_path = os.path.join(cfg.extraction_path, dataset.name + '.h5')

    # Save free parameters to disk
    utils.log_parameters(cfg.logmel, os.path.join(cfg.extraction_path,
                                                  'parameters.json'))

    # Generate features for each audio clip in the dataset
    features.extract_dataset(dataset.path,
                             file_names,
                             extractor,
                             cfg.clip_duration,
                             output_path,
                             n_transforms_iter=n_transforms_iter,
                             )
Пример #6
0
def train(model, fold, use_class_weight, noisy_sample_weight):
    """Train the neural network model.

    Args:
        model (str): The neural network architecture.
        fold (int): The fold to use for validation.
        use_class_weight (bool): Whether to use class-wise weights.
        noisy_sample_weight (float): Examples that are not verified are
            weighted according to this value.

    Note:
        For reproducibility, the random seed is set to a fixed value.
    """
    import training

    # Try to create reproducible results
    np.random.seed(cfg.initial_seed)

    # Load training data and associated metadata
    x, df = _load_data(cfg.to_dataset('training'))
    # Get one-hot representation of target values
    y = utils.to_categorical(df.label)

    # Split training data into training and validation
    if fold >= 0:
        mask = df.fold == fold
    else:
        mask = np.zeros(len(df), dtype=bool)
    val_mask = mask & (df.manually_verified == 1)

    tr_x = x[~mask]
    tr_y = y[~mask]
    val_x = x[val_mask]
    val_y = y[val_mask]
    val_index = df.index[val_mask]

    # Compute class weights based on number of class examples
    if use_class_weight:
        group = utils.group_by_name(df)
        n_examples = group.first().groupby('label').size().values
        class_weight = len(group) / (len(n_examples) * n_examples)
    else:
        class_weight = None

    # Assign a specific sample weight to unverified examples
    if noisy_sample_weight:
        sample_weight = df[~mask].manually_verified.values.astype(float)
        sample_weight[sample_weight == 0] = noisy_sample_weight
    else:
        sample_weight = None

    # Ensure output directories exist
    fold_dir = str(fold) if fold >= 0 else 'all'
    os.makedirs(os.path.join(cfg.model_path, fold_dir), exist_ok=True)
    os.makedirs(cfg.log_path.format(fold_dir), exist_ok=True)

    # Save free parameters to disk
    utils.log_parameters(cfg.training,
                         os.path.join(cfg.model_path, 'parameters.json'))

    training.train(tr_x,
                   tr_y,
                   val_x,
                   val_y,
                   val_index,
                   model,
                   fold,
                   class_weight=class_weight,
                   sample_weight=sample_weight)