def audio_to_chroma(input_wavfile, hopsize, fx, norm=0): """Method for turning a wavefile into chroma features. Parameters ---------- input_wavfile : str Path to a wavefile. hopsize : int Number of samples between frames. fx : function Function that consumes 2D matrices of DFT coefficients and outputs chroma features. norm : scalar, default=0 Lp norm to apply to the features; skipped if not > 0. Returns ------- features : np.ndarray Matrix of time-aligned chroma vectors, shaped (num_frames, 12). """ sigbuff = signal_buffer(input_wavfile, hopsize=hopsize) pitch_spec = np.concatenate([CT.cqt_pool(batch) for batch in sigbuff], axis=0) features = fx(pitch_spec) if norm > 0: features = CT.lp_norm(features, norm) return features
def prepare_training_data(train_file, label_file, label_map, batch_size=100): """Create a data generator from input data and label files. Parameters ---------- train_file : str Path to a numpy file of data observations. label_file : str Path to a numpy file of data labels. label_map : dict Dictionary mapping string labels to integers. batch_size : int, default=100 Number of datapoints to return for each batch. Returns ------- shuffler : generator Data generator that returns an (x,y) tuple for each call to next(). stats : dict Coefficient means and standard deviations, keyed by 'mu' and 'sigma'. """ data, labels = np.load(train_file), np.load(label_file) y_true = np.array([label_map.get(l, -1) for l in labels]) valid_idx = y_true > 0 # Drop all labels that don't exist in the label map, i.e. negative. data, y_true = data[valid_idx], y_true[valid_idx] data = CT.cqt_pool(data) # Compute standardization statistics. stats = {'mu': data.mean(axis=0), 'sigma': data.std(axis=0)} num_qualities = int(y_true.max() / 12) templates = generate_chroma_templates(num_qualities) return data_shuffler(data, templates[y_true], batch_size=batch_size), stats