Exemplo n.º 1
0
def test_logger(caplog):
    logger = utils.get_logger("MyLogger")
    logger.info("hello")
    for record in caplog.records:
        assert record.levelname == "INFO"
        assert "hello" in record.message

    logger = utils.get_logger("MyLogger")
    assert len(logger.handlers) == 1
Exemplo n.º 2
0
import numpy as np

from omnizart.utils import get_logger


logger = get_logger("Vocal Predict")


def create_batches(feature, ctx_len=9, batch_size=64):
    feat_pad = np.pad(feature, ((ctx_len, ctx_len), (0, 0), (0, 0)))

    slices = [feat_pad[idx - ctx_len:idx + ctx_len + 1] for idx in range(ctx_len, len(feat_pad) - ctx_len)]
    pad_size = batch_size - len(slices) % batch_size
    payload = np.zeros_like(slices[0])
    for _ in range(pad_size):
        slices.append(payload)
    slices = np.array(slices)
    assert len(slices) % batch_size == 0

    batches = [slices[idx:idx + batch_size] for idx in range(0, len(slices), batch_size)]
    return np.array(batches, dtype=np.float32), pad_size


def merge_batches(batch_pred):
    assert len(batch_pred.shape) == 4

    batches, batch_size, frm_len, out_classes = batch_pred.shape
    total_len = batches * batch_size + frm_len - 1
    output = np.zeros((total_len, out_classes))
    for bidx, batch in enumerate(batch_pred):
        for fidx, frame in enumerate(batch):
Exemplo n.º 3
0
import tensorflow as tf

from omnizart.feature.wrapper_func import extract_patch_cqt
from omnizart.drum.prediction import predict
from omnizart.drum.labels import extract_label_13_inst
from omnizart.drum.inference import inference
from omnizart.models.spectral_norm_net import drum_model, ConvSN2D
from omnizart.utils import get_logger, ensure_path_exists, parallel_generator
from omnizart.io import write_yaml
from omnizart.base import BaseTranscription, BaseDatasetLoader
from omnizart.setting_loaders import DrumSettings
from omnizart.train import get_train_val_feat_file_list
from omnizart.constants.datasets import PopStructure
from omnizart.constants.feature import NOTE_PRIORITY_ARRAY

logger = get_logger("Drum Transcription")


class DrumTranscription(BaseTranscription):
    """Application class for drum transcriptions."""
    def __init__(self):
        super().__init__(DrumSettings)
        self.custom_objects = {"ConvSN2D": ConvSN2D}

    def transcribe(self, input_audio, model_path=None, output="./"):
        """Transcribe drum in the audio.

        This function transcribes drum activations in the music. Currently the model
        predicts 13 classes of different drum sets, and 3 of them will be written to
        the MIDI file.
Exemplo n.º 4
0
import numpy as np
import pretty_midi
from scipy.stats import norm

from omnizart.utils import get_logger

logger = get_logger("Vocal Inference")


def _conv(seq, window):
    half_len = len(window) // 2
    end_idx = len(seq) - half_len
    total = sum(window)
    container = []
    for val in seq[:half_len]:
        container.append(val)
    for idx in range(half_len, end_idx):
        container.append(
            np.dot(seq[idx - half_len:idx + half_len + 1], window) /
            total)  # noqa: E226
    for val in seq[-half_len:]:
        container.append(val)
    return np.array(container)


def _find_peaks(seq, ctx_len=2, threshold=0.5):
    # Discard the first and the last <ctx_len> frames.
    peaks = []
    for idx in range(ctx_len, len(seq) - ctx_len - 1):
        cur_val = seq[idx]
        if cur_val < threshold:
Exemplo n.º 5
0
# pylint: disable=E1101
import numpy as np

from omnizart.utils import get_logger

logger = get_logger("Drum Prediction")


def create_batches(feature, mini_beat_per_seg, b_size=6):
    """Create a 4D input for model prediction.

    Parameters
    ----------
    feature: 3D numpy array
        Should be in shape [mini_beat_pos x time x freq].
    mini_beat_per_seg: int
        Number of mini beats in one segment (a beat).
    b_size: int
        Output batch size.

    Returns
    -------
    batch_feature: 5D numpy array
        Dimensions are [batches x b_size x time x freq x mini_beat_per_seg].
    pad_size: int
        The additional padded size at the end of the batch.
    """
    assert (
        len(feature.shape) == 3
    ), f"Invalid feature shape: {feature.shape}. Should be three dimensional."
Exemplo n.º 6
0
import os
import abc

import numpy as np

from omnizart.constants import datasets as dset
from omnizart.constants.midi import MUSICNET_INSTRUMENT_PROGRAMS, LOWEST_MIDI_NOTE
from omnizart.io import dump_pickle
from omnizart.utils import get_logger

logger = get_logger("Music Labels")


class LabelType:
    """Defines different types of `music` label for training.

    Defines functions that converts the customized label format into numpy
    array. With the customized format, it is more flexible to transform
    labels into different different numpy formats according to the usage
    scenario, and also saves a lot of storage space by using the customized
    format.

    Parameters
    ----------
    mode: ['note', 'note-stream', 'pop-note-stream', 'frame', 'frame-stream']
        Mode of label conversion.

        * note: outputs onset and duration channel
        * note-stream: outputs onset and duration channel of instruments (for MusicNet)
        * pop-note-stream: similar to ``note-stream`` mode, but is for ``Pop`` dataset
        * frame: same as ``note`` mode. To truely output duration channel only, use \
Exemplo n.º 7
0
import tensorflow as tf

from omnizart.io import write_yaml
from omnizart.base import BaseTranscription, BaseDatasetLoader
from omnizart.train import get_train_val_feat_file_list
from omnizart.utils import get_logger, ensure_path_exists, parallel_generator
from omnizart.constants.datasets import MusicNetStructure
from omnizart.setting_loaders import BeatSettings
from omnizart.beat.features import extract_musicnet_feature, extract_musicnet_label, extract_feature_from_midi
from omnizart.beat.prediction import predict
from omnizart.beat.inference import inference
from omnizart.models.rnn import blstm, blstm_attn
from omnizart.models.t2t import MultiHeadAttention


logger = get_logger("Beat Transcription")


class BeatTranscription(BaseTranscription):
    """Application class for beat tracking in MIDI domain."""
    def __init__(self, conf_path=None):
        super().__init__(BeatSettings, conf_path=conf_path)

        self.custom_objects = {"MultiHeadAttention": MultiHeadAttention}

    def transcribe(self, input_audio, model_path=None, output="./"):
        """Transcribe beat positions in the given MIDI.

        Tracks the beat in symbolic domain. Outputs three files if the output path is given:
        *<filename>.mid*, <filename>_beat.csv, and <filename>_down_beat.csv, where *filename*
        is the name of the input MIDI without extension. The *.csv files records the beat
Exemplo n.º 8
0
# pylint: disable=W0201
import os
import abc

import six
import numpy as np
import tensorflow as tf
from tensorflow.python.keras.utils import tf_utils

from omnizart.io import write_yaml
from omnizart.utils import get_logger, ensure_path_exists

logger = get_logger("Callbacks")


class Callback(metaclass=abc.ABCMeta):
    """Base class of all callback classes"""
    def __init__(self, monitor=None):
        if monitor is not None:
            self.monitor = monitor
            if "acc" in monitor:
                self.monitor_op = np.greater
            else:
                self.monitor_op = np.less

    def on_train_begin(self, history=None):
        pass

    def on_train_end(self, history=None):
        pass
Exemplo n.º 9
0
# pylint: disable=W0102,R0914

import math

import pretty_midi
import numpy as np
from scipy.interpolate import CubicSpline
from scipy.signal import find_peaks
from librosa import note_to_midi

from omnizart.constants.midi import MUSICNET_INSTRUMENT_PROGRAMS, MIDI_PROGRAM_NAME_MAPPING
from omnizart.utils import get_logger

logger = get_logger("Music Inference")


def roll_down_sample(data, occur_num=3, base=88):
    """Down sample feature size for a single pitch.

    Down sample the feature size from 354 to 88 for infering the notes.

    Parameters
    ----------
    data: 2D numpy array
        The thresholded 2D prediction..
    occur_num: int
        For each pitch, the original prediction expands 4 bins wide. This value determines how many positive bins
        should there be to say there is a real activation after down sampling.
    base
        Should be constant as there are 88 pitches on the piano.
Exemplo n.º 10
0
# -*- coding: utf-8 -*-
"""
Author: Lisu

Mantainer: BreezeWhite
"""
# pylint: disable=C0103,W0102,R0914
import numpy as np
import scipy

from omnizart.io import load_audio
from omnizart.utils import get_logger, parallel_generator

logger = get_logger("CFP Feature")


def STFT(x, fr, fs, Hop, h):
    t = np.arange(Hop, np.ceil(len(x) / float(Hop)) * Hop, Hop)
    N = int(fs / float(fr))
    window_size = len(h)
    f = fs * np.linspace(0, 0.5, np.round(N / 2).astype("int"), endpoint=True)
    Lh = int(np.floor(float(window_size - 1) / 2))
    tfr = np.zeros((int(N), len(t)), dtype=np.float)

    for icol, ti in enumerate(t):
        ti = int(ti)
        tau = np.arange(int(-min([round(N / 2.0) - 1, Lh, ti - 1])),
                        int(min([round(N / 2.0) - 1, Lh,
                                 len(x) - ti])))
        indices = np.mod(N + tau, N) + 1
        tfr[indices - 1,
Exemplo n.º 11
0
import librosa
import numpy as np

from omnizart.io import load_audio
from omnizart.utils import get_logger

logger = get_logger("CQT Feature")


def post_process_cqt(gram):
    """
    Normalize and log-scale a Constant-Q spectrogram

    Parameters
    ----------
    gram: np.ndarray
        Constant-Q spectrogram, constructed from ``librosa.cqt``.

    Returns
    -------
    log_normalized_gram: np.ndarray
        Log-magnitude, L2-normalized constant-Q spectrogram.
    """
    # Compute log amplitude
    gram = (librosa.amplitude_to_db(np.abs(gram), amin=1e-06, top_db=80.0) +
            80.001) * (100.0 / 80.0)  # noqa: E226

    # and L2 normalize
    gram = librosa.util.normalize(gram.T, norm=2.0, axis=1)
    return gram.astype(np.float32)
Exemplo n.º 12
0
from mir_eval import sonify
from mir_eval.util import midi_to_hz
from scipy.io.wavfile import write as wavwrite

from omnizart.io import write_yaml, write_agg_f0_results
from omnizart.utils import get_logger, parallel_generator, get_filename, ensure_path_exists, aggregate_f0_info
from omnizart.base import BaseTranscription, BaseDatasetLoader
from omnizart.constants import datasets as d_struct
from omnizart.feature.cfp import extract_patch_cfp
from omnizart.setting_loaders import PatchCNNSettings
from omnizart.models.patch_cnn import patch_cnn_model
from omnizart.patch_cnn.inference import inference
from omnizart.vocal.labels import MIR1KlabelExtraction
from omnizart.train import get_train_val_feat_file_list

logger = get_logger("Patch CNN Transcription")


class PatchCNNTranscription(BaseTranscription):
    """Application class of PatchCNN module."""
    def __init__(self, conf_path=None):
        super().__init__(PatchCNNSettings, conf_path=conf_path)

    def transcribe(self, input_audio, model_path=None, output="./"):
        """Transcribe frame-level fundamental frequency of vocal from the given audio.

        Parameters
        ----------
        input_audio: Path
            Path to the wav audio file.
        model_path: Path
Exemplo n.º 13
0
"""Utility functions for Music module"""

import numpy as np
from scipy.special import expit

from omnizart.utils import get_logger

logger = get_logger("Music Prediction")


def cut_frame(frm, ori_feature_size=352, feature_num=384):
    feat_num = frm.shape[1]
    assert feat_num == feature_num

    cut_start = (feat_num - ori_feature_size) // 2  # noqa: E226
    c_range = range(cut_start, cut_start + ori_feature_size)

    return frm[:, c_range]


def cut_batch_pred(b_pred):
    t_len = len(b_pred[0])
    cut_rr = range(round(t_len * 0.25), round(t_len * 0.75))
    cut_pp = []
    for pred in b_pred:
        cut_pp.append(pred[cut_rr])

    return np.array(cut_pp)


def create_batches(feature, b_size, timesteps, feature_num=384):
Exemplo n.º 14
0
from concurrent.futures import ProcessPoolExecutor

import scipy
import numpy as np
from madmom.features import (
    DBNDownBeatTrackingProcessor,
    RNNDownBeatProcessor,
    DBNBeatTrackingProcessor,
    RNNBeatProcessor,
    BeatTrackingProcessor,
)

from omnizart.io import load_audio
from omnizart.utils import get_logger

logger = get_logger("Beat Extraction")


class MadmomBeatTracking:
    """Extract beat information with madmom library.

    Three different beat tracking methods are used together for producing a more
    stable beat tracking result.
    """
    def __init__(self, num_threads=3):
        self.num_threads = num_threads

    def _get_dbn_down_beat(self,
                           audio_data_in1,
                           min_bpm_in=50,
                           max_bpm_in=230):
Exemplo n.º 15
0
import math
import numpy as np

from omnizart.utils import get_logger

logger = get_logger("Beat Prediction")

#: Step size for slicing the feature. Ratio to the timesteps of the model input feature.
STEP_SIZE_RATIO = 0.5


def create_batches(feature, timesteps, batch_size=8):
    """Create a 4D output from the 2D feature for model prediciton.

    Create overlapped input features, and collect feature slices into batches.
    The overlap size is 1/4 length to the timesteps.

    Parameters
    ----------
    feature: 2D numpy array
        The feature representation for the model.
    timesteps: int
        Size of the input feature dimension.
    batch_size: int
        Batch size.

    Returns
    -------
    batches: 4D numpy array
        Batched feature slices with dimension: batches x batch_size x timesteps x feat.
    """
Exemplo n.º 16
0
import math

import numpy as np
import pretty_midi
from scipy.interpolate import interp1d

from omnizart.utils import get_logger
from omnizart.base import Label
from omnizart.constants.midi import LOWEST_MIDI_NOTE
from omnizart.constants.datasets import MusicNetStructure

logger = get_logger("Beat features")


def extract_feature_from_midi(midi_path, t_unit=0.01):
    """Extract feature for beat module from MIDI file.

    See Also
    --------
    omnizart.beat.features.extract_feature:
        The main feature extraction function of beat module.
    """
    midi = pretty_midi.PrettyMIDI(midi_path)
    labels = []
    for inst in midi.instruments:
        for note in inst.notes:
            labels.append(
                Label(start_time=note.start,
                      end_time=note.end,
                      note=note.pitch))
    return extract_feature(labels, t_unit=t_unit)
Exemplo n.º 17
0
import os
import glob
import random
from os.path import join as jpath
from abc import ABCMeta, abstractmethod

import h5py
import tensorflow as tf
from tensorflow.keras.models import model_from_yaml

from omnizart import MODULE_PATH
from omnizart.utils import get_logger, ensure_path_exists, get_filename
from omnizart.constants.midi import LOWEST_MIDI_NOTE, HIGHEST_MIDI_NOTE

logger = get_logger("Base Class")


class BaseTranscription(metaclass=ABCMeta):
    """Base class of transcription applications."""
    def __init__(self, setting_class, conf_path=None):
        self.setting_class = setting_class
        self.settings = setting_class(conf_path=conf_path)
        self.custom_objects = {}

    @abstractmethod
    def transcribe(self, input_audio, model_path, output="./"):
        raise NotImplementedError

    def get_model(self, settings):
        """Get the model from the python source file.
Exemplo n.º 18
0
+-----------+--------------+-----------+--------------+

"""
# pylint: disable=C0112
import os
import csv
import glob
from os.path import join as jpath
from shutil import copy

from omnizart.io import load_yaml
from omnizart.utils import ensure_path_exists, get_logger
from omnizart.remote import download_large_file_from_google_drive


logger = get_logger("Constant Datasets")


def _get_file_list(dataset_path, dirs, ext):
    files = []
    for _dir in dirs:
        files += glob.glob(os.path.join(dataset_path, _dir, "*" + ext))
    return files


class BaseStructure:
    """Defines the necessary attributes and common functions for each sub-dataset structure class.

    All sub-dataset structure class should inherit this base class to ensure
    the necessary attributes and methods are overriden.
    """
Exemplo n.º 19
0
import os
import csv
import pickle

import yaml
import librosa

from omnizart.utils import ensure_path_exists, LazyLoader, get_logger

# Lazy load the Spleeter pacakge for avoiding pulling large dependencies
# and boosting the import speed.
adapter = LazyLoader("adapter", globals(), "spleeter.audio.adapter")
logger = get_logger("IO")


def dump_pickle(data, save_to):
    """Dump data to the given path.

    Parameters
    ----------
    data: python objects
        Data to store. Should be python built-in types like `dict`, `list`, `str`, `int`, etc
    save_to: Path
        The full path to store the pickle file, including file name.
        Will create the directory if the given path doesn't exist.

    """
    base_dir = os.path.dirname(save_to)
    ensure_path_exists(base_dir)
    with open(save_to, "wb") as pkl_file:
        pickle.dump(data, pkl_file)
Exemplo n.º 20
0
from spleeter.separator import Separator
from spleeter.utils.logging import get_logger as sp_get_logger

from omnizart.io import load_audio, write_yaml
from omnizart.utils import get_logger, resolve_dataset_type, parallel_generator, ensure_path_exists, LazyLoader
from omnizart.constants import datasets as d_struct
from omnizart.base import BaseTranscription, BaseDatasetLoader
from omnizart.feature.cfp import extract_vocal_cfp, _extract_vocal_cfp
from omnizart.setting_loaders import VocalSettings
from omnizart.vocal import labels as lextor
from omnizart.vocal.prediction import predict
from omnizart.vocal.inference import infer_interval, infer_midi
from omnizart.train import get_train_val_feat_file_list
from omnizart.models.pyramid_net import PyramidNet

logger = get_logger("Vocal Transcription")
vcapp = LazyLoader("vcapp", globals(), "omnizart.vocal_contour")


class VocalTranscription(BaseTranscription):
    """Application class for vocal note transcription.

    This application implements the training procedure in a semi-supervised way.
    """
    def __init__(self, conf_path=None):
        super().__init__(VocalSettings, conf_path=conf_path)

        # Disable logging information of Spleeter
        sp_logger = sp_get_logger()
        sp_logger.setLevel(40)  # logging.ERROR
Exemplo n.º 21
0
import h5py
import numpy as np
import tensorflow as tf

from omnizart.base import BaseTranscription, BaseDatasetLoader
from omnizart.setting_loaders import ChordSettings
from omnizart.io import write_yaml
from omnizart.utils import get_logger, ensure_path_exists, parallel_generator
from omnizart.constants.datasets import McGillBillBoard
from omnizart.feature.chroma import extract_chroma
from omnizart.chord.features import extract_feature_label
from omnizart.chord.inference import inference, write_csv
from omnizart.train import get_train_val_feat_file_list
from omnizart.models.chord_model import ChordModel, ReduceSlope

logger = get_logger("Chord Application")


class ChordTranscription(BaseTranscription):
    """Application class for chord transcription."""
    def __init__(self, conf_path=None):
        super().__init__(ChordSettings, conf_path=conf_path)

    def transcribe(self, input_audio, model_path=None, output="./"):
        """Transcribe chords in the audio.

        This function transcribes chord progression in the audio and will outputs MIDI
        and CSV files. The MIDI file is provided for quick validation by directly listen
        to the chords. The complete transcription results are listed in the CSV file,
        which contains the chord's name and the start and end time.
Exemplo n.º 22
0
from omnizart.music.prediction import predict
from omnizart.music.labels import (LabelType, MaestroLabelExtraction,
                                   MapsLabelExtraction,
                                   MusicNetLabelExtraction, PopLabelExtraction)
from omnizart.music.losses import focal_loss, smooth_loss
from omnizart.base import BaseTranscription, BaseDatasetLoader
from omnizart.utils import get_logger, parallel_generator, ensure_path_exists, resolve_dataset_type
from omnizart.io import dump_pickle, write_yaml
from omnizart.train import train_epochs, get_train_val_feat_file_list
from omnizart.callbacks import EarlyStopping, ModelCheckpoint
from omnizart.setting_loaders import MusicSettings
from omnizart.constants.midi import MUSICNET_INSTRUMENT_PROGRAMS, POP_INSTRUMENT_PROGRAMES
from omnizart.constants.feature import FEATURE_NAME_TO_NUMBER
import omnizart.constants.datasets as d_struct

logger = get_logger("Music Transcription")


class MusicTranscription(BaseTranscription):
    """Application class for music transcription.

    Inherited from the BaseTranscription class to make sure everything
    needed got override.
    """
    def __init__(self, conf_path=None):
        super().__init__(MusicSettings, conf_path=conf_path)
        self.mode_inst_mapping = {
            "Piano": MUSICNET_INSTRUMENT_PROGRAMS,
            "Stream": MUSICNET_INSTRUMENT_PROGRAMS,
            "Pop": POP_INSTRUMENT_PROGRAMES
        }
Exemplo n.º 23
0
from mir_eval import sonify

from omnizart.base import BaseTranscription, BaseDatasetLoader
from omnizart.setting_loaders import VocalContourSettings
from omnizart.feature.wrapper_func import extract_cfp_feature
from omnizart.utils import get_logger, ensure_path_exists, parallel_generator, resolve_dataset_type, aggregate_f0_info
from omnizart.io import write_yaml, write_agg_f0_results
from omnizart.train import train_epochs, get_train_val_feat_file_list
from omnizart.callbacks import EarlyStopping, ModelCheckpoint
from omnizart.vocal_contour.inference import inference
from omnizart.vocal_contour import labels as lextor
from omnizart.constants import datasets as d_struct
from omnizart.models.u_net import semantic_segmentation
from omnizart.music.losses import focal_loss

logger = get_logger("Vocal Contour")


class VocalContourTranscription(BaseTranscription):
    """Application class for vocal-contour transcription."""
    def __init__(self, conf_path=None):
        super().__init__(VocalContourSettings, conf_path=conf_path)

    def transcribe(self, input_audio, model_path=None, output="./"):
        """Transcribe frame-level fundamental frequency of vocal from the given audio.

        Parameters
        ----------
        input_audio: Path
            Path to the wav audio file.
        model_path: Path