Exemplo n.º 1
0
def parse_args(argv):
    parser = common.common_arguments_parser()
    parser.add_argument("--spectrogram", type=str, help="Postprocessing layer")
    parser.add_argument("--capacity_multiplier",
                        default=8,
                        type=int,
                        help="Capacity")
    parser.add_argument("--voicing",
                        action='store_true',
                        help="Add voicing model.")

    args = parser.parse_args(argv)

    hop_length = 512
    defaults = {
        "samplerate": 44100,
        "context_width": 14 * hop_length,
        "annotations_per_window": 20,
        "hop_size": 1,
        "frame_width": hop_length,
        "note_range": 72,
        "min_note": 24,
        "batch_size": 8,
        "evaluate_every": 5000,
        "evaluate_small_every": 1000,
        "annotation_smoothing": 0.177,
        "spectrogram": "hcqt",
        "capacity_multiplier": 8,
        "voicing": False
    }
    specified_args = common.argument_defaults(args, defaults)
    common.name(args, specified_args, "bittner")
    # common.name(args, "cqt_voicing_residual_batchnorm")

    return args
Exemplo n.º 2
0
def parse_args(argv):
    parser = common.common_arguments_parser()
    parser.add_argument("--capacity_multiplier", type=int, help="Capacity multiplier")
    parser.add_argument("--spectrogram", type=str, help="Spectrogram method")
    parser.add_argument("--spectrogram_top_db", type=float, help="Spectrogram top_db")
    parser.add_argument("--spectrogram_filter_scale", type=float, help="Spectrogram filter_scale")
    parser.add_argument("--spectrogram_undertone_stacking", type=int, help="spectrogram undertone stacking")
    parser.add_argument("--spectrogram_overtone_stacking", type=int, help="spectrogram overtone stacking")
    parser.add_argument("--undertone_stacking", type=int, help="Undertone stacking in the model")
    parser.add_argument("--overtone_stacking", type=int, help="Overtone stacking in the model")
    args = parser.parse_args(argv)
    defaults = {
        "samplerate": 44100, "context_width": 0, "annotations_per_window": 50, "hop_size": 1,
        "frame_width": HOP_LENGTH,
        "note_range": 72, "min_note": 24,
        "evaluate_every": 5000,
        "evaluate_small_every": 1000,
        "spectrogram": "cqt",
        "learning_rate": 0.001,
        "learning_rate_decay": 0.85,
        "learning_rate_decay_steps": 5000,
        "undertone_stacking": 0,
        "overtone_stacking": 1,
        "spectrogram_undertone_stacking": 1,
        "spectrogram_overtone_stacking": 5,
        "spectrogram_top_db": 80,
        "spectrogram_filter_scale": 1.0,
        "capacity_multiplier": 64,

    }
    specified_args = common.argument_defaults(args, defaults)
    common.name(args, specified_args, "bittner")

    return args
Exemplo n.º 3
0
def parse_args(argv):
    parser = common.common_arguments_parser()
    # Model specific arguments
    parser.add_argument("--input_normalization",
                        type=int,
                        help="Enable normalizing each input example")
    parser.add_argument("--capacity_multiplier",
                        type=int,
                        help="Capacity multiplier of the model")
    parser.add_argument(
        "--multiresolution_convolution",
        type=int,
        help="Number of different resolution of the first convolution layer")
    parser.add_argument("--variable_stride",
                        action='store_true',
                        help="Variable stride")
    parser.add_argument("--first_layer_capacity",
                        type=int,
                        help="Capacity multiplier")

    args = parser.parse_args(argv)
    defaults = {
        # Change some of the common defaults
        "context_width": 978,
        "input_normalization": 1,
        "capacity_multiplier": 16,
        "multiresolution_convolution": 0,
        "variable_stride": False,
        "first_layer_capacity": 1,
    }
    specified_args = common.argument_defaults(args, defaults)
    common.name(args, specified_args, "crepe")

    return args
Exemplo n.º 4
0
def parse_args(argv):
    parser = common.common_arguments_parser()
    # Model specific arguments
    # input
    parser.add_argument("--spectrogram", type=str, help="Spectrogram method")
    parser.add_argument("--spectrogram_top_db",
                        type=float,
                        help="Spectrogram top_db")
    parser.add_argument("--spectrogram_filter_scale",
                        type=float,
                        help="Spectrogram filter_scale")
    parser.add_argument("--spectrogram_undertone_stacking",
                        type=int,
                        help="spectrogram undertone stacking")
    parser.add_argument("--spectrogram_overtone_stacking",
                        type=int,
                        help="spectrogram overtone stacking")
    parser.add_argument(
        "--cut_context",
        type=int,
        help="Cut unnecessary context, doesn't work with dilations!")
    # model
    parser.add_argument("--architecture", type=str, help="Model architecture")
    parser.add_argument("--filters", type=int, help="Filters in convolutions")
    parser.add_argument("--stacks", type=int, help="Stacks")
    parser.add_argument("--conv_range",
                        type=int,
                        help="Stack kernel size in frequency axis")
    parser.add_argument("--undertone_stacking",
                        type=int,
                        help="Undertone stacking in the model")
    parser.add_argument("--overtone_stacking",
                        type=int,
                        help="Overtone stacking in the model")
    parser.add_argument("--activation",
                        type=str,
                        help="Activation function for the convolution stack")
    # context
    parser.add_argument("--conv_ctx",
                        nargs="+",
                        type=int,
                        help="Stack kernel sizes in time axis")
    parser.add_argument("--dilations",
                        nargs="+",
                        type=int,
                        help="Dilation rate for the convolutions")
    parser.add_argument("--last_conv_kernel", nargs=2, type=int)
    # residual
    parser.add_argument(
        "--residual_hop",
        type=int,
        help="Size of one block around which there is a residual connection")
    parser.add_argument("--residual_end",
                        type=int,
                        help="No residual connection in last N layers")
    parser.add_argument(
        "--residual_op",
        type=str,
        help=
        "Residual connection operation (add for ResNet, concat for DenseNet)")
    # regularization
    parser.add_argument("--batchnorm", type=int)
    parser.add_argument("--dropout", type=float)
    parser.add_argument("--specaugment_prob", type=float)
    parser.add_argument("--specaugment_freq_mask_num", type=int)
    parser.add_argument("--specaugment_freq_mask_max", type=int)
    parser.add_argument("--specaugment_time_mask_num", type=int)
    parser.add_argument("--specaugment_time_mask_max", type=int)
    # voicing module
    parser.add_argument("--voicing", type=int)
    parser.add_argument("--voicing_input", type=str)

    args = parser.parse_args(argv)

    hop_length = 512
    defaults = {
        # Change some of the common defaults
        "samplerate": 44100,
        "context_width": 10 * hop_length,
        "annotations_per_window": 5,
        "hop_size": 1,
        "frame_width": hop_length,
        "note_range": 72,
        "min_note": 24,
        "evaluate_every": 5000,
        "evaluate_small_every": 1000,
        "annotation_smoothing": 0.18,
        "batch_size": 8,
        # Model specific defaults
        "learning_rate_decay_steps": 10000,
        "learning_rate_decay": 0.8,
        "spectrogram": "cqt",
        "spectrogram_top_db": 80,
        "spectrogram_filter_scale": 1.0,
        "spectrogram_undertone_stacking": 1,
        "spectrogram_overtone_stacking": 5,
        "cut_context": 1,
        "architecture": "deep_hcnn",
        "filters": 16,
        "stacks": 10,
        "conv_range": 3,
        "undertone_stacking": 0,
        "overtone_stacking": 1,
        "activation": "relu",
        "conv_ctx": [1],
        "dilations": [1],
        "last_conv_kernel": [1, 1],
        "residual_hop": 1,
        "residual_end": 0,
        "residual_op": "add",
        "batchnorm": 0,
        "dropout": 0.3,
        "specaugment_prob": 0.0,
        "specaugment_freq_mask_num": 2,
        "specaugment_freq_mask_max": 27,
        "specaugment_time_mask_num": 1,
        "specaugment_time_mask_max": 5,
        "voicing": 0,
        "voicing_input": "spectrogram_salience",
    }
    specified_args = common.argument_defaults(args, defaults)
    common.name(args, specified_args, "spctrgrm")

    return args
def parse_args(argv):
    parser = common.common_arguments_parser()
    # Model specific arguments
    # input
    parser.add_argument("--spectrogram", type=str, help="Spectrogram method")
    parser.add_argument("--spectrogram_top_db",
                        type=float,
                        help="Spectrogram top_db")
    parser.add_argument("--spectrogram_filter_scale",
                        type=float,
                        help="Spectrogram filter_scale")
    parser.add_argument("--spectrogram_undertone_stacking",
                        type=int,
                        help="spectrogram undertone stacking")
    parser.add_argument("--spectrogram_overtone_stacking",
                        type=int,
                        help="spectrogram overtone stacking")
    parser.add_argument(
        "--cut_context",
        type=int,
        help="Cut unnecessary context, doesn't work with dilations!")
    # model
    parser.add_argument("--architecture", type=str, help="Model architecture")
    parser.add_argument("--faster_hcnn", type=int, help="HCNN implementation")
    parser.add_argument("--use_bias", type=int, help="use bias in conv2d")
    parser.add_argument("--class_weighting",
                        type=int,
                        help="use class weighting")

    parser.add_argument("--filters", type=int, help="Filters in convolutions")
    parser.add_argument("--stacks", type=int, help="Stacks")
    parser.add_argument("--conv_range",
                        type=int,
                        help="Stack kernel size in frequency axis")
    parser.add_argument("--undertone_stacking",
                        type=int,
                        help="Undertone stacking in the model")
    parser.add_argument("--overtone_stacking",
                        type=int,
                        help="Overtone stacking in the model")
    parser.add_argument("--stacking_until",
                        type=int,
                        help="Harmonic stacking in the model until Nth layer")
    parser.add_argument("--activation",
                        type=str,
                        help="Activation function for the convolution stack")
    # context
    parser.add_argument("--conv_ctx",
                        nargs="+",
                        type=int,
                        help="Stack kernel sizes in time axis")
    parser.add_argument("--dilations",
                        nargs="+",
                        type=int,
                        help="Dilation rate for the convolutions")
    parser.add_argument("--last_conv_kernel", nargs=2, type=int)
    parser.add_argument("--last_pooling", type=str)
    # residual
    parser.add_argument(
        "--residual_hop",
        type=int,
        help="Size of one block around which there is a residual connection")
    parser.add_argument("--residual_end",
                        type=int,
                        help="No residual connection in last N layers")
    parser.add_argument(
        "--residual_op",
        type=str,
        help=
        "Residual connection operation (add for ResNet, concat for DenseNet)")
    # regularization
    parser.add_argument("--batchnorm", type=int)
    parser.add_argument("--dropout", type=float)

    args = parser.parse_args(argv)

    # hop_length = 256
    hop_length = 512  # FRAME-LEVEL INSTRUMENT RECOGNITION BY TIMBRE AND PITCH
    defaults = {
        # Change some of the common defaults
        "samplerate": 44100,
        "context_width": 4 * hop_length,
        "annotations_per_window": 1,
        "hop_size": 1,
        "frame_width": hop_length,
        "note_range": 11,
        "min_note": 0,
        "evaluate_every": 30000,
        "evaluate_small_every": 1000,
        "annotation_smoothing": 0.0,
        "batch_size": 32,
        "bins_per_semitone": 1,
        "unvoiced_loss_weight": 1.0,
        "datasets": ["musicnet_mir"],
        # Model specific defaults
        "learning_rate_decay_steps": 10000,
        "learning_rate_decay": 0.8,
        "spectrogram": "YunNingHung_cqt",
        "spectrogram_top_db": 110,
        "spectrogram_filter_scale": 1.0,
        "spectrogram_undertone_stacking": 1,
        "spectrogram_overtone_stacking": 5,
        "cut_context": 1,
        "architecture": "baseline",
        "faster_hcnn": 0,
        "use_bias": 1,
        "class_weighting": 1,
        "filters": 12,
        "stacks": 6,
        "conv_range": 3,
        "undertone_stacking": 1,
        "overtone_stacking": 3,
        "stacking_until": 999,
        "activation": "relu",
        "conv_ctx": [1],
        "dilations": [1],
        "last_conv_kernel": [1, 72],
        "last_pooling": "avg",
        "residual_hop": 1,
        "residual_end": 0,
        "residual_op": "add",
        "batchnorm": 0,
        "dropout": 0.3,
    }
    specified_args = common.argument_defaults(args, defaults)
    common.name(args, specified_args, "mir")

    return args
Exemplo n.º 6
0
def parse_args(argv):
    parser = common.common_arguments_parser()
    # Model specific arguments
    parser.add_argument("--use_biases",
                        action='store_true',
                        default=False,
                        help="Use biases in the convolutions")

    parser.add_argument("--input_normalization",
                        type=int,
                        help="Enable normalizing each input example")
    parser.add_argument("--initial_filter_width",
                        type=int,
                        help="First conv layer filter width")
    parser.add_argument("--initial_filter_padding",
                        type=str,
                        help="First conv layer padding")
    parser.add_argument("--filter_width",
                        type=int,
                        help="Dilation stack filter width (2 or 3)")
    parser.add_argument("--skip_channels", type=int, help="Skip channels")
    parser.add_argument("--residual_channels",
                        type=int,
                        help="Residual channels")
    parser.add_argument("--stack_number",
                        type=int,
                        help="Number of dilated stacks")
    parser.add_argument("--max_dilation",
                        type=int,
                        help="Maximum dilation rate")
    parser.add_argument("--dilation_layer_dropout",
                        type=float,
                        help="Dropout in dilation layer")
    parser.add_argument("--skip_layer_dropout",
                        type=float,
                        help="Dropout in skip connections")
    parser.add_argument("--skip", type=str, help="Skip add or concat")
    parser.add_argument("--postprocessing",
                        type=str,
                        help="Postprocessing layer")

    args = parser.parse_args(argv)
    defaults = {
        "note_range":
        72,
        "min_note":
        24,
        "evaluate_every":
        5000,
        "evaluate_small_every":
        5000,
        "batch_size":
        8,
        "annotations_per_window":
        10,
        "context_width":
        94,
        "annotation_smoothing":
        0.18,
        "input_normalization":
        1,
        "initial_filter_width":
        32,
        "initial_filter_padding":
        "same",
        "filter_width":
        3,
        "skip_channels":
        64,
        "residual_channels":
        32,
        "stack_number":
        1,
        "max_dilation":
        512,
        "dilation_layer_dropout":
        0.0,
        "skip_layer_dropout":
        0.0,
        "skip":
        "add",
        "postprocessing":
        "avgpool_p93_s93_Psame--conv_f256_k16_s8_Psame_arelu--conv_f256_k16_s8_Psame_arelu",
    }
    specified_args = common.argument_defaults(args, defaults)
    common.name(args, specified_args, "wavenet")

    return args
Exemplo n.º 7
0
def parse_args(argv):
    parser = common.common_arguments_parser()
    # Model specific arguments
    # input
    parser.add_argument("--spectrogram", type=str, help="Spectrogram method")
    # model
    parser.add_argument("--architecture", type=str, help="Model architecture")
    parser.add_argument("--filters", type=int, help="Filters in convolutions")
    parser.add_argument("--spectrogram_undertone_stacking",
                        type=int,
                        help="Undertone stacking in the spectrogram")
    parser.add_argument("--spectrogram_overtone_stacking",
                        type=int,
                        help="Overtone stacking in the spectrogram")
    parser.add_argument("--undertone_stacking",
                        type=int,
                        help="Undertone stacking in the model")
    parser.add_argument("--overtone_stacking",
                        type=int,
                        help="Overtone stacking in the model")
    parser.add_argument("--activation",
                        type=str,
                        help="Activation function for the convolution stack")
    # regularization
    parser.add_argument("--dropout", type=float)

    args = parser.parse_args(argv)

    hop_length = 441 * 4  # 25 fps
    # context_width: 10*hop_length
    defaults = {
        # Change some of the common defaults
        "samplerate": 44100,
        "context_width": 2 * hop_length,
        "annotations_per_window": 1,
        "hop_size": 1,
        "frame_width": hop_length,
        "note_range": 88,
        "min_note": 21,
        "evaluate_every": 5000,
        "evaluate_small_every": 1000,
        "annotation_smoothing": 0.0,
        "batch_size": 128,
        "batch_size_evaluation": 1024,
        "bins_per_semitone": 1,
        "datasets": ["maps"],
        # Model specific defaults
        "learning_rate_decay_steps": 10000,
        "learning_rate_decay": 0.8,
        "spectrogram": "kelz",
        "architecture": "allconv",
        "filters": 16,
        "undertone_stacking": 0,
        "overtone_stacking": 1,
        "spectrogram_undertone_stacking": 0,
        "spectrogram_overtone_stacking": 1,
        "activation": "relu",
        "dropout": 0.25,
    }
    specified_args = common.argument_defaults(args, defaults)
    common.name(args, specified_args, "kelz")

    return args