def testing(args, model=None):
    # load wav
    song = args.input_file
    x, fs = sf.read(song)
    results = None
    if args.jetson:
        sample_ptr = 0
        while sample_ptr < x.shape[0]:
            chunk_end = min(sample_ptr + MAX_LEN, x.shape[0] - 1)
            chunk = x[sample_ptr:chunk_end, :]
            sample_ptr += MAX_LEN

            # Feature extraction
            feature = feature_extraction(chunk, fs)
            feature = np.transpose(feature[0:4], axes=(2, 1, 0))

            # load model
            if model is None:
                model = load_model(args.model_path)

            # Inference
            print(feature[:, :, 0].shape)
            extract_result = inference(feature=feature[:, :, 0],
                                       model=model,
                                       batch_size=args.batch_size_test)

            # Output
            r = matrix_parser(extract_result)

            if results is None:
                results = r
            else:
                results = np.concatenate((results, r))
    else:
        # Feature extraction
        feature = feature_extraction(x, fs)
        feature = np.transpose(feature[0:4], axes=(2, 1, 0))

        # load model
        if model is None:
            model = load_model(args.model_path)

        # Inference
        print(feature[:, :, 0].shape)
        extract_result = inference(feature=feature[:, :, 0],
                                   model=model,
                                   batch_size=args.batch_size_test)

        # Output
        results = matrix_parser(extract_result)

    np.savetxt(args.output_file + ".txt", results)
    print("FINISHED")
Exemplo n.º 2
0
    def predict_hdf(cls, hdf_paths, model_path, pred_batch_size=4):
        """
        This is a generator function.
        Assert there exist corresponding label files with extension .pickle under the same
        directory of given hdf_paths.
        """

        if not isinstance(hdf_paths, list):
            hdf_paths = [hdf_paths]

        model = load_model(model_path)
        feature_type, channels, out_class, timesteps = model_info(model_path)

        for hdf_path in hdf_paths:
            with h5py.File(hdf_path, "r") as feat:
                label_path = hdf_path.replace(".hdf", ".pickle")
                label = pickle.load(open(label_path, "rb"))
                for key, ff in feat.items():
                    ll = label[key]
                    #pred = predict(ff[:,:,channels], model, timesteps, out_class, batch_size=pred_batch_size)
                    pred = predict_v1(ff[:, :, channels],
                                      model,
                                      timesteps,
                                      batch_size=pred_batch_size)

                    yield pred, ll, key
Exemplo n.º 3
0
def main(args):
    # Pre-process features
    assert (
        os.path.isfile(args.input_audio)
    ), "The given path is not a file!. Please check your input again. Given input: {}".format(
        audio.input_audio)
    print("Processing features of input audio: {}".format(args.input_audio))
    Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio)

    # Post-process feature according to the configuration of model
    feature_type, channels, out_class, timesteps = model_info(args.model_path)
    if feature_type == "HCFP":
        assert (len(channels) == (args.num_harmonics * 2 + 2))

        spec = []
        ceps = []
        for i in range(args.num_harmonics):
            spec.append(fetch_harmonic(tfrL0, cenf, i))
            ceps.append(fetch_harmonic(tfrLQ, cenf, i))

        spec = np.transpose(np.array(spec), axes=(2, 1, 0))
        ceps = np.transpose(np.array(ceps), axes=(2, 1, 0))

        feature = np.dstack((spec, ceps))
    else:
        assert (len(channels) <= 4)

        feature = np.array([Z, tfrL0, tfrLF, tfrLQ])
        feature = np.transpose(feature, axes=(2, 1, 0))

    model = load_model(args.model_path)
    print("Predicting...")
    #pred = predict(feature[:,:,channels], model, timesteps, out_class, batch_size=4, overlap_ratio=2/4)
    pred = predict_v1(feature[:, :, channels], model, timesteps, batch_size=4)

    #p_out = h5py.File("pred.hdf", "w")
    #p_out.create_dataset("0", data=pred)
    #p_out.close()

    midi = MultiPostProcess(pred,
                            mode="note",
                            onset_th=args.onset_th,
                            dura_th=0.5,
                            frm_th=3,
                            inst_th=1.1,
                            t_unit=0.02)

    if args.to_midi is not None:
        midi.write(args.to_midi)
        print("Midi written as {}".format(args.to_midi))
def extract_melody(y, sr, model="Seg"):

    # Feature extraction
    feature = feature_extraction(y, sr)
    feature = np.transpose(feature[0:4], axes=(2, 1, 0))

    # load model
    model = load_model(model)

    # Inference
    print(feature[:, :, 0].shape)
    extract_result = inference(feature=feature[:, :, 0],
                               model=model,
                               batch_size=10)

    # Output
    r = matrix_parser(extract_result)

    return r
def main(args):
    # Pre-process features
    assert(os.path.isfile(args.input_audio)), "The given path is not a file!. Please check your input again."
    print("Processing features")
    Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio)
    
    # Post-process feature according to the configuration of model
    feature_type, channels, out_class, timesteps = model_info(args.model_path)
    if feature_type == "HCFP":
        assert(len(channels) == (args.num_harmonics*2+2))
        
        spec = []
        ceps = []
        for i in range(args.num_harmonics):
            spec.append(fetch_harmonic(tfrL0, cenf, i))
            ceps.append(fetch_harmonic(tfrLQ, cenf, i))
        
        spec = np.transpose(np.array(spec), axes=(2, 1, 0))
        ceps = np.transpose(np.array(ceps), axes=(2, 1, 0))
        
        feature = np.dstack((spec, ceps))
    else:
        assert(len(channels) <= 4)
        
        feature = np.array([Z, tfrL0, tfrLF, tfrLQ])
        feature = np.transpose(feature, axes=(2, 1, 0))
    
    feature = create_batches(feature[:,:,channels], b_size=16, timesteps=timesteps)
    model = load_model(args.model_path)
    

    print("Predicting...")
    pred = predict(feature, model)
    
    p_out = h5py.File("pred.hdf", "w")
    p_out.create_dataset("0", data=pred)
    p_out.close()

    notes, midi = PostProcess(pred)
    
    if args.to_midi is not None:
        midi.write(args.to_midi)
Exemplo n.º 6
0
    def predictOne(self, path: str):
        """
        method copied from the main file in the project
        """
        # pkg_resources.()
        # project = importlib.import_module("vendors.Vocal-Melody-Extraction.project")
        from project.MelodyExt import feature_extraction
        from project.utils import load_model, save_model, matrix_parser
        from project.test import inference
        from project.model import seg, seg_pnn, sparse_loss
        from project.train import train_audio

        # load wav
        song = path

        # Feature extraction
        feature = feature_extraction(song)
        feature = np.transpose(feature[0:4], axes=(2, 1, 0))

        # load model

        model = load_model(
            resource_filename(
                __name__,
                "../../../vendors/Vocal-Melody-Extraction/Pretrained_models/" +
                self.parameters["model"].value))
        batch_size_test = 10
        # Inference
        print(feature[:, :, 0].shape)
        extract_result = inference(feature=feature[:, :, 0],
                                   model=model,
                                   batch_size=batch_size_test)

        # Output
        r = matrix_parser(extract_result)
        return (Signal(r[:, 0], sampleRate=50), Signal(r[:, 1], sampleRate=50))
def main():
    # Arguments
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p',
        '--phase',
        help='phase: training or testing (default: %(default)s',
        type=str,
        default='testing')

    #arguments for training
    parser.add_argument('-t',
                        '--model_type',
                        help='model type: seg or pnn (default: %(default)s',
                        type=str,
                        default='seg')
    parser.add_argument(
        '-d',
        '--data_type',
        help='data type: audio or symbolic (default: %(default)s',
        type=str,
        default='audio')
    parser.add_argument('-da',
                        '--dataset_path',
                        nargs='+',
                        help='path to data set (default: %(default)s',
                        type=str,
                        default='dataset')
    parser.add_argument('-la',
                        '--label_path',
                        nargs='+',
                        help='path to data set label (default: %(default)s',
                        type=str,
                        default='dataset_label')
    parser.add_argument('-ms',
                        '--model_path_symbolic',
                        help='path to symbolic model (default: %(default)s',
                        type=str,
                        default='model_symbolic')

    parser.add_argument(
        '-w',
        '--window_width',
        help='width of the input feature (default: %(default)s',
        type=int,
        default=128)
    parser.add_argument(
        '-b',
        '--batch_size_train',
        help='batch size during training (default: %(default)s',
        type=int,
        default=12)
    parser.add_argument('-e',
                        '--epoch',
                        help='number of epoch (default: %(default)s',
                        type=int,
                        default=5)
    parser.add_argument('-n',
                        '--steps',
                        help='number of step per epoch (default: %(default)s',
                        type=int,
                        default=6000)

    parser.add_argument('-o',
                        '--output_model_name',
                        help='name of the output model (default: %(default)s',
                        type=str,
                        default="out")

    #arguments for testing
    parser.add_argument('-m',
                        '--model_path',
                        help='path to existing model (default: %(default)s',
                        type=str,
                        default='transfer_audio_directly')
    parser.add_argument('-i',
                        '--input_file',
                        help='path to input file (default: %(default)s',
                        type=str,
                        default='train01.wav')
    parser.add_argument('-bb',
                        '--batch_size_test',
                        help='batch size during testing (default: %(default)s',
                        type=int,
                        default=10)

    args = parser.parse_args()
    print(args)

    if (args.phase == "training"):
        #arguments setting
        TIMESTEPS = args.window_width

        #dataset_path = ["medleydb_48bin_all_4features", "mir1k_48bin_all_4features"]
        #label_path = ["medleydb_48bin_all_4features_label", "mir1k_48bin_all_4features_label"]
        dataset_path = args.dataset_path
        label_path = args.label_path

        # load or create model
        if ("seg" in args.model_type):
            model = seg(multi_grid_layer_n=1,
                        feature_num=384,
                        input_channel=1,
                        timesteps=TIMESTEPS)
        elif ("pnn" in args.model_type):
            model = seg_pnn(multi_grid_layer_n=1,
                            feature_num=384,
                            timesteps=TIMESTEPS,
                            prev_model=args.model_path_symbolic)

        model.compile(optimizer="adam",
                      loss={'prediction': sparse_loss},
                      metrics=['accuracy'])

        #train
        train_audio(model, args.epoch, args.steps, args.batch_size_train,
                    args.window_width, dataset_path, label_path)

        #save model
        save_model(model, args.output_model_name)
    else:
        # load wav
        song = args.input_file

        # Feature extraction
        feature = feature_extraction(song)
        feature = np.transpose(feature[0:4], axes=(2, 1, 0))

        # load model
        model = load_model(args.model_path)

        # Inference
        print(feature[:, :, 0].shape)
        extract_result = inference(feature=feature[:, :, 0],
                                   model=model,
                                   batch_size=args.batch_size_test)

        # Output
        r = matrix_parser(extract_result)

        np.savetxt("out_seg.txt", r)
def seg_pnn(feature_num=128,
            timesteps=256,
            multi_grid_layer_n=5,
            multi_grid_n=3,
            prev_model="melody_transfer_transpose"):

    layer_out = []
    input_score_48 = Input(shape=(timesteps, feature_num, 1),
                           name="input_score_48")
    input_score_12 = Input(shape=(timesteps, feature_num // 3, 1),
                           name="input_score_12")

    me_transfer_seg = seg(multi_grid_layer_n=1, timesteps=timesteps, prog=True)
    me_seg = load_model(prev_model)
    model_copy(me_seg, me_transfer_seg)

    #TODO: move inside model_copy
    for index, layer in enumerate(me_transfer_seg.layers):
        me_transfer_seg.layers[index].trainable = False

    o_p = me_transfer_seg([input_score_12])

    en_l = Conv2D(2**5, (7, 7), strides=(1, 1), padding="same")(input_score_48)

    o = adapter(o_p[0], 2**(5), dropout_rate=0.2)
    en_l = add([en_l, o])

    en_l1 = conv_block(en_l, 2**5, (3, 3), strides=(2, 2))
    en_l1 = conv_block(en_l1, 2**5, (3, 3), strides=(1, 1))
    layer_out.append(en_l1)

    o = adapter(o_p[1], 2**(5), dropout_rate=0.2)
    en_l1 = add([en_l1, o])

    en_l2 = conv_block(en_l1, 2**6, (3, 3), strides=(2, 2))
    en_l2 = conv_block(en_l2, 2**6, (3, 3), strides=(1, 1))
    en_l2 = conv_block(en_l2, 2**6, (3, 3), strides=(1, 1))
    layer_out.append(en_l2)

    o = adapter(o_p[2], 2**(6), dropout_rate=0.2)
    en_l2 = add([en_l2, o])

    en_l3 = conv_block(en_l2, 2**7, (3, 3), strides=(2, 2))
    en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1))
    en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1))
    en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1))
    layer_out.append(en_l3)

    o = adapter(o_p[3], 2**(7), dropout_rate=0.2)
    en_l3 = add([en_l3, o])

    en_l4 = conv_block(en_l3, 2**8, (3, 3), strides=(2, 2))
    en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1))
    en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1))
    en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1))
    en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1))
    layer_out.append(en_l4)

    o = adapter(o_p[4], 2**(8), dropout_rate=0.2)
    en_l4 = add([en_l4, o])

    feature = en_l4

    for i in range(multi_grid_layer_n):

        feature = BatchNormalization()(Activation("relu")(feature))
        feature = Dropout(0.3)(feature)
        m = BatchNormalization()(Conv2D(2**9, (1, 1),
                                        strides=(1, 1),
                                        padding="same",
                                        activation="relu")(feature))
        multi_grid = m
        for ii in range(multi_grid_n):
            m = BatchNormalization()(Conv2D(2**9, (3, 3),
                                            strides=(1, 1),
                                            dilation_rate=2**ii,
                                            padding="same",
                                            activation="relu")(feature))
            multi_grid = concatenate([multi_grid, m])
        multi_grid = Dropout(0.3)(multi_grid)
        feature = Conv2D(2**9, (1, 1), strides=(1, 1),
                         padding="same")(multi_grid)

        o = adapter(o_p[5], 2**(9), dropout_rate=0.3)
        feature = add([feature, o])

    feature = BatchNormalization()(Activation("relu")(feature))

    feature = Dropout(0.4)(feature)
    feature = Conv2D(2**8, (1, 1), strides=(1, 1), padding="same")(feature)
    feature = add([feature, layer_out[3]])
    de_l1 = transpose_conv_block(feature, 2**7, (3, 3), strides=(2, 2))

    o = adapter(o_p[6], 2**(7), kernel_size=(1, 5), dropout_rate=0.4)
    de_l1 = add([de_l1, o])

    skip = de_l1
    de_l1 = BatchNormalization()(Activation("relu")(de_l1))
    de_l1 = concatenate(
        [de_l1, BatchNormalization()(Activation("relu")(layer_out[2]))])
    de_l1 = Dropout(0.4)(de_l1)
    de_l1 = Conv2D(2**7, (1, 1), strides=(1, 1), padding="same")(de_l1)
    de_l1 = add([de_l1, skip])
    de_l2 = transpose_conv_block(de_l1, 2**6, (3, 3), strides=(2, 2))

    o = adapter(o_p[7], 2**(6), kernel_size=(1, 5), dropout_rate=0.4)
    de_l2 = add([de_l2, o])

    skip = de_l2
    de_l2 = BatchNormalization()(Activation("relu")(de_l2))
    de_l2 = concatenate(
        [de_l2, BatchNormalization()(Activation("relu")(layer_out[1]))])
    de_l2 = Dropout(0.4)(de_l2)
    de_l2 = Conv2D(2**6, (1, 1), strides=(1, 1), padding="same")(de_l2)
    de_l2 = add([de_l2, skip])
    de_l3 = transpose_conv_block(de_l2, 2**5, (3, 3), strides=(2, 2))

    o = adapter(o_p[8], 2**(5), kernel_size=(1, 5), dropout_rate=0.4)
    de_l3 = add([de_l3, o])

    skip = de_l3
    de_l3 = BatchNormalization()(Activation("relu")(de_l3))
    de_l3 = concatenate(
        [de_l3, BatchNormalization()(Activation("relu")(layer_out[0]))])
    de_l3 = Dropout(0.4)(de_l3)
    de_l3 = Conv2D(2**5, (1, 1), strides=(1, 1), padding="same")(de_l3)
    de_l3 = add([de_l3, skip])
    de_l4 = transpose_conv_block(de_l3, 2**5, (3, 3), strides=(2, 2))

    o = adapter(o_p[9], 2**(5), kernel_size=(1, 5), dropout_rate=0.4)
    de_l4 = add([de_l4, o])

    de_l4 = BatchNormalization()(Activation("relu")(de_l4))
    de_l4 = Dropout(0.4)(de_l4)
    out = Conv2D(2, (1, 1), strides=(1, 1), padding="same",
                 name='prediction')(de_l4)

    model = Model(inputs=[input_score_48, input_score_12], outputs=out)

    return model
Exemplo n.º 9
0
def main(args):
    if args.dataset not in dataflow_cls:
        raise TypeError
    
    # Hyper parameters that will be stored for future reuse
    hparams = {}

    # Parameters that will be passed to dataflow
    df_params = {}
    
    # Handling root path to the dataset
    d_path = dataset_paths[args.dataset]
    if args.dataset_path is not None:
        assert(os.path.isdir(args.dataset_path))
        d_path = args.dataset_path
    df_params["dataset_path"] = d_path
    
    # Number of channels that model need to know about
    ch_num = len(args.channels)
    channels = args.channels
    
    # Type of feature to use
    feature_type = "CFP"
    
    # Number of output classes
    out_classes = 3

    # Output model name
    out_model_name = args.output_model_name
    
    # Feature length on time dimension
    timesteps = args.timesteps

    # Continue to train on a pre-trained model
    if args.input_model is not None:
        # output model name is the same as input model
        out_model_name = args.input_model
        
        # load configuration of previous training
        feature_type, channels, out_classes, timesteps = model_info(args.input_model)
        ch_num = len(channels)
    else:
        if args.dataset == "MusicNet":
            # Sepcial settings for MusicNet that has multiple instruments presented
            if args.use_harmonic:
                ch_num = Harmonic_num * 2
                channels = [i for i in range(ch_num)]
                feature_type = "HCFP"
            if args.multi_instruemnts:
                out_classes = 12 # There are total 11 types of instruments in MusicNet

        
    df_params["b_sz"]      = args.train_batch_size
    df_params["phase"]     = "train"
    df_params["use_ram"]   = args.use_ram
    df_params["channels"]  = channels
    df_params["mpe_only"]  = not args.multi_instruments
    df_params["timesteps"] = timesteps

    print("Loading training data")
    df_cls = dataflow_cls[args.dataset]
    train_df = df_cls(**df_params)

    df_params["b_sz"]  = args.val_batch_size
    df_params["phase"] = "val"

    print("Loading validation data")
    val_df = df_cls(**df_params)

    
    hparams["channels"]       = channels
    hparams["timesteps"]      = timesteps
    hparams["feature_type"]   = feature_type
    hparams["output_classes"] = out_classes

    
    print("Creating/loading model")
    # Create model
    if args.input_model is not None:
        model = load_model(args.input_model)
    else:
        # Create new model
        #model = seg(multi_grid_layer_n=1, feature_num=384, input_channel=ch_num, timesteps=timesteps,
        #            out_class=out_classes)
        model = model_attn.seg(feature_num=384, input_channel=ch_num, timesteps=timesteps,
                               out_class=out_classes)
        
        out_model_name = os.path.join(default_model_path, out_model_name)
        # Save model and configurations
        if not os.path.exists(out_model_name):
            os.makedirs(out_model_name)
        save_model(model, out_model_name, **hparams)

    model.compile(optimizer="adam", loss={'prediction': sparse_loss}, metrics=['accuracy'])


    # create callbacks
    earlystop   = callbacks.EarlyStopping(monitor="val_acc", patience=args.early_stop)
    checkpoint  = callbacks.ModelCheckpoint(os.path.join(out_model_name, "weights.h5"), 
                                            monitor="val_acc", save_best_only=True, save_weights_only=True)
    tensorboard = callbacks.TensorBoard(log_dir=os.path.join("tensorboard", args.output_model_name),
                                        write_images=True)
    callback_list = [checkpoint, earlystop, tensorboard]
    
    print("Start training")
    # Start training
    train(model, train_df, val_df,
          epoch     = args.epoch,
          callbacks = callback_list,
          steps     = args.steps,
          v_steps   = args.val_steps)
Exemplo n.º 10
0
def main(args):
    if args.dataset not in dataflow_cls:
        raise TypeError

    # Hyper parameters that will be stored for future reuse
    hparams = {}

    # Parameters that will be passed to dataflow
    df_params = {}

    # Handling root path to the dataset
    d_path = dataset_paths[args.dataset]
    if args.dataset_path is not None:
        assert (os.path.isdir(args.dataset_path))
        d_path = args.dataset_path

    # Number of channels that model need to know about
    ch_num = len(args.channels)
    channels = args.channels

    # Type of feature to use
    feature_type = "CFP"

    # Output model name
    out_model_name = args.output_model_name

    # Feature length on time dimension
    timesteps = args.timesteps

    # Label type
    mode = "frame_onset"
    l_type = MusicNetLabelType(mode, timesteps=timesteps)

    # Number of output classes
    out_classes = l_type.get_out_classes()

    # Continue to train on a pre-trained model
    if args.input_model is not None:
        # load configuration of previous training
        feature_type, channels, out_classes, timesteps = model_info(
            args.input_model)
        ch_num = len(channels)
    else:
        if args.dataset == "MusicNet":
            # Sepcial settings for MusicNet that has multiple instruments presented
            if args.use_harmonic:
                ch_num = HarmonicNum * 2
                channels = [i for i in range(ch_num)]
                feature_type = "HCFP"

    df_params["b_sz"] = args.train_batch_size
    df_params["phase"] = "train"
    df_params["use_ram"] = args.use_ram
    df_params["channels"] = channels
    df_params["timesteps"] = timesteps
    df_params["out_classes"] = out_classes
    df_params["dataset_path"] = d_path
    df_params["label_conversion_func"] = l_type.get_conversion_func()

    print("Loading training data")
    df_cls = dataflow_cls[args.dataset]
    train_df = df_cls(**df_params)

    print("Loading validation data")
    df_params["b_sz"] = args.val_batch_size
    df_params["phase"] = "val"
    val_df = df_cls(**df_params)

    hparams["channels"] = channels
    hparams["timesteps"] = timesteps
    hparams["feature_type"] = feature_type
    hparams["output_classes"] = out_classes

    print("Creating/loading model")
    # Create model
    if args.input_model is not None:
        model = load_model(args.input_model)
    else:
        # Create new model
        model = seg(feature_num=384,
                    input_channel=ch_num,
                    timesteps=timesteps,
                    out_class=out_classes,
                    multi_grid_layer_n=1,
                    multi_grid_n=3)
        #model = model_attn.seg(feature_num=384, input_channel=ch_num, timesteps=timesteps,
        #                       out_class=out_classes)

    # Save model and configurations
    out_model_name = os.path.join(default_model_path, out_model_name)
    if not os.path.exists(out_model_name):
        os.makedirs(out_model_name)
    save_model(model, out_model_name, **hparams)

    # Weighted loss
    weight = None  # Frame mode
    if weight is not None:
        assert (len(weight) == out_classes
                ), "Weight length: {}, out classes: {}".format(
                    len(weight), out_classes)
    #loss_func = lambda label,pred: sparse_loss(label, pred, weight=weight)
    loss_func = lambda label, pred: mctl_loss(
        label, pred, out_classes=out_classes, weight=weight)

    # Use multi-gpu to train the model
    if False:
        para_model = multi_gpu_model(model, gpus=2, cpu_merge=False)
        para_model.compile(optimizer="adam",
                           loss={'prediction': loss_func},
                           metrics=['accuracy'])
        model = para_model
    else:
        model.compile(optimizer="adam",
                      loss={'prediction': loss_func},
                      metrics=['accuracy'])

    # create callbacks
    earlystop = callbacks.EarlyStopping(monitor="val_loss",
                                        patience=args.early_stop)
    checkpoint = callbacks.ModelCheckpoint(os.path.join(
        out_model_name, "weights.h5"),
                                           monitor="val_loss",
                                           save_best_only=False,
                                           save_weights_only=True)
    tensorboard = callbacks.TensorBoard(log_dir=os.path.join(
        "tensorboard", args.output_model_name),
                                        write_images=True)
    callback_list = [checkpoint, earlystop, tensorboard]

    print("Start training")
    # Start training
    train(model,
          train_df,
          val_df,
          epoch=args.epoch,
          callbacks=callback_list,
          steps=args.steps,
          v_steps=args.val_steps)
def FullTest(model_path,
             test_path,
             label_path=None,
             pred_save_path="./predictions",
             use_ram=True,
             MAX_FRAME=1800):

    # Load files
    print("Loading files")
    features = parse_path(test_path)
    for ff in features:
        if not ff.endswith(".hdf"):
            idx = features.index(ff)
            del features[idx]

    if label_path is not None:
        # Assume there are exactly label files corresponding to the test audios
        #labels = parse_path(label_path, label=True)
        labels = []
        for ff in features:
            ext = ff[ff.rfind("."):]
            if ext != ".hdf" and ext != ".pickle":
                continue

            ll = ff[(ff.rfind("/") + 1):]
            if "_label" not in ll:
                ll = ll[:ll.rfind(".")] + "_label.pickle"
            labels.append(os.path.join(label_path, ll))
        labels = load_files(labels, use_ram=use_ram)

    features = load_files(features, use_ram=use_ram)
    model = load_model(model_path)

    # Validate on model/feature configurations
    f_type, channels, out_classes, timesteps = model_info(model_path)
    key = list(features.keys())
    if f_type == "HCFP" and features[key[0]].shape[2] < 12:
        assert (
            False
        ), "The model uses HCFP as input feature, but loaded features are not."
    if f_type == "CFP" and features[key[0]].shape[2] == 12:
        assert (len(channels) == 2 and 1 in channels
                and 3 in channels), """The 
             The given feature are HCFP, but the model uses more feature types.
             Model input feature types: """ + str(
                    channels) + " ({0: Z, 1: Spec, 2: GCoS, 3: Ceps})"
        channels = [0, 6]
    mpe = False
    if out_classes == 2:
        mpe = True

    # To avoid running out of memory.
    # 9000 is suitable for 32G RAM with one instrument only and all 4 channels used. (Max ram usage almost 100%)
    #MAX_FRAME = 1800
    print("Max frame per prediction: ", MAX_FRAME)

    # Start to predict
    pred_out = h5py.File(os.path.join(pred_save_path, "pred.hdf"), "w")
    label_out = h5py.File(os.path.join(pred_save_path, "label.hdf"), "w")
    len_data = len(features)
    for idx in trange(len_data, desc='Dataset'):
        i = key[idx]
        feature = features[i][:]

        pred = predict(feature,
                       model,
                       MAX_FRAME=MAX_FRAME,
                       channels=list(channels),
                       instruments=out_classes - 1,
                       timesteps=timesteps)

        # Save to output
        pred_out.create_dataset(str(i),
                                data=pred,
                                compression="gzip",
                                compression_opts=5)
        del feature, features[i]

        # Process corresponding label
        if label_path is not None:
            ll = labels[0]
            if type(ll) != np.ndarray:
                ll = label_conversion(ll, 352, 128, mpe=mpe)[:, :, 1:]
            label_out.create_dataset(str(i),
                                     data=ll,
                                     compression="gzip",
                                     compression_opts=5)
            del labels[0]

    pred_out.close()
    label_out.close()
Exemplo n.º 12
0
def main():
    # Arguments
    parser = argparse.ArgumentParser()

    parser.add_argument('-p', '--phase',
                        help='phase: training or testing (default: %(default)s',
                        type=str, default='testing')
    # arguments for testing
    parser.add_argument('-d', '--dataset_path',
                        help='path to data set (default: %(default)s',
                        type=str, default='bach_dataset.pickle')

    parser.add_argument('-e', '--epoch',
                        help='number of epoch(default: %(default)s',
                        type=int, default=80)
    parser.add_argument('-n', '--steps',
                        help='number of step per epoch(default: %(default)s',
                        type=int, default=6000)
    parser.add_argument('-b', '--batch_size_train',
                        help='batch size(default: %(default)s',
                        type=int, default=88*3)
    parser.add_argument('-o', '--output_model_name',
                        help='name of the output model(default: %(default)s',
                        type=str, default="out")
    # arguments for testing
    parser.add_argument('-m', '--model_path',
                        help='path to existing model (default: %(default)s',
                        type=str, default='bach')
    parser.add_argument('-i', '--input_file',
                        help='path to input file (default: %(default)s',
                        type=str, default="LiveAndLetDie_all.mid")
    parser.add_argument('-ii', '--input_file_melody',
                        help='path to input melody file (default: %(default)s',
                        type=str, default="LiveAndLetDie_main.mid")
    parser.add_argument('-s', '--subdivision',
                        help='subdivision within one beat (default: %(default)s',
                        type=int, default=4)

    args = parser.parse_args()
    print(args)

    if(args.phase == "training"):
        #set arguments

        timesteps = 32
        step = 4
        subdivision = args.subdivision
        batch_size = args.batch_size_train
        dataset_path = args.dataset_path

        #create model

        model = lstm_wavenet(num_features_lr=91, timesteps=timesteps,
                             step=step, num_units_lstm=[150, 150, 150, 150],
                             num_dense=150,
                             conv_layers=5,
                             skip_layers=2)

        model.compile(optimizer="adam", loss={'prediction': 'binary_crossentropy'}, metrics=['accuracy'])

        #train

        model = train(model,
                      dataset_path,
                      subdivision,
                      epoch=args.epoch,
                      steps=args.steps,
                      timesteps=timesteps,
                      step=step,
                      batch_size=batch_size)
        #save model

        save_model(model, args.output_model_name)

    else:
        #load input file

        subdivision = args.subdivision
        path = args.input_file
        path_melody = args.input_file_melody
        score = midi2score(path, subdivision)

        if(path_melody == "none"):
            score_melody = np.zeros(score.shape)
        else:
            score_melody = midi2score(path_melody, subdivision)

        score = add_beat(score, subdivision)
        score_melody = add_beat(score_melody, subdivision)

        score = np.array(score[0:640])
        score_melody = np.array(score_melody[0:640])

        extended_score = padding(score, 32, 4)

        #load model

        model = load_model(model_name=args.model_path)

        #generation

        result = style_transfer(extended_score, score_melody, model, iter_num=25)

        #save result

        score2midi("test.mid", result, subdivision, 120, melody_constraint=True, melody=score_melody)
        print("saved")
def main(args):

    model = load_model(args.model_path)
    feature_type, channels, out_classes, timesteps = model_info(
        args.model_path)

    d_path = dataset_paths[args.dataset]
    df_cls = dataflow_cls[args.dataset]
    df = df_cls(d_path,
                "test",
                timesteps=timesteps,
                channels=channels,
                b_sz=16)
    eval_flow = EvalFlow(df)

    wr_f = None
    wr_l = None
    if args.save_pred is not None:
        if not os.path.exists(args.save_pred):
            os.makedirs(args.save_pred)
        out_f = h5py.File(os.path.join(args.save_pred, "pred.hdf"), "w")
        out_l = h5py.File(os.path.join(args.save_pred, "label.hdf"), "w")

        wr_f = lambda i, d: out_f.create_dataset(
            str(i), data=d, compression="gzip", compression_opts=5)
        wr_l = lambda i, l: out_l.create_dataset(
            str(i), data=l, compression="gzip", compression_opts=5)

    preds = []
    lls = []
    results = {"l_prec": [], "l_rec": [], "l_f": []}
    for i in range(10):  #len(eval_flow)):
        # This loop go through pieces
        print("{}/{}".format(i + 1, len(eval_flow)))

        features = []
        labels = []
        for x, y in eval_flow:
            # Collect batches from a single piece
            features.append(x)
            labels.append(y)
            #print(y.shape)

        pred, ll = predict(features, labels, model)
        """
        p = np.where(pred[:,:,1]>pred[:,:,0], 1, 0)
        l = ll[:,:,1]
        prec, rec, f, l_prec, l_rec, l_f = evaluation([p], [ll])
        results["l_prec"] += l_prec
        results["l_rec"]  += l_rec
        results["l_f"]    += l_f

        if len(preds)%2 == 0:
            eval_stats(l_prec, l_rec, l_f)
        """

        if args.save_pred is not None:
            wr_f(i, pred)
            wr_l(i, ll)


#for i in range(len(preds)):
#    p = preds[i]
#    a = np.where(p[:,:,1]>p[:,:,0], 1, 0)
#    preds[i] = roll_down_sample(a)
#    lls[i] = roll_down_sample(lls[i])

#eval_stats(l_prec, l_rec, l_f)

    if args.save_pred is not None:
        out_f.close()
        out_l.close()
def arguments_post_process(args):

    # path to dataset
    if args.MusicNet_feature_path is not None:
        base_path = args.MusicNet_feature_path
        dataset_type = "MusicNet"
    elif args.MAPS_feature_path is not None:
        base_path = args.MAPS_feature_path
        dataset_type = "MAPS"
    else:
        assert (
            False
        ), "Please at least assign one of the flags: --MAPS-feature-path or --MusicNet-feature-path"

    # Continue to train on a pre-trained model
    if args.input_model is not None:
        # output model name is the same as input model
        args.output_model_name = args.input_model

        # load configuration of previous training
        feature_type, channels, out_classes = model_info(
            os.path.join("model", args.input_model))
        ch_num = len(channels)
        args.channels = channels

        # load model
        model = load_model(os.path.join("model", args.input_model))

    # Train a new model
    else:
        # setup output model name
        if " + " in args.output_model_name:
            args.output_model_name = args.output_model_name[0:13] + str(
                args.channel)

        # Number of channels to use
        ch_num = len(args.channels)

        # Train on MusicNet
        if dataset_type == "MusicNet":
            # Input parameters
            if args.no_harmonic == True:
                ch_num = 2
                args.channels = [0, 6]  # Spec. and Ceps. channel
                args.channels = [1, 3]  # For train on maestro
                feature_type = "CFP"
            else:
                ch_num = Harmonic_Num * 2
                args.channels = [i for i in range(ch_num)
                                 ]  # Including harmonic channels
                feature_type = "HCFP"
            # Output parameters
            if args.mpe_only:
                out_classes = 2
            else:
                out_classes = 12
        # Train on MAPS
        elif dataset_type == "MAPS":
            base_path = args.MAPS_feature_path
            out_classes = 2
            dataset_type = "MAPS"
            feature_type = "CFP"
            args.no_harmonic = True

        # Create new model
        model = seg(multi_grid_layer_n=1,
                    feature_num=384,
                    input_channel=ch_num,
                    timesteps=args.window_width,
                    out_class=out_classes)

        path = os.path.join("./model", args.output_model_name)
        # Save model and configurations
        if not os.path.exists(path):
            os.makedirs(path)
        save_model(model,
                   path,
                   feature_type=feature_type,
                   input_channels=args.channels,
                   output_classes=out_classes)

    model.compile(optimizer="adam",
                  loss={'prediction': sparse_loss},
                  metrics=['accuracy'])

    # Load file according to recrodings in SongList.csv file
    distinct_file = set()
    with open(os.path.join(base_path, "SongList.csv"), newline='') as config:
        reader = csv.DictReader(config)
        for row in reader:
            distinct_file.add(row["File name"])
    dataset_path = [ff for ff in distinct_file][0:args.num_datasets]
    label_path = [i + "_label.pickle" for i in dataset_path]
    dataset_path = [i + ".hdf" for i in dataset_path]
    print("Datasets chosen: ", dataset_path)

    dataset_path = [os.path.join(base_path, dp) for dp in dataset_path]
    label_path = [os.path.join(base_path, lp) for lp in label_path]

    return model, dataset_path, label_path, dataset_type
def main(args):
    # Pre-process features
    assert (os.path.isfile(args.input_audio)
            ), "The given path is not a file!. Please check your input again."
    print("Processing features")
    Z, tfrL0, tfrLF, tfrLQ, t, cenf, f = feature_extraction(args.input_audio)

    # Post-process feature according to the configuration of model
    feature_type, channels, out_class, timesteps = model_info(args.model_path)
    if feature_type == "HCFP":
        assert (len(channels) == (args.num_harmonics * 2 + 2))

        spec = []
        ceps = []
        for i in range(args.num_harmonics):
            spec.append(fetch_harmonic(tfrL0, cenf, i))
            ceps.append(fetch_harmonic(tfrLQ, cenf, i))

        spec = np.transpose(np.array(spec), axes=(2, 1, 0))
        ceps = np.transpose(np.array(ceps), axes=(2, 1, 0))

        feature = np.dstack((spec, ceps))
    else:
        assert (len(channels) <= 4)

        feature = np.array([Z, tfrL0, tfrLF, tfrLQ])
        feature = np.transpose(feature, axes=(2, 1, 0))

    model = load_model(args.model_path)

    print("Predicting...")
    pred = predict(feature,
                   model,
                   timesteps=timesteps,
                   channels=channels,
                   instruments=out_class - 1)
    p_out = h5py.File("pred.hdf", "w")
    p_out.create_dataset("0", data=pred)
    p_out.close()

    for i in range(pred.shape[2]):
        pred[:, :88, i] = peak_picking(pred[:, :, i])
    pred = pred[:, :88]

    # Print figure
    base_path = args.input_audio[:args.input_audio.rfind("/")]
    save_name = os.path.join(base_path, args.output_fig_name)

    plot_range = range(500, 1500)
    if max(plot_range) > len(pred):
        plot_range = range(0, len(pred))
    pp = pred[plot_range]

    if out_class >= 11:
        assert (out_class == 12
                ), "There is something wrong with the configuration. \
                                Expected value: 12, Current value: {}".format(
                    out_class)
        titles = MusicNet_Instruments
    else:
        assert (out_class == 2
                ), "There is something wrong with the configuration. \
                               Expected value: 2, Current value: {}".format(
                    out_class)
        titles = ["Piano"]

    print("Ploting figure...")
    #PLOT(pp, save_name, plot_range, titles=titles)
    print("Output figure to {}".format(base_path))

    if args.to_midi is not None:
        midi_path = args.to_midi

        threshold = [0.45, 0.5]
        for th in threshold:
            midi = to_midi(pred, midi_path + "_" + str(th), threshold=th)

            roll = midi.get_piano_roll()
            print("Shape of output midi roll: ", roll.shape)