Python MusicTaggerCRNN示例，music_tagger_crnn.MusicTaggerCRNN Python示例

示例#1

0

显示文件

文件： music_classifier.py 项目： Kirill380/MusicAnalyzer

def genrePrediction(filePath):
    ''' *WARNIING*
    This model use Batch Normalization, so the prediction
    is affected by batch. Use multiple, different data 
    samples together (at least 4) for reliable prediction.'''

    print('Running genrePrediction() with network: crnn and backend: %s' %
          (K._BACKEND))
    # setting
    audio_paths = [filePath]

    tags = [
        'rock', 'pop', 'alternative', 'indie', 'electronic',
        'female vocalists', 'dance', '00s', 'alternative rock', 'jazz',
        'beautiful', 'metal', 'chillout', 'male vocalists', 'classic rock',
        'soul', 'indie rock', 'Mellow', 'electronica', '80s', 'folk', '90s',
        'chill', 'instrumental', 'punk', 'oldies', 'blues', 'hard rock',
        'ambient', 'acoustic', 'experimental', 'female vocalist', 'guitar',
        'Hip-Hop', '70s', 'party', 'country', 'easy listening', 'sexy',
        'catchy', 'funk', 'electro', 'heavy metal', 'Progressive rock', '60s',
        'rnb', 'indie pop', 'sad', 'House', 'happy'
    ]

    genres = [
        'rock', 'pop', 'alternative', 'indie', 'electronic', 'dance',
        'alternative rock', 'jazz', 'metal', 'classic rock', 'soul',
        'indie rock', 'electronica', 'folk', 'punk', 'blues', 'hard rock',
        'experimental', 'Hip-Hop', 'heavy metal', 'country', 'funk', 'electro',
        'Progressive rock', 'rnb', 'indie pop', 'House'
    ]

    # prepare data like this
    melgrams = np.zeros((0, 1, 96, 1366))

    for audio_path in audio_paths:
        melgram = ap.compute_melgram(audio_path)
        melgrams = np.concatenate((melgrams, melgram), axis=0)

    model = MusicTaggerCRNN(weights='msd')

    print('Predicting...')
    start = time.time()
    pred_tags = model.predict(melgrams)

    print "Prediction is done. It took %d seconds." % (time.time() - start)

    sorted_result = sort_result(tags, pred_tags[0, :].tolist())
    print(audio_path)

    sorted_result = filter(lambda x: x[0] in genres, sorted_result)
    for item in sorted_result:
        print(item)
    print(' ')

    print 'Total = ' + str(
        reduce(lambda s, el: s + float(el[1]), sorted_result, 0))
    return sorted_result

示例#2

0

显示文件

文件： train.py 项目： secklowsounds-stuff/music_vs_speech

def main(net_type, epochs=10):
    x, y, class_names = load_data(data.MELGRAM_LOCATION)
    print(class_names)

    n_classes = len(class_names)

    if net_type == 'cnn':
        model = MusicTaggerCNN(data.N_FRAMES, data.N_MELS, n_classes)
    elif net_type == 'small_cnn':
        model = SmallCNN(data.N_FRAMES, data.N_MELS, n_classes)
    elif net_type == 'smallest_cnn':
        model = SmallestCNN(data.N_FRAMES, data.N_MELS, n_classes)
    elif net_type == 'crnn':
        model = MusicTaggerCRNN(data.N_FRAMES, data.N_MELS, n_classes)

    else:
        raise ValueError(net_type)

    model.summary()
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    # TODO change batch size
    X_train, X_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)
    model.fit(X_train,
              y_train,
              epochs=epochs,
              validation_data=(X_test, y_test))

    model.save('music_{}_epochs:{}.h5'.format(net_type, epochs))

    return

示例#3

0

显示文件

def main(net):

    print('Running main() with network: %s and backend: %s' %
          (net, K._BACKEND))
    # setting
    audio_paths = [
        'data/bensound-cute.mp3', 'data/bensound-actionable.mp3',
        'data/bensound-dubstep.mp3', 'data/bensound-thejazzpiano.mp3'
    ]
    melgram_paths = [
        'data/bensound-cute.npy', 'data/bensound-actionable.npy',
        'data/bensound-dubstep.npy', 'data/bensound-thejazzpiano.npy'
    ]

    tags = [
        'rock', 'pop', 'alternative', 'indie', 'electronic',
        'female vocalists', 'dance', '00s', 'alternative rock', 'jazz',
        'beautiful', 'metal', 'chillout', 'male vocalists', 'classic rock',
        'soul', 'indie rock', 'Mellow', 'electronica', '80s', 'folk', '90s',
        'chill', 'instrumental', 'punk', 'oldies', 'blues', 'hard rock',
        'ambient', 'acoustic', 'experimental', 'female vocalist', 'guitar',
        'Hip-Hop', '70s', 'party', 'country', 'easy listening', 'sexy',
        'catchy', 'funk', 'electro', 'heavy metal', 'Progressive rock', '60s',
        'rnb', 'indie pop', 'sad', 'House', 'happy'
    ]

    # prepare data like this
    melgrams = np.zeros((0, 1, 96, 1366))

    if librosa_exists:
        for audio_path in audio_paths:
            melgram = ap.compute_melgram(audio_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)
    else:
        for melgram_path in melgram_paths:
            melgram = np.load(melgram_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)

    # load model like this
    if net == 'cnn':
        model = MusicTaggerCNN(weights='msd')
    elif net == 'crnn':
        model = MusicTaggerCRNN(weights='msd')
    model.summary()
    # predict the tags like this
    print('Predicting... with melgrams: ', melgrams.shape)
    start = time.time()
    pred_tags = model.predict(melgrams)
    # print like this...
    print "Prediction is done. It took %d seconds." % (time.time() - start)
    print('Printing top-10 tags for each track...')
    for song_idx, audio_path in enumerate(audio_paths):
        sorted_result = sort_result(tags, pred_tags[song_idx, :].tolist())
        print(audio_path)
        print(sorted_result[:5])
        print(sorted_result[5:10])
        print(' ')

    return

示例#4

0

显示文件

文件： example_feat_extract.py 项目： capt4ce/music-auto_tagging-keras

def main(net):
    ''' *WARNIING*
    This model use Batch Normalization, so the prediction
    is affected by batch. Use multiple, different data 
    samples together (at least 4) for reliable prediction.'''

    print('Running main() with network: %s and backend: %s' %
          (net, K._BACKEND))
    # setting
    audio_paths = [
        'data/bensound-cute.mp3', 'data/bensound-actionable.mp3',
        'data/bensound-dubstep.mp3', 'data/bensound-thejazzpiano.mp3'
    ]
    melgram_paths = [
        'data/bensound-cute.npy', 'data/bensound-actionable.npy',
        'data/bensound-dubstep.npy', 'data/bensound-thejazzpiano.npy'
    ]

    # prepare data like this
    melgrams = np.zeros((0, 1, 96, 1366))

    if librosa_exists:
        for audio_path in audio_paths:
            melgram = ap.compute_melgram(audio_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)
    else:
        for melgram_path in melgram_paths:
            melgram = np.load(melgram_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)

    # load model like this
    if net == 'cnn':
        # model = MusicTaggerCNN(weights='msd', include_top=False)
        model = MusicTaggerCNN(weights=None, include_top=False)
    elif net == 'crnn':
        # model = MusicTaggerCRNN(weights='msd', include_top=False)
        model = MusicTaggerCRNN(weights=None, include_top=False)
    # predict the tags like this
    print('Predicting features...')
    start = time.time()
    features = model.predict(melgrams)
    # print(features[:, :10])
    print(features[:, :])
    print(len(features))
    return

示例#5

0

显示文件

文件： example_tagging.py 项目： albinpaul/mainproject

def main(net):
    ''' *WARNIING*
    This model use Batch Normalization, so the prediction
    is affected by batch. Use multiple, different data 
    samples together (at least 4) for reliable prediction.'''

    print('Running main() with network: %s and backend: %s' %
          (net, K._BACKEND))
    # setting
    # audio_paths = ['data/bensound-cute.mp3',
    #                'data/bensound-actionable.mp3',
    #                'data/bensound-dubstep.mp3',
    #                'data/bensound-thejazzpiano.mp3']
    audio_paths = []
    melgram_paths = [
        'data/bensound-cute.npy', 'data/bensound-actionable.npy',
        'data/bensound-dubstep.npy', 'data/bensound-thejazzpiano.npy'
    ]

    for arg in sys.argv[1:]:
        print(arg)
        audio_paths.append(arg)
    tags = [
        'rock', 'pop', 'alternative', 'indie', 'electronic',
        'female vocalists', 'dance', '00s', 'alternative rock', 'jazz',
        'beautiful', 'metal', 'chillout', 'male vocalists', 'classic rock',
        'soul', 'indie rock', 'Mellow', 'electronica', '80s', 'folk', '90s',
        'chill', 'instrumental', 'punk', 'oldies', 'blues', 'hard rock',
        'ambient', 'acoustic', 'experimental', 'female vocalist', 'guitar',
        'Hip-Hop', '70s', 'party', 'country', 'easy listening', 'sexy',
        'catchy', 'funk', 'electro', 'heavy metal', 'Progressive rock', '60s',
        'rnb', 'indie pop', 'sad', 'House', 'happy'
    ]

    # prepare data like this
    melgrams = np.zeros((0, 1, 96, 1366))

    if librosa_exists:
        for audio_path in audio_paths:
            melgram = ap.compute_melgram(audio_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)
    else:
        for melgram_path in melgram_paths:
            melgram = np.load(melgram_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)

    # load model like this
    if net == 'cnn':
        model = MusicTaggerCNN(weights='msd')
    elif net == 'crnn':
        model = MusicTaggerCRNN(weights='msd')

    # predict the tags like this
    print('Predicting...')
    start = time.time()
    pred_tags = model.predict(melgrams)
    # print like this...
    print("Prediction is done. It took %d seconds." % (time.time() - start))
    open('output.txt', 'w').close()
    print('Printing top-10 tags for each track...')
    for song_idx, audio_path in enumerate(audio_paths):
        sorted_result = sort_result(tags, pred_tags[song_idx, :].tolist())

        # print(audio_path)
        # print(sorted_result[:5])
        # print(sorted_result[5:10])
        # print(' ')
        with open('output.txt', 'a') as f:
            f.write(sorted_result[0][0])
            f.write('\n')
    return

示例#6

0

显示文件

    melgrams = np.zeros((0, 1, 96, 1366))

    if librosa_exists:
        for audio_path in audio_paths:
            melgram = ap.compute_melgram(audio_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)
    else:
        for melgram_path in melgram_paths:
            melgram = np.load(melgram_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)

    # load model like this
    if net == 'cnn':
        model = MusicTaggerCNN(weights='msd')
    elif net == 'crnn':
        model = MusicTaggerCRNN(weights='msd')
    
    # predict the tags like this
    print('Predicting...')
    start = time.time()
    pred_tags = model.predict(melgrams)
    # print like this...
    print "Prediction is done. It took %d seconds." % (time.time()-start)
    print('Printing top-10 tags for each track...')
    for song_idx, audio_path in enumerate(audio_paths):
        sorted_result = sort_result(tags, pred_tags[song_idx, :].tolist())
        print(audio_path)
        print(sorted_result[:5])
        print(sorted_result[5:10])
        print(' ')