def genrePrediction(filePath):
    ''' *WARNIING*
    This model use Batch Normalization, so the prediction
    is affected by batch. Use multiple, different data 
    samples together (at least 4) for reliable prediction.'''

    print('Running genrePrediction() with network: crnn and backend: %s' %
          (K._BACKEND))
    # setting
    audio_paths = [filePath]

    tags = [
        'rock', 'pop', 'alternative', 'indie', 'electronic',
        'female vocalists', 'dance', '00s', 'alternative rock', 'jazz',
        'beautiful', 'metal', 'chillout', 'male vocalists', 'classic rock',
        'soul', 'indie rock', 'Mellow', 'electronica', '80s', 'folk', '90s',
        'chill', 'instrumental', 'punk', 'oldies', 'blues', 'hard rock',
        'ambient', 'acoustic', 'experimental', 'female vocalist', 'guitar',
        'Hip-Hop', '70s', 'party', 'country', 'easy listening', 'sexy',
        'catchy', 'funk', 'electro', 'heavy metal', 'Progressive rock', '60s',
        'rnb', 'indie pop', 'sad', 'House', 'happy'
    ]

    genres = [
        'rock', 'pop', 'alternative', 'indie', 'electronic', 'dance',
        'alternative rock', 'jazz', 'metal', 'classic rock', 'soul',
        'indie rock', 'electronica', 'folk', 'punk', 'blues', 'hard rock',
        'experimental', 'Hip-Hop', 'heavy metal', 'country', 'funk', 'electro',
        'Progressive rock', 'rnb', 'indie pop', 'House'
    ]

    # prepare data like this
    melgrams = np.zeros((0, 1, 96, 1366))

    for audio_path in audio_paths:
        melgram = ap.compute_melgram(audio_path)
        melgrams = np.concatenate((melgrams, melgram), axis=0)

    model = MusicTaggerCRNN(weights='msd')

    print('Predicting...')
    start = time.time()
    pred_tags = model.predict(melgrams)

    print "Prediction is done. It took %d seconds." % (time.time() - start)

    sorted_result = sort_result(tags, pred_tags[0, :].tolist())
    print(audio_path)

    sorted_result = filter(lambda x: x[0] in genres, sorted_result)
    for item in sorted_result:
        print(item)
    print(' ')

    print 'Total = ' + str(
        reduce(lambda s, el: s + float(el[1]), sorted_result, 0))
    return sorted_result
def main(net_type, epochs=10):
    x, y, class_names = load_data(data.MELGRAM_LOCATION)
    print(class_names)

    n_classes = len(class_names)

    if net_type == 'cnn':
        model = MusicTaggerCNN(data.N_FRAMES, data.N_MELS, n_classes)
    elif net_type == 'small_cnn':
        model = SmallCNN(data.N_FRAMES, data.N_MELS, n_classes)
    elif net_type == 'smallest_cnn':
        model = SmallestCNN(data.N_FRAMES, data.N_MELS, n_classes)
    elif net_type == 'crnn':
        model = MusicTaggerCRNN(data.N_FRAMES, data.N_MELS, n_classes)

    else:
        raise ValueError(net_type)

    model.summary()
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    # TODO change batch size
    X_train, X_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)
    model.fit(X_train,
              y_train,
              epochs=epochs,
              validation_data=(X_test, y_test))

    model.save('music_{}_epochs:{}.h5'.format(net_type, epochs))

    return
示例#3
0
def main(net):

    print('Running main() with network: %s and backend: %s' %
          (net, K._BACKEND))
    # setting
    audio_paths = [
        'data/bensound-cute.mp3', 'data/bensound-actionable.mp3',
        'data/bensound-dubstep.mp3', 'data/bensound-thejazzpiano.mp3'
    ]
    melgram_paths = [
        'data/bensound-cute.npy', 'data/bensound-actionable.npy',
        'data/bensound-dubstep.npy', 'data/bensound-thejazzpiano.npy'
    ]

    tags = [
        'rock', 'pop', 'alternative', 'indie', 'electronic',
        'female vocalists', 'dance', '00s', 'alternative rock', 'jazz',
        'beautiful', 'metal', 'chillout', 'male vocalists', 'classic rock',
        'soul', 'indie rock', 'Mellow', 'electronica', '80s', 'folk', '90s',
        'chill', 'instrumental', 'punk', 'oldies', 'blues', 'hard rock',
        'ambient', 'acoustic', 'experimental', 'female vocalist', 'guitar',
        'Hip-Hop', '70s', 'party', 'country', 'easy listening', 'sexy',
        'catchy', 'funk', 'electro', 'heavy metal', 'Progressive rock', '60s',
        'rnb', 'indie pop', 'sad', 'House', 'happy'
    ]

    # prepare data like this
    melgrams = np.zeros((0, 1, 96, 1366))

    if librosa_exists:
        for audio_path in audio_paths:
            melgram = ap.compute_melgram(audio_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)
    else:
        for melgram_path in melgram_paths:
            melgram = np.load(melgram_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)

    # load model like this
    if net == 'cnn':
        model = MusicTaggerCNN(weights='msd')
    elif net == 'crnn':
        model = MusicTaggerCRNN(weights='msd')
    model.summary()
    # predict the tags like this
    print('Predicting... with melgrams: ', melgrams.shape)
    start = time.time()
    pred_tags = model.predict(melgrams)
    # print like this...
    print "Prediction is done. It took %d seconds." % (time.time() - start)
    print('Printing top-10 tags for each track...')
    for song_idx, audio_path in enumerate(audio_paths):
        sorted_result = sort_result(tags, pred_tags[song_idx, :].tolist())
        print(audio_path)
        print(sorted_result[:5])
        print(sorted_result[5:10])
        print(' ')

    return
def main(net):
    ''' *WARNIING*
    This model use Batch Normalization, so the prediction
    is affected by batch. Use multiple, different data 
    samples together (at least 4) for reliable prediction.'''

    print('Running main() with network: %s and backend: %s' %
          (net, K._BACKEND))
    # setting
    audio_paths = [
        'data/bensound-cute.mp3', 'data/bensound-actionable.mp3',
        'data/bensound-dubstep.mp3', 'data/bensound-thejazzpiano.mp3'
    ]
    melgram_paths = [
        'data/bensound-cute.npy', 'data/bensound-actionable.npy',
        'data/bensound-dubstep.npy', 'data/bensound-thejazzpiano.npy'
    ]

    # prepare data like this
    melgrams = np.zeros((0, 1, 96, 1366))

    if librosa_exists:
        for audio_path in audio_paths:
            melgram = ap.compute_melgram(audio_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)
    else:
        for melgram_path in melgram_paths:
            melgram = np.load(melgram_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)

    # load model like this
    if net == 'cnn':
        # model = MusicTaggerCNN(weights='msd', include_top=False)
        model = MusicTaggerCNN(weights=None, include_top=False)
    elif net == 'crnn':
        # model = MusicTaggerCRNN(weights='msd', include_top=False)
        model = MusicTaggerCRNN(weights=None, include_top=False)
    # predict the tags like this
    print('Predicting features...')
    start = time.time()
    features = model.predict(melgrams)
    # print(features[:, :10])
    print(features[:, :])
    print(len(features))
    return
示例#5
0
def main(net):
    ''' *WARNIING*
    This model use Batch Normalization, so the prediction
    is affected by batch. Use multiple, different data 
    samples together (at least 4) for reliable prediction.'''

    print('Running main() with network: %s and backend: %s' %
          (net, K._BACKEND))
    # setting
    # audio_paths = ['data/bensound-cute.mp3',
    #                'data/bensound-actionable.mp3',
    #                'data/bensound-dubstep.mp3',
    #                'data/bensound-thejazzpiano.mp3']
    audio_paths = []
    melgram_paths = [
        'data/bensound-cute.npy', 'data/bensound-actionable.npy',
        'data/bensound-dubstep.npy', 'data/bensound-thejazzpiano.npy'
    ]

    for arg in sys.argv[1:]:
        print(arg)
        audio_paths.append(arg)
    tags = [
        'rock', 'pop', 'alternative', 'indie', 'electronic',
        'female vocalists', 'dance', '00s', 'alternative rock', 'jazz',
        'beautiful', 'metal', 'chillout', 'male vocalists', 'classic rock',
        'soul', 'indie rock', 'Mellow', 'electronica', '80s', 'folk', '90s',
        'chill', 'instrumental', 'punk', 'oldies', 'blues', 'hard rock',
        'ambient', 'acoustic', 'experimental', 'female vocalist', 'guitar',
        'Hip-Hop', '70s', 'party', 'country', 'easy listening', 'sexy',
        'catchy', 'funk', 'electro', 'heavy metal', 'Progressive rock', '60s',
        'rnb', 'indie pop', 'sad', 'House', 'happy'
    ]

    # prepare data like this
    melgrams = np.zeros((0, 1, 96, 1366))

    if librosa_exists:
        for audio_path in audio_paths:
            melgram = ap.compute_melgram(audio_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)
    else:
        for melgram_path in melgram_paths:
            melgram = np.load(melgram_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)

    # load model like this
    if net == 'cnn':
        model = MusicTaggerCNN(weights='msd')
    elif net == 'crnn':
        model = MusicTaggerCRNN(weights='msd')

    # predict the tags like this
    print('Predicting...')
    start = time.time()
    pred_tags = model.predict(melgrams)
    # print like this...
    print("Prediction is done. It took %d seconds." % (time.time() - start))
    open('output.txt', 'w').close()
    print('Printing top-10 tags for each track...')
    for song_idx, audio_path in enumerate(audio_paths):
        sorted_result = sort_result(tags, pred_tags[song_idx, :].tolist())

        # print(audio_path)
        # print(sorted_result[:5])
        # print(sorted_result[5:10])
        # print(' ')
        with open('output.txt', 'a') as f:
            f.write(sorted_result[0][0])
            f.write('\n')
    return
示例#6
0
    melgrams = np.zeros((0, 1, 96, 1366))

    if librosa_exists:
        for audio_path in audio_paths:
            melgram = ap.compute_melgram(audio_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)
    else:
        for melgram_path in melgram_paths:
            melgram = np.load(melgram_path)
            melgrams = np.concatenate((melgrams, melgram), axis=0)

    # load model like this
    if net == 'cnn':
        model = MusicTaggerCNN(weights='msd')
    elif net == 'crnn':
        model = MusicTaggerCRNN(weights='msd')
    
    # predict the tags like this
    print('Predicting...')
    start = time.time()
    pred_tags = model.predict(melgrams)
    # print like this...
    print "Prediction is done. It took %d seconds." % (time.time()-start)
    print('Printing top-10 tags for each track...')
    for song_idx, audio_path in enumerate(audio_paths):
        sorted_result = sort_result(tags, pred_tags[song_idx, :].tolist())
        print(audio_path)
        print(sorted_result[:5])
        print(sorted_result[5:10])
        print(' ')