def genrePrediction(filePath): ''' *WARNIING* This model use Batch Normalization, so the prediction is affected by batch. Use multiple, different data samples together (at least 4) for reliable prediction.''' print('Running genrePrediction() with network: crnn and backend: %s' % (K._BACKEND)) # setting audio_paths = [filePath] tags = [ 'rock', 'pop', 'alternative', 'indie', 'electronic', 'female vocalists', 'dance', '00s', 'alternative rock', 'jazz', 'beautiful', 'metal', 'chillout', 'male vocalists', 'classic rock', 'soul', 'indie rock', 'Mellow', 'electronica', '80s', 'folk', '90s', 'chill', 'instrumental', 'punk', 'oldies', 'blues', 'hard rock', 'ambient', 'acoustic', 'experimental', 'female vocalist', 'guitar', 'Hip-Hop', '70s', 'party', 'country', 'easy listening', 'sexy', 'catchy', 'funk', 'electro', 'heavy metal', 'Progressive rock', '60s', 'rnb', 'indie pop', 'sad', 'House', 'happy' ] genres = [ 'rock', 'pop', 'alternative', 'indie', 'electronic', 'dance', 'alternative rock', 'jazz', 'metal', 'classic rock', 'soul', 'indie rock', 'electronica', 'folk', 'punk', 'blues', 'hard rock', 'experimental', 'Hip-Hop', 'heavy metal', 'country', 'funk', 'electro', 'Progressive rock', 'rnb', 'indie pop', 'House' ] # prepare data like this melgrams = np.zeros((0, 1, 96, 1366)) for audio_path in audio_paths: melgram = ap.compute_melgram(audio_path) melgrams = np.concatenate((melgrams, melgram), axis=0) model = MusicTaggerCRNN(weights='msd') print('Predicting...') start = time.time() pred_tags = model.predict(melgrams) print "Prediction is done. It took %d seconds." % (time.time() - start) sorted_result = sort_result(tags, pred_tags[0, :].tolist()) print(audio_path) sorted_result = filter(lambda x: x[0] in genres, sorted_result) for item in sorted_result: print(item) print(' ') print 'Total = ' + str( reduce(lambda s, el: s + float(el[1]), sorted_result, 0)) return sorted_result
def main(net_type, epochs=10): x, y, class_names = load_data(data.MELGRAM_LOCATION) print(class_names) n_classes = len(class_names) if net_type == 'cnn': model = MusicTaggerCNN(data.N_FRAMES, data.N_MELS, n_classes) elif net_type == 'small_cnn': model = SmallCNN(data.N_FRAMES, data.N_MELS, n_classes) elif net_type == 'smallest_cnn': model = SmallestCNN(data.N_FRAMES, data.N_MELS, n_classes) elif net_type == 'crnn': model = MusicTaggerCRNN(data.N_FRAMES, data.N_MELS, n_classes) else: raise ValueError(net_type) model.summary() model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) # TODO change batch size X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42) model.fit(X_train, y_train, epochs=epochs, validation_data=(X_test, y_test)) model.save('music_{}_epochs:{}.h5'.format(net_type, epochs)) return
def main(net): print('Running main() with network: %s and backend: %s' % (net, K._BACKEND)) # setting audio_paths = [ 'data/bensound-cute.mp3', 'data/bensound-actionable.mp3', 'data/bensound-dubstep.mp3', 'data/bensound-thejazzpiano.mp3' ] melgram_paths = [ 'data/bensound-cute.npy', 'data/bensound-actionable.npy', 'data/bensound-dubstep.npy', 'data/bensound-thejazzpiano.npy' ] tags = [ 'rock', 'pop', 'alternative', 'indie', 'electronic', 'female vocalists', 'dance', '00s', 'alternative rock', 'jazz', 'beautiful', 'metal', 'chillout', 'male vocalists', 'classic rock', 'soul', 'indie rock', 'Mellow', 'electronica', '80s', 'folk', '90s', 'chill', 'instrumental', 'punk', 'oldies', 'blues', 'hard rock', 'ambient', 'acoustic', 'experimental', 'female vocalist', 'guitar', 'Hip-Hop', '70s', 'party', 'country', 'easy listening', 'sexy', 'catchy', 'funk', 'electro', 'heavy metal', 'Progressive rock', '60s', 'rnb', 'indie pop', 'sad', 'House', 'happy' ] # prepare data like this melgrams = np.zeros((0, 1, 96, 1366)) if librosa_exists: for audio_path in audio_paths: melgram = ap.compute_melgram(audio_path) melgrams = np.concatenate((melgrams, melgram), axis=0) else: for melgram_path in melgram_paths: melgram = np.load(melgram_path) melgrams = np.concatenate((melgrams, melgram), axis=0) # load model like this if net == 'cnn': model = MusicTaggerCNN(weights='msd') elif net == 'crnn': model = MusicTaggerCRNN(weights='msd') model.summary() # predict the tags like this print('Predicting... with melgrams: ', melgrams.shape) start = time.time() pred_tags = model.predict(melgrams) # print like this... print "Prediction is done. It took %d seconds." % (time.time() - start) print('Printing top-10 tags for each track...') for song_idx, audio_path in enumerate(audio_paths): sorted_result = sort_result(tags, pred_tags[song_idx, :].tolist()) print(audio_path) print(sorted_result[:5]) print(sorted_result[5:10]) print(' ') return
def main(net): ''' *WARNIING* This model use Batch Normalization, so the prediction is affected by batch. Use multiple, different data samples together (at least 4) for reliable prediction.''' print('Running main() with network: %s and backend: %s' % (net, K._BACKEND)) # setting audio_paths = [ 'data/bensound-cute.mp3', 'data/bensound-actionable.mp3', 'data/bensound-dubstep.mp3', 'data/bensound-thejazzpiano.mp3' ] melgram_paths = [ 'data/bensound-cute.npy', 'data/bensound-actionable.npy', 'data/bensound-dubstep.npy', 'data/bensound-thejazzpiano.npy' ] # prepare data like this melgrams = np.zeros((0, 1, 96, 1366)) if librosa_exists: for audio_path in audio_paths: melgram = ap.compute_melgram(audio_path) melgrams = np.concatenate((melgrams, melgram), axis=0) else: for melgram_path in melgram_paths: melgram = np.load(melgram_path) melgrams = np.concatenate((melgrams, melgram), axis=0) # load model like this if net == 'cnn': # model = MusicTaggerCNN(weights='msd', include_top=False) model = MusicTaggerCNN(weights=None, include_top=False) elif net == 'crnn': # model = MusicTaggerCRNN(weights='msd', include_top=False) model = MusicTaggerCRNN(weights=None, include_top=False) # predict the tags like this print('Predicting features...') start = time.time() features = model.predict(melgrams) # print(features[:, :10]) print(features[:, :]) print(len(features)) return
def main(net): ''' *WARNIING* This model use Batch Normalization, so the prediction is affected by batch. Use multiple, different data samples together (at least 4) for reliable prediction.''' print('Running main() with network: %s and backend: %s' % (net, K._BACKEND)) # setting # audio_paths = ['data/bensound-cute.mp3', # 'data/bensound-actionable.mp3', # 'data/bensound-dubstep.mp3', # 'data/bensound-thejazzpiano.mp3'] audio_paths = [] melgram_paths = [ 'data/bensound-cute.npy', 'data/bensound-actionable.npy', 'data/bensound-dubstep.npy', 'data/bensound-thejazzpiano.npy' ] for arg in sys.argv[1:]: print(arg) audio_paths.append(arg) tags = [ 'rock', 'pop', 'alternative', 'indie', 'electronic', 'female vocalists', 'dance', '00s', 'alternative rock', 'jazz', 'beautiful', 'metal', 'chillout', 'male vocalists', 'classic rock', 'soul', 'indie rock', 'Mellow', 'electronica', '80s', 'folk', '90s', 'chill', 'instrumental', 'punk', 'oldies', 'blues', 'hard rock', 'ambient', 'acoustic', 'experimental', 'female vocalist', 'guitar', 'Hip-Hop', '70s', 'party', 'country', 'easy listening', 'sexy', 'catchy', 'funk', 'electro', 'heavy metal', 'Progressive rock', '60s', 'rnb', 'indie pop', 'sad', 'House', 'happy' ] # prepare data like this melgrams = np.zeros((0, 1, 96, 1366)) if librosa_exists: for audio_path in audio_paths: melgram = ap.compute_melgram(audio_path) melgrams = np.concatenate((melgrams, melgram), axis=0) else: for melgram_path in melgram_paths: melgram = np.load(melgram_path) melgrams = np.concatenate((melgrams, melgram), axis=0) # load model like this if net == 'cnn': model = MusicTaggerCNN(weights='msd') elif net == 'crnn': model = MusicTaggerCRNN(weights='msd') # predict the tags like this print('Predicting...') start = time.time() pred_tags = model.predict(melgrams) # print like this... print("Prediction is done. It took %d seconds." % (time.time() - start)) open('output.txt', 'w').close() print('Printing top-10 tags for each track...') for song_idx, audio_path in enumerate(audio_paths): sorted_result = sort_result(tags, pred_tags[song_idx, :].tolist()) # print(audio_path) # print(sorted_result[:5]) # print(sorted_result[5:10]) # print(' ') with open('output.txt', 'a') as f: f.write(sorted_result[0][0]) f.write('\n') return
melgrams = np.zeros((0, 1, 96, 1366)) if librosa_exists: for audio_path in audio_paths: melgram = ap.compute_melgram(audio_path) melgrams = np.concatenate((melgrams, melgram), axis=0) else: for melgram_path in melgram_paths: melgram = np.load(melgram_path) melgrams = np.concatenate((melgrams, melgram), axis=0) # load model like this if net == 'cnn': model = MusicTaggerCNN(weights='msd') elif net == 'crnn': model = MusicTaggerCRNN(weights='msd') # predict the tags like this print('Predicting...') start = time.time() pred_tags = model.predict(melgrams) # print like this... print "Prediction is done. It took %d seconds." % (time.time()-start) print('Printing top-10 tags for each track...') for song_idx, audio_path in enumerate(audio_paths): sorted_result = sort_result(tags, pred_tags[song_idx, :].tolist()) print(audio_path) print(sorted_result[:5]) print(sorted_result[5:10]) print(' ')