def optimize(classifier, feature_dict): # full feature set all_features = ['eng', 'chroma', 'keystrength', 'brightness', 'zerocross', 'mfc', 'roughness', 'inharmonic', 'tempo', 'key'] # get the power set of all_features all_features = list(powerset(all_features)) random.shuffle(all_features) # initialize idx = 0 results = {} max_acc = 0 max_feat = [] start = time.clock() # loop through each possible feature set for feature_set in all_features: if len(feature_set) == 0: continue p1, r1, = leave_one_out(feature_dict, glob, classifier, classifier.__name__) acc = vis.present_results(p1, r1, classifier.__name__, print_results=False, show_results=True) # print the running best combination if acc > max_acc: max_acc = acc max_feat = feature_set print() print(max_acc, max_feat) results[feature_set] = acc idx += 1 t = time.clock() - start remaining = t * (len(all_features) / idx) - t sys.stdout.write("\r%d / %d permutations (%d:%02d left)" % (idx, len(all_features), remaining // 60, remaining % 60)) print("") return sorted(results, key=results.get, reverse=True)
# trim to only key and tempo for each song data_trimmed = extract_params(['key', 'tempo'], data_raw) data_genres = extract_params(['class'], data_raw) # select training set using boolean mask of a random subset training_indices = random.sample(range(data_trimmed.shape[0]), data_trimmed.shape[0]//2) mask = np.zeros(data_trimmed.shape[0], dtype=bool) mask[training_indices] = True training_params = data_trimmed[mask,:] training_values = data_genres[mask] # build model model = build_model(training_params, training_values) # ** TEST ** # only predict genres for those not used by mask guessed_genres = model.predict(data_trimmed[~mask]) correct_genres = data_genres[~mask] guessed_genres = [int_as_genre(int(genre)) for genre in guessed_genres] correct_genres = [int_as_genre(int(genre)) for genre in correct_genres] print(data_raw[0]['keystrength'][0][0]) # evaluate and graph vis.present_results(guessed_genres, correct_genres)
# all params availible params = ['eng', 'chroma', 'keystrength', 'zerocross', 'tempo', 'mfc', 'brightness', 'roughness', 'inharmonic'] param_dict = dict() for param in params: if param in ['inharmonic']: param_dict[param] = (True, True, False) elif param in ['chroma', 'keystrength']: param_dict[param] = (True, False, True) else: param_dict[param] = (True, True, True) start = time.clock() p, r = leave_one_out(param_dict, glob, 1, "K Neighbors (Opt)") t = time.clock() - start vis.present_results(p, r, "K Neighbors (Opt)", t, print_results=True, show_results=True) start = time.clock() p, r = leave_one_out(param_dict, glob, 2, "Voting") t = time.clock() - start vis.present_results(p, r, "Voting", t, print_results=True, show_results=True) start = time.clock() p, r = leave_one_out(param_dict, glob, 3, GaussianNB.__name__) t = time.clock() - start vis.present_results(p, r, GaussianNB.__name__, t, print_results=True, show_results=True) start = time.clock() p, r = leave_one_out(param_dict, glob, 4, "Stochastic Gradient Descent (Opt)") t = time.clock() - start vis.present_results(p, r, "Stochastic Gradient Descent (Opt)", t, print_results=True, show_results=True)