def get_features(features, datasets_dir, pca=False): timestamps_gtzan, feature_vectors_gtzan = Util.read_merged_features( datasets_dir + "/gtzan/gtzan_combined.wav", features) labels_gtzan = Util.read_audacity_labels(datasets_dir + "/gtzan/gtzan_combined.txt") X_gtzan, Y_gtzan, lbls_gtzan = Util.get_annotated_data_x_y( timestamps_gtzan, feature_vectors_gtzan, labels_gtzan) timestamps_labrosa, feature_vectors_labrosa = Util.read_merged_features( datasets_dir + "/labrosa/labrosa_combined.wav", features) labels_labrosa = Util.read_audacity_labels(datasets_dir + "/labrosa/labrosa_combined.txt") X_labrosa, Y_labrosa, lbls_labrosa = Util.get_annotated_data_x_y( timestamps_labrosa, feature_vectors_labrosa, labels_labrosa) timestamps_mirex, feature_vectors_mirex = Util.read_merged_features( datasets_dir + "/mirex/mirex_combined.wav", features) labels_mirex = Util.read_audacity_labels(datasets_dir + "/mirex/mirex_combined.txt") X_mirex, Y_mirex, lbls_mirex = Util.get_annotated_data_x_y( timestamps_mirex, feature_vectors_mirex, labels_mirex) scaler = StandardScaler() scaler.fit(np.concatenate((X_labrosa, X_gtzan, X_mirex))) with open("pickled/scaler.pickle", 'w') as f: pickle.dump(scaler, f) X_gtzan = scaler.transform(X_gtzan) X_labrosa = scaler.transform(X_labrosa) X_mirex = scaler.transform(X_mirex) if pca: pca = PCA(n_components=20) pca.fit(np.concatenate((X_labrosa, X_gtzan, X_mirex))) X_gtzan = pca.transform(X_gtzan) X_labrosa = pca.transform(X_labrosa) X_mirex = pca.transform(X_mirex) data = { "x_gtzan": X_gtzan, "y_gtzan": Y_gtzan, "labels_gtzan": labels_gtzan, "x_labrosa": X_labrosa, "y_labrosa": Y_labrosa, "labels_labrosa": labels_labrosa, "x_mirex": X_mirex, "y_mirex": Y_mirex, "labels_mirex": labels_mirex, "timestamps_gtzan": timestamps_gtzan, "timestamps_labrosa": timestamps_labrosa, "timestamps_mirex": timestamps_mirex } return data
def read_features(features, wavfile, scale=False): timestamps, feature_vectors = Util.read_merged_features(wavfile, features) if scale: with open("/opt/speech-music-discrimination/pickled/scaler.pickle", 'r') as f: scaler = pickle.load(f) feature_vectors = scaler.transform(feature_vectors) return timestamps, feature_vectors