def clustering_by_chords(musics, k, num_states=3): N = len(musics) ns = 2 nc = 12 n = num_states + num_states**2 + nc * num_states feature_vectors = [] for idx, music in enumerate(musics): suffix = [] chord = [] for dat in music.dat: suffix.extend(dat[0]) chord.append(dat[1]) f = [] hmm_c = HMM(nc, num_states=num_states) hmm_c.learning(chord) Pic = hmm_c.Pi a_sort = Pic.argsort()[-1::-1] Pic = Pic[a_sort] Ac = hmm_c.A[:, a_sort] Bc = hmm_c.B[:, a_sort] f.extend(list(Pic.flatten())) f.extend(list(Ac.flatten())) f.extend(list(Bc.flatten())) f = np.array(f) if any(np.isnan(f)): print idx, music.title, music.keys[0] feature_vectors.append(f) feature_vectors = np.array(feature_vectors) kmeans_model = KMeans(n_clusters=k, random_state=RS).fit(feature_vectors) labels = kmeans_model.labels_ return labels, feature_vectors
def clustering_by_chords_and_lyrics(musics, k, num_states=3): N = len(musics) nc = 12 n = num_states + num_states**2 + nc * num_states feature_vectors = [] for idx, music in enumerate(musics): suffix = [] chord = [] for dat in music.dat: suffix.append(dat[0]) chord.append(dat[1]) f = [] hmm_c = HMM(nc, num_states=num_states) hmm_c.learning(chord) Pic = hmm_c.Pi a_sort = Pic.argsort()[-1::-1] Pic = Pic[a_sort] Ac = hmm_c.A[:, a_sort] Bc = hmm_c.B[:, a_sort] f.extend(list(Pic.flatten())) f.extend(list(Ac.flatten())) f.extend(list(Bc.flatten())) f = np.array(f) if any(np.isnan(f)): print idx, music feature_vectors.append(f) Scores = [] docs = [] for music in musics: a_words = parse_lyrics.get_affective_words(music.lyrics) music.a_words = a_words docs.append(a_words) collection = nltk.TextCollection(docs) terms = list(set(collection)) for idx, doc in enumerate(docs): score = parse_lyrics.calc_score(collection, terms, doc) Scores.append(score) emo4 = parse_lyrics.calc_4emotions(score) musics[idx].score = score musics[idx].emo4 = emo4 Scores = np.array(Scores) max_scores = Scores.max(axis=0) min_scores = Scores.min(axis=0) diff = max_scores - min_scores Scores = (Scores - min_scores) / diff for music in musics: music.score = (music.score - min_scores) / diff for idx, f in enumerate(feature_vectors): np.append(f, Scores[idx]) feature_vectors = np.array(feature_vectors) kmeans_model = KMeans(n_clusters=k, random_state=RS).fit(feature_vectors) labels = kmeans_model.labels_ return labels, feature_vectors