示例#1
0
def clustering_by_chords(musics, k, num_states=3):
	N = len(musics)
	ns = 2
	nc = 12
	n = num_states + num_states**2 + nc * num_states

	feature_vectors = []

	for idx, music in enumerate(musics):
		suffix = []
		chord = []
		for dat in music.dat:
			suffix.extend(dat[0])
			chord.append(dat[1])
		f = []

		hmm_c = HMM(nc, num_states=num_states)
		hmm_c.learning(chord)
		
		Pic = hmm_c.Pi
		a_sort = Pic.argsort()[-1::-1]
		Pic = Pic[a_sort]
		Ac = hmm_c.A[:, a_sort]
		Bc = hmm_c.B[:, a_sort]

		f.extend(list(Pic.flatten()))
		f.extend(list(Ac.flatten()))
		f.extend(list(Bc.flatten()))
		
		f = np.array(f)
		if any(np.isnan(f)): 
			print idx, music.title, music.keys[0]
		feature_vectors.append(f)
	
	feature_vectors = np.array(feature_vectors)
	kmeans_model = KMeans(n_clusters=k, random_state=RS).fit(feature_vectors)
	labels = kmeans_model.labels_

	return labels, feature_vectors
示例#2
0
def clustering_by_chords_and_lyrics(musics, k, num_states=3):
	N = len(musics)
	nc = 12
	n = num_states + num_states**2 + nc * num_states
	
	feature_vectors = []

	for idx, music in enumerate(musics):
		suffix = []
		chord = []
		for dat in music.dat:
			suffix.append(dat[0])
			chord.append(dat[1])
		f = []

		hmm_c = HMM(nc, num_states=num_states)
		hmm_c.learning(chord)
		
		Pic = hmm_c.Pi
		a_sort = Pic.argsort()[-1::-1]
		Pic = Pic[a_sort]
		Ac = hmm_c.A[:, a_sort]
		Bc = hmm_c.B[:, a_sort]

		f.extend(list(Pic.flatten()))
		f.extend(list(Ac.flatten()))
		f.extend(list(Bc.flatten()))
		f = np.array(f)
		if any(np.isnan(f)): 
			print idx, music
		feature_vectors.append(f)

	Scores = []
	docs = []
	for music in musics:
		a_words = parse_lyrics.get_affective_words(music.lyrics)
		music.a_words = a_words
		docs.append(a_words)
	collection = nltk.TextCollection(docs)
	terms = list(set(collection))
	for idx, doc in enumerate(docs):
		score = parse_lyrics.calc_score(collection, terms, doc)
		Scores.append(score)
		emo4 = parse_lyrics.calc_4emotions(score)
		musics[idx].score = score
		musics[idx].emo4 = emo4

	Scores = np.array(Scores)
	max_scores = Scores.max(axis=0)
	min_scores = Scores.min(axis=0)
	diff = max_scores - min_scores
	Scores = (Scores - min_scores) / diff
	for music in musics:
		music.score = (music.score - min_scores) / diff

	for idx, f in enumerate(feature_vectors):
		np.append(f, Scores[idx])
	
	feature_vectors = np.array(feature_vectors)
	kmeans_model = KMeans(n_clusters=k, random_state=RS).fit(feature_vectors)
	labels = kmeans_model.labels_

	return labels, feature_vectors