示例#1
0
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize

momentum = pd.read_csv('features/video/df_momentum.csv', sep='§')
momentum.rename(columns={'Unnamed: 0': 'Sequence'}, inplace=True)
momentum['Sequence'] = momentum['Sequence'].apply(lambda x: x[:-len('_VIDEO')])
assert len(momentum) == 308
momentum.sort_values('Sequence', inplace=True)
pca_momentum = PCA(n_components=5)
pca_momentum = pd.DataFrame(
    pca_momentum.fit_transform(normalize(momentum.drop('Sequence', axis=1))))
pca_momentum = pca_momentum.add_prefix(f'Momentum_')
pca_momentum = pd.concat([momentum['Sequence'], pca_momentum], axis=1)
print(pca_momentum.head())

audio_stat_desc = pd.read_csv('features/audio/Statistique_desc.csv', sep='§')
audio_stat_desc['Sequence'] = audio_stat_desc['Sequence'].apply(
    lambda x: x[:-len('_AUDIO')])
assert len(audio_stat_desc) == 308
audio_stat_desc.sort_values('Sequence', inplace=True)
print(audio_stat_desc.head())

decoupage_seq_son = pd.read_csv('features/audio/Decoupage_Sequence_son.csv',
                                sep='§')
decoupage_seq_son['Sequence'] = decoupage_seq_son['Sequence'].apply(
    lambda x: x[:-len('_AUDIO')])
assert len(decoupage_seq_son) == 308
decoupage_seq_son.sort_values('Sequence', inplace=True)
print(decoupage_seq_son.head())
示例#2
0
from sklearn.decomposition import PCA
import matplotlib as mpl
mpl.use('TkAgg')  # or whatever other backend that you want
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()  # for plot styling
import warnings
warnings.filterwarnings('ignore')
from plt import plot_cluster

data = pd.read_csv(
    '/home/mickael/Documents/Challenge_Video_Audio_Text/features/text/emotion_doc.csv',
    sep='§')

pca = PCA(n_components=2)
pca = pd.DataFrame(pca.fit_transform(data.drop(['Sequence'], axis='columns')))
pca = pca.add_prefix(f'PCA_')

model = KMeans(n_clusters=3, random_state=42, n_init=30)

cluster = model.fit_predict(data.drop(['Sequence'], axis='columns'))

plt.scatter(pca['PCA_0'], pca['PCA_1'], c=cluster, s=50, cmap='viridis')
plt.show()

plot_cluster(
    pca[['PCA_0', 'PCA_1']].values, data['Sequence'], cluster,
    '/home/mickael/Documents/Challenge_Video_Audio_Text/result/plot_cluster_sentiments.html'
)
#f.savefig("/home/mickael/Documents/Challenge_Video_Audio_Text/result/kmeans_sentiments.png", bbox_inches='tight')