Пример #1
0
#Applying Mini-KMeans because the dataset is large.
clf = MiniBatchKMeans(n_clusters=3,
                      init_size=1000,
                      batch_size=500,
                      max_iter=100)
clf.fit(X)
labels = clf.fit_predict(X)

#Data visualisation:

#Graphs by calling class Visual.
dv = Visual(X, labels, features, clf)
dv.raw_plot()
dv.cluster_plot()
dv.bargraph()

#Insights:

print(
    "\nThe names PHILLIP and JOHN have come up numerous times, and thus it can be assumed that they had a strong relation with the workings of the company, and thus might have known about/orchestrated the fraud.\n"
)

#Querying on the insights:

print("Querying Phillip:\n")

#Instantiating class EmailData.
dq = EmailData(email_df.body, vect)

#Querying.