#Applying Mini-KMeans because the dataset is large. clf = MiniBatchKMeans(n_clusters=3, init_size=1000, batch_size=500, max_iter=100) clf.fit(X) labels = clf.fit_predict(X) #Data visualisation: #Graphs by calling class Visual. dv = Visual(X, labels, features, clf) dv.raw_plot() dv.cluster_plot() dv.bargraph() #Insights: print( "\nThe names PHILLIP and JOHN have come up numerous times, and thus it can be assumed that they had a strong relation with the workings of the company, and thus might have known about/orchestrated the fraud.\n" ) #Querying on the insights: print("Querying Phillip:\n") #Instantiating class EmailData. dq = EmailData(email_df.body, vect) #Querying.