for category, category_id in sorted(category_to_id.items()): features_chi2 = chi2(features, labels == category_id) indices = np.argsort(features_chi2[0]) feature_names = np.array(tfidf.get_feature_names())[indices] unigrams = [v for v in feature_names if len(v.split(' ')) == 1] bigrams = [v for v in feature_names if len(v.split(' ')) == 2] rfmodel = RandomForestClassifier(n_estimators=600, max_features='sqrt', max_depth=60, random_state=42) rfmodel._name = 'RandomForest' rfmodel.fit(features_train, labels_train) nbmodel = MultinomialNB() nbmodel._name = 'MultinomialNB' nbmodel.fit(features_train, labels_train) svModel = LinearSVC() svModel._name = 'SVC' svModel.fit(features_train, labels_train) print("Testing results are as follows- ") rfPred = rfmodel.predict(features_test) print("Random forest classifier accuracy is ") print(accuracy_score(labels_test, rfPred)) conf_mat_rf = confusion_matrix(labels_test, rfPred) ax = sns.heatmap(conf_mat_rf, annot=True, fmt='d', xticklabels=category_id_df.category.values,